digitalhub 0.7.0b2__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +63 -93
- digitalhub/client/__init__.py +0 -0
- digitalhub/client/_base/__init__.py +0 -0
- digitalhub/client/_base/client.py +56 -0
- digitalhub/client/api.py +63 -0
- digitalhub/client/builder.py +50 -0
- digitalhub/client/dhcore/__init__.py +0 -0
- digitalhub/client/dhcore/client.py +669 -0
- digitalhub/client/dhcore/env.py +21 -0
- digitalhub/client/dhcore/models.py +46 -0
- digitalhub/client/dhcore/utils.py +111 -0
- digitalhub/client/local/__init__.py +0 -0
- digitalhub/client/local/client.py +533 -0
- digitalhub/context/__init__.py +0 -0
- digitalhub/context/api.py +93 -0
- digitalhub/context/builder.py +94 -0
- digitalhub/context/context.py +136 -0
- digitalhub/datastores/__init__.py +0 -0
- digitalhub/datastores/_base/__init__.py +0 -0
- digitalhub/datastores/_base/datastore.py +85 -0
- digitalhub/datastores/api.py +37 -0
- digitalhub/datastores/builder.py +110 -0
- digitalhub/datastores/local/__init__.py +0 -0
- digitalhub/datastores/local/datastore.py +50 -0
- digitalhub/datastores/remote/__init__.py +0 -0
- digitalhub/datastores/remote/datastore.py +31 -0
- digitalhub/datastores/s3/__init__.py +0 -0
- digitalhub/datastores/s3/datastore.py +46 -0
- digitalhub/datastores/sql/__init__.py +0 -0
- digitalhub/datastores/sql/datastore.py +68 -0
- digitalhub/entities/__init__.py +0 -0
- digitalhub/entities/_base/__init__.py +0 -0
- digitalhub/entities/_base/_base/__init__.py +0 -0
- digitalhub/entities/_base/_base/entity.py +82 -0
- digitalhub/entities/_base/api_utils.py +620 -0
- digitalhub/entities/_base/context/__init__.py +0 -0
- digitalhub/entities/_base/context/entity.py +118 -0
- digitalhub/entities/_base/crud.py +468 -0
- digitalhub/entities/_base/entity/__init__.py +0 -0
- digitalhub/entities/_base/entity/_constructors/__init__.py +0 -0
- digitalhub/entities/_base/entity/_constructors/metadata.py +44 -0
- digitalhub/entities/_base/entity/_constructors/name.py +31 -0
- digitalhub/entities/_base/entity/_constructors/spec.py +33 -0
- digitalhub/entities/_base/entity/_constructors/status.py +52 -0
- digitalhub/entities/_base/entity/_constructors/uuid.py +26 -0
- digitalhub/entities/_base/entity/builder.py +175 -0
- digitalhub/entities/_base/entity/entity.py +106 -0
- digitalhub/entities/_base/entity/metadata.py +59 -0
- digitalhub/entities/_base/entity/spec.py +58 -0
- digitalhub/entities/_base/entity/status.py +43 -0
- digitalhub/entities/_base/executable/__init__.py +0 -0
- digitalhub/entities/_base/executable/entity.py +405 -0
- digitalhub/entities/_base/material/__init__.py +0 -0
- digitalhub/entities/_base/material/entity.py +214 -0
- digitalhub/entities/_base/material/spec.py +22 -0
- digitalhub/entities/_base/material/status.py +49 -0
- digitalhub/entities/_base/runtime_entity/__init__.py +0 -0
- digitalhub/entities/_base/runtime_entity/builder.py +106 -0
- digitalhub/entities/_base/unversioned/__init__.py +0 -0
- digitalhub/entities/_base/unversioned/builder.py +66 -0
- digitalhub/entities/_base/unversioned/entity.py +49 -0
- digitalhub/entities/_base/versioned/__init__.py +0 -0
- digitalhub/entities/_base/versioned/builder.py +68 -0
- digitalhub/entities/_base/versioned/entity.py +53 -0
- digitalhub/entities/artifact/__init__.py +0 -0
- digitalhub/entities/artifact/_base/__init__.py +0 -0
- digitalhub/entities/artifact/_base/builder.py +86 -0
- digitalhub/entities/artifact/_base/entity.py +39 -0
- digitalhub/entities/artifact/_base/spec.py +15 -0
- digitalhub/entities/artifact/_base/status.py +9 -0
- digitalhub/entities/artifact/artifact/__init__.py +0 -0
- digitalhub/entities/artifact/artifact/builder.py +18 -0
- digitalhub/entities/artifact/artifact/entity.py +32 -0
- digitalhub/entities/artifact/artifact/spec.py +27 -0
- digitalhub/entities/artifact/artifact/status.py +15 -0
- digitalhub/entities/artifact/crud.py +332 -0
- digitalhub/entities/builders.py +63 -0
- digitalhub/entities/dataitem/__init__.py +0 -0
- digitalhub/entities/dataitem/_base/__init__.py +0 -0
- digitalhub/entities/dataitem/_base/builder.py +86 -0
- digitalhub/entities/dataitem/_base/entity.py +75 -0
- digitalhub/entities/dataitem/_base/spec.py +15 -0
- digitalhub/entities/dataitem/_base/status.py +20 -0
- digitalhub/entities/dataitem/crud.py +372 -0
- digitalhub/entities/dataitem/dataitem/__init__.py +0 -0
- digitalhub/entities/dataitem/dataitem/builder.py +18 -0
- digitalhub/entities/dataitem/dataitem/entity.py +32 -0
- digitalhub/entities/dataitem/dataitem/spec.py +15 -0
- digitalhub/entities/dataitem/dataitem/status.py +9 -0
- digitalhub/entities/dataitem/iceberg/__init__.py +0 -0
- digitalhub/entities/dataitem/iceberg/builder.py +18 -0
- digitalhub/entities/dataitem/iceberg/entity.py +32 -0
- digitalhub/entities/dataitem/iceberg/spec.py +15 -0
- digitalhub/entities/dataitem/iceberg/status.py +9 -0
- digitalhub/entities/dataitem/table/__init__.py +0 -0
- digitalhub/entities/dataitem/table/builder.py +18 -0
- digitalhub/entities/dataitem/table/entity.py +146 -0
- digitalhub/entities/dataitem/table/models.py +62 -0
- digitalhub/entities/dataitem/table/spec.py +25 -0
- digitalhub/entities/dataitem/table/status.py +9 -0
- digitalhub/entities/function/__init__.py +0 -0
- digitalhub/entities/function/_base/__init__.py +0 -0
- digitalhub/entities/function/_base/builder.py +79 -0
- digitalhub/entities/function/_base/entity.py +98 -0
- digitalhub/entities/function/_base/models.py +118 -0
- digitalhub/entities/function/_base/spec.py +15 -0
- digitalhub/entities/function/_base/status.py +9 -0
- digitalhub/entities/function/crud.py +279 -0
- digitalhub/entities/model/__init__.py +0 -0
- digitalhub/entities/model/_base/__init__.py +0 -0
- digitalhub/entities/model/_base/builder.py +86 -0
- digitalhub/entities/model/_base/entity.py +34 -0
- digitalhub/entities/model/_base/spec.py +49 -0
- digitalhub/entities/model/_base/status.py +9 -0
- digitalhub/entities/model/crud.py +331 -0
- digitalhub/entities/model/huggingface/__init__.py +0 -0
- digitalhub/entities/model/huggingface/builder.py +18 -0
- digitalhub/entities/model/huggingface/entity.py +32 -0
- digitalhub/entities/model/huggingface/spec.py +36 -0
- digitalhub/entities/model/huggingface/status.py +9 -0
- digitalhub/entities/model/mlflow/__init__.py +0 -0
- digitalhub/entities/model/mlflow/builder.py +18 -0
- digitalhub/entities/model/mlflow/entity.py +32 -0
- digitalhub/entities/model/mlflow/models.py +26 -0
- digitalhub/entities/model/mlflow/spec.py +44 -0
- digitalhub/entities/model/mlflow/status.py +9 -0
- digitalhub/entities/model/mlflow/utils.py +81 -0
- digitalhub/entities/model/model/__init__.py +0 -0
- digitalhub/entities/model/model/builder.py +18 -0
- digitalhub/entities/model/model/entity.py +32 -0
- digitalhub/entities/model/model/spec.py +15 -0
- digitalhub/entities/model/model/status.py +9 -0
- digitalhub/entities/model/sklearn/__init__.py +0 -0
- digitalhub/entities/model/sklearn/builder.py +18 -0
- digitalhub/entities/model/sklearn/entity.py +32 -0
- digitalhub/entities/model/sklearn/spec.py +15 -0
- digitalhub/entities/model/sklearn/status.py +9 -0
- digitalhub/entities/project/__init__.py +0 -0
- digitalhub/entities/project/_base/__init__.py +0 -0
- digitalhub/entities/project/_base/builder.py +128 -0
- digitalhub/entities/project/_base/entity.py +2078 -0
- digitalhub/entities/project/_base/spec.py +50 -0
- digitalhub/entities/project/_base/status.py +9 -0
- digitalhub/entities/project/crud.py +357 -0
- digitalhub/entities/run/__init__.py +0 -0
- digitalhub/entities/run/_base/__init__.py +0 -0
- digitalhub/entities/run/_base/builder.py +94 -0
- digitalhub/entities/run/_base/entity.py +307 -0
- digitalhub/entities/run/_base/spec.py +50 -0
- digitalhub/entities/run/_base/status.py +9 -0
- digitalhub/entities/run/crud.py +219 -0
- digitalhub/entities/secret/__init__.py +0 -0
- digitalhub/entities/secret/_base/__init__.py +0 -0
- digitalhub/entities/secret/_base/builder.py +81 -0
- digitalhub/entities/secret/_base/entity.py +74 -0
- digitalhub/entities/secret/_base/spec.py +35 -0
- digitalhub/entities/secret/_base/status.py +9 -0
- digitalhub/entities/secret/crud.py +290 -0
- digitalhub/entities/task/__init__.py +0 -0
- digitalhub/entities/task/_base/__init__.py +0 -0
- digitalhub/entities/task/_base/builder.py +91 -0
- digitalhub/entities/task/_base/entity.py +136 -0
- digitalhub/entities/task/_base/models.py +208 -0
- digitalhub/entities/task/_base/spec.py +53 -0
- digitalhub/entities/task/_base/status.py +9 -0
- digitalhub/entities/task/crud.py +228 -0
- digitalhub/entities/utils/__init__.py +0 -0
- digitalhub/entities/utils/api.py +346 -0
- digitalhub/entities/utils/entity_types.py +19 -0
- digitalhub/entities/utils/state.py +31 -0
- digitalhub/entities/utils/utils.py +202 -0
- digitalhub/entities/workflow/__init__.py +0 -0
- digitalhub/entities/workflow/_base/__init__.py +0 -0
- digitalhub/entities/workflow/_base/builder.py +79 -0
- digitalhub/entities/workflow/_base/entity.py +74 -0
- digitalhub/entities/workflow/_base/spec.py +15 -0
- digitalhub/entities/workflow/_base/status.py +9 -0
- digitalhub/entities/workflow/crud.py +278 -0
- digitalhub/factory/__init__.py +0 -0
- digitalhub/factory/api.py +277 -0
- digitalhub/factory/factory.py +268 -0
- digitalhub/factory/utils.py +90 -0
- digitalhub/readers/__init__.py +0 -0
- digitalhub/readers/_base/__init__.py +0 -0
- digitalhub/readers/_base/builder.py +26 -0
- digitalhub/readers/_base/reader.py +70 -0
- digitalhub/readers/api.py +80 -0
- digitalhub/readers/factory.py +133 -0
- digitalhub/readers/pandas/__init__.py +0 -0
- digitalhub/readers/pandas/builder.py +29 -0
- digitalhub/readers/pandas/reader.py +207 -0
- digitalhub/runtimes/__init__.py +0 -0
- digitalhub/runtimes/_base.py +102 -0
- digitalhub/runtimes/builder.py +32 -0
- digitalhub/stores/__init__.py +0 -0
- digitalhub/stores/_base/__init__.py +0 -0
- digitalhub/stores/_base/store.py +189 -0
- digitalhub/stores/api.py +54 -0
- digitalhub/stores/builder.py +211 -0
- digitalhub/stores/local/__init__.py +0 -0
- digitalhub/stores/local/store.py +230 -0
- digitalhub/stores/remote/__init__.py +0 -0
- digitalhub/stores/remote/store.py +143 -0
- digitalhub/stores/s3/__init__.py +0 -0
- digitalhub/stores/s3/store.py +563 -0
- digitalhub/stores/sql/__init__.py +0 -0
- digitalhub/stores/sql/store.py +328 -0
- digitalhub/utils/__init__.py +0 -0
- digitalhub/utils/data_utils.py +127 -0
- digitalhub/utils/exceptions.py +67 -0
- digitalhub/utils/file_utils.py +204 -0
- digitalhub/utils/generic_utils.py +183 -0
- digitalhub/utils/git_utils.py +148 -0
- digitalhub/utils/io_utils.py +116 -0
- digitalhub/utils/logger.py +17 -0
- digitalhub/utils/s3_utils.py +58 -0
- digitalhub/utils/uri_utils.py +56 -0
- {digitalhub-0.7.0b2.dist-info → digitalhub-0.8.0.dist-info}/METADATA +30 -13
- digitalhub-0.8.0.dist-info/RECORD +231 -0
- {digitalhub-0.7.0b2.dist-info → digitalhub-0.8.0.dist-info}/WHEEL +1 -1
- test/local/CRUD/test_artifacts.py +96 -0
- test/local/CRUD/test_dataitems.py +96 -0
- test/local/CRUD/test_models.py +95 -0
- test/test_crud_functions.py +1 -1
- test/test_crud_runs.py +1 -1
- test/test_crud_tasks.py +1 -1
- digitalhub-0.7.0b2.dist-info/RECORD +0 -14
- test/test_crud_artifacts.py +0 -96
- test/test_crud_dataitems.py +0 -96
- {digitalhub-0.7.0b2.dist-info → digitalhub-0.8.0.dist-info}/LICENSE.txt +0 -0
- {digitalhub-0.7.0b2.dist-info → digitalhub-0.8.0.dist-info}/top_level.txt +0 -0
- /test/{test_imports.py → local/imports/test_imports.py} +0 -0
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import pyarrow as pa
|
|
6
|
+
import pyarrow.parquet as pq
|
|
7
|
+
from sqlalchemy import MetaData, Table, create_engine
|
|
8
|
+
from sqlalchemy.engine import Engine
|
|
9
|
+
from sqlalchemy.engine.row import LegacyRow
|
|
10
|
+
from sqlalchemy.exc import SQLAlchemyError
|
|
11
|
+
|
|
12
|
+
from digitalhub.stores._base.store import Store, StoreConfig
|
|
13
|
+
from digitalhub.utils.exceptions import StoreError
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SQLStoreConfig(StoreConfig):
|
|
17
|
+
"""
|
|
18
|
+
SQL store configuration class.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
host: str
|
|
22
|
+
"""SQL host."""
|
|
23
|
+
|
|
24
|
+
port: int
|
|
25
|
+
"""SQL port."""
|
|
26
|
+
|
|
27
|
+
user: str
|
|
28
|
+
"""SQL user."""
|
|
29
|
+
|
|
30
|
+
password: str
|
|
31
|
+
"""SQL password."""
|
|
32
|
+
|
|
33
|
+
database: str
|
|
34
|
+
"""SQL database name."""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SqlStore(Store):
|
|
38
|
+
"""
|
|
39
|
+
SQL store class. It implements the Store interface and provides methods to fetch and persist
|
|
40
|
+
artifacts on SQL based storage.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, name: str, store_type: str, config: SQLStoreConfig) -> None:
|
|
44
|
+
super().__init__(name, store_type)
|
|
45
|
+
self.config = config
|
|
46
|
+
|
|
47
|
+
##############################
|
|
48
|
+
# IO methods
|
|
49
|
+
##############################
|
|
50
|
+
|
|
51
|
+
def download(
|
|
52
|
+
self,
|
|
53
|
+
root: str,
|
|
54
|
+
dst: Path,
|
|
55
|
+
src: list[str],
|
|
56
|
+
overwrite: bool = False,
|
|
57
|
+
) -> str:
|
|
58
|
+
"""
|
|
59
|
+
Download artifacts from storage.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
root : str
|
|
64
|
+
The root path of the artifact.
|
|
65
|
+
dst : str
|
|
66
|
+
The destination of the artifact on local filesystem.
|
|
67
|
+
src : list[str]
|
|
68
|
+
List of sources.
|
|
69
|
+
overwrite : bool
|
|
70
|
+
Specify if overwrite existing file(s).
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
str
|
|
75
|
+
Destination path of the downloaded artifact.
|
|
76
|
+
"""
|
|
77
|
+
table_name = self._get_table_name(root) + ".parquet"
|
|
78
|
+
# Case where dst is not provided
|
|
79
|
+
if dst is None:
|
|
80
|
+
dst = Path(self._build_temp("sql")) / table_name
|
|
81
|
+
else:
|
|
82
|
+
self._check_local_dst(str(dst))
|
|
83
|
+
path = Path(dst)
|
|
84
|
+
|
|
85
|
+
# Case where dst is a directory
|
|
86
|
+
if path.suffix == "":
|
|
87
|
+
dst = path / table_name
|
|
88
|
+
|
|
89
|
+
# Case where dst is a file
|
|
90
|
+
elif path.suffix != ".parquet":
|
|
91
|
+
raise StoreError("The destination path must be a directory or a parquet file.")
|
|
92
|
+
|
|
93
|
+
self._check_overwrite(dst, overwrite)
|
|
94
|
+
self._build_path(dst)
|
|
95
|
+
|
|
96
|
+
schema = self._get_schema(root)
|
|
97
|
+
table = self._get_table_name(root)
|
|
98
|
+
return self._download_table(schema, table, str(dst))
|
|
99
|
+
|
|
100
|
+
def upload(self, src: str | list[str], dst: str | None = None) -> list[tuple[str, str]]:
|
|
101
|
+
"""
|
|
102
|
+
Upload an artifact to storage.
|
|
103
|
+
|
|
104
|
+
Raises
|
|
105
|
+
------
|
|
106
|
+
StoreError
|
|
107
|
+
This method is not implemented.
|
|
108
|
+
"""
|
|
109
|
+
raise StoreError("SQL store does not support upload.")
|
|
110
|
+
|
|
111
|
+
def get_file_info(self, paths: list[str]) -> list[dict]:
|
|
112
|
+
"""
|
|
113
|
+
Get file information from SQL based storage.
|
|
114
|
+
|
|
115
|
+
Raises
|
|
116
|
+
------
|
|
117
|
+
NotImplementedError
|
|
118
|
+
This method is not implemented.
|
|
119
|
+
"""
|
|
120
|
+
raise NotImplementedError("SQL store does not support upload.")
|
|
121
|
+
|
|
122
|
+
##############################
|
|
123
|
+
# Private helper methods
|
|
124
|
+
##############################
|
|
125
|
+
|
|
126
|
+
def _get_connection_string(self) -> str:
|
|
127
|
+
"""
|
|
128
|
+
Get the connection string.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
str
|
|
133
|
+
The connection string.
|
|
134
|
+
"""
|
|
135
|
+
return (
|
|
136
|
+
f"postgresql://{self.config.user}:{self.config.password}@"
|
|
137
|
+
f"{self.config.host}:{self.config.port}/{self.config.database}"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def _get_engine(self, schema: str | None = None) -> Engine:
|
|
141
|
+
"""
|
|
142
|
+
Create engine from connection string.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
schema : str
|
|
147
|
+
The schema.
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
Engine
|
|
152
|
+
An SQLAlchemy engine.
|
|
153
|
+
"""
|
|
154
|
+
connection_string = self._get_connection_string()
|
|
155
|
+
if not isinstance(connection_string, str):
|
|
156
|
+
raise StoreError("Connection string must be a string.")
|
|
157
|
+
try:
|
|
158
|
+
connect_args = {"connect_timeout": 30}
|
|
159
|
+
if schema is not None:
|
|
160
|
+
connect_args["options"] = f"-csearch_path={schema}"
|
|
161
|
+
return create_engine(connection_string, connect_args=connect_args)
|
|
162
|
+
except Exception as ex:
|
|
163
|
+
raise StoreError(f"Something wrong with connection string. Arguments: {str(ex.args)}")
|
|
164
|
+
|
|
165
|
+
def _check_factory(self, schema: str | None = None) -> Engine:
|
|
166
|
+
"""
|
|
167
|
+
Check if the database is accessible and return the engine.
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
schema : str
|
|
172
|
+
The schema.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
Engine
|
|
177
|
+
The database engine.
|
|
178
|
+
"""
|
|
179
|
+
engine = self._get_engine(schema)
|
|
180
|
+
self._check_access_to_storage(engine)
|
|
181
|
+
return engine
|
|
182
|
+
|
|
183
|
+
@staticmethod
|
|
184
|
+
def _parse_path(path: str) -> dict:
|
|
185
|
+
"""
|
|
186
|
+
Parse the path and return the components.
|
|
187
|
+
|
|
188
|
+
Parameters
|
|
189
|
+
----------
|
|
190
|
+
path : str
|
|
191
|
+
The path.
|
|
192
|
+
|
|
193
|
+
Returns
|
|
194
|
+
-------
|
|
195
|
+
dict
|
|
196
|
+
A dictionary containing the components of the path.
|
|
197
|
+
"""
|
|
198
|
+
# Parse path
|
|
199
|
+
err_msg = "Invalid SQL path. Must be sql://<database>/<schema>/<table> or sql://<database>/<table>"
|
|
200
|
+
protocol, pth = path.split("://")
|
|
201
|
+
components = pth.split("/")
|
|
202
|
+
if protocol != "sql" or not (2 <= len(components) <= 3):
|
|
203
|
+
raise ValueError(err_msg)
|
|
204
|
+
|
|
205
|
+
# Get components
|
|
206
|
+
database = components[0]
|
|
207
|
+
table = components[-1]
|
|
208
|
+
schema = components[1] if len(components) == 3 else "public"
|
|
209
|
+
return {"database": database, "schema": schema, "table": table}
|
|
210
|
+
|
|
211
|
+
def _get_schema(self, uri: str) -> str:
|
|
212
|
+
"""
|
|
213
|
+
Get the name of the SQL schema from the URI.
|
|
214
|
+
|
|
215
|
+
Parameters
|
|
216
|
+
----------
|
|
217
|
+
uri : str
|
|
218
|
+
The URI.
|
|
219
|
+
|
|
220
|
+
Returns
|
|
221
|
+
-------
|
|
222
|
+
str
|
|
223
|
+
The name of the SQL schema.
|
|
224
|
+
"""
|
|
225
|
+
return str(self._parse_path(uri).get("schema"))
|
|
226
|
+
|
|
227
|
+
def _get_table_name(self, uri: str) -> str:
|
|
228
|
+
"""
|
|
229
|
+
Get the name of the table from the URI.
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
uri : str
|
|
234
|
+
The URI.
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
str
|
|
239
|
+
The name of the table
|
|
240
|
+
"""
|
|
241
|
+
return str(self._parse_path(uri).get("table"))
|
|
242
|
+
|
|
243
|
+
@staticmethod
|
|
244
|
+
def _check_access_to_storage(engine: Engine) -> None:
|
|
245
|
+
"""
|
|
246
|
+
Check if there is access to the storage.
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
engine : Engine
|
|
251
|
+
An SQLAlchemy engine.
|
|
252
|
+
|
|
253
|
+
Returns
|
|
254
|
+
-------
|
|
255
|
+
None
|
|
256
|
+
|
|
257
|
+
Raises
|
|
258
|
+
------
|
|
259
|
+
StoreError
|
|
260
|
+
If there is no access to the storage.
|
|
261
|
+
"""
|
|
262
|
+
try:
|
|
263
|
+
engine.connect()
|
|
264
|
+
except SQLAlchemyError:
|
|
265
|
+
engine.dispose()
|
|
266
|
+
raise StoreError("No access to db!")
|
|
267
|
+
|
|
268
|
+
def _download_table(self, schema: str, table: str, dst: str) -> str:
|
|
269
|
+
"""
|
|
270
|
+
Download a table from SQL based storage.
|
|
271
|
+
|
|
272
|
+
Parameters
|
|
273
|
+
----------
|
|
274
|
+
schema : str
|
|
275
|
+
The origin schema.
|
|
276
|
+
table : str
|
|
277
|
+
The origin table.
|
|
278
|
+
dst : str
|
|
279
|
+
The destination path.
|
|
280
|
+
|
|
281
|
+
Returns
|
|
282
|
+
-------
|
|
283
|
+
str
|
|
284
|
+
The destination path.
|
|
285
|
+
"""
|
|
286
|
+
engine = self._check_factory(schema=schema)
|
|
287
|
+
|
|
288
|
+
# Read the table from the database
|
|
289
|
+
sa_table = Table(table, MetaData(), autoload_with=engine)
|
|
290
|
+
query = sa_table.select()
|
|
291
|
+
with engine.begin() as conn:
|
|
292
|
+
result: list[LegacyRow] = conn.execute(query).fetchall()
|
|
293
|
+
|
|
294
|
+
# Parse the result
|
|
295
|
+
data = self._parse_result(result)
|
|
296
|
+
|
|
297
|
+
# Convert the result to a pyarrow table and
|
|
298
|
+
# write the pyarrow table to a Parquet file
|
|
299
|
+
arrow_table = pa.Table.from_pydict(data)
|
|
300
|
+
pq.write_table(arrow_table, dst)
|
|
301
|
+
|
|
302
|
+
engine.dispose()
|
|
303
|
+
|
|
304
|
+
return dst
|
|
305
|
+
|
|
306
|
+
@staticmethod
|
|
307
|
+
def _parse_result(result: list[LegacyRow]) -> dict:
|
|
308
|
+
"""
|
|
309
|
+
Convert a list of list of tuples to a dict.
|
|
310
|
+
|
|
311
|
+
Parameters
|
|
312
|
+
----------
|
|
313
|
+
result : list[LegacyRow]
|
|
314
|
+
The data to convert.
|
|
315
|
+
|
|
316
|
+
Returns
|
|
317
|
+
-------
|
|
318
|
+
dict
|
|
319
|
+
The converted data.
|
|
320
|
+
"""
|
|
321
|
+
data_list = [row.items() for row in result]
|
|
322
|
+
data = {}
|
|
323
|
+
for row in data_list:
|
|
324
|
+
for column_name, value in row:
|
|
325
|
+
if column_name not in data:
|
|
326
|
+
data[column_name] = []
|
|
327
|
+
data[column_name].append(value)
|
|
328
|
+
return data
|
|
File without changes
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def build_data_preview(preview: list[dict] | None = None, rows_count: int | None = None) -> dict:
|
|
7
|
+
"""
|
|
8
|
+
Build data preview.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
preview : list[dict] | None
|
|
13
|
+
Preview.
|
|
14
|
+
rows_count : int | None
|
|
15
|
+
Row count.
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
dict
|
|
20
|
+
Data preview.
|
|
21
|
+
"""
|
|
22
|
+
dict_ = {}
|
|
23
|
+
if preview is not None:
|
|
24
|
+
dict_["cols"] = preview
|
|
25
|
+
if rows_count is not None:
|
|
26
|
+
dict_["rows"] = rows_count
|
|
27
|
+
return dict_
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_data_preview(columns: list, data: list[list], columnar: bool = False) -> list[dict]:
|
|
31
|
+
"""
|
|
32
|
+
Prepare preview.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
columns : list
|
|
37
|
+
Columns names.
|
|
38
|
+
data : list[list]
|
|
39
|
+
Data to preview.
|
|
40
|
+
columnar : bool
|
|
41
|
+
If data are arranged in columns. If False, data are arranged in rows.
|
|
42
|
+
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
45
|
+
list[dict]
|
|
46
|
+
Data preview.
|
|
47
|
+
"""
|
|
48
|
+
# Reduce data to 10 rows
|
|
49
|
+
if not columnar:
|
|
50
|
+
if len(data) > 10:
|
|
51
|
+
data = data[:10]
|
|
52
|
+
else:
|
|
53
|
+
data = [d[:10] for d in data]
|
|
54
|
+
|
|
55
|
+
# Transpose data if needed
|
|
56
|
+
if not columnar:
|
|
57
|
+
data = list(map(list, list(zip(*data))))
|
|
58
|
+
|
|
59
|
+
# Prepare the preview
|
|
60
|
+
data_dict = prepare_preview(columns, data)
|
|
61
|
+
|
|
62
|
+
# Filter memoryview values
|
|
63
|
+
filtered_memview = filter_memoryview(data_dict)
|
|
64
|
+
|
|
65
|
+
# Check the size of the preview data
|
|
66
|
+
return check_preview_size(filtered_memview)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def prepare_preview(column_names: list, data: list[list]) -> list[dict]:
|
|
70
|
+
"""
|
|
71
|
+
Get preview.
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
data : pd.DataFrame
|
|
76
|
+
Data.
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
list[dict]
|
|
81
|
+
Preview.
|
|
82
|
+
"""
|
|
83
|
+
if len(column_names) != len(data):
|
|
84
|
+
raise ValueError("Column names and data must have the same length")
|
|
85
|
+
return [{"name": column, "value": values} for column, values in zip(column_names, data)]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def filter_memoryview(data: list[dict]) -> list[dict]:
|
|
89
|
+
"""
|
|
90
|
+
Find memoryview values.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
data : pd.DataFrame
|
|
95
|
+
Data.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
list[str]
|
|
100
|
+
Column to filter out from preview.
|
|
101
|
+
"""
|
|
102
|
+
key_to_filter = []
|
|
103
|
+
for i in data:
|
|
104
|
+
if any(isinstance(v, memoryview) for v in i["value"]):
|
|
105
|
+
key_to_filter.append(i["name"])
|
|
106
|
+
for i in key_to_filter:
|
|
107
|
+
data = [d for d in data if d["name"] != i]
|
|
108
|
+
return data
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def check_preview_size(preview: list[dict]) -> list:
|
|
112
|
+
"""
|
|
113
|
+
Check preview size. If it's too big, return empty list.
|
|
114
|
+
|
|
115
|
+
Parameters
|
|
116
|
+
----------
|
|
117
|
+
preview : list[dict]
|
|
118
|
+
Preview.
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
list
|
|
123
|
+
Preview.
|
|
124
|
+
"""
|
|
125
|
+
if len(json.dumps(preview).encode("utf-8")) >= 64000:
|
|
126
|
+
return []
|
|
127
|
+
return preview
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BuilderError(Exception):
|
|
5
|
+
"""
|
|
6
|
+
Raised when incontered errors on builders.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class StoreError(Exception):
|
|
11
|
+
"""
|
|
12
|
+
Raised when incontered errors on stores.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BackendError(Exception):
|
|
17
|
+
"""
|
|
18
|
+
Raised when incontered errors from backend.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EntityNotExistsError(BackendError):
|
|
23
|
+
"""
|
|
24
|
+
Raised when entity not found.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class EntityAlreadyExistsError(BackendError):
|
|
29
|
+
"""
|
|
30
|
+
Raised when entity already exists.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class MissingSpecError(BackendError):
|
|
35
|
+
"""
|
|
36
|
+
Raised when spec is missing in backend.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class UnauthorizedError(BackendError):
|
|
41
|
+
"""
|
|
42
|
+
Raised when unauthorized.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ForbiddenError(BackendError):
|
|
47
|
+
"""
|
|
48
|
+
Raised when forbidden.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class BadRequestError(BackendError):
|
|
53
|
+
"""
|
|
54
|
+
Raised when bad request.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class EntityError(Exception):
|
|
59
|
+
"""
|
|
60
|
+
Raised when incontered errors on entities.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class ContextError(Exception):
|
|
65
|
+
"""
|
|
66
|
+
Raised when context errors.
|
|
67
|
+
"""
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from hashlib import sha256
|
|
5
|
+
from mimetypes import guess_type
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FileInfo(BaseModel):
|
|
12
|
+
"""
|
|
13
|
+
File info class.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
path: str = None
|
|
17
|
+
name: str = None
|
|
18
|
+
content_type: str = None
|
|
19
|
+
size: int = None
|
|
20
|
+
hash: str = None
|
|
21
|
+
last_modified: str = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def calculate_blob_hash(data_path: str) -> str:
|
|
25
|
+
"""
|
|
26
|
+
Calculate the hash of a file.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
data_path : str
|
|
31
|
+
Path to the file.
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
str
|
|
36
|
+
The hash of the file.
|
|
37
|
+
"""
|
|
38
|
+
with open(data_path, "rb") as f:
|
|
39
|
+
data = f.read()
|
|
40
|
+
return f"sha256:{sha256(data).hexdigest()}"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_file_size(data_path: str) -> int:
|
|
44
|
+
"""
|
|
45
|
+
Get the size of a file.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
data_path : str
|
|
50
|
+
Path to the file.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
int
|
|
55
|
+
The size of the file.
|
|
56
|
+
"""
|
|
57
|
+
return Path(data_path).stat().st_size
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_file_mime_type(data_path: str) -> str:
|
|
61
|
+
"""
|
|
62
|
+
Get the mime type of a file.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
data_path : str
|
|
67
|
+
Path to the file.
|
|
68
|
+
|
|
69
|
+
Returns
|
|
70
|
+
-------
|
|
71
|
+
str
|
|
72
|
+
The mime type of the file.
|
|
73
|
+
"""
|
|
74
|
+
return guess_type(data_path)[0]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_path_name(data_path: str) -> str:
|
|
78
|
+
"""
|
|
79
|
+
Get the name of a file.
|
|
80
|
+
|
|
81
|
+
Parameters
|
|
82
|
+
----------
|
|
83
|
+
data_path : str
|
|
84
|
+
Path to the file.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
str
|
|
89
|
+
The name of the file.
|
|
90
|
+
"""
|
|
91
|
+
return Path(data_path).name
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def get_last_modified(data_path: str) -> str:
|
|
95
|
+
"""
|
|
96
|
+
Get the last modified date of a file.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
data_path : str
|
|
101
|
+
Path to the file.
|
|
102
|
+
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
str
|
|
106
|
+
The last modified date of the file.
|
|
107
|
+
"""
|
|
108
|
+
path = Path(data_path)
|
|
109
|
+
timestamp = path.stat().st_mtime
|
|
110
|
+
return datetime.fromtimestamp(timestamp).astimezone().isoformat()
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def get_s3_path(src_path: str) -> str:
|
|
114
|
+
"""
|
|
115
|
+
Get the S3 path of a file.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
src_path : str
|
|
120
|
+
Path to the file.
|
|
121
|
+
|
|
122
|
+
Returns
|
|
123
|
+
-------
|
|
124
|
+
str
|
|
125
|
+
The S3 path of the file.
|
|
126
|
+
"""
|
|
127
|
+
return Path(src_path).as_uri()
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def get_file_info_from_local(path: str, src_path: str) -> None | dict:
|
|
131
|
+
"""
|
|
132
|
+
Get file info from path.
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
path : str
|
|
137
|
+
Target path of the object.
|
|
138
|
+
src_path : str
|
|
139
|
+
Local path of some source.
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
dict
|
|
144
|
+
File info.
|
|
145
|
+
"""
|
|
146
|
+
try:
|
|
147
|
+
name = get_path_name(path)
|
|
148
|
+
content_type = get_file_mime_type(path)
|
|
149
|
+
size = get_file_size(path)
|
|
150
|
+
hash = calculate_blob_hash(path)
|
|
151
|
+
last_modified = get_last_modified(path)
|
|
152
|
+
|
|
153
|
+
return FileInfo(
|
|
154
|
+
path=src_path,
|
|
155
|
+
name=name,
|
|
156
|
+
content_type=content_type,
|
|
157
|
+
size=size,
|
|
158
|
+
hash=hash,
|
|
159
|
+
last_modified=last_modified,
|
|
160
|
+
).dict()
|
|
161
|
+
except Exception:
|
|
162
|
+
return None
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def get_file_info_from_s3(path: str, metadata: dict) -> None | dict:
|
|
166
|
+
"""
|
|
167
|
+
Get file info from path.
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
path : str
|
|
172
|
+
Object source path.
|
|
173
|
+
metadata : dict
|
|
174
|
+
Metadata of the object from S3.
|
|
175
|
+
|
|
176
|
+
Returns
|
|
177
|
+
-------
|
|
178
|
+
dict
|
|
179
|
+
File info.
|
|
180
|
+
"""
|
|
181
|
+
try:
|
|
182
|
+
size = metadata["ContentLength"]
|
|
183
|
+
file_hash = metadata["ETag"][1:-1]
|
|
184
|
+
|
|
185
|
+
file_size_limit_multipart = 20 * 1024 * 1024
|
|
186
|
+
if size < file_size_limit_multipart:
|
|
187
|
+
file_hash = "md5:" + file_hash
|
|
188
|
+
else:
|
|
189
|
+
file_hash = "LiteralETag:" + file_hash
|
|
190
|
+
|
|
191
|
+
name = get_path_name(path)
|
|
192
|
+
content_type = metadata["ContentType"]
|
|
193
|
+
last_modified = metadata["LastModified"].isoformat()
|
|
194
|
+
|
|
195
|
+
return FileInfo(
|
|
196
|
+
path=path,
|
|
197
|
+
name=name,
|
|
198
|
+
content_type=content_type,
|
|
199
|
+
size=size,
|
|
200
|
+
hash=file_hash,
|
|
201
|
+
last_modified=last_modified,
|
|
202
|
+
).dict()
|
|
203
|
+
except Exception:
|
|
204
|
+
return None
|