digitalhub 0.8.0b0__py3-none-any.whl → 0.8.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +62 -94
- digitalhub/client/__init__.py +0 -0
- digitalhub/client/builder.py +105 -0
- digitalhub/client/objects/__init__.py +0 -0
- digitalhub/client/objects/base.py +56 -0
- digitalhub/client/objects/dhcore.py +681 -0
- digitalhub/client/objects/local.py +533 -0
- digitalhub/context/__init__.py +0 -0
- digitalhub/context/builder.py +178 -0
- digitalhub/context/context.py +136 -0
- digitalhub/datastores/__init__.py +0 -0
- digitalhub/datastores/builder.py +134 -0
- digitalhub/datastores/objects/__init__.py +0 -0
- digitalhub/datastores/objects/base.py +85 -0
- digitalhub/datastores/objects/local.py +42 -0
- digitalhub/datastores/objects/remote.py +23 -0
- digitalhub/datastores/objects/s3.py +38 -0
- digitalhub/datastores/objects/sql.py +60 -0
- digitalhub/entities/__init__.py +0 -0
- digitalhub/entities/_base/__init__.py +0 -0
- digitalhub/entities/_base/api.py +346 -0
- digitalhub/entities/_base/base.py +82 -0
- digitalhub/entities/_base/crud.py +610 -0
- digitalhub/entities/_base/entity/__init__.py +0 -0
- digitalhub/entities/_base/entity/base.py +132 -0
- digitalhub/entities/_base/entity/context.py +118 -0
- digitalhub/entities/_base/entity/executable.py +380 -0
- digitalhub/entities/_base/entity/material.py +214 -0
- digitalhub/entities/_base/entity/unversioned.py +87 -0
- digitalhub/entities/_base/entity/versioned.py +94 -0
- digitalhub/entities/_base/metadata.py +59 -0
- digitalhub/entities/_base/spec/__init__.py +0 -0
- digitalhub/entities/_base/spec/base.py +58 -0
- digitalhub/entities/_base/spec/material.py +22 -0
- digitalhub/entities/_base/state.py +31 -0
- digitalhub/entities/_base/status/__init__.py +0 -0
- digitalhub/entities/_base/status/base.py +32 -0
- digitalhub/entities/_base/status/material.py +49 -0
- digitalhub/entities/_builders/__init__.py +0 -0
- digitalhub/entities/_builders/metadata.py +60 -0
- digitalhub/entities/_builders/name.py +31 -0
- digitalhub/entities/_builders/spec.py +43 -0
- digitalhub/entities/_builders/status.py +62 -0
- digitalhub/entities/_builders/uuid.py +33 -0
- digitalhub/entities/artifact/__init__.py +0 -0
- digitalhub/entities/artifact/builder.py +133 -0
- digitalhub/entities/artifact/crud.py +358 -0
- digitalhub/entities/artifact/entity/__init__.py +0 -0
- digitalhub/entities/artifact/entity/_base.py +39 -0
- digitalhub/entities/artifact/entity/artifact.py +9 -0
- digitalhub/entities/artifact/spec.py +39 -0
- digitalhub/entities/artifact/status.py +15 -0
- digitalhub/entities/dataitem/__init__.py +0 -0
- digitalhub/entities/dataitem/builder.py +144 -0
- digitalhub/entities/dataitem/crud.py +395 -0
- digitalhub/entities/dataitem/entity/__init__.py +0 -0
- digitalhub/entities/dataitem/entity/_base.py +75 -0
- digitalhub/entities/dataitem/entity/dataitem.py +9 -0
- digitalhub/entities/dataitem/entity/iceberg.py +7 -0
- digitalhub/entities/dataitem/entity/table.py +125 -0
- digitalhub/entities/dataitem/models.py +62 -0
- digitalhub/entities/dataitem/spec.py +61 -0
- digitalhub/entities/dataitem/status.py +38 -0
- digitalhub/entities/entity_types.py +19 -0
- digitalhub/entities/function/__init__.py +0 -0
- digitalhub/entities/function/builder.py +86 -0
- digitalhub/entities/function/crud.py +305 -0
- digitalhub/entities/function/entity.py +101 -0
- digitalhub/entities/function/models.py +118 -0
- digitalhub/entities/function/spec.py +81 -0
- digitalhub/entities/function/status.py +9 -0
- digitalhub/entities/model/__init__.py +0 -0
- digitalhub/entities/model/builder.py +152 -0
- digitalhub/entities/model/crud.py +358 -0
- digitalhub/entities/model/entity/__init__.py +0 -0
- digitalhub/entities/model/entity/_base.py +34 -0
- digitalhub/entities/model/entity/huggingface.py +9 -0
- digitalhub/entities/model/entity/mlflow.py +90 -0
- digitalhub/entities/model/entity/model.py +9 -0
- digitalhub/entities/model/entity/sklearn.py +9 -0
- digitalhub/entities/model/models.py +26 -0
- digitalhub/entities/model/spec.py +146 -0
- digitalhub/entities/model/status.py +33 -0
- digitalhub/entities/project/__init__.py +0 -0
- digitalhub/entities/project/builder.py +82 -0
- digitalhub/entities/project/crud.py +350 -0
- digitalhub/entities/project/entity.py +2060 -0
- digitalhub/entities/project/spec.py +50 -0
- digitalhub/entities/project/status.py +9 -0
- digitalhub/entities/registries.py +48 -0
- digitalhub/entities/run/__init__.py +0 -0
- digitalhub/entities/run/builder.py +77 -0
- digitalhub/entities/run/crud.py +232 -0
- digitalhub/entities/run/entity.py +461 -0
- digitalhub/entities/run/spec.py +153 -0
- digitalhub/entities/run/status.py +114 -0
- digitalhub/entities/secret/__init__.py +0 -0
- digitalhub/entities/secret/builder.py +93 -0
- digitalhub/entities/secret/crud.py +294 -0
- digitalhub/entities/secret/entity.py +73 -0
- digitalhub/entities/secret/spec.py +35 -0
- digitalhub/entities/secret/status.py +9 -0
- digitalhub/entities/task/__init__.py +0 -0
- digitalhub/entities/task/builder.py +74 -0
- digitalhub/entities/task/crud.py +241 -0
- digitalhub/entities/task/entity.py +135 -0
- digitalhub/entities/task/models.py +199 -0
- digitalhub/entities/task/spec.py +51 -0
- digitalhub/entities/task/status.py +9 -0
- digitalhub/entities/utils.py +184 -0
- digitalhub/entities/workflow/__init__.py +0 -0
- digitalhub/entities/workflow/builder.py +91 -0
- digitalhub/entities/workflow/crud.py +304 -0
- digitalhub/entities/workflow/entity.py +77 -0
- digitalhub/entities/workflow/spec.py +15 -0
- digitalhub/entities/workflow/status.py +9 -0
- digitalhub/readers/__init__.py +0 -0
- digitalhub/readers/builder.py +54 -0
- digitalhub/readers/objects/__init__.py +0 -0
- digitalhub/readers/objects/base.py +70 -0
- digitalhub/readers/objects/pandas.py +207 -0
- digitalhub/readers/registry.py +15 -0
- digitalhub/registry/__init__.py +0 -0
- digitalhub/registry/models.py +87 -0
- digitalhub/registry/registry.py +74 -0
- digitalhub/registry/utils.py +150 -0
- digitalhub/runtimes/__init__.py +0 -0
- digitalhub/runtimes/base.py +164 -0
- digitalhub/runtimes/builder.py +53 -0
- digitalhub/runtimes/kind_registry.py +170 -0
- digitalhub/stores/__init__.py +0 -0
- digitalhub/stores/builder.py +257 -0
- digitalhub/stores/objects/__init__.py +0 -0
- digitalhub/stores/objects/base.py +189 -0
- digitalhub/stores/objects/local.py +230 -0
- digitalhub/stores/objects/remote.py +143 -0
- digitalhub/stores/objects/s3.py +563 -0
- digitalhub/stores/objects/sql.py +328 -0
- digitalhub/utils/__init__.py +0 -0
- digitalhub/utils/data_utils.py +127 -0
- digitalhub/utils/env_utils.py +123 -0
- digitalhub/utils/exceptions.py +55 -0
- digitalhub/utils/file_utils.py +204 -0
- digitalhub/utils/generic_utils.py +207 -0
- digitalhub/utils/git_utils.py +148 -0
- digitalhub/utils/io_utils.py +79 -0
- digitalhub/utils/logger.py +17 -0
- digitalhub/utils/uri_utils.py +56 -0
- {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b2.dist-info}/METADATA +27 -12
- digitalhub-0.8.0b2.dist-info/RECORD +161 -0
- test/test_crud_artifacts.py +1 -1
- test/test_crud_dataitems.py +1 -1
- test/test_crud_functions.py +1 -1
- test/test_crud_runs.py +1 -1
- test/test_crud_tasks.py +1 -1
- digitalhub-0.8.0b0.dist-info/RECORD +0 -14
- {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b2.dist-info}/LICENSE.txt +0 -0
- {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b2.dist-info}/WHEEL +0 -0
- {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import pyarrow as pa
|
|
6
|
+
import pyarrow.parquet as pq
|
|
7
|
+
from sqlalchemy import MetaData, Table, create_engine
|
|
8
|
+
from sqlalchemy.engine import Engine
|
|
9
|
+
from sqlalchemy.engine.row import LegacyRow
|
|
10
|
+
from sqlalchemy.exc import SQLAlchemyError
|
|
11
|
+
|
|
12
|
+
from digitalhub.stores.objects.base import Store, StoreConfig
|
|
13
|
+
from digitalhub.utils.exceptions import StoreError
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SQLStoreConfig(StoreConfig):
|
|
17
|
+
"""
|
|
18
|
+
SQL store configuration class.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
host: str
|
|
22
|
+
"""SQL host."""
|
|
23
|
+
|
|
24
|
+
port: int
|
|
25
|
+
"""SQL port."""
|
|
26
|
+
|
|
27
|
+
user: str
|
|
28
|
+
"""SQL user."""
|
|
29
|
+
|
|
30
|
+
password: str
|
|
31
|
+
"""SQL password."""
|
|
32
|
+
|
|
33
|
+
database: str
|
|
34
|
+
"""SQL database name."""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SqlStore(Store):
|
|
38
|
+
"""
|
|
39
|
+
SQL store class. It implements the Store interface and provides methods to fetch and persist
|
|
40
|
+
artifacts on SQL based storage.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, name: str, store_type: str, config: SQLStoreConfig) -> None:
|
|
44
|
+
super().__init__(name, store_type)
|
|
45
|
+
self.config = config
|
|
46
|
+
|
|
47
|
+
##############################
|
|
48
|
+
# IO methods
|
|
49
|
+
##############################
|
|
50
|
+
|
|
51
|
+
def download(
|
|
52
|
+
self,
|
|
53
|
+
root: str,
|
|
54
|
+
dst: Path,
|
|
55
|
+
src: list[str],
|
|
56
|
+
overwrite: bool = False,
|
|
57
|
+
) -> str:
|
|
58
|
+
"""
|
|
59
|
+
Download artifacts from storage.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
root : str
|
|
64
|
+
The root path of the artifact.
|
|
65
|
+
dst : str
|
|
66
|
+
The destination of the artifact on local filesystem.
|
|
67
|
+
src : list[str]
|
|
68
|
+
List of sources.
|
|
69
|
+
overwrite : bool
|
|
70
|
+
Specify if overwrite existing file(s).
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
str
|
|
75
|
+
Destination path of the downloaded artifact.
|
|
76
|
+
"""
|
|
77
|
+
table_name = self._get_table_name(root) + ".parquet"
|
|
78
|
+
# Case where dst is not provided
|
|
79
|
+
if dst is None:
|
|
80
|
+
dst = Path(self._build_temp("sql")) / table_name
|
|
81
|
+
else:
|
|
82
|
+
self._check_local_dst(str(dst))
|
|
83
|
+
path = Path(dst)
|
|
84
|
+
|
|
85
|
+
# Case where dst is a directory
|
|
86
|
+
if path.suffix == "":
|
|
87
|
+
dst = path / table_name
|
|
88
|
+
|
|
89
|
+
# Case where dst is a file
|
|
90
|
+
elif path.suffix != ".parquet":
|
|
91
|
+
raise StoreError("The destination path must be a directory or a parquet file.")
|
|
92
|
+
|
|
93
|
+
self._check_overwrite(dst, overwrite)
|
|
94
|
+
self._build_path(dst)
|
|
95
|
+
|
|
96
|
+
schema = self._get_schema(root)
|
|
97
|
+
table = self._get_table_name(root)
|
|
98
|
+
return self._download_table(schema, table, str(dst))
|
|
99
|
+
|
|
100
|
+
def upload(self, src: str | list[str], dst: str | None = None) -> list[tuple[str, str]]:
|
|
101
|
+
"""
|
|
102
|
+
Upload an artifact to storage.
|
|
103
|
+
|
|
104
|
+
Raises
|
|
105
|
+
------
|
|
106
|
+
StoreError
|
|
107
|
+
This method is not implemented.
|
|
108
|
+
"""
|
|
109
|
+
raise StoreError("SQL store does not support upload.")
|
|
110
|
+
|
|
111
|
+
def get_file_info(self, paths: list[str]) -> list[dict]:
|
|
112
|
+
"""
|
|
113
|
+
Get file information from SQL based storage.
|
|
114
|
+
|
|
115
|
+
Raises
|
|
116
|
+
------
|
|
117
|
+
NotImplementedError
|
|
118
|
+
This method is not implemented.
|
|
119
|
+
"""
|
|
120
|
+
raise NotImplementedError("SQL store does not support upload.")
|
|
121
|
+
|
|
122
|
+
##############################
|
|
123
|
+
# Private helper methods
|
|
124
|
+
##############################
|
|
125
|
+
|
|
126
|
+
def _get_connection_string(self) -> str:
|
|
127
|
+
"""
|
|
128
|
+
Get the connection string.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
str
|
|
133
|
+
The connection string.
|
|
134
|
+
"""
|
|
135
|
+
return (
|
|
136
|
+
f"postgresql://{self.config.user}:{self.config.password}@"
|
|
137
|
+
f"{self.config.host}:{self.config.port}/{self.config.database}"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def _get_engine(self, schema: str | None = None) -> Engine:
|
|
141
|
+
"""
|
|
142
|
+
Create engine from connection string.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
schema : str
|
|
147
|
+
The schema.
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
Engine
|
|
152
|
+
An SQLAlchemy engine.
|
|
153
|
+
"""
|
|
154
|
+
connection_string = self._get_connection_string()
|
|
155
|
+
if not isinstance(connection_string, str):
|
|
156
|
+
raise StoreError("Connection string must be a string.")
|
|
157
|
+
try:
|
|
158
|
+
connect_args = {"connect_timeout": 30}
|
|
159
|
+
if schema is not None:
|
|
160
|
+
connect_args["options"] = f"-csearch_path={schema}"
|
|
161
|
+
return create_engine(connection_string, connect_args=connect_args)
|
|
162
|
+
except Exception as ex:
|
|
163
|
+
raise StoreError(f"Something wrong with connection string. Arguments: {str(ex.args)}")
|
|
164
|
+
|
|
165
|
+
def _check_factory(self, schema: str | None = None) -> Engine:
|
|
166
|
+
"""
|
|
167
|
+
Check if the database is accessible and return the engine.
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
schema : str
|
|
172
|
+
The schema.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
Engine
|
|
177
|
+
The database engine.
|
|
178
|
+
"""
|
|
179
|
+
engine = self._get_engine(schema)
|
|
180
|
+
self._check_access_to_storage(engine)
|
|
181
|
+
return engine
|
|
182
|
+
|
|
183
|
+
@staticmethod
|
|
184
|
+
def _parse_path(path: str) -> dict:
|
|
185
|
+
"""
|
|
186
|
+
Parse the path and return the components.
|
|
187
|
+
|
|
188
|
+
Parameters
|
|
189
|
+
----------
|
|
190
|
+
path : str
|
|
191
|
+
The path.
|
|
192
|
+
|
|
193
|
+
Returns
|
|
194
|
+
-------
|
|
195
|
+
dict
|
|
196
|
+
A dictionary containing the components of the path.
|
|
197
|
+
"""
|
|
198
|
+
# Parse path
|
|
199
|
+
err_msg = "Invalid SQL path. Must be sql://<database>/<schema>/<table> or sql://<database>/<table>"
|
|
200
|
+
protocol, pth = path.split("://")
|
|
201
|
+
components = pth.split("/")
|
|
202
|
+
if protocol != "sql" or not (2 <= len(components) <= 3):
|
|
203
|
+
raise ValueError(err_msg)
|
|
204
|
+
|
|
205
|
+
# Get components
|
|
206
|
+
database = components[0]
|
|
207
|
+
table = components[-1]
|
|
208
|
+
schema = components[1] if len(components) == 3 else "public"
|
|
209
|
+
return {"database": database, "schema": schema, "table": table}
|
|
210
|
+
|
|
211
|
+
def _get_schema(self, uri: str) -> str:
|
|
212
|
+
"""
|
|
213
|
+
Get the name of the SQL schema from the URI.
|
|
214
|
+
|
|
215
|
+
Parameters
|
|
216
|
+
----------
|
|
217
|
+
uri : str
|
|
218
|
+
The URI.
|
|
219
|
+
|
|
220
|
+
Returns
|
|
221
|
+
-------
|
|
222
|
+
str
|
|
223
|
+
The name of the SQL schema.
|
|
224
|
+
"""
|
|
225
|
+
return str(self._parse_path(uri).get("schema"))
|
|
226
|
+
|
|
227
|
+
def _get_table_name(self, uri: str) -> str:
|
|
228
|
+
"""
|
|
229
|
+
Get the name of the table from the URI.
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
uri : str
|
|
234
|
+
The URI.
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
str
|
|
239
|
+
The name of the table
|
|
240
|
+
"""
|
|
241
|
+
return str(self._parse_path(uri).get("table"))
|
|
242
|
+
|
|
243
|
+
@staticmethod
|
|
244
|
+
def _check_access_to_storage(engine: Engine) -> None:
|
|
245
|
+
"""
|
|
246
|
+
Check if there is access to the storage.
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
engine : Engine
|
|
251
|
+
An SQLAlchemy engine.
|
|
252
|
+
|
|
253
|
+
Returns
|
|
254
|
+
-------
|
|
255
|
+
None
|
|
256
|
+
|
|
257
|
+
Raises
|
|
258
|
+
------
|
|
259
|
+
StoreError
|
|
260
|
+
If there is no access to the storage.
|
|
261
|
+
"""
|
|
262
|
+
try:
|
|
263
|
+
engine.connect()
|
|
264
|
+
except SQLAlchemyError:
|
|
265
|
+
engine.dispose()
|
|
266
|
+
raise StoreError("No access to db!")
|
|
267
|
+
|
|
268
|
+
def _download_table(self, schema: str, table: str, dst: str) -> str:
|
|
269
|
+
"""
|
|
270
|
+
Download a table from SQL based storage.
|
|
271
|
+
|
|
272
|
+
Parameters
|
|
273
|
+
----------
|
|
274
|
+
schema : str
|
|
275
|
+
The origin schema.
|
|
276
|
+
table : str
|
|
277
|
+
The origin table.
|
|
278
|
+
dst : str
|
|
279
|
+
The destination path.
|
|
280
|
+
|
|
281
|
+
Returns
|
|
282
|
+
-------
|
|
283
|
+
str
|
|
284
|
+
The destination path.
|
|
285
|
+
"""
|
|
286
|
+
engine = self._check_factory(schema=schema)
|
|
287
|
+
|
|
288
|
+
# Read the table from the database
|
|
289
|
+
sa_table = Table(table, MetaData(), autoload_with=engine)
|
|
290
|
+
query = sa_table.select()
|
|
291
|
+
with engine.begin() as conn:
|
|
292
|
+
result: list[LegacyRow] = conn.execute(query).fetchall()
|
|
293
|
+
|
|
294
|
+
# Parse the result
|
|
295
|
+
data = self._parse_result(result)
|
|
296
|
+
|
|
297
|
+
# Convert the result to a pyarrow table and
|
|
298
|
+
# write the pyarrow table to a Parquet file
|
|
299
|
+
arrow_table = pa.Table.from_pydict(data)
|
|
300
|
+
pq.write_table(arrow_table, dst)
|
|
301
|
+
|
|
302
|
+
engine.dispose()
|
|
303
|
+
|
|
304
|
+
return dst
|
|
305
|
+
|
|
306
|
+
@staticmethod
|
|
307
|
+
def _parse_result(result: list[LegacyRow]) -> dict:
|
|
308
|
+
"""
|
|
309
|
+
Convert a list of list of tuples to a dict.
|
|
310
|
+
|
|
311
|
+
Parameters
|
|
312
|
+
----------
|
|
313
|
+
result : list[LegacyRow]
|
|
314
|
+
The data to convert.
|
|
315
|
+
|
|
316
|
+
Returns
|
|
317
|
+
-------
|
|
318
|
+
dict
|
|
319
|
+
The converted data.
|
|
320
|
+
"""
|
|
321
|
+
data_list = [row.items() for row in result]
|
|
322
|
+
data = {}
|
|
323
|
+
for row in data_list:
|
|
324
|
+
for column_name, value in row:
|
|
325
|
+
if column_name not in data:
|
|
326
|
+
data[column_name] = []
|
|
327
|
+
data[column_name].append(value)
|
|
328
|
+
return data
|
|
File without changes
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def build_data_preview(preview: list[dict] | None = None, rows_count: int | None = None) -> dict:
|
|
7
|
+
"""
|
|
8
|
+
Build data preview.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
preview : list[dict] | None
|
|
13
|
+
Preview.
|
|
14
|
+
rows_count : int | None
|
|
15
|
+
Row count.
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
dict
|
|
20
|
+
Data preview.
|
|
21
|
+
"""
|
|
22
|
+
dict_ = {}
|
|
23
|
+
if preview is not None:
|
|
24
|
+
dict_["cols"] = preview
|
|
25
|
+
if rows_count is not None:
|
|
26
|
+
dict_["rows"] = rows_count
|
|
27
|
+
return dict_
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_data_preview(columns: list, data: list[list], columnar: bool = False) -> list[dict]:
|
|
31
|
+
"""
|
|
32
|
+
Prepare preview.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
columns : list
|
|
37
|
+
Columns names.
|
|
38
|
+
data : list[list]
|
|
39
|
+
Data to preview.
|
|
40
|
+
columnar : bool
|
|
41
|
+
If data are arranged in columns. If False, data are arranged in rows.
|
|
42
|
+
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
45
|
+
list[dict]
|
|
46
|
+
Data preview.
|
|
47
|
+
"""
|
|
48
|
+
# Reduce data to 10 rows
|
|
49
|
+
if not columnar:
|
|
50
|
+
if len(data) > 10:
|
|
51
|
+
data = data[:10]
|
|
52
|
+
else:
|
|
53
|
+
data = [d[:10] for d in data]
|
|
54
|
+
|
|
55
|
+
# Transpose data if needed
|
|
56
|
+
if not columnar:
|
|
57
|
+
data = list(map(list, list(zip(*data))))
|
|
58
|
+
|
|
59
|
+
# Prepare the preview
|
|
60
|
+
data_dict = prepare_preview(columns, data)
|
|
61
|
+
|
|
62
|
+
# Filter memoryview values
|
|
63
|
+
filtered_memview = filter_memoryview(data_dict)
|
|
64
|
+
|
|
65
|
+
# Check the size of the preview data
|
|
66
|
+
return check_preview_size(filtered_memview)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def prepare_preview(column_names: list, data: list[list]) -> list[dict]:
|
|
70
|
+
"""
|
|
71
|
+
Get preview.
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
data : pd.DataFrame
|
|
76
|
+
Data.
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
list[dict]
|
|
81
|
+
Preview.
|
|
82
|
+
"""
|
|
83
|
+
if len(column_names) != len(data):
|
|
84
|
+
raise ValueError("Column names and data must have the same length")
|
|
85
|
+
return [{"name": column, "value": values} for column, values in zip(column_names, data)]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def filter_memoryview(data: list[dict]) -> list[dict]:
|
|
89
|
+
"""
|
|
90
|
+
Find memoryview values.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
data : pd.DataFrame
|
|
95
|
+
Data.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
list[str]
|
|
100
|
+
Column to filter out from preview.
|
|
101
|
+
"""
|
|
102
|
+
key_to_filter = []
|
|
103
|
+
for i in data:
|
|
104
|
+
if any(isinstance(v, memoryview) for v in i["value"]):
|
|
105
|
+
key_to_filter.append(i["name"])
|
|
106
|
+
for i in key_to_filter:
|
|
107
|
+
data = [d for d in data if d["name"] != i]
|
|
108
|
+
return data
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def check_preview_size(preview: list[dict]) -> list:
|
|
112
|
+
"""
|
|
113
|
+
Check preview size. If it's too big, return empty list.
|
|
114
|
+
|
|
115
|
+
Parameters
|
|
116
|
+
----------
|
|
117
|
+
preview : list[dict]
|
|
118
|
+
Preview.
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
list
|
|
123
|
+
Preview.
|
|
124
|
+
"""
|
|
125
|
+
if len(json.dumps(preview).encode("utf-8")) >= 64000:
|
|
126
|
+
return []
|
|
127
|
+
return preview
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from digitalhub.client.builder import check_client_exists, get_client
|
|
7
|
+
|
|
8
|
+
if typing.TYPE_CHECKING:
|
|
9
|
+
from digitalhub.client.objects.dhcore import ClientDHCore
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def set_dhcore_env(
|
|
13
|
+
endpoint: str | None = None,
|
|
14
|
+
user: str | None = None,
|
|
15
|
+
password: str | None = None,
|
|
16
|
+
access_token: str | None = None,
|
|
17
|
+
refresh_token: str | None = None,
|
|
18
|
+
client_id: str | None = None,
|
|
19
|
+
) -> None:
|
|
20
|
+
"""
|
|
21
|
+
Function to set environment variables for DHCore config.
|
|
22
|
+
Note that if the environment variable is already set, it
|
|
23
|
+
will be overwritten. It also ovverides the remote client
|
|
24
|
+
configuration.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
endpoint : str
|
|
29
|
+
The endpoint of DHCore.
|
|
30
|
+
user : str
|
|
31
|
+
The user of DHCore.
|
|
32
|
+
password : str
|
|
33
|
+
The password of DHCore.
|
|
34
|
+
access_token : str
|
|
35
|
+
The access token of DHCore.
|
|
36
|
+
refresh_token : str
|
|
37
|
+
The refresh token of DHCore.
|
|
38
|
+
client_id : str
|
|
39
|
+
The client id of DHCore.
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
None
|
|
44
|
+
"""
|
|
45
|
+
if endpoint is not None:
|
|
46
|
+
os.environ["DHCORE_ENDPOINT"] = endpoint
|
|
47
|
+
if user is not None:
|
|
48
|
+
os.environ["DHCORE_USER"] = user
|
|
49
|
+
if password is not None:
|
|
50
|
+
os.environ["DHCORE_PASSWORD"] = password
|
|
51
|
+
if access_token is not None:
|
|
52
|
+
os.environ["DHCORE_ACCESS_TOKEN"] = access_token
|
|
53
|
+
if refresh_token is not None:
|
|
54
|
+
os.environ["DHCORE_REFRESH_TOKEN"] = refresh_token
|
|
55
|
+
if client_id is not None:
|
|
56
|
+
os.environ["DHCORE_CLIENT_ID"] = client_id
|
|
57
|
+
|
|
58
|
+
if check_client_exists(local=False):
|
|
59
|
+
update_client_from_env()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def update_client_from_env() -> None:
|
|
63
|
+
"""
|
|
64
|
+
Function to update client from environment variables.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
None
|
|
69
|
+
"""
|
|
70
|
+
client: ClientDHCore = get_client(local=False)
|
|
71
|
+
|
|
72
|
+
# Update endpoint
|
|
73
|
+
endpoint = os.getenv("DHCORE_ENDPOINT")
|
|
74
|
+
if endpoint is not None:
|
|
75
|
+
client._endpoint_core = endpoint
|
|
76
|
+
|
|
77
|
+
# Update auth
|
|
78
|
+
|
|
79
|
+
# If token is set, it will override the other auth options
|
|
80
|
+
access_token = os.getenv("DHCORE_ACCESS_TOKEN")
|
|
81
|
+
refresh_token = os.getenv("DHCORE_REFRESH_TOKEN")
|
|
82
|
+
client_id = os.getenv("DHCORE_CLIENT_ID")
|
|
83
|
+
|
|
84
|
+
if access_token is not None:
|
|
85
|
+
if refresh_token is not None:
|
|
86
|
+
client._refresh_token = refresh_token
|
|
87
|
+
if client_id is not None:
|
|
88
|
+
client._client_id = client_id
|
|
89
|
+
client._access_token = access_token
|
|
90
|
+
client._auth_type = "oauth2"
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
# Otherwise, if user and password are set, basic auth will be used
|
|
94
|
+
username = os.getenv("DHCORE_USER")
|
|
95
|
+
password = os.getenv("DHCORE_PASSWORD")
|
|
96
|
+
if username is not None and password is not None:
|
|
97
|
+
client._user = username
|
|
98
|
+
client._password = password
|
|
99
|
+
client._auth_type = "basic"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def refresh_token() -> None:
|
|
103
|
+
"""
|
|
104
|
+
Function to refresh token.
|
|
105
|
+
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
108
|
+
None
|
|
109
|
+
"""
|
|
110
|
+
client: ClientDHCore = get_client(local=False)
|
|
111
|
+
client._get_new_access_token()
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def get_s3_bucket() -> str | None:
|
|
115
|
+
"""
|
|
116
|
+
Function to get S3 bucket name.
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
str
|
|
121
|
+
The S3 bucket name.
|
|
122
|
+
"""
|
|
123
|
+
return os.getenv("S3_BUCKET_NAME", "datalake")
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class StoreError(Exception):
|
|
5
|
+
"""
|
|
6
|
+
Raised when incontered errors on stores.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BackendError(Exception):
|
|
11
|
+
"""
|
|
12
|
+
Raised when incontered errors from backend.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class EntityNotExistsError(BackendError):
|
|
17
|
+
"""
|
|
18
|
+
Raised when entity not found.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EntityAlreadyExistsError(BackendError):
|
|
23
|
+
"""
|
|
24
|
+
Raised when entity already exists.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class MissingSpecError(BackendError):
|
|
29
|
+
"""
|
|
30
|
+
Raised when spec is missing in backend.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class UnauthorizedError(BackendError):
|
|
35
|
+
"""
|
|
36
|
+
Raised when unauthorized.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ForbiddenError(BackendError):
|
|
41
|
+
"""
|
|
42
|
+
Raised when forbidden.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class BadRequestError(BackendError):
|
|
47
|
+
"""
|
|
48
|
+
Raised when bad request.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class EntityError(Exception):
|
|
53
|
+
"""
|
|
54
|
+
Raised when incontered errors on entities.
|
|
55
|
+
"""
|