digitalhub 0.7.0b2__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of digitalhub might be problematic. Click here for more details.

Files changed (232) hide show
  1. digitalhub/__init__.py +63 -93
  2. digitalhub/client/__init__.py +0 -0
  3. digitalhub/client/_base/__init__.py +0 -0
  4. digitalhub/client/_base/client.py +56 -0
  5. digitalhub/client/api.py +63 -0
  6. digitalhub/client/builder.py +50 -0
  7. digitalhub/client/dhcore/__init__.py +0 -0
  8. digitalhub/client/dhcore/client.py +669 -0
  9. digitalhub/client/dhcore/env.py +21 -0
  10. digitalhub/client/dhcore/models.py +46 -0
  11. digitalhub/client/dhcore/utils.py +111 -0
  12. digitalhub/client/local/__init__.py +0 -0
  13. digitalhub/client/local/client.py +533 -0
  14. digitalhub/context/__init__.py +0 -0
  15. digitalhub/context/api.py +93 -0
  16. digitalhub/context/builder.py +94 -0
  17. digitalhub/context/context.py +136 -0
  18. digitalhub/datastores/__init__.py +0 -0
  19. digitalhub/datastores/_base/__init__.py +0 -0
  20. digitalhub/datastores/_base/datastore.py +85 -0
  21. digitalhub/datastores/api.py +37 -0
  22. digitalhub/datastores/builder.py +110 -0
  23. digitalhub/datastores/local/__init__.py +0 -0
  24. digitalhub/datastores/local/datastore.py +50 -0
  25. digitalhub/datastores/remote/__init__.py +0 -0
  26. digitalhub/datastores/remote/datastore.py +31 -0
  27. digitalhub/datastores/s3/__init__.py +0 -0
  28. digitalhub/datastores/s3/datastore.py +46 -0
  29. digitalhub/datastores/sql/__init__.py +0 -0
  30. digitalhub/datastores/sql/datastore.py +68 -0
  31. digitalhub/entities/__init__.py +0 -0
  32. digitalhub/entities/_base/__init__.py +0 -0
  33. digitalhub/entities/_base/_base/__init__.py +0 -0
  34. digitalhub/entities/_base/_base/entity.py +82 -0
  35. digitalhub/entities/_base/api_utils.py +620 -0
  36. digitalhub/entities/_base/context/__init__.py +0 -0
  37. digitalhub/entities/_base/context/entity.py +118 -0
  38. digitalhub/entities/_base/crud.py +468 -0
  39. digitalhub/entities/_base/entity/__init__.py +0 -0
  40. digitalhub/entities/_base/entity/_constructors/__init__.py +0 -0
  41. digitalhub/entities/_base/entity/_constructors/metadata.py +44 -0
  42. digitalhub/entities/_base/entity/_constructors/name.py +31 -0
  43. digitalhub/entities/_base/entity/_constructors/spec.py +33 -0
  44. digitalhub/entities/_base/entity/_constructors/status.py +52 -0
  45. digitalhub/entities/_base/entity/_constructors/uuid.py +26 -0
  46. digitalhub/entities/_base/entity/builder.py +175 -0
  47. digitalhub/entities/_base/entity/entity.py +106 -0
  48. digitalhub/entities/_base/entity/metadata.py +59 -0
  49. digitalhub/entities/_base/entity/spec.py +58 -0
  50. digitalhub/entities/_base/entity/status.py +43 -0
  51. digitalhub/entities/_base/executable/__init__.py +0 -0
  52. digitalhub/entities/_base/executable/entity.py +405 -0
  53. digitalhub/entities/_base/material/__init__.py +0 -0
  54. digitalhub/entities/_base/material/entity.py +214 -0
  55. digitalhub/entities/_base/material/spec.py +22 -0
  56. digitalhub/entities/_base/material/status.py +49 -0
  57. digitalhub/entities/_base/runtime_entity/__init__.py +0 -0
  58. digitalhub/entities/_base/runtime_entity/builder.py +106 -0
  59. digitalhub/entities/_base/unversioned/__init__.py +0 -0
  60. digitalhub/entities/_base/unversioned/builder.py +66 -0
  61. digitalhub/entities/_base/unversioned/entity.py +49 -0
  62. digitalhub/entities/_base/versioned/__init__.py +0 -0
  63. digitalhub/entities/_base/versioned/builder.py +68 -0
  64. digitalhub/entities/_base/versioned/entity.py +53 -0
  65. digitalhub/entities/artifact/__init__.py +0 -0
  66. digitalhub/entities/artifact/_base/__init__.py +0 -0
  67. digitalhub/entities/artifact/_base/builder.py +86 -0
  68. digitalhub/entities/artifact/_base/entity.py +39 -0
  69. digitalhub/entities/artifact/_base/spec.py +15 -0
  70. digitalhub/entities/artifact/_base/status.py +9 -0
  71. digitalhub/entities/artifact/artifact/__init__.py +0 -0
  72. digitalhub/entities/artifact/artifact/builder.py +18 -0
  73. digitalhub/entities/artifact/artifact/entity.py +32 -0
  74. digitalhub/entities/artifact/artifact/spec.py +27 -0
  75. digitalhub/entities/artifact/artifact/status.py +15 -0
  76. digitalhub/entities/artifact/crud.py +332 -0
  77. digitalhub/entities/builders.py +63 -0
  78. digitalhub/entities/dataitem/__init__.py +0 -0
  79. digitalhub/entities/dataitem/_base/__init__.py +0 -0
  80. digitalhub/entities/dataitem/_base/builder.py +86 -0
  81. digitalhub/entities/dataitem/_base/entity.py +75 -0
  82. digitalhub/entities/dataitem/_base/spec.py +15 -0
  83. digitalhub/entities/dataitem/_base/status.py +20 -0
  84. digitalhub/entities/dataitem/crud.py +372 -0
  85. digitalhub/entities/dataitem/dataitem/__init__.py +0 -0
  86. digitalhub/entities/dataitem/dataitem/builder.py +18 -0
  87. digitalhub/entities/dataitem/dataitem/entity.py +32 -0
  88. digitalhub/entities/dataitem/dataitem/spec.py +15 -0
  89. digitalhub/entities/dataitem/dataitem/status.py +9 -0
  90. digitalhub/entities/dataitem/iceberg/__init__.py +0 -0
  91. digitalhub/entities/dataitem/iceberg/builder.py +18 -0
  92. digitalhub/entities/dataitem/iceberg/entity.py +32 -0
  93. digitalhub/entities/dataitem/iceberg/spec.py +15 -0
  94. digitalhub/entities/dataitem/iceberg/status.py +9 -0
  95. digitalhub/entities/dataitem/table/__init__.py +0 -0
  96. digitalhub/entities/dataitem/table/builder.py +18 -0
  97. digitalhub/entities/dataitem/table/entity.py +146 -0
  98. digitalhub/entities/dataitem/table/models.py +62 -0
  99. digitalhub/entities/dataitem/table/spec.py +25 -0
  100. digitalhub/entities/dataitem/table/status.py +9 -0
  101. digitalhub/entities/function/__init__.py +0 -0
  102. digitalhub/entities/function/_base/__init__.py +0 -0
  103. digitalhub/entities/function/_base/builder.py +79 -0
  104. digitalhub/entities/function/_base/entity.py +98 -0
  105. digitalhub/entities/function/_base/models.py +118 -0
  106. digitalhub/entities/function/_base/spec.py +15 -0
  107. digitalhub/entities/function/_base/status.py +9 -0
  108. digitalhub/entities/function/crud.py +279 -0
  109. digitalhub/entities/model/__init__.py +0 -0
  110. digitalhub/entities/model/_base/__init__.py +0 -0
  111. digitalhub/entities/model/_base/builder.py +86 -0
  112. digitalhub/entities/model/_base/entity.py +34 -0
  113. digitalhub/entities/model/_base/spec.py +49 -0
  114. digitalhub/entities/model/_base/status.py +9 -0
  115. digitalhub/entities/model/crud.py +331 -0
  116. digitalhub/entities/model/huggingface/__init__.py +0 -0
  117. digitalhub/entities/model/huggingface/builder.py +18 -0
  118. digitalhub/entities/model/huggingface/entity.py +32 -0
  119. digitalhub/entities/model/huggingface/spec.py +36 -0
  120. digitalhub/entities/model/huggingface/status.py +9 -0
  121. digitalhub/entities/model/mlflow/__init__.py +0 -0
  122. digitalhub/entities/model/mlflow/builder.py +18 -0
  123. digitalhub/entities/model/mlflow/entity.py +32 -0
  124. digitalhub/entities/model/mlflow/models.py +26 -0
  125. digitalhub/entities/model/mlflow/spec.py +44 -0
  126. digitalhub/entities/model/mlflow/status.py +9 -0
  127. digitalhub/entities/model/mlflow/utils.py +81 -0
  128. digitalhub/entities/model/model/__init__.py +0 -0
  129. digitalhub/entities/model/model/builder.py +18 -0
  130. digitalhub/entities/model/model/entity.py +32 -0
  131. digitalhub/entities/model/model/spec.py +15 -0
  132. digitalhub/entities/model/model/status.py +9 -0
  133. digitalhub/entities/model/sklearn/__init__.py +0 -0
  134. digitalhub/entities/model/sklearn/builder.py +18 -0
  135. digitalhub/entities/model/sklearn/entity.py +32 -0
  136. digitalhub/entities/model/sklearn/spec.py +15 -0
  137. digitalhub/entities/model/sklearn/status.py +9 -0
  138. digitalhub/entities/project/__init__.py +0 -0
  139. digitalhub/entities/project/_base/__init__.py +0 -0
  140. digitalhub/entities/project/_base/builder.py +128 -0
  141. digitalhub/entities/project/_base/entity.py +2078 -0
  142. digitalhub/entities/project/_base/spec.py +50 -0
  143. digitalhub/entities/project/_base/status.py +9 -0
  144. digitalhub/entities/project/crud.py +357 -0
  145. digitalhub/entities/run/__init__.py +0 -0
  146. digitalhub/entities/run/_base/__init__.py +0 -0
  147. digitalhub/entities/run/_base/builder.py +94 -0
  148. digitalhub/entities/run/_base/entity.py +307 -0
  149. digitalhub/entities/run/_base/spec.py +50 -0
  150. digitalhub/entities/run/_base/status.py +9 -0
  151. digitalhub/entities/run/crud.py +219 -0
  152. digitalhub/entities/secret/__init__.py +0 -0
  153. digitalhub/entities/secret/_base/__init__.py +0 -0
  154. digitalhub/entities/secret/_base/builder.py +81 -0
  155. digitalhub/entities/secret/_base/entity.py +74 -0
  156. digitalhub/entities/secret/_base/spec.py +35 -0
  157. digitalhub/entities/secret/_base/status.py +9 -0
  158. digitalhub/entities/secret/crud.py +290 -0
  159. digitalhub/entities/task/__init__.py +0 -0
  160. digitalhub/entities/task/_base/__init__.py +0 -0
  161. digitalhub/entities/task/_base/builder.py +91 -0
  162. digitalhub/entities/task/_base/entity.py +136 -0
  163. digitalhub/entities/task/_base/models.py +208 -0
  164. digitalhub/entities/task/_base/spec.py +53 -0
  165. digitalhub/entities/task/_base/status.py +9 -0
  166. digitalhub/entities/task/crud.py +228 -0
  167. digitalhub/entities/utils/__init__.py +0 -0
  168. digitalhub/entities/utils/api.py +346 -0
  169. digitalhub/entities/utils/entity_types.py +19 -0
  170. digitalhub/entities/utils/state.py +31 -0
  171. digitalhub/entities/utils/utils.py +202 -0
  172. digitalhub/entities/workflow/__init__.py +0 -0
  173. digitalhub/entities/workflow/_base/__init__.py +0 -0
  174. digitalhub/entities/workflow/_base/builder.py +79 -0
  175. digitalhub/entities/workflow/_base/entity.py +74 -0
  176. digitalhub/entities/workflow/_base/spec.py +15 -0
  177. digitalhub/entities/workflow/_base/status.py +9 -0
  178. digitalhub/entities/workflow/crud.py +278 -0
  179. digitalhub/factory/__init__.py +0 -0
  180. digitalhub/factory/api.py +277 -0
  181. digitalhub/factory/factory.py +268 -0
  182. digitalhub/factory/utils.py +90 -0
  183. digitalhub/readers/__init__.py +0 -0
  184. digitalhub/readers/_base/__init__.py +0 -0
  185. digitalhub/readers/_base/builder.py +26 -0
  186. digitalhub/readers/_base/reader.py +70 -0
  187. digitalhub/readers/api.py +80 -0
  188. digitalhub/readers/factory.py +133 -0
  189. digitalhub/readers/pandas/__init__.py +0 -0
  190. digitalhub/readers/pandas/builder.py +29 -0
  191. digitalhub/readers/pandas/reader.py +207 -0
  192. digitalhub/runtimes/__init__.py +0 -0
  193. digitalhub/runtimes/_base.py +102 -0
  194. digitalhub/runtimes/builder.py +32 -0
  195. digitalhub/stores/__init__.py +0 -0
  196. digitalhub/stores/_base/__init__.py +0 -0
  197. digitalhub/stores/_base/store.py +189 -0
  198. digitalhub/stores/api.py +54 -0
  199. digitalhub/stores/builder.py +211 -0
  200. digitalhub/stores/local/__init__.py +0 -0
  201. digitalhub/stores/local/store.py +230 -0
  202. digitalhub/stores/remote/__init__.py +0 -0
  203. digitalhub/stores/remote/store.py +143 -0
  204. digitalhub/stores/s3/__init__.py +0 -0
  205. digitalhub/stores/s3/store.py +563 -0
  206. digitalhub/stores/sql/__init__.py +0 -0
  207. digitalhub/stores/sql/store.py +328 -0
  208. digitalhub/utils/__init__.py +0 -0
  209. digitalhub/utils/data_utils.py +127 -0
  210. digitalhub/utils/exceptions.py +67 -0
  211. digitalhub/utils/file_utils.py +204 -0
  212. digitalhub/utils/generic_utils.py +183 -0
  213. digitalhub/utils/git_utils.py +148 -0
  214. digitalhub/utils/io_utils.py +116 -0
  215. digitalhub/utils/logger.py +17 -0
  216. digitalhub/utils/s3_utils.py +58 -0
  217. digitalhub/utils/uri_utils.py +56 -0
  218. {digitalhub-0.7.0b2.dist-info → digitalhub-0.8.0.dist-info}/METADATA +30 -13
  219. digitalhub-0.8.0.dist-info/RECORD +231 -0
  220. {digitalhub-0.7.0b2.dist-info → digitalhub-0.8.0.dist-info}/WHEEL +1 -1
  221. test/local/CRUD/test_artifacts.py +96 -0
  222. test/local/CRUD/test_dataitems.py +96 -0
  223. test/local/CRUD/test_models.py +95 -0
  224. test/test_crud_functions.py +1 -1
  225. test/test_crud_runs.py +1 -1
  226. test/test_crud_tasks.py +1 -1
  227. digitalhub-0.7.0b2.dist-info/RECORD +0 -14
  228. test/test_crud_artifacts.py +0 -96
  229. test/test_crud_dataitems.py +0 -96
  230. {digitalhub-0.7.0b2.dist-info → digitalhub-0.8.0.dist-info}/LICENSE.txt +0 -0
  231. {digitalhub-0.7.0b2.dist-info → digitalhub-0.8.0.dist-info}/top_level.txt +0 -0
  232. /test/{test_imports.py → local/imports/test_imports.py} +0 -0
@@ -0,0 +1,328 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ import pyarrow as pa
6
+ import pyarrow.parquet as pq
7
+ from sqlalchemy import MetaData, Table, create_engine
8
+ from sqlalchemy.engine import Engine
9
+ from sqlalchemy.engine.row import LegacyRow
10
+ from sqlalchemy.exc import SQLAlchemyError
11
+
12
+ from digitalhub.stores._base.store import Store, StoreConfig
13
+ from digitalhub.utils.exceptions import StoreError
14
+
15
+
16
+ class SQLStoreConfig(StoreConfig):
17
+ """
18
+ SQL store configuration class.
19
+ """
20
+
21
+ host: str
22
+ """SQL host."""
23
+
24
+ port: int
25
+ """SQL port."""
26
+
27
+ user: str
28
+ """SQL user."""
29
+
30
+ password: str
31
+ """SQL password."""
32
+
33
+ database: str
34
+ """SQL database name."""
35
+
36
+
37
+ class SqlStore(Store):
38
+ """
39
+ SQL store class. It implements the Store interface and provides methods to fetch and persist
40
+ artifacts on SQL based storage.
41
+ """
42
+
43
+ def __init__(self, name: str, store_type: str, config: SQLStoreConfig) -> None:
44
+ super().__init__(name, store_type)
45
+ self.config = config
46
+
47
+ ##############################
48
+ # IO methods
49
+ ##############################
50
+
51
+ def download(
52
+ self,
53
+ root: str,
54
+ dst: Path,
55
+ src: list[str],
56
+ overwrite: bool = False,
57
+ ) -> str:
58
+ """
59
+ Download artifacts from storage.
60
+
61
+ Parameters
62
+ ----------
63
+ root : str
64
+ The root path of the artifact.
65
+ dst : str
66
+ The destination of the artifact on local filesystem.
67
+ src : list[str]
68
+ List of sources.
69
+ overwrite : bool
70
+ Specify if overwrite existing file(s).
71
+
72
+ Returns
73
+ -------
74
+ str
75
+ Destination path of the downloaded artifact.
76
+ """
77
+ table_name = self._get_table_name(root) + ".parquet"
78
+ # Case where dst is not provided
79
+ if dst is None:
80
+ dst = Path(self._build_temp("sql")) / table_name
81
+ else:
82
+ self._check_local_dst(str(dst))
83
+ path = Path(dst)
84
+
85
+ # Case where dst is a directory
86
+ if path.suffix == "":
87
+ dst = path / table_name
88
+
89
+ # Case where dst is a file
90
+ elif path.suffix != ".parquet":
91
+ raise StoreError("The destination path must be a directory or a parquet file.")
92
+
93
+ self._check_overwrite(dst, overwrite)
94
+ self._build_path(dst)
95
+
96
+ schema = self._get_schema(root)
97
+ table = self._get_table_name(root)
98
+ return self._download_table(schema, table, str(dst))
99
+
100
+ def upload(self, src: str | list[str], dst: str | None = None) -> list[tuple[str, str]]:
101
+ """
102
+ Upload an artifact to storage.
103
+
104
+ Raises
105
+ ------
106
+ StoreError
107
+ This method is not implemented.
108
+ """
109
+ raise StoreError("SQL store does not support upload.")
110
+
111
+ def get_file_info(self, paths: list[str]) -> list[dict]:
112
+ """
113
+ Get file information from SQL based storage.
114
+
115
+ Raises
116
+ ------
117
+ NotImplementedError
118
+ This method is not implemented.
119
+ """
120
+ raise NotImplementedError("SQL store does not support upload.")
121
+
122
+ ##############################
123
+ # Private helper methods
124
+ ##############################
125
+
126
+ def _get_connection_string(self) -> str:
127
+ """
128
+ Get the connection string.
129
+
130
+ Returns
131
+ -------
132
+ str
133
+ The connection string.
134
+ """
135
+ return (
136
+ f"postgresql://{self.config.user}:{self.config.password}@"
137
+ f"{self.config.host}:{self.config.port}/{self.config.database}"
138
+ )
139
+
140
+ def _get_engine(self, schema: str | None = None) -> Engine:
141
+ """
142
+ Create engine from connection string.
143
+
144
+ Parameters
145
+ ----------
146
+ schema : str
147
+ The schema.
148
+
149
+ Returns
150
+ -------
151
+ Engine
152
+ An SQLAlchemy engine.
153
+ """
154
+ connection_string = self._get_connection_string()
155
+ if not isinstance(connection_string, str):
156
+ raise StoreError("Connection string must be a string.")
157
+ try:
158
+ connect_args = {"connect_timeout": 30}
159
+ if schema is not None:
160
+ connect_args["options"] = f"-csearch_path={schema}"
161
+ return create_engine(connection_string, connect_args=connect_args)
162
+ except Exception as ex:
163
+ raise StoreError(f"Something wrong with connection string. Arguments: {str(ex.args)}")
164
+
165
+ def _check_factory(self, schema: str | None = None) -> Engine:
166
+ """
167
+ Check if the database is accessible and return the engine.
168
+
169
+ Parameters
170
+ ----------
171
+ schema : str
172
+ The schema.
173
+
174
+ Returns
175
+ -------
176
+ Engine
177
+ The database engine.
178
+ """
179
+ engine = self._get_engine(schema)
180
+ self._check_access_to_storage(engine)
181
+ return engine
182
+
183
+ @staticmethod
184
+ def _parse_path(path: str) -> dict:
185
+ """
186
+ Parse the path and return the components.
187
+
188
+ Parameters
189
+ ----------
190
+ path : str
191
+ The path.
192
+
193
+ Returns
194
+ -------
195
+ dict
196
+ A dictionary containing the components of the path.
197
+ """
198
+ # Parse path
199
+ err_msg = "Invalid SQL path. Must be sql://<database>/<schema>/<table> or sql://<database>/<table>"
200
+ protocol, pth = path.split("://")
201
+ components = pth.split("/")
202
+ if protocol != "sql" or not (2 <= len(components) <= 3):
203
+ raise ValueError(err_msg)
204
+
205
+ # Get components
206
+ database = components[0]
207
+ table = components[-1]
208
+ schema = components[1] if len(components) == 3 else "public"
209
+ return {"database": database, "schema": schema, "table": table}
210
+
211
+ def _get_schema(self, uri: str) -> str:
212
+ """
213
+ Get the name of the SQL schema from the URI.
214
+
215
+ Parameters
216
+ ----------
217
+ uri : str
218
+ The URI.
219
+
220
+ Returns
221
+ -------
222
+ str
223
+ The name of the SQL schema.
224
+ """
225
+ return str(self._parse_path(uri).get("schema"))
226
+
227
+ def _get_table_name(self, uri: str) -> str:
228
+ """
229
+ Get the name of the table from the URI.
230
+
231
+ Parameters
232
+ ----------
233
+ uri : str
234
+ The URI.
235
+
236
+ Returns
237
+ -------
238
+ str
239
+ The name of the table
240
+ """
241
+ return str(self._parse_path(uri).get("table"))
242
+
243
+ @staticmethod
244
+ def _check_access_to_storage(engine: Engine) -> None:
245
+ """
246
+ Check if there is access to the storage.
247
+
248
+ Parameters
249
+ ----------
250
+ engine : Engine
251
+ An SQLAlchemy engine.
252
+
253
+ Returns
254
+ -------
255
+ None
256
+
257
+ Raises
258
+ ------
259
+ StoreError
260
+ If there is no access to the storage.
261
+ """
262
+ try:
263
+ engine.connect()
264
+ except SQLAlchemyError:
265
+ engine.dispose()
266
+ raise StoreError("No access to db!")
267
+
268
+ def _download_table(self, schema: str, table: str, dst: str) -> str:
269
+ """
270
+ Download a table from SQL based storage.
271
+
272
+ Parameters
273
+ ----------
274
+ schema : str
275
+ The origin schema.
276
+ table : str
277
+ The origin table.
278
+ dst : str
279
+ The destination path.
280
+
281
+ Returns
282
+ -------
283
+ str
284
+ The destination path.
285
+ """
286
+ engine = self._check_factory(schema=schema)
287
+
288
+ # Read the table from the database
289
+ sa_table = Table(table, MetaData(), autoload_with=engine)
290
+ query = sa_table.select()
291
+ with engine.begin() as conn:
292
+ result: list[LegacyRow] = conn.execute(query).fetchall()
293
+
294
+ # Parse the result
295
+ data = self._parse_result(result)
296
+
297
+ # Convert the result to a pyarrow table and
298
+ # write the pyarrow table to a Parquet file
299
+ arrow_table = pa.Table.from_pydict(data)
300
+ pq.write_table(arrow_table, dst)
301
+
302
+ engine.dispose()
303
+
304
+ return dst
305
+
306
+ @staticmethod
307
+ def _parse_result(result: list[LegacyRow]) -> dict:
308
+ """
309
+ Convert a list of list of tuples to a dict.
310
+
311
+ Parameters
312
+ ----------
313
+ result : list[LegacyRow]
314
+ The data to convert.
315
+
316
+ Returns
317
+ -------
318
+ dict
319
+ The converted data.
320
+ """
321
+ data_list = [row.items() for row in result]
322
+ data = {}
323
+ for row in data_list:
324
+ for column_name, value in row:
325
+ if column_name not in data:
326
+ data[column_name] = []
327
+ data[column_name].append(value)
328
+ return data
File without changes
@@ -0,0 +1,127 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+
5
+
6
+ def build_data_preview(preview: list[dict] | None = None, rows_count: int | None = None) -> dict:
7
+ """
8
+ Build data preview.
9
+
10
+ Parameters
11
+ ----------
12
+ preview : list[dict] | None
13
+ Preview.
14
+ rows_count : int | None
15
+ Row count.
16
+
17
+ Returns
18
+ -------
19
+ dict
20
+ Data preview.
21
+ """
22
+ dict_ = {}
23
+ if preview is not None:
24
+ dict_["cols"] = preview
25
+ if rows_count is not None:
26
+ dict_["rows"] = rows_count
27
+ return dict_
28
+
29
+
30
+ def get_data_preview(columns: list, data: list[list], columnar: bool = False) -> list[dict]:
31
+ """
32
+ Prepare preview.
33
+
34
+ Parameters
35
+ ----------
36
+ columns : list
37
+ Columns names.
38
+ data : list[list]
39
+ Data to preview.
40
+ columnar : bool
41
+ If data are arranged in columns. If False, data are arranged in rows.
42
+
43
+ Returns
44
+ -------
45
+ list[dict]
46
+ Data preview.
47
+ """
48
+ # Reduce data to 10 rows
49
+ if not columnar:
50
+ if len(data) > 10:
51
+ data = data[:10]
52
+ else:
53
+ data = [d[:10] for d in data]
54
+
55
+ # Transpose data if needed
56
+ if not columnar:
57
+ data = list(map(list, list(zip(*data))))
58
+
59
+ # Prepare the preview
60
+ data_dict = prepare_preview(columns, data)
61
+
62
+ # Filter memoryview values
63
+ filtered_memview = filter_memoryview(data_dict)
64
+
65
+ # Check the size of the preview data
66
+ return check_preview_size(filtered_memview)
67
+
68
+
69
+ def prepare_preview(column_names: list, data: list[list]) -> list[dict]:
70
+ """
71
+ Get preview.
72
+
73
+ Parameters
74
+ ----------
75
+ data : pd.DataFrame
76
+ Data.
77
+
78
+ Returns
79
+ -------
80
+ list[dict]
81
+ Preview.
82
+ """
83
+ if len(column_names) != len(data):
84
+ raise ValueError("Column names and data must have the same length")
85
+ return [{"name": column, "value": values} for column, values in zip(column_names, data)]
86
+
87
+
88
+ def filter_memoryview(data: list[dict]) -> list[dict]:
89
+ """
90
+ Find memoryview values.
91
+
92
+ Parameters
93
+ ----------
94
+ data : pd.DataFrame
95
+ Data.
96
+
97
+ Returns
98
+ -------
99
+ list[str]
100
+ Column to filter out from preview.
101
+ """
102
+ key_to_filter = []
103
+ for i in data:
104
+ if any(isinstance(v, memoryview) for v in i["value"]):
105
+ key_to_filter.append(i["name"])
106
+ for i in key_to_filter:
107
+ data = [d for d in data if d["name"] != i]
108
+ return data
109
+
110
+
111
+ def check_preview_size(preview: list[dict]) -> list:
112
+ """
113
+ Check preview size. If it's too big, return empty list.
114
+
115
+ Parameters
116
+ ----------
117
+ preview : list[dict]
118
+ Preview.
119
+
120
+ Returns
121
+ -------
122
+ list
123
+ Preview.
124
+ """
125
+ if len(json.dumps(preview).encode("utf-8")) >= 64000:
126
+ return []
127
+ return preview
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class BuilderError(Exception):
5
+ """
6
+ Raised when incontered errors on builders.
7
+ """
8
+
9
+
10
+ class StoreError(Exception):
11
+ """
12
+ Raised when incontered errors on stores.
13
+ """
14
+
15
+
16
+ class BackendError(Exception):
17
+ """
18
+ Raised when incontered errors from backend.
19
+ """
20
+
21
+
22
+ class EntityNotExistsError(BackendError):
23
+ """
24
+ Raised when entity not found.
25
+ """
26
+
27
+
28
+ class EntityAlreadyExistsError(BackendError):
29
+ """
30
+ Raised when entity already exists.
31
+ """
32
+
33
+
34
+ class MissingSpecError(BackendError):
35
+ """
36
+ Raised when spec is missing in backend.
37
+ """
38
+
39
+
40
+ class UnauthorizedError(BackendError):
41
+ """
42
+ Raised when unauthorized.
43
+ """
44
+
45
+
46
+ class ForbiddenError(BackendError):
47
+ """
48
+ Raised when forbidden.
49
+ """
50
+
51
+
52
+ class BadRequestError(BackendError):
53
+ """
54
+ Raised when bad request.
55
+ """
56
+
57
+
58
+ class EntityError(Exception):
59
+ """
60
+ Raised when incontered errors on entities.
61
+ """
62
+
63
+
64
+ class ContextError(Exception):
65
+ """
66
+ Raised when context errors.
67
+ """
@@ -0,0 +1,204 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from hashlib import sha256
5
+ from mimetypes import guess_type
6
+ from pathlib import Path
7
+
8
+ from pydantic import BaseModel
9
+
10
+
11
+ class FileInfo(BaseModel):
12
+ """
13
+ File info class.
14
+ """
15
+
16
+ path: str = None
17
+ name: str = None
18
+ content_type: str = None
19
+ size: int = None
20
+ hash: str = None
21
+ last_modified: str = None
22
+
23
+
24
+ def calculate_blob_hash(data_path: str) -> str:
25
+ """
26
+ Calculate the hash of a file.
27
+
28
+ Parameters
29
+ ----------
30
+ data_path : str
31
+ Path to the file.
32
+
33
+ Returns
34
+ -------
35
+ str
36
+ The hash of the file.
37
+ """
38
+ with open(data_path, "rb") as f:
39
+ data = f.read()
40
+ return f"sha256:{sha256(data).hexdigest()}"
41
+
42
+
43
+ def get_file_size(data_path: str) -> int:
44
+ """
45
+ Get the size of a file.
46
+
47
+ Parameters
48
+ ----------
49
+ data_path : str
50
+ Path to the file.
51
+
52
+ Returns
53
+ -------
54
+ int
55
+ The size of the file.
56
+ """
57
+ return Path(data_path).stat().st_size
58
+
59
+
60
+ def get_file_mime_type(data_path: str) -> str:
61
+ """
62
+ Get the mime type of a file.
63
+
64
+ Parameters
65
+ ----------
66
+ data_path : str
67
+ Path to the file.
68
+
69
+ Returns
70
+ -------
71
+ str
72
+ The mime type of the file.
73
+ """
74
+ return guess_type(data_path)[0]
75
+
76
+
77
+ def get_path_name(data_path: str) -> str:
78
+ """
79
+ Get the name of a file.
80
+
81
+ Parameters
82
+ ----------
83
+ data_path : str
84
+ Path to the file.
85
+
86
+ Returns
87
+ -------
88
+ str
89
+ The name of the file.
90
+ """
91
+ return Path(data_path).name
92
+
93
+
94
+ def get_last_modified(data_path: str) -> str:
95
+ """
96
+ Get the last modified date of a file.
97
+
98
+ Parameters
99
+ ----------
100
+ data_path : str
101
+ Path to the file.
102
+
103
+ Returns
104
+ -------
105
+ str
106
+ The last modified date of the file.
107
+ """
108
+ path = Path(data_path)
109
+ timestamp = path.stat().st_mtime
110
+ return datetime.fromtimestamp(timestamp).astimezone().isoformat()
111
+
112
+
113
+ def get_s3_path(src_path: str) -> str:
114
+ """
115
+ Get the S3 path of a file.
116
+
117
+ Parameters
118
+ ----------
119
+ src_path : str
120
+ Path to the file.
121
+
122
+ Returns
123
+ -------
124
+ str
125
+ The S3 path of the file.
126
+ """
127
+ return Path(src_path).as_uri()
128
+
129
+
130
+ def get_file_info_from_local(path: str, src_path: str) -> None | dict:
131
+ """
132
+ Get file info from path.
133
+
134
+ Parameters
135
+ ----------
136
+ path : str
137
+ Target path of the object.
138
+ src_path : str
139
+ Local path of some source.
140
+
141
+ Returns
142
+ -------
143
+ dict
144
+ File info.
145
+ """
146
+ try:
147
+ name = get_path_name(path)
148
+ content_type = get_file_mime_type(path)
149
+ size = get_file_size(path)
150
+ hash = calculate_blob_hash(path)
151
+ last_modified = get_last_modified(path)
152
+
153
+ return FileInfo(
154
+ path=src_path,
155
+ name=name,
156
+ content_type=content_type,
157
+ size=size,
158
+ hash=hash,
159
+ last_modified=last_modified,
160
+ ).dict()
161
+ except Exception:
162
+ return None
163
+
164
+
165
+ def get_file_info_from_s3(path: str, metadata: dict) -> None | dict:
166
+ """
167
+ Get file info from path.
168
+
169
+ Parameters
170
+ ----------
171
+ path : str
172
+ Object source path.
173
+ metadata : dict
174
+ Metadata of the object from S3.
175
+
176
+ Returns
177
+ -------
178
+ dict
179
+ File info.
180
+ """
181
+ try:
182
+ size = metadata["ContentLength"]
183
+ file_hash = metadata["ETag"][1:-1]
184
+
185
+ file_size_limit_multipart = 20 * 1024 * 1024
186
+ if size < file_size_limit_multipart:
187
+ file_hash = "md5:" + file_hash
188
+ else:
189
+ file_hash = "LiteralETag:" + file_hash
190
+
191
+ name = get_path_name(path)
192
+ content_type = metadata["ContentType"]
193
+ last_modified = metadata["LastModified"].isoformat()
194
+
195
+ return FileInfo(
196
+ path=path,
197
+ name=name,
198
+ content_type=content_type,
199
+ size=size,
200
+ hash=file_hash,
201
+ last_modified=last_modified,
202
+ ).dict()
203
+ except Exception:
204
+ return None