digitalhub 0.9.2__py3-none-any.whl → 0.10.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +2 -3
- digitalhub/client/_base/client.py +3 -2
- digitalhub/client/dhcore/api_builder.py +5 -0
- digitalhub/client/dhcore/client.py +27 -399
- digitalhub/client/dhcore/configurator.py +339 -0
- digitalhub/client/dhcore/error_parser.py +107 -0
- digitalhub/client/dhcore/models.py +13 -23
- digitalhub/client/dhcore/utils.py +4 -44
- digitalhub/client/local/api_builder.py +9 -17
- digitalhub/client/local/client.py +12 -2
- digitalhub/client/local/enums.py +11 -0
- digitalhub/configurator/api.py +31 -0
- digitalhub/configurator/configurator.py +194 -0
- digitalhub/configurator/credentials_store.py +65 -0
- digitalhub/configurator/ini_module.py +74 -0
- digitalhub/entities/_base/_base/entity.py +2 -2
- digitalhub/entities/_base/material/entity.py +19 -6
- digitalhub/entities/_base/material/utils.py +2 -2
- digitalhub/entities/_commons/enums.py +1 -0
- digitalhub/entities/_commons/models.py +9 -0
- digitalhub/entities/_commons/utils.py +25 -0
- digitalhub/entities/_operations/processor.py +103 -107
- digitalhub/entities/artifact/crud.py +3 -3
- digitalhub/entities/artifact/utils.py +1 -1
- digitalhub/entities/dataitem/_base/entity.py +2 -2
- digitalhub/entities/dataitem/crud.py +3 -3
- digitalhub/entities/dataitem/table/entity.py +2 -2
- digitalhub/{utils/data_utils.py → entities/dataitem/table/utils.py} +43 -51
- digitalhub/entities/dataitem/utils.py +6 -3
- digitalhub/entities/model/_base/entity.py +172 -0
- digitalhub/entities/model/_base/spec.py +0 -10
- digitalhub/entities/model/_base/status.py +10 -0
- digitalhub/entities/model/crud.py +3 -3
- digitalhub/entities/model/huggingface/spec.py +6 -3
- digitalhub/entities/model/mlflow/models.py +2 -2
- digitalhub/entities/model/mlflow/spec.py +1 -3
- digitalhub/entities/model/mlflow/utils.py +44 -5
- digitalhub/entities/run/_base/entity.py +149 -0
- digitalhub/entities/run/_base/status.py +12 -0
- digitalhub/entities/task/_base/spec.py +2 -0
- digitalhub/entities/task/crud.py +4 -0
- digitalhub/readers/{_commons → pandas}/enums.py +4 -0
- digitalhub/readers/pandas/reader.py +58 -10
- digitalhub/stores/_base/store.py +1 -49
- digitalhub/stores/api.py +8 -33
- digitalhub/stores/builder.py +44 -161
- digitalhub/stores/local/store.py +4 -18
- digitalhub/stores/remote/store.py +3 -10
- digitalhub/stores/s3/configurator.py +107 -0
- digitalhub/stores/s3/enums.py +17 -0
- digitalhub/stores/s3/models.py +21 -0
- digitalhub/stores/s3/store.py +8 -28
- digitalhub/{utils/s3_utils.py → stores/s3/utils.py} +7 -3
- digitalhub/stores/sql/configurator.py +88 -0
- digitalhub/stores/sql/enums.py +16 -0
- digitalhub/stores/sql/models.py +24 -0
- digitalhub/stores/sql/store.py +14 -57
- digitalhub/utils/exceptions.py +6 -0
- digitalhub/utils/generic_utils.py +9 -8
- digitalhub/utils/uri_utils.py +1 -1
- {digitalhub-0.9.2.dist-info → digitalhub-0.10.0b1.dist-info}/METADATA +5 -6
- {digitalhub-0.9.2.dist-info → digitalhub-0.10.0b1.dist-info}/RECORD +67 -54
- test/local/imports/test_imports.py +0 -1
- digitalhub/client/dhcore/env.py +0 -23
- /digitalhub/{readers/_commons → configurator}/__init__.py +0 -0
- {digitalhub-0.9.2.dist-info → digitalhub-0.10.0b1.dist-info}/LICENSE.txt +0 -0
- {digitalhub-0.9.2.dist-info → digitalhub-0.10.0b1.dist-info}/WHEEL +0 -0
- {digitalhub-0.9.2.dist-info → digitalhub-0.10.0b1.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
from digitalhub.client.api import get_client
|
|
6
7
|
from digitalhub.context.api import delete_context, get_context
|
|
@@ -713,46 +714,8 @@ class OperationsProcessor:
|
|
|
713
714
|
entity_id=entity_id,
|
|
714
715
|
**kwargs,
|
|
715
716
|
)
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
def read_material_entity(
|
|
719
|
-
self,
|
|
720
|
-
identifier: str,
|
|
721
|
-
entity_type: str | None = None,
|
|
722
|
-
project: str | None = None,
|
|
723
|
-
entity_id: str | None = None,
|
|
724
|
-
**kwargs,
|
|
725
|
-
) -> MaterialEntity:
|
|
726
|
-
"""
|
|
727
|
-
Read object from backend.
|
|
728
|
-
|
|
729
|
-
Parameters
|
|
730
|
-
----------
|
|
731
|
-
identifier : str
|
|
732
|
-
Entity key (store://...) or entity name.
|
|
733
|
-
entity_type : str
|
|
734
|
-
Entity type.
|
|
735
|
-
project : str
|
|
736
|
-
Project name.
|
|
737
|
-
entity_id : str
|
|
738
|
-
Entity ID.
|
|
739
|
-
**kwargs : dict
|
|
740
|
-
Parameters to pass to the API call.
|
|
741
|
-
|
|
742
|
-
Returns
|
|
743
|
-
-------
|
|
744
|
-
MaterialEntity
|
|
745
|
-
Object instance.
|
|
746
|
-
"""
|
|
747
|
-
obj: MaterialEntity = self.read_context_entity(
|
|
748
|
-
identifier,
|
|
749
|
-
entity_type=entity_type,
|
|
750
|
-
project=project,
|
|
751
|
-
entity_id=entity_id,
|
|
752
|
-
**kwargs,
|
|
753
|
-
)
|
|
754
|
-
obj._get_files_info()
|
|
755
|
-
return obj
|
|
717
|
+
entity = build_entity_from_dict(obj)
|
|
718
|
+
return self._post_process_get(entity)
|
|
756
719
|
|
|
757
720
|
def read_unversioned_entity(
|
|
758
721
|
self,
|
|
@@ -1000,42 +963,6 @@ class OperationsProcessor:
|
|
|
1000
963
|
List of object instances.
|
|
1001
964
|
"""
|
|
1002
965
|
context = self._get_context_from_identifier(identifier, project)
|
|
1003
|
-
obj = self._read_context_entity_versions(
|
|
1004
|
-
context,
|
|
1005
|
-
identifier,
|
|
1006
|
-
entity_type=entity_type,
|
|
1007
|
-
project=project,
|
|
1008
|
-
**kwargs,
|
|
1009
|
-
)
|
|
1010
|
-
return [build_entity_from_dict(o) for o in obj]
|
|
1011
|
-
|
|
1012
|
-
def read_material_entity_versions(
|
|
1013
|
-
self,
|
|
1014
|
-
identifier: str,
|
|
1015
|
-
entity_type: str | None = None,
|
|
1016
|
-
project: str | None = None,
|
|
1017
|
-
**kwargs,
|
|
1018
|
-
) -> list[MaterialEntity]:
|
|
1019
|
-
"""
|
|
1020
|
-
Read object versions from backend.
|
|
1021
|
-
|
|
1022
|
-
Parameters
|
|
1023
|
-
----------
|
|
1024
|
-
identifier : str
|
|
1025
|
-
Entity key (store://...) or entity name.
|
|
1026
|
-
entity_type : str
|
|
1027
|
-
Entity type.
|
|
1028
|
-
project : str
|
|
1029
|
-
Project name.
|
|
1030
|
-
**kwargs : dict
|
|
1031
|
-
Parameters to pass to the API call.
|
|
1032
|
-
|
|
1033
|
-
Returns
|
|
1034
|
-
-------
|
|
1035
|
-
list[MaterialEntity]
|
|
1036
|
-
List of object instances.
|
|
1037
|
-
"""
|
|
1038
|
-
context = self._get_context_from_identifier(identifier, project)
|
|
1039
966
|
objs = self._read_context_entity_versions(
|
|
1040
967
|
context,
|
|
1041
968
|
identifier,
|
|
@@ -1045,8 +972,8 @@ class OperationsProcessor:
|
|
|
1045
972
|
)
|
|
1046
973
|
objects = []
|
|
1047
974
|
for o in objs:
|
|
1048
|
-
entity:
|
|
1049
|
-
entity.
|
|
975
|
+
entity: ContextEntity = build_entity_from_dict(o)
|
|
976
|
+
entity = self._post_process_get(entity)
|
|
1050
977
|
objects.append(entity)
|
|
1051
978
|
return objects
|
|
1052
979
|
|
|
@@ -1106,37 +1033,10 @@ class OperationsProcessor:
|
|
|
1106
1033
|
"""
|
|
1107
1034
|
context = self._get_context(project)
|
|
1108
1035
|
objs = self._list_context_entities(context, entity_type, **kwargs)
|
|
1109
|
-
return [build_entity_from_dict(obj) for obj in objs]
|
|
1110
|
-
|
|
1111
|
-
def list_material_entities(
|
|
1112
|
-
self,
|
|
1113
|
-
project: str,
|
|
1114
|
-
entity_type: str,
|
|
1115
|
-
**kwargs,
|
|
1116
|
-
) -> list[MaterialEntity]:
|
|
1117
|
-
"""
|
|
1118
|
-
List all latest version objects from backend.
|
|
1119
|
-
|
|
1120
|
-
Parameters
|
|
1121
|
-
----------
|
|
1122
|
-
project : str
|
|
1123
|
-
Project name.
|
|
1124
|
-
entity_type : str
|
|
1125
|
-
Entity type.
|
|
1126
|
-
**kwargs : dict
|
|
1127
|
-
Parameters to pass to the API call.
|
|
1128
|
-
|
|
1129
|
-
Returns
|
|
1130
|
-
-------
|
|
1131
|
-
list[MaterialEntity]
|
|
1132
|
-
List of object instances.
|
|
1133
|
-
"""
|
|
1134
|
-
context = self._get_context(project)
|
|
1135
|
-
objs = self._list_context_entities(context, entity_type, **kwargs)
|
|
1136
1036
|
objects = []
|
|
1137
1037
|
for o in objs:
|
|
1138
|
-
entity:
|
|
1139
|
-
entity.
|
|
1038
|
+
entity: ContextEntity = build_entity_from_dict(o)
|
|
1039
|
+
entity = self._post_process_get(entity)
|
|
1140
1040
|
objects.append(entity)
|
|
1141
1041
|
return objects
|
|
1142
1042
|
|
|
@@ -1324,6 +1224,26 @@ class OperationsProcessor:
|
|
|
1324
1224
|
**kwargs,
|
|
1325
1225
|
)
|
|
1326
1226
|
|
|
1227
|
+
def _post_process_get(self, entity: ContextEntity) -> ContextEntity:
|
|
1228
|
+
"""
|
|
1229
|
+
Post process get (files, metrics).
|
|
1230
|
+
|
|
1231
|
+
Parameters
|
|
1232
|
+
----------
|
|
1233
|
+
entity : ContextEntity
|
|
1234
|
+
Entity to post process.
|
|
1235
|
+
|
|
1236
|
+
Returns
|
|
1237
|
+
-------
|
|
1238
|
+
ContextEntity
|
|
1239
|
+
Post processed entity.
|
|
1240
|
+
"""
|
|
1241
|
+
if hasattr(entity.status, "metrics"):
|
|
1242
|
+
entity._get_metrics()
|
|
1243
|
+
if hasattr(entity.status, "files"):
|
|
1244
|
+
entity._get_files_info()
|
|
1245
|
+
return entity
|
|
1246
|
+
|
|
1327
1247
|
##############################
|
|
1328
1248
|
# Context entity operations
|
|
1329
1249
|
##############################
|
|
@@ -1644,6 +1564,82 @@ class OperationsProcessor:
|
|
|
1644
1564
|
)
|
|
1645
1565
|
return context.client.update_object(api, entity_list, **kwargs)
|
|
1646
1566
|
|
|
1567
|
+
def read_metrics(
|
|
1568
|
+
self,
|
|
1569
|
+
project: str,
|
|
1570
|
+
entity_type: str,
|
|
1571
|
+
entity_id: str,
|
|
1572
|
+
metric_name: str | None = None,
|
|
1573
|
+
**kwargs,
|
|
1574
|
+
) -> dict:
|
|
1575
|
+
"""
|
|
1576
|
+
Get metrics from backend.
|
|
1577
|
+
|
|
1578
|
+
Parameters
|
|
1579
|
+
----------
|
|
1580
|
+
project : str
|
|
1581
|
+
Project name.
|
|
1582
|
+
entity_type : str
|
|
1583
|
+
Entity type.
|
|
1584
|
+
entity_id : str
|
|
1585
|
+
Entity ID.
|
|
1586
|
+
**kwargs : dict
|
|
1587
|
+
Parameters to pass to the API call.
|
|
1588
|
+
|
|
1589
|
+
Returns
|
|
1590
|
+
-------
|
|
1591
|
+
dict
|
|
1592
|
+
Response from backend.
|
|
1593
|
+
"""
|
|
1594
|
+
context = self._get_context(project)
|
|
1595
|
+
api = context.client.build_api(
|
|
1596
|
+
ApiCategories.CONTEXT.value,
|
|
1597
|
+
BackendOperations.METRICS.value,
|
|
1598
|
+
project=context.name,
|
|
1599
|
+
entity_type=entity_type,
|
|
1600
|
+
entity_id=entity_id,
|
|
1601
|
+
metric_name=metric_name,
|
|
1602
|
+
)
|
|
1603
|
+
return context.client.read_object(api, **kwargs)
|
|
1604
|
+
|
|
1605
|
+
def update_metric(
|
|
1606
|
+
self,
|
|
1607
|
+
project: str,
|
|
1608
|
+
entity_type: str,
|
|
1609
|
+
entity_id: str,
|
|
1610
|
+
metric_name: str,
|
|
1611
|
+
metric_value: Any,
|
|
1612
|
+
**kwargs,
|
|
1613
|
+
) -> None:
|
|
1614
|
+
"""
|
|
1615
|
+
Get single metric from backend.
|
|
1616
|
+
|
|
1617
|
+
Parameters
|
|
1618
|
+
----------
|
|
1619
|
+
project : str
|
|
1620
|
+
Project name.
|
|
1621
|
+
entity_type : str
|
|
1622
|
+
Entity type.
|
|
1623
|
+
entity_id : str
|
|
1624
|
+
Entity ID.
|
|
1625
|
+
**kwargs : dict
|
|
1626
|
+
Parameters to pass to the API call.
|
|
1627
|
+
|
|
1628
|
+
Returns
|
|
1629
|
+
-------
|
|
1630
|
+
None
|
|
1631
|
+
"""
|
|
1632
|
+
context = self._get_context(project)
|
|
1633
|
+
api = context.client.build_api(
|
|
1634
|
+
ApiCategories.CONTEXT.value,
|
|
1635
|
+
BackendOperations.METRICS.value,
|
|
1636
|
+
project=context.name,
|
|
1637
|
+
entity_type=entity_type,
|
|
1638
|
+
entity_id=entity_id,
|
|
1639
|
+
metric_name=metric_name,
|
|
1640
|
+
)
|
|
1641
|
+
context.client.update_object(api, metric_value, **kwargs)
|
|
1642
|
+
|
|
1647
1643
|
def _search(
|
|
1648
1644
|
self,
|
|
1649
1645
|
project: str,
|
|
@@ -158,7 +158,7 @@ def get_artifact(
|
|
|
158
158
|
>>> project="my-project",
|
|
159
159
|
>>> entity_id="my-artifact-id")
|
|
160
160
|
"""
|
|
161
|
-
return processor.
|
|
161
|
+
return processor.read_context_entity(
|
|
162
162
|
identifier=identifier,
|
|
163
163
|
entity_type=ENTITY_TYPE,
|
|
164
164
|
project=project,
|
|
@@ -198,7 +198,7 @@ def get_artifact_versions(
|
|
|
198
198
|
>>> obj = get_artifact_versions("my-artifact-name"
|
|
199
199
|
>>> project="my-project")
|
|
200
200
|
"""
|
|
201
|
-
return processor.
|
|
201
|
+
return processor.read_context_entity_versions(
|
|
202
202
|
identifier=identifier,
|
|
203
203
|
entity_type=ENTITY_TYPE,
|
|
204
204
|
project=project,
|
|
@@ -226,7 +226,7 @@ def list_artifacts(project: str, **kwargs) -> list[Artifact]:
|
|
|
226
226
|
--------
|
|
227
227
|
>>> objs = list_artifacts(project="my-project")
|
|
228
228
|
"""
|
|
229
|
-
return processor.
|
|
229
|
+
return processor.list_context_entities(
|
|
230
230
|
project=project,
|
|
231
231
|
entity_type=ENTITY_TYPE,
|
|
232
232
|
**kwargs,
|
|
@@ -56,7 +56,7 @@ def process_kwargs(
|
|
|
56
56
|
if path is None:
|
|
57
57
|
uuid = build_uuid()
|
|
58
58
|
kwargs["uuid"] = uuid
|
|
59
|
-
kwargs["path"] = build_log_path_from_source(project, EntityTypes.
|
|
59
|
+
kwargs["path"] = build_log_path_from_source(project, EntityTypes.ARTIFACT.value, name, uuid, source)
|
|
60
60
|
else:
|
|
61
61
|
kwargs["path"] = path
|
|
62
62
|
return kwargs
|
|
@@ -5,7 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
|
|
6
6
|
from digitalhub.entities._base.material.entity import MaterialEntity
|
|
7
7
|
from digitalhub.entities._commons.enums import EntityTypes
|
|
8
|
-
from digitalhub.
|
|
8
|
+
from digitalhub.entities.dataitem.utils import DEFAULT_EXTENSION
|
|
9
9
|
from digitalhub.utils.exceptions import EntityError
|
|
10
10
|
from digitalhub.utils.uri_utils import has_sql_scheme
|
|
11
11
|
|
|
@@ -67,7 +67,7 @@ class Dataitem(MaterialEntity):
|
|
|
67
67
|
return file_format
|
|
68
68
|
|
|
69
69
|
if has_sql_scheme(path):
|
|
70
|
-
return
|
|
70
|
+
return DEFAULT_EXTENSION
|
|
71
71
|
|
|
72
72
|
ext = Path(path).suffix[1:]
|
|
73
73
|
if ext is not None:
|
|
@@ -165,7 +165,7 @@ def get_dataitem(
|
|
|
165
165
|
>>> project="my-project",
|
|
166
166
|
>>> entity_id="my-dataitem-id")
|
|
167
167
|
"""
|
|
168
|
-
return processor.
|
|
168
|
+
return processor.read_context_entity(
|
|
169
169
|
identifier=identifier,
|
|
170
170
|
entity_type=ENTITY_TYPE,
|
|
171
171
|
project=project,
|
|
@@ -205,7 +205,7 @@ def get_dataitem_versions(
|
|
|
205
205
|
>>> objs = get_dataitem_versions("my-dataitem-name",
|
|
206
206
|
>>> project="my-project")
|
|
207
207
|
"""
|
|
208
|
-
return processor.
|
|
208
|
+
return processor.read_context_entity_versions(
|
|
209
209
|
identifier=identifier,
|
|
210
210
|
entity_type=ENTITY_TYPE,
|
|
211
211
|
project=project,
|
|
@@ -233,7 +233,7 @@ def list_dataitems(project: str, **kwargs) -> list[Dataitem]:
|
|
|
233
233
|
--------
|
|
234
234
|
>>> objs = list_dataitems(project="my-project")
|
|
235
235
|
"""
|
|
236
|
-
return processor.
|
|
236
|
+
return processor.list_context_entities(
|
|
237
237
|
project=project,
|
|
238
238
|
entity_type=ENTITY_TYPE,
|
|
239
239
|
**kwargs,
|
|
@@ -85,7 +85,7 @@ class DataitemTable(Dataitem):
|
|
|
85
85
|
checker = data_path
|
|
86
86
|
|
|
87
87
|
extension = self._get_extension(checker, file_format)
|
|
88
|
-
return get_store("").read_df(data_path, extension, engine, **kwargs)
|
|
88
|
+
return get_store(self.project, "").read_df(data_path, extension, engine, **kwargs)
|
|
89
89
|
|
|
90
90
|
except Exception as e:
|
|
91
91
|
raise e
|
|
@@ -119,7 +119,7 @@ class DataitemTable(Dataitem):
|
|
|
119
119
|
str
|
|
120
120
|
Path to the written dataframe.
|
|
121
121
|
"""
|
|
122
|
-
return get_store(self.spec.path).write_df(df, self.spec.path, extension=extension, **kwargs)
|
|
122
|
+
return get_store(self.project, self.spec.path).write_df(df, self.spec.path, extension=extension, **kwargs)
|
|
123
123
|
|
|
124
124
|
@staticmethod
|
|
125
125
|
def _clean_tmp_path(pth: Path | None, clean: bool) -> None:
|
|
@@ -1,49 +1,23 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
from digitalhub.utils.generic_utils import dump_json
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
def
|
|
6
|
+
def prepare_data(data: list[list], columnar: bool = False) -> list[list]:
|
|
7
7
|
"""
|
|
8
|
-
|
|
8
|
+
Prepare data.
|
|
9
9
|
|
|
10
10
|
Parameters
|
|
11
11
|
----------
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
Row count.
|
|
16
|
-
|
|
17
|
-
Returns
|
|
18
|
-
-------
|
|
19
|
-
dict
|
|
20
|
-
Data preview.
|
|
21
|
-
"""
|
|
22
|
-
dict_ = {}
|
|
23
|
-
if preview is not None:
|
|
24
|
-
dict_["cols"] = preview
|
|
25
|
-
if rows_count is not None:
|
|
26
|
-
dict_["rows"] = rows_count
|
|
27
|
-
return dict_
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def get_data_preview(columns: list, data: list[list], columnar: bool = False) -> list[dict]:
|
|
31
|
-
"""
|
|
32
|
-
Prepare preview.
|
|
33
|
-
|
|
34
|
-
Parameters
|
|
35
|
-
----------
|
|
36
|
-
columns : list
|
|
37
|
-
Columns names.
|
|
38
|
-
data : list[list]
|
|
39
|
-
Data to preview.
|
|
40
|
-
columnar : bool
|
|
12
|
+
data : list
|
|
13
|
+
Data.
|
|
14
|
+
columnar : bool | None
|
|
41
15
|
If data are arranged in columns. If False, data are arranged in rows.
|
|
42
16
|
|
|
43
17
|
Returns
|
|
44
18
|
-------
|
|
45
|
-
list[
|
|
46
|
-
|
|
19
|
+
list[list]
|
|
20
|
+
Prepared data.
|
|
47
21
|
"""
|
|
48
22
|
# Reduce data to 10 rows
|
|
49
23
|
if not columnar:
|
|
@@ -56,17 +30,10 @@ def get_data_preview(columns: list, data: list[list], columnar: bool = False) ->
|
|
|
56
30
|
if not columnar:
|
|
57
31
|
data = list(map(list, list(zip(*data))))
|
|
58
32
|
|
|
59
|
-
|
|
60
|
-
data_dict = prepare_preview(columns, data)
|
|
61
|
-
|
|
62
|
-
# Filter memoryview values
|
|
63
|
-
filtered_memview = filter_memoryview(data_dict)
|
|
64
|
-
|
|
65
|
-
# Check the size of the preview data
|
|
66
|
-
return check_preview_size(filtered_memview)
|
|
33
|
+
return data
|
|
67
34
|
|
|
68
35
|
|
|
69
|
-
def prepare_preview(
|
|
36
|
+
def prepare_preview(columns: list, data: list[list]) -> list[dict]:
|
|
70
37
|
"""
|
|
71
38
|
Get preview.
|
|
72
39
|
|
|
@@ -80,9 +47,10 @@ def prepare_preview(column_names: list, data: list[list]) -> list[dict]:
|
|
|
80
47
|
list[dict]
|
|
81
48
|
Preview.
|
|
82
49
|
"""
|
|
83
|
-
if len(
|
|
50
|
+
if len(columns) != len(data):
|
|
84
51
|
raise ValueError("Column names and data must have the same length")
|
|
85
|
-
|
|
52
|
+
preview = [{"name": column, "value": values} for column, values in zip(columns, data)]
|
|
53
|
+
return filter_memoryview(preview)
|
|
86
54
|
|
|
87
55
|
|
|
88
56
|
def filter_memoryview(data: list[dict]) -> list[dict]:
|
|
@@ -91,13 +59,13 @@ def filter_memoryview(data: list[dict]) -> list[dict]:
|
|
|
91
59
|
|
|
92
60
|
Parameters
|
|
93
61
|
----------
|
|
94
|
-
data :
|
|
62
|
+
data : list[dict]
|
|
95
63
|
Data.
|
|
96
64
|
|
|
97
65
|
Returns
|
|
98
66
|
-------
|
|
99
|
-
list[
|
|
100
|
-
|
|
67
|
+
list[dict]
|
|
68
|
+
Preview.
|
|
101
69
|
"""
|
|
102
70
|
key_to_filter = []
|
|
103
71
|
for i in data:
|
|
@@ -108,13 +76,13 @@ def filter_memoryview(data: list[dict]) -> list[dict]:
|
|
|
108
76
|
return data
|
|
109
77
|
|
|
110
78
|
|
|
111
|
-
def check_preview_size(preview:
|
|
79
|
+
def check_preview_size(preview: dict) -> list:
|
|
112
80
|
"""
|
|
113
81
|
Check preview size. If it's too big, return empty list.
|
|
114
82
|
|
|
115
83
|
Parameters
|
|
116
84
|
----------
|
|
117
|
-
preview :
|
|
85
|
+
preview : dict
|
|
118
86
|
Preview.
|
|
119
87
|
|
|
120
88
|
Returns
|
|
@@ -122,6 +90,30 @@ def check_preview_size(preview: list[dict]) -> list:
|
|
|
122
90
|
list
|
|
123
91
|
Preview.
|
|
124
92
|
"""
|
|
125
|
-
if len(
|
|
93
|
+
if len(dump_json(preview)) >= 64000:
|
|
126
94
|
return []
|
|
127
95
|
return preview
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def finalize_preview(preview: list[dict] | None = None, rows_count: int | None = None) -> dict:
|
|
99
|
+
"""
|
|
100
|
+
Finalize preview.
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
preview : list[dict] | None
|
|
105
|
+
Preview.
|
|
106
|
+
rows_count : int | None
|
|
107
|
+
Row count.
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
dict
|
|
112
|
+
Data preview.
|
|
113
|
+
"""
|
|
114
|
+
data = {}
|
|
115
|
+
if preview is not None:
|
|
116
|
+
data["cols"] = preview
|
|
117
|
+
if rows_count is not None:
|
|
118
|
+
data["rows"] = rows_count
|
|
119
|
+
return data
|
|
@@ -8,7 +8,6 @@ from digitalhub.context.api import get_context
|
|
|
8
8
|
from digitalhub.entities._base.entity._constructors.uuid import build_uuid
|
|
9
9
|
from digitalhub.entities._base.material.utils import build_log_path_from_source, eval_local_source
|
|
10
10
|
from digitalhub.entities._commons.enums import EntityKinds, EntityTypes
|
|
11
|
-
from digitalhub.readers._commons.enums import Extensions
|
|
12
11
|
from digitalhub.readers.api import get_reader_by_object
|
|
13
12
|
from digitalhub.utils.generic_utils import slugify_string
|
|
14
13
|
|
|
@@ -16,6 +15,9 @@ if typing.TYPE_CHECKING:
|
|
|
16
15
|
from digitalhub.entities.dataitem._base.entity import Dataitem
|
|
17
16
|
|
|
18
17
|
|
|
18
|
+
DEFAULT_EXTENSION = "parquet"
|
|
19
|
+
|
|
20
|
+
|
|
19
21
|
def eval_source(
|
|
20
22
|
source: str | list[str] | None = None,
|
|
21
23
|
data: Any | None = None,
|
|
@@ -39,11 +41,12 @@ def eval_source(
|
|
|
39
41
|
raise ValueError("You must provide source or data.")
|
|
40
42
|
|
|
41
43
|
if source is not None:
|
|
42
|
-
|
|
44
|
+
eval_local_source(source)
|
|
45
|
+
return source
|
|
43
46
|
|
|
44
47
|
if kind == EntityKinds.DATAITEM_TABLE.value:
|
|
45
48
|
ctx = get_context(project)
|
|
46
|
-
pth = ctx.root / f"{slugify_string(name)}.{
|
|
49
|
+
pth = ctx.root / f"{slugify_string(name)}.{DEFAULT_EXTENSION}"
|
|
47
50
|
reader = get_reader_by_object(data)
|
|
48
51
|
reader.write_parquet(data, pth)
|
|
49
52
|
return str(pth)
|