datachain 0.30.2__py3-none-any.whl → 0.30.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/catalog/catalog.py +86 -29
- datachain/cli/commands/datasets.py +3 -2
- datachain/data_storage/metastore.py +34 -9
- datachain/delta.py +23 -12
- datachain/func/string.py +8 -0
- datachain/lib/dc/database.py +50 -6
- datachain/lib/dc/datachain.py +31 -9
- datachain/lib/dc/datasets.py +9 -4
- datachain/listing.py +5 -9
- datachain/model/ultralytics/bbox.py +14 -12
- datachain/model/ultralytics/pose.py +14 -12
- datachain/model/ultralytics/segment.py +14 -12
- datachain/query/dataset.py +20 -10
- {datachain-0.30.2.dist-info → datachain-0.30.3.dist-info}/METADATA +2 -2
- {datachain-0.30.2.dist-info → datachain-0.30.3.dist-info}/RECORD +19 -19
- {datachain-0.30.2.dist-info → datachain-0.30.3.dist-info}/WHEEL +0 -0
- {datachain-0.30.2.dist-info → datachain-0.30.3.dist-info}/entry_points.txt +0 -0
- {datachain-0.30.2.dist-info → datachain-0.30.3.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.30.2.dist-info → datachain-0.30.3.dist-info}/top_level.txt +0 -0
datachain/catalog/catalog.py
CHANGED
|
@@ -680,8 +680,9 @@ class Catalog:
|
|
|
680
680
|
ds_namespace, ds_project, ds_name = parse_dataset_name(ds_name)
|
|
681
681
|
assert ds_namespace
|
|
682
682
|
assert ds_project
|
|
683
|
-
|
|
684
|
-
|
|
683
|
+
dataset = self.get_dataset(
|
|
684
|
+
ds_name, namespace_name=ds_namespace, project_name=ds_project
|
|
685
|
+
)
|
|
685
686
|
if not ds_version:
|
|
686
687
|
ds_version = dataset.latest_version
|
|
687
688
|
dataset_sources = self.warehouse.get_dataset_sources(
|
|
@@ -807,7 +808,11 @@ class Catalog:
|
|
|
807
808
|
)
|
|
808
809
|
default_version = DEFAULT_DATASET_VERSION
|
|
809
810
|
try:
|
|
810
|
-
dataset = self.get_dataset(
|
|
811
|
+
dataset = self.get_dataset(
|
|
812
|
+
name,
|
|
813
|
+
namespace_name=project.namespace.name if project else None,
|
|
814
|
+
project_name=project.name if project else None,
|
|
815
|
+
)
|
|
811
816
|
default_version = dataset.next_version_patch
|
|
812
817
|
if update_version == "major":
|
|
813
818
|
default_version = dataset.next_version_major
|
|
@@ -1016,7 +1021,11 @@ class Catalog:
|
|
|
1016
1021
|
dc.save(name)
|
|
1017
1022
|
except Exception as e: # noqa: BLE001
|
|
1018
1023
|
try:
|
|
1019
|
-
ds = self.get_dataset(
|
|
1024
|
+
ds = self.get_dataset(
|
|
1025
|
+
name,
|
|
1026
|
+
namespace_name=project.namespace.name,
|
|
1027
|
+
project_name=project.name,
|
|
1028
|
+
)
|
|
1020
1029
|
self.metastore.update_dataset_status(
|
|
1021
1030
|
ds,
|
|
1022
1031
|
DatasetStatus.FAILED,
|
|
@@ -1033,7 +1042,11 @@ class Catalog:
|
|
|
1033
1042
|
except DatasetNotFoundError:
|
|
1034
1043
|
raise e from None
|
|
1035
1044
|
|
|
1036
|
-
ds = self.get_dataset(
|
|
1045
|
+
ds = self.get_dataset(
|
|
1046
|
+
name,
|
|
1047
|
+
namespace_name=project.namespace.name,
|
|
1048
|
+
project_name=project.name,
|
|
1049
|
+
)
|
|
1037
1050
|
|
|
1038
1051
|
self.update_dataset_version_with_warehouse_info(
|
|
1039
1052
|
ds,
|
|
@@ -1041,7 +1054,11 @@ class Catalog:
|
|
|
1041
1054
|
sources="\n".join(sources),
|
|
1042
1055
|
)
|
|
1043
1056
|
|
|
1044
|
-
return self.get_dataset(
|
|
1057
|
+
return self.get_dataset(
|
|
1058
|
+
name,
|
|
1059
|
+
namespace_name=project.namespace.name,
|
|
1060
|
+
project_name=project.name,
|
|
1061
|
+
)
|
|
1045
1062
|
|
|
1046
1063
|
def get_full_dataset_name(
|
|
1047
1064
|
self,
|
|
@@ -1077,22 +1094,23 @@ class Catalog:
|
|
|
1077
1094
|
return namespace_name, project_name, name
|
|
1078
1095
|
|
|
1079
1096
|
def get_dataset(
|
|
1080
|
-
self,
|
|
1097
|
+
self,
|
|
1098
|
+
name: str,
|
|
1099
|
+
namespace_name: Optional[str] = None,
|
|
1100
|
+
project_name: Optional[str] = None,
|
|
1081
1101
|
) -> DatasetRecord:
|
|
1082
1102
|
from datachain.lib.listing import is_listing_dataset
|
|
1083
1103
|
|
|
1084
|
-
|
|
1104
|
+
namespace_name = namespace_name or self.metastore.default_namespace_name
|
|
1105
|
+
project_name = project_name or self.metastore.default_project_name
|
|
1085
1106
|
|
|
1086
1107
|
if is_listing_dataset(name):
|
|
1087
|
-
|
|
1108
|
+
namespace_name = self.metastore.system_namespace_name
|
|
1109
|
+
project_name = self.metastore.listing_project_name
|
|
1088
1110
|
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
raise DatasetNotFoundError(
|
|
1093
|
-
f"Dataset {name} not found in namespace {project.namespace.name}"
|
|
1094
|
-
f" and project {project.name}"
|
|
1095
|
-
) from None
|
|
1111
|
+
return self.metastore.get_dataset(
|
|
1112
|
+
name, namespace_name=namespace_name, project_name=project_name
|
|
1113
|
+
)
|
|
1096
1114
|
|
|
1097
1115
|
def get_dataset_with_remote_fallback(
|
|
1098
1116
|
self,
|
|
@@ -1113,8 +1131,11 @@ class Catalog:
|
|
|
1113
1131
|
|
|
1114
1132
|
if self.metastore.is_local_dataset(namespace_name) or not update:
|
|
1115
1133
|
try:
|
|
1116
|
-
|
|
1117
|
-
|
|
1134
|
+
ds = self.get_dataset(
|
|
1135
|
+
name,
|
|
1136
|
+
namespace_name=namespace_name,
|
|
1137
|
+
project_name=project_name,
|
|
1138
|
+
)
|
|
1118
1139
|
if not version or ds.has_version(version):
|
|
1119
1140
|
return ds
|
|
1120
1141
|
except (NamespaceNotFoundError, ProjectNotFoundError, DatasetNotFoundError):
|
|
@@ -1139,7 +1160,9 @@ class Catalog:
|
|
|
1139
1160
|
local_ds_version=version,
|
|
1140
1161
|
)
|
|
1141
1162
|
return self.get_dataset(
|
|
1142
|
-
name,
|
|
1163
|
+
name,
|
|
1164
|
+
namespace_name=namespace_name,
|
|
1165
|
+
project_name=project_name,
|
|
1143
1166
|
)
|
|
1144
1167
|
|
|
1145
1168
|
return self.get_remote_dataset(namespace_name, project_name, name)
|
|
@@ -1148,7 +1171,11 @@ class Catalog:
|
|
|
1148
1171
|
"""Returns dataset that contains version with specific uuid"""
|
|
1149
1172
|
for dataset in self.ls_datasets():
|
|
1150
1173
|
if dataset.has_version_with_uuid(uuid):
|
|
1151
|
-
return self.get_dataset(
|
|
1174
|
+
return self.get_dataset(
|
|
1175
|
+
dataset.name,
|
|
1176
|
+
namespace_name=dataset.project.namespace.name,
|
|
1177
|
+
project_name=dataset.project.name,
|
|
1178
|
+
)
|
|
1152
1179
|
raise DatasetNotFoundError(f"Dataset with version uuid {uuid} not found.")
|
|
1153
1180
|
|
|
1154
1181
|
def get_remote_dataset(
|
|
@@ -1171,9 +1198,18 @@ class Catalog:
|
|
|
1171
1198
|
return DatasetRecord.from_dict(dataset_info)
|
|
1172
1199
|
|
|
1173
1200
|
def get_dataset_dependencies(
|
|
1174
|
-
self,
|
|
1201
|
+
self,
|
|
1202
|
+
name: str,
|
|
1203
|
+
version: str,
|
|
1204
|
+
namespace_name: Optional[str] = None,
|
|
1205
|
+
project_name: Optional[str] = None,
|
|
1206
|
+
indirect=False,
|
|
1175
1207
|
) -> list[Optional[DatasetDependency]]:
|
|
1176
|
-
dataset = self.get_dataset(
|
|
1208
|
+
dataset = self.get_dataset(
|
|
1209
|
+
name,
|
|
1210
|
+
namespace_name=namespace_name,
|
|
1211
|
+
project_name=project_name,
|
|
1212
|
+
)
|
|
1177
1213
|
|
|
1178
1214
|
direct_dependencies = self.metastore.get_direct_dataset_dependencies(
|
|
1179
1215
|
dataset, version
|
|
@@ -1187,10 +1223,13 @@ class Catalog:
|
|
|
1187
1223
|
# dependency has been removed
|
|
1188
1224
|
continue
|
|
1189
1225
|
if d.is_dataset:
|
|
1190
|
-
project = self.metastore.get_project(d.project, d.namespace)
|
|
1191
1226
|
# only datasets can have dependencies
|
|
1192
1227
|
d.dependencies = self.get_dataset_dependencies(
|
|
1193
|
-
d.name,
|
|
1228
|
+
d.name,
|
|
1229
|
+
d.version,
|
|
1230
|
+
namespace_name=d.namespace,
|
|
1231
|
+
project_name=d.project,
|
|
1232
|
+
indirect=indirect,
|
|
1194
1233
|
)
|
|
1195
1234
|
|
|
1196
1235
|
return direct_dependencies
|
|
@@ -1340,7 +1379,11 @@ class Catalog:
|
|
|
1340
1379
|
project: Optional[Project] = None,
|
|
1341
1380
|
client_config=None,
|
|
1342
1381
|
) -> list[str]:
|
|
1343
|
-
dataset = self.get_dataset(
|
|
1382
|
+
dataset = self.get_dataset(
|
|
1383
|
+
name,
|
|
1384
|
+
namespace_name=project.namespace.name if project else None,
|
|
1385
|
+
project_name=project.name if project else None,
|
|
1386
|
+
)
|
|
1344
1387
|
|
|
1345
1388
|
return self.warehouse.export_dataset_table(
|
|
1346
1389
|
bucket_uri, dataset, version, client_config
|
|
@@ -1349,7 +1392,11 @@ class Catalog:
|
|
|
1349
1392
|
def dataset_table_export_file_names(
|
|
1350
1393
|
self, name: str, version: str, project: Optional[Project] = None
|
|
1351
1394
|
) -> list[str]:
|
|
1352
|
-
dataset = self.get_dataset(
|
|
1395
|
+
dataset = self.get_dataset(
|
|
1396
|
+
name,
|
|
1397
|
+
namespace_name=project.namespace.name if project else None,
|
|
1398
|
+
project_name=project.name if project else None,
|
|
1399
|
+
)
|
|
1353
1400
|
return self.warehouse.dataset_table_export_file_names(dataset, version)
|
|
1354
1401
|
|
|
1355
1402
|
def remove_dataset(
|
|
@@ -1359,7 +1406,11 @@ class Catalog:
|
|
|
1359
1406
|
version: Optional[str] = None,
|
|
1360
1407
|
force: Optional[bool] = False,
|
|
1361
1408
|
):
|
|
1362
|
-
dataset = self.get_dataset(
|
|
1409
|
+
dataset = self.get_dataset(
|
|
1410
|
+
name,
|
|
1411
|
+
namespace_name=project.namespace.name if project else None,
|
|
1412
|
+
project_name=project.name if project else None,
|
|
1413
|
+
)
|
|
1363
1414
|
if not version and not force:
|
|
1364
1415
|
raise ValueError(f"Missing dataset version from input for dataset {name}")
|
|
1365
1416
|
if version and not dataset.has_version(version):
|
|
@@ -1395,7 +1446,11 @@ class Catalog:
|
|
|
1395
1446
|
if attrs is not None:
|
|
1396
1447
|
update_data["attrs"] = attrs # type: ignore[assignment]
|
|
1397
1448
|
|
|
1398
|
-
dataset = self.get_dataset(
|
|
1449
|
+
dataset = self.get_dataset(
|
|
1450
|
+
name,
|
|
1451
|
+
namespace_name=project.namespace.name if project else None,
|
|
1452
|
+
project_name=project.name if project else None,
|
|
1453
|
+
)
|
|
1399
1454
|
return self.update_dataset(dataset, **update_data)
|
|
1400
1455
|
|
|
1401
1456
|
def ls(
|
|
@@ -1549,7 +1604,9 @@ class Catalog:
|
|
|
1549
1604
|
)
|
|
1550
1605
|
|
|
1551
1606
|
try:
|
|
1552
|
-
local_dataset = self.get_dataset(
|
|
1607
|
+
local_dataset = self.get_dataset(
|
|
1608
|
+
local_ds_name, namespace_name=namespace.name, project_name=project.name
|
|
1609
|
+
)
|
|
1553
1610
|
if local_dataset and local_dataset.has_version(local_ds_version):
|
|
1554
1611
|
raise DataChainError(
|
|
1555
1612
|
f"Local dataset {local_ds_uri} already exists with different uuid,"
|
|
@@ -107,8 +107,9 @@ def list_datasets_local(catalog: "Catalog", name: Optional[str] = None):
|
|
|
107
107
|
def list_datasets_local_versions(catalog: "Catalog", name: str):
|
|
108
108
|
namespace_name, project_name, name = catalog.get_full_dataset_name(name)
|
|
109
109
|
|
|
110
|
-
|
|
111
|
-
|
|
110
|
+
ds = catalog.get_dataset(
|
|
111
|
+
name, namespace_name=namespace_name, project_name=project_name
|
|
112
|
+
)
|
|
112
113
|
for v in ds.versions:
|
|
113
114
|
yield (name, v.version)
|
|
114
115
|
|
|
@@ -301,7 +301,13 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
301
301
|
"""
|
|
302
302
|
|
|
303
303
|
@abstractmethod
|
|
304
|
-
def get_dataset(
|
|
304
|
+
def get_dataset(
|
|
305
|
+
self,
|
|
306
|
+
name: str, # normal, not full dataset name
|
|
307
|
+
namespace_name: Optional[str] = None,
|
|
308
|
+
project_name: Optional[str] = None,
|
|
309
|
+
conn=None,
|
|
310
|
+
) -> DatasetRecord:
|
|
305
311
|
"""Gets a single dataset by name."""
|
|
306
312
|
|
|
307
313
|
@abstractmethod
|
|
@@ -912,11 +918,14 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
912
918
|
**kwargs, # TODO registered = True / False
|
|
913
919
|
) -> DatasetRecord:
|
|
914
920
|
"""Creates new dataset."""
|
|
915
|
-
|
|
921
|
+
if not project_id:
|
|
922
|
+
project = self.default_project
|
|
923
|
+
else:
|
|
924
|
+
project = self.get_project_by_id(project_id)
|
|
916
925
|
|
|
917
926
|
query = self._datasets_insert().values(
|
|
918
927
|
name=name,
|
|
919
|
-
project_id=
|
|
928
|
+
project_id=project.id,
|
|
920
929
|
status=status,
|
|
921
930
|
feature_schema=json.dumps(feature_schema or {}),
|
|
922
931
|
created_at=datetime.now(timezone.utc),
|
|
@@ -935,7 +944,9 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
935
944
|
query = query.on_conflict_do_nothing(index_elements=["project_id", "name"])
|
|
936
945
|
self.db.execute(query)
|
|
937
946
|
|
|
938
|
-
return self.get_dataset(
|
|
947
|
+
return self.get_dataset(
|
|
948
|
+
name, namespace_name=project.namespace.name, project_name=project.name
|
|
949
|
+
)
|
|
939
950
|
|
|
940
951
|
def create_dataset_version( # noqa: PLR0913
|
|
941
952
|
self,
|
|
@@ -992,7 +1003,12 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
992
1003
|
)
|
|
993
1004
|
self.db.execute(query, conn=conn)
|
|
994
1005
|
|
|
995
|
-
return self.get_dataset(
|
|
1006
|
+
return self.get_dataset(
|
|
1007
|
+
dataset.name,
|
|
1008
|
+
namespace_name=dataset.project.namespace.name,
|
|
1009
|
+
project_name=dataset.project.name,
|
|
1010
|
+
conn=conn,
|
|
1011
|
+
)
|
|
996
1012
|
|
|
997
1013
|
def remove_dataset(self, dataset: DatasetRecord) -> None:
|
|
998
1014
|
"""Removes dataset."""
|
|
@@ -1216,21 +1232,30 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1216
1232
|
def get_dataset(
|
|
1217
1233
|
self,
|
|
1218
1234
|
name: str, # normal, not full dataset name
|
|
1219
|
-
|
|
1235
|
+
namespace_name: Optional[str] = None,
|
|
1236
|
+
project_name: Optional[str] = None,
|
|
1220
1237
|
conn=None,
|
|
1221
1238
|
) -> DatasetRecord:
|
|
1222
1239
|
"""
|
|
1223
1240
|
Gets a single dataset in project by dataset name.
|
|
1224
1241
|
"""
|
|
1225
|
-
|
|
1242
|
+
namespace_name = namespace_name or self.default_namespace_name
|
|
1243
|
+
project_name = project_name or self.default_project_name
|
|
1226
1244
|
|
|
1227
1245
|
d = self._datasets
|
|
1246
|
+
n = self._namespaces
|
|
1247
|
+
p = self._projects
|
|
1228
1248
|
query = self._base_dataset_query()
|
|
1229
|
-
query = query.where(
|
|
1249
|
+
query = query.where(
|
|
1250
|
+
d.c.name == name,
|
|
1251
|
+
n.c.name == namespace_name,
|
|
1252
|
+
p.c.name == project_name,
|
|
1253
|
+
) # type: ignore [attr-defined]
|
|
1230
1254
|
ds = self._parse_dataset(self.db.execute(query, conn=conn))
|
|
1231
1255
|
if not ds:
|
|
1232
1256
|
raise DatasetNotFoundError(
|
|
1233
|
-
f"Dataset {name} not found in
|
|
1257
|
+
f"Dataset {name} not found in namespace {namespace_name}"
|
|
1258
|
+
f" and project {project_name}"
|
|
1234
1259
|
)
|
|
1235
1260
|
|
|
1236
1261
|
return ds
|
datachain/delta.py
CHANGED
|
@@ -77,7 +77,8 @@ def _get_delta_chain(
|
|
|
77
77
|
|
|
78
78
|
def _get_retry_chain(
|
|
79
79
|
name: str,
|
|
80
|
-
|
|
80
|
+
namespace_name: str,
|
|
81
|
+
project_name: str,
|
|
81
82
|
latest_version: str,
|
|
82
83
|
source_ds_name: str,
|
|
83
84
|
source_ds_project: Project,
|
|
@@ -96,8 +97,8 @@ def _get_retry_chain(
|
|
|
96
97
|
# Read the latest version of the result dataset for retry logic
|
|
97
98
|
result_dataset = datachain.read_dataset(
|
|
98
99
|
name,
|
|
99
|
-
namespace=
|
|
100
|
-
project=
|
|
100
|
+
namespace=namespace_name,
|
|
101
|
+
project=project_name,
|
|
101
102
|
version=latest_version,
|
|
102
103
|
)
|
|
103
104
|
source_dc = datachain.read_dataset(
|
|
@@ -128,7 +129,8 @@ def _get_retry_chain(
|
|
|
128
129
|
|
|
129
130
|
def _get_source_info(
|
|
130
131
|
name: str,
|
|
131
|
-
|
|
132
|
+
namespace_name: str,
|
|
133
|
+
project_name: str,
|
|
132
134
|
latest_version: str,
|
|
133
135
|
catalog,
|
|
134
136
|
) -> tuple[
|
|
@@ -145,7 +147,11 @@ def _get_source_info(
|
|
|
145
147
|
Returns (None, None, None, None) if source dataset was removed.
|
|
146
148
|
"""
|
|
147
149
|
dependencies = catalog.get_dataset_dependencies(
|
|
148
|
-
name,
|
|
150
|
+
name,
|
|
151
|
+
latest_version,
|
|
152
|
+
namespace_name=namespace_name,
|
|
153
|
+
project_name=project_name,
|
|
154
|
+
indirect=False,
|
|
149
155
|
)
|
|
150
156
|
|
|
151
157
|
dep = dependencies[0]
|
|
@@ -157,7 +163,9 @@ def _get_source_info(
|
|
|
157
163
|
source_ds_name = dep.name
|
|
158
164
|
source_ds_version = dep.version
|
|
159
165
|
source_ds_latest_version = catalog.get_dataset(
|
|
160
|
-
source_ds_name,
|
|
166
|
+
source_ds_name,
|
|
167
|
+
namespace_name=source_ds_project.namespace.name,
|
|
168
|
+
project_name=source_ds_project.name,
|
|
161
169
|
).latest_version
|
|
162
170
|
|
|
163
171
|
return (
|
|
@@ -211,12 +219,14 @@ def delta_retry_update(
|
|
|
211
219
|
"""
|
|
212
220
|
|
|
213
221
|
catalog = dc.session.catalog
|
|
214
|
-
project = catalog.metastore.get_project(project_name, namespace_name)
|
|
222
|
+
# project = catalog.metastore.get_project(project_name, namespace_name)
|
|
215
223
|
dc._query.apply_listing_pre_step()
|
|
216
224
|
|
|
217
225
|
# Check if dataset exists
|
|
218
226
|
try:
|
|
219
|
-
dataset = catalog.get_dataset(
|
|
227
|
+
dataset = catalog.get_dataset(
|
|
228
|
+
name, namespace_name=namespace_name, project_name=project_name
|
|
229
|
+
)
|
|
220
230
|
latest_version = dataset.latest_version
|
|
221
231
|
except DatasetNotFoundError:
|
|
222
232
|
# First creation of result dataset
|
|
@@ -234,7 +244,7 @@ def delta_retry_update(
|
|
|
234
244
|
source_ds_version,
|
|
235
245
|
source_ds_latest_version,
|
|
236
246
|
dependencies,
|
|
237
|
-
) = _get_source_info(name,
|
|
247
|
+
) = _get_source_info(name, namespace_name, project_name, latest_version, catalog)
|
|
238
248
|
|
|
239
249
|
# If source_ds_name is None, starting dataset was removed
|
|
240
250
|
if source_ds_name is None:
|
|
@@ -264,7 +274,8 @@ def delta_retry_update(
|
|
|
264
274
|
if delta_retry:
|
|
265
275
|
retry_chain = _get_retry_chain(
|
|
266
276
|
name,
|
|
267
|
-
|
|
277
|
+
namespace_name,
|
|
278
|
+
project_name,
|
|
268
279
|
latest_version,
|
|
269
280
|
source_ds_name,
|
|
270
281
|
source_ds_project,
|
|
@@ -290,8 +301,8 @@ def delta_retry_update(
|
|
|
290
301
|
|
|
291
302
|
latest_dataset = datachain.read_dataset(
|
|
292
303
|
name,
|
|
293
|
-
namespace=
|
|
294
|
-
project=
|
|
304
|
+
namespace=namespace_name,
|
|
305
|
+
project=project_name,
|
|
295
306
|
version=latest_version,
|
|
296
307
|
)
|
|
297
308
|
compared_chain = latest_dataset.diff(
|
datachain/func/string.py
CHANGED
datachain/lib/dc/database.py
CHANGED
|
@@ -77,12 +77,15 @@ def to_database(
|
|
|
77
77
|
on_conflict: Optional[str] = None,
|
|
78
78
|
conflict_columns: Optional[list[str]] = None,
|
|
79
79
|
column_mapping: Optional[dict[str, Optional[str]]] = None,
|
|
80
|
-
) ->
|
|
80
|
+
) -> int:
|
|
81
81
|
"""
|
|
82
82
|
Implementation function for exporting DataChain to database tables.
|
|
83
83
|
|
|
84
84
|
This is the core implementation that handles the actual database operations.
|
|
85
85
|
For user-facing documentation, see DataChain.to_database() method.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
int: Number of rows affected (inserted/updated).
|
|
86
89
|
"""
|
|
87
90
|
if on_conflict and on_conflict not in ("ignore", "update"):
|
|
88
91
|
raise ValueError(
|
|
@@ -101,11 +104,16 @@ def to_database(
|
|
|
101
104
|
all_columns, normalized_column_mapping
|
|
102
105
|
)
|
|
103
106
|
|
|
107
|
+
normalized_conflict_columns = _normalize_conflict_columns(
|
|
108
|
+
conflict_columns, normalized_column_mapping
|
|
109
|
+
)
|
|
110
|
+
|
|
104
111
|
with _connect(connection) as conn:
|
|
105
112
|
metadata = sqlalchemy.MetaData()
|
|
106
113
|
table = sqlalchemy.Table(table_name, metadata, *columns)
|
|
107
114
|
|
|
108
115
|
table_existed_before = False
|
|
116
|
+
total_rows_affected = 0
|
|
109
117
|
try:
|
|
110
118
|
with conn.begin():
|
|
111
119
|
# Check if table exists to determine if we should clean up on error.
|
|
@@ -117,14 +125,18 @@ def to_database(
|
|
|
117
125
|
|
|
118
126
|
rows_iter = chain._leaf_values()
|
|
119
127
|
for batch in batched(rows_iter, batch_rows):
|
|
120
|
-
_process_batch(
|
|
128
|
+
rows_affected = _process_batch(
|
|
121
129
|
conn,
|
|
122
130
|
table,
|
|
123
131
|
batch,
|
|
124
132
|
on_conflict,
|
|
125
|
-
|
|
133
|
+
normalized_conflict_columns,
|
|
126
134
|
column_indices_and_names,
|
|
127
135
|
)
|
|
136
|
+
if rows_affected < 0 or total_rows_affected < 0:
|
|
137
|
+
total_rows_affected = -1
|
|
138
|
+
else:
|
|
139
|
+
total_rows_affected += rows_affected
|
|
128
140
|
except Exception:
|
|
129
141
|
if not table_existed_before:
|
|
130
142
|
try:
|
|
@@ -134,6 +146,8 @@ def to_database(
|
|
|
134
146
|
pass
|
|
135
147
|
raise
|
|
136
148
|
|
|
149
|
+
return total_rows_affected
|
|
150
|
+
|
|
137
151
|
|
|
138
152
|
def _normalize_column_mapping(
|
|
139
153
|
column_mapping: dict[str, Optional[str]],
|
|
@@ -174,6 +188,30 @@ def _normalize_column_mapping(
|
|
|
174
188
|
return normalized_mapping
|
|
175
189
|
|
|
176
190
|
|
|
191
|
+
def _normalize_conflict_columns(
|
|
192
|
+
conflict_columns: Optional[list[str]], column_mapping: dict[str, Optional[str]]
|
|
193
|
+
) -> Optional[list[str]]:
|
|
194
|
+
"""
|
|
195
|
+
Normalize conflict_columns by converting DataChain format to database format
|
|
196
|
+
and applying column mapping.
|
|
197
|
+
"""
|
|
198
|
+
if not conflict_columns:
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
normalized_columns = []
|
|
202
|
+
for col in conflict_columns:
|
|
203
|
+
db_col = ColumnMeta.to_db_name(col)
|
|
204
|
+
|
|
205
|
+
if db_col in column_mapping or hasattr(column_mapping, "default_factory"):
|
|
206
|
+
mapped_name = column_mapping[db_col]
|
|
207
|
+
if mapped_name:
|
|
208
|
+
normalized_columns.append(mapped_name)
|
|
209
|
+
else:
|
|
210
|
+
normalized_columns.append(db_col)
|
|
211
|
+
|
|
212
|
+
return normalized_columns
|
|
213
|
+
|
|
214
|
+
|
|
177
215
|
def _prepare_columns(all_columns, column_mapping):
|
|
178
216
|
"""Prepare column mapping and column definitions."""
|
|
179
217
|
column_indices_and_names = [] # List of (index, target_name) tuples
|
|
@@ -192,8 +230,12 @@ def _prepare_columns(all_columns, column_mapping):
|
|
|
192
230
|
|
|
193
231
|
def _process_batch(
|
|
194
232
|
conn, table, batch, on_conflict, conflict_columns, column_indices_and_names
|
|
195
|
-
):
|
|
196
|
-
"""Process a batch of rows with conflict resolution.
|
|
233
|
+
) -> int:
|
|
234
|
+
"""Process a batch of rows with conflict resolution.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
int: Number of rows affected by the insert operation.
|
|
238
|
+
"""
|
|
197
239
|
|
|
198
240
|
def prepare_row(row_values):
|
|
199
241
|
"""Convert a row tuple to a dictionary with proper DB column names."""
|
|
@@ -206,6 +248,7 @@ def _process_batch(
|
|
|
206
248
|
|
|
207
249
|
supports_conflict = on_conflict and conn.engine.name in ("postgresql", "sqlite")
|
|
208
250
|
|
|
251
|
+
insert_stmt: Any # Can be PostgreSQL, SQLite, or regular insert statement
|
|
209
252
|
if supports_conflict:
|
|
210
253
|
# Use dialect-specific insert for conflict resolution
|
|
211
254
|
if conn.engine.name == "postgresql":
|
|
@@ -249,7 +292,8 @@ def _process_batch(
|
|
|
249
292
|
stacklevel=2,
|
|
250
293
|
)
|
|
251
294
|
|
|
252
|
-
conn.execute(insert_stmt, rows_to_insert)
|
|
295
|
+
result = conn.execute(insert_stmt, rows_to_insert)
|
|
296
|
+
return result.rowcount
|
|
253
297
|
|
|
254
298
|
|
|
255
299
|
def read_database(
|
datachain/lib/dc/datachain.py
CHANGED
|
@@ -284,7 +284,11 @@ class DataChain:
|
|
|
284
284
|
"""Underlying dataset, if there is one."""
|
|
285
285
|
if not self.name:
|
|
286
286
|
return None
|
|
287
|
-
return self.session.catalog.get_dataset(
|
|
287
|
+
return self.session.catalog.get_dataset(
|
|
288
|
+
self.name,
|
|
289
|
+
namespace_name=self._query.project.namespace.name,
|
|
290
|
+
project_name=self._query.project.name,
|
|
291
|
+
)
|
|
288
292
|
|
|
289
293
|
def __or__(self, other: "Self") -> "Self":
|
|
290
294
|
"""Return `self.union(other)`."""
|
|
@@ -2298,13 +2302,17 @@ class DataChain:
|
|
|
2298
2302
|
on_conflict: Optional[str] = None,
|
|
2299
2303
|
conflict_columns: Optional[list[str]] = None,
|
|
2300
2304
|
column_mapping: Optional[dict[str, Optional[str]]] = None,
|
|
2301
|
-
) ->
|
|
2305
|
+
) -> int:
|
|
2302
2306
|
"""Save chain to a database table using a given database connection.
|
|
2303
2307
|
|
|
2304
2308
|
This method exports all DataChain records to a database table, creating the
|
|
2305
2309
|
table if it doesn't exist and appending data if it does. The table schema
|
|
2306
2310
|
is automatically inferred from the DataChain's signal schema.
|
|
2307
2311
|
|
|
2312
|
+
For PostgreSQL, tables are created in the schema specified by the connection's
|
|
2313
|
+
search_path (defaults to 'public'). Use URL parameters to target specific
|
|
2314
|
+
schemas.
|
|
2315
|
+
|
|
2308
2316
|
Parameters:
|
|
2309
2317
|
table_name: Name of the database table to create/write to.
|
|
2310
2318
|
connection: SQLAlchemy connectable, str, or a sqlite3 connection
|
|
@@ -2328,20 +2336,26 @@ class DataChain:
|
|
|
2328
2336
|
- Set values to None to skip columns entirely, or use `defaultdict` to
|
|
2329
2337
|
skip all columns except those specified.
|
|
2330
2338
|
|
|
2339
|
+
Returns:
|
|
2340
|
+
int: Number of rows affected (inserted/updated). -1 if DB driver doesn't
|
|
2341
|
+
support telemetry.
|
|
2342
|
+
|
|
2331
2343
|
Examples:
|
|
2332
2344
|
Basic usage with PostgreSQL:
|
|
2333
2345
|
```py
|
|
2334
|
-
import sqlalchemy as sa
|
|
2335
2346
|
import datachain as dc
|
|
2336
2347
|
|
|
2337
|
-
|
|
2338
|
-
|
|
2339
|
-
|
|
2348
|
+
rows_affected = (dc
|
|
2349
|
+
.read_storage("s3://my-bucket/")
|
|
2350
|
+
.to_database("files_table", "postgresql://user:pass@localhost/mydb")
|
|
2351
|
+
)
|
|
2352
|
+
print(f"Inserted/updated {rows_affected} rows")
|
|
2340
2353
|
```
|
|
2341
2354
|
|
|
2342
2355
|
Using SQLite with connection string:
|
|
2343
2356
|
```py
|
|
2344
|
-
chain.to_database("my_table", "sqlite:///data.db")
|
|
2357
|
+
rows_affected = chain.to_database("my_table", "sqlite:///data.db")
|
|
2358
|
+
print(f"Affected {rows_affected} rows")
|
|
2345
2359
|
```
|
|
2346
2360
|
|
|
2347
2361
|
Column mapping and renaming:
|
|
@@ -2360,7 +2374,9 @@ class DataChain:
|
|
|
2360
2374
|
chain.to_database("my_table", engine, on_conflict="ignore")
|
|
2361
2375
|
|
|
2362
2376
|
# Update existing records
|
|
2363
|
-
chain.to_database(
|
|
2377
|
+
chain.to_database(
|
|
2378
|
+
"my_table", engine, on_conflict="update", conflict_columns=["id"]
|
|
2379
|
+
)
|
|
2364
2380
|
```
|
|
2365
2381
|
|
|
2366
2382
|
Working with different databases:
|
|
@@ -2372,10 +2388,16 @@ class DataChain:
|
|
|
2372
2388
|
# SQLite in-memory
|
|
2373
2389
|
chain.to_database("temp_table", "sqlite:///:memory:")
|
|
2374
2390
|
```
|
|
2391
|
+
|
|
2392
|
+
PostgreSQL with schema support:
|
|
2393
|
+
```py
|
|
2394
|
+
pg_url = "postgresql://user:pass@host/db?options=-c search_path=analytics"
|
|
2395
|
+
chain.to_database("processed_data", pg_url)
|
|
2396
|
+
```
|
|
2375
2397
|
"""
|
|
2376
2398
|
from .database import to_database
|
|
2377
2399
|
|
|
2378
|
-
to_database(
|
|
2400
|
+
return to_database(
|
|
2379
2401
|
self,
|
|
2380
2402
|
table_name,
|
|
2381
2403
|
connection,
|
datachain/lib/dc/datasets.py
CHANGED
|
@@ -357,7 +357,14 @@ def delete_dataset(
|
|
|
357
357
|
) from None
|
|
358
358
|
|
|
359
359
|
if not force:
|
|
360
|
-
version =
|
|
360
|
+
version = (
|
|
361
|
+
version
|
|
362
|
+
or catalog.get_dataset(
|
|
363
|
+
name,
|
|
364
|
+
namespace_name=ds_project.namespace.name,
|
|
365
|
+
project_name=ds_project.name,
|
|
366
|
+
).latest_version
|
|
367
|
+
)
|
|
361
368
|
else:
|
|
362
369
|
version = None
|
|
363
370
|
catalog.remove_dataset(name, ds_project, version=version, force=force)
|
|
@@ -403,9 +410,7 @@ def move_dataset(
|
|
|
403
410
|
namespace, project, name = catalog.get_full_dataset_name(src)
|
|
404
411
|
dest_namespace, dest_project, dest_name = catalog.get_full_dataset_name(dest)
|
|
405
412
|
|
|
406
|
-
dataset = catalog.get_dataset(
|
|
407
|
-
name, catalog.metastore.get_project(project, namespace)
|
|
408
|
-
)
|
|
413
|
+
dataset = catalog.get_dataset(name, namespace_name=namespace, project_name=project)
|
|
409
414
|
|
|
410
415
|
catalog.update_dataset(
|
|
411
416
|
dataset,
|
datachain/listing.py
CHANGED
|
@@ -65,17 +65,13 @@ class Listing:
|
|
|
65
65
|
|
|
66
66
|
@cached_property
|
|
67
67
|
def dataset(self) -> "DatasetRecord":
|
|
68
|
-
from datachain.error import DatasetNotFoundError
|
|
69
|
-
|
|
70
68
|
assert self.dataset_name
|
|
71
69
|
project = self.metastore.listing_project
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
f" {project.namespace.name} and project {project.name}"
|
|
78
|
-
) from None
|
|
70
|
+
return self.metastore.get_dataset(
|
|
71
|
+
self.dataset_name,
|
|
72
|
+
namespace_name=project.namespace.name,
|
|
73
|
+
project_name=project.name,
|
|
74
|
+
)
|
|
79
75
|
|
|
80
76
|
@cached_property
|
|
81
77
|
def dataset_rows(self):
|
|
@@ -31,11 +31,11 @@ class YoloBBox(DataModel):
|
|
|
31
31
|
if not summary:
|
|
32
32
|
return YoloBBox(box=BBox())
|
|
33
33
|
name = summary[0].get("name", "")
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
34
|
+
if summary[0].get("box"):
|
|
35
|
+
assert isinstance(summary[0]["box"], dict)
|
|
36
|
+
box = BBox.from_dict(summary[0]["box"], title=name)
|
|
37
|
+
else:
|
|
38
|
+
box = BBox()
|
|
39
39
|
return YoloBBox(
|
|
40
40
|
cls=summary[0]["class"],
|
|
41
41
|
name=name,
|
|
@@ -70,7 +70,8 @@ class YoloBBoxes(DataModel):
|
|
|
70
70
|
names.append(name)
|
|
71
71
|
confidence.append(s["confidence"])
|
|
72
72
|
if s.get("box"):
|
|
73
|
-
|
|
73
|
+
assert isinstance(s["box"], dict)
|
|
74
|
+
box.append(BBox.from_dict(s["box"], title=name))
|
|
74
75
|
return YoloBBoxes(
|
|
75
76
|
cls=cls,
|
|
76
77
|
name=names,
|
|
@@ -101,11 +102,11 @@ class YoloOBBox(DataModel):
|
|
|
101
102
|
if not summary:
|
|
102
103
|
return YoloOBBox(box=OBBox())
|
|
103
104
|
name = summary[0].get("name", "")
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
105
|
+
if summary[0].get("box"):
|
|
106
|
+
assert isinstance(summary[0]["box"], dict)
|
|
107
|
+
box = OBBox.from_dict(summary[0]["box"], title=name)
|
|
108
|
+
else:
|
|
109
|
+
box = OBBox()
|
|
109
110
|
return YoloOBBox(
|
|
110
111
|
cls=summary[0]["class"],
|
|
111
112
|
name=name,
|
|
@@ -140,7 +141,8 @@ class YoloOBBoxes(DataModel):
|
|
|
140
141
|
names.append(name)
|
|
141
142
|
confidence.append(s["confidence"])
|
|
142
143
|
if s.get("box"):
|
|
143
|
-
|
|
144
|
+
assert isinstance(s["box"], dict)
|
|
145
|
+
box.append(OBBox.from_dict(s["box"], title=name))
|
|
144
146
|
return YoloOBBoxes(
|
|
145
147
|
cls=cls,
|
|
146
148
|
name=names,
|
|
@@ -56,16 +56,16 @@ class YoloPose(DataModel):
|
|
|
56
56
|
if not summary:
|
|
57
57
|
return YoloPose(box=BBox(), pose=Pose3D())
|
|
58
58
|
name = summary[0].get("name", "")
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
59
|
+
if summary[0].get("box"):
|
|
60
|
+
assert isinstance(summary[0]["box"], dict)
|
|
61
|
+
box = BBox.from_dict(summary[0]["box"], title=name)
|
|
62
|
+
else:
|
|
63
|
+
box = BBox()
|
|
64
|
+
if summary[0].get("keypoints"):
|
|
65
|
+
assert isinstance(summary[0]["keypoints"], dict)
|
|
66
|
+
pose = Pose3D.from_dict(summary[0]["keypoints"])
|
|
67
|
+
else:
|
|
68
|
+
pose = Pose3D()
|
|
69
69
|
return YoloPose(
|
|
70
70
|
cls=summary[0]["class"],
|
|
71
71
|
name=name,
|
|
@@ -103,9 +103,11 @@ class YoloPoses(DataModel):
|
|
|
103
103
|
names.append(name)
|
|
104
104
|
confidence.append(s["confidence"])
|
|
105
105
|
if s.get("box"):
|
|
106
|
-
|
|
106
|
+
assert isinstance(s["box"], dict)
|
|
107
|
+
box.append(BBox.from_dict(s["box"], title=name))
|
|
107
108
|
if s.get("keypoints"):
|
|
108
|
-
|
|
109
|
+
assert isinstance(s["keypoints"], dict)
|
|
110
|
+
pose.append(Pose3D.from_dict(s["keypoints"]))
|
|
109
111
|
return YoloPoses(
|
|
110
112
|
cls=cls,
|
|
111
113
|
name=names,
|
|
@@ -34,16 +34,16 @@ class YoloSegment(DataModel):
|
|
|
34
34
|
if not summary:
|
|
35
35
|
return YoloSegment(box=BBox(), segment=Segment())
|
|
36
36
|
name = summary[0].get("name", "")
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
37
|
+
if summary[0].get("box"):
|
|
38
|
+
assert isinstance(summary[0]["box"], dict)
|
|
39
|
+
box = BBox.from_dict(summary[0]["box"], title=name)
|
|
40
|
+
else:
|
|
41
|
+
box = BBox()
|
|
42
|
+
if summary[0].get("segments"):
|
|
43
|
+
assert isinstance(summary[0]["segments"], dict)
|
|
44
|
+
segment = Segment.from_dict(summary[0]["segments"], title=name)
|
|
45
|
+
else:
|
|
46
|
+
segment = Segment()
|
|
47
47
|
return YoloSegment(
|
|
48
48
|
cls=summary[0]["class"],
|
|
49
49
|
name=summary[0]["name"],
|
|
@@ -81,9 +81,11 @@ class YoloSegments(DataModel):
|
|
|
81
81
|
names.append(name)
|
|
82
82
|
confidence.append(s["confidence"])
|
|
83
83
|
if s.get("box"):
|
|
84
|
-
|
|
84
|
+
assert isinstance(s["box"], dict)
|
|
85
|
+
box.append(BBox.from_dict(s["box"], title=name))
|
|
85
86
|
if s.get("segments"):
|
|
86
|
-
|
|
87
|
+
assert isinstance(s["segments"], dict)
|
|
88
|
+
segment.append(Segment.from_dict(s["segments"], title=name))
|
|
87
89
|
return YoloSegments(
|
|
88
90
|
cls=cls,
|
|
89
91
|
name=names,
|
datachain/query/dataset.py
CHANGED
|
@@ -1703,16 +1703,18 @@ class DatasetQuery:
|
|
|
1703
1703
|
for dep in self.catalog.get_dataset_dependencies(
|
|
1704
1704
|
dep_dataset.name,
|
|
1705
1705
|
dep_dataset_version,
|
|
1706
|
-
dep_dataset.project,
|
|
1706
|
+
namespace_name=dep_dataset.project.namespace.name,
|
|
1707
|
+
project_name=dep_dataset.project.name,
|
|
1707
1708
|
indirect=False,
|
|
1708
1709
|
):
|
|
1709
1710
|
if dep:
|
|
1710
|
-
dep_project = self.catalog.metastore.get_project(
|
|
1711
|
-
dep.project, dep.namespace
|
|
1712
|
-
)
|
|
1713
1711
|
dependencies.add(
|
|
1714
1712
|
(
|
|
1715
|
-
self.catalog.get_dataset(
|
|
1713
|
+
self.catalog.get_dataset(
|
|
1714
|
+
dep.name,
|
|
1715
|
+
namespace_name=dep.namespace,
|
|
1716
|
+
project_name=dep.project,
|
|
1717
|
+
),
|
|
1716
1718
|
dep.version,
|
|
1717
1719
|
)
|
|
1718
1720
|
)
|
|
@@ -1754,7 +1756,11 @@ class DatasetQuery:
|
|
|
1754
1756
|
if (
|
|
1755
1757
|
name
|
|
1756
1758
|
and version
|
|
1757
|
-
and self.catalog.get_dataset(
|
|
1759
|
+
and self.catalog.get_dataset(
|
|
1760
|
+
name,
|
|
1761
|
+
namespace_name=project.namespace.name,
|
|
1762
|
+
project_name=project.name,
|
|
1763
|
+
).has_version(version)
|
|
1758
1764
|
):
|
|
1759
1765
|
raise RuntimeError(f"Dataset {name} already has version {version}")
|
|
1760
1766
|
except DatasetNotFoundError:
|
|
@@ -1808,11 +1814,15 @@ class DatasetQuery:
|
|
|
1808
1814
|
# overriding dependencies
|
|
1809
1815
|
self.dependencies = set()
|
|
1810
1816
|
for dep in dependencies:
|
|
1811
|
-
dep_project = self.catalog.metastore.get_project(
|
|
1812
|
-
dep.project, dep.namespace
|
|
1813
|
-
)
|
|
1814
1817
|
self.dependencies.add(
|
|
1815
|
-
(
|
|
1818
|
+
(
|
|
1819
|
+
self.catalog.get_dataset(
|
|
1820
|
+
dep.name,
|
|
1821
|
+
namespace_name=dep.namespace,
|
|
1822
|
+
project_name=dep.project,
|
|
1823
|
+
),
|
|
1824
|
+
dep.version,
|
|
1825
|
+
)
|
|
1816
1826
|
)
|
|
1817
1827
|
|
|
1818
1828
|
self._add_dependencies(dataset, version) # type: ignore [arg-type]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.30.
|
|
3
|
+
Version: 0.30.3
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -45,7 +45,7 @@ Requires-Dist: datamodel-code-generator>=0.25
|
|
|
45
45
|
Requires-Dist: Pillow<12,>=10.0.0
|
|
46
46
|
Requires-Dist: msgpack<2,>=1.0.4
|
|
47
47
|
Requires-Dist: psutil
|
|
48
|
-
Requires-Dist: huggingface_hub
|
|
48
|
+
Requires-Dist: huggingface_hub
|
|
49
49
|
Requires-Dist: iterative-telemetry>=0.0.10
|
|
50
50
|
Requires-Dist: platformdirs
|
|
51
51
|
Requires-Dist: dvc-studio-client<1,>=0.21
|
|
@@ -4,10 +4,10 @@ datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
|
|
|
4
4
|
datachain/cache.py,sha256=ESVRaCJXEThMIfGEFVHx6wJPOZA7FYk9V6WxjyuqUBY,3626
|
|
5
5
|
datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
|
|
6
6
|
datachain/dataset.py,sha256=ATGa-CBTFoZeTN2V40-zHEzfMBcdYK0WuoJ6H2yEAvo,25268
|
|
7
|
-
datachain/delta.py,sha256=
|
|
7
|
+
datachain/delta.py,sha256=dghGvD44LcglvL5-kUOIKk75ywBO0U7eikA3twKZC28,10202
|
|
8
8
|
datachain/error.py,sha256=OWwWMkzZYJrkcoEDGhJHMf7SfKvxcsOLRF94mjPf29I,1609
|
|
9
9
|
datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
|
|
10
|
-
datachain/listing.py,sha256=
|
|
10
|
+
datachain/listing.py,sha256=aqayl5St3D9PwdwM6nR1STkpLSw-S3U8pudO9PWi3N8,7241
|
|
11
11
|
datachain/namespace.py,sha256=MozcXYxedIbamzY56YKy9r9fgSpOm2VryhWfIf6stYk,1791
|
|
12
12
|
datachain/node.py,sha256=KWDT0ClYXB7FYI-QOvzAa-UDkLJErUI2eWm5FBteYuU,5577
|
|
13
13
|
datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,1095
|
|
@@ -21,13 +21,13 @@ datachain/studio.py,sha256=27750qCSNxIChEzhV02damIFreLMfr7UdiWqMFyk8AA,15361
|
|
|
21
21
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
22
22
|
datachain/utils.py,sha256=Gp5JVr_m7nVWQGDOjrGnZjRXF9-Ai-MBxiPJIcpPvWQ,15451
|
|
23
23
|
datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
|
|
24
|
-
datachain/catalog/catalog.py,sha256=
|
|
24
|
+
datachain/catalog/catalog.py,sha256=vy5k0ME9FxDnpV5D9Ry0YT8eRAKZglUDdyePslJSBpk,67174
|
|
25
25
|
datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
|
|
26
26
|
datachain/catalog/loader.py,sha256=B2cps5coFE4MBttM-j8cs7JgNVPjnHKF4Gx1s2fJrxw,6119
|
|
27
27
|
datachain/cli/__init__.py,sha256=migILaB4-dSSlxEoi6MYOGhOg2jpZo3a2AhS3mSuN8o,8237
|
|
28
28
|
datachain/cli/utils.py,sha256=wrLnAh7Wx8O_ojZE8AE4Lxn5WoxHbOj7as8NWlLAA74,3036
|
|
29
29
|
datachain/cli/commands/__init__.py,sha256=zp3bYIioO60x_X04A4-IpZqSYVnpwOa1AdERQaRlIhI,493
|
|
30
|
-
datachain/cli/commands/datasets.py,sha256=
|
|
30
|
+
datachain/cli/commands/datasets.py,sha256=m8aTlUMpYCZmfSwhp7Lb_UFaQRQm_kOWXh71bNBn7qQ,6461
|
|
31
31
|
datachain/cli/commands/du.py,sha256=9edEzDEs98K2VYk8Wf-ZMpUzALcgm9uD6YtoqbvtUGU,391
|
|
32
32
|
datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR88,198
|
|
33
33
|
datachain/cli/commands/ls.py,sha256=CBmk838Q-EQp04lE2Qdnpsc1GXAkC4-I-b-a_828n1E,5272
|
|
@@ -49,7 +49,7 @@ datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
|
|
|
49
49
|
datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
|
|
50
50
|
datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
|
|
51
51
|
datachain/data_storage/job.py,sha256=ZkeXCNUj_VCkoKYx29hqB4AcfVUielnRjY-GYUcUxt4,426
|
|
52
|
-
datachain/data_storage/metastore.py,sha256=
|
|
52
|
+
datachain/data_storage/metastore.py,sha256=0-myih2VtUm_gcwz8bNgnP2h0rBkBl6v1IEhduShb4w,54517
|
|
53
53
|
datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
|
|
54
54
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
55
55
|
datachain/data_storage/sqlite.py,sha256=TTQjdDXUaZSr3MEaxZjDhsVIkIJqxFNA-sD25TO3m_4,30228
|
|
@@ -67,7 +67,7 @@ datachain/func/func.py,sha256=fpslnn4edr0dH3mD8BSTndRFJiiVZvbJoBJV6HkHMqw,17400
|
|
|
67
67
|
datachain/func/numeric.py,sha256=J6FgzuIAcS6B02Cm1qPnJdB6ut21jyBDVXSBrkZNZaQ,6978
|
|
68
68
|
datachain/func/path.py,sha256=9Jas35QhEtRai4l54hMqVvuJsqxHvOx88oo4vym1H_I,4077
|
|
69
69
|
datachain/func/random.py,sha256=t7jwXsI8-hy0qAdvjAntgzy-AHtTAfozlZ1CpKR-QZE,458
|
|
70
|
-
datachain/func/string.py,sha256=
|
|
70
|
+
datachain/func/string.py,sha256=6-fZM7wHv0JZ2ZzpLFPLLYW15K_CT5VfYsmx56zBrpA,7419
|
|
71
71
|
datachain/func/window.py,sha256=ImyRpc1QI8QUSPO7KdD60e_DPVo7Ja0G5kcm6BlyMcw,1584
|
|
72
72
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
73
|
datachain/lib/arrow.py,sha256=geoLvyDd5uMqS3D9Ec1ODlShCUAdtwHUwl8FqbUX_hg,10776
|
|
@@ -103,9 +103,9 @@ datachain/lib/convert/unflatten.py,sha256=ysMkstwJzPMWUlnxn-Z-tXJR3wmhjHeSN_P-sD
|
|
|
103
103
|
datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUOzHUGPoyZXAB0,4360
|
|
104
104
|
datachain/lib/dc/__init__.py,sha256=TFci5HTvYGjBesNUxDAnXaX36PnzPEUSn5a6JxB9o0U,872
|
|
105
105
|
datachain/lib/dc/csv.py,sha256=q6a9BpapGwP6nwy6c5cklxQumep2fUp9l2LAjtTJr6s,4411
|
|
106
|
-
datachain/lib/dc/database.py,sha256=
|
|
107
|
-
datachain/lib/dc/datachain.py,sha256=
|
|
108
|
-
datachain/lib/dc/datasets.py,sha256=
|
|
106
|
+
datachain/lib/dc/database.py,sha256=F6EOjPKwSdp26kJsOKGq49D9OxqyKEalINHEwLQav2s,14716
|
|
107
|
+
datachain/lib/dc/datachain.py,sha256=sfLT_iMGkNsPh5JAPzRATyC4hsotp34bDS0rSu2NQ10,99244
|
|
108
|
+
datachain/lib/dc/datasets.py,sha256=jRwfHUUESlNdK1heUNxb-V10ZobRJYPbvCqYYkz2FYI,15240
|
|
109
109
|
datachain/lib/dc/hf.py,sha256=AP_MUHg6HJWae10PN9hD_beQVjrl0cleZ6Cvhtl1yoI,2901
|
|
110
110
|
datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
|
|
111
111
|
datachain/lib/dc/listings.py,sha256=V379Cb-7ZyquM0w7sWArQZkzInZy4GB7QQ1ZfowKzQY,4544
|
|
@@ -121,12 +121,12 @@ datachain/model/pose.py,sha256=rjquA6M-I-Y30Xm6YSkGv1OY52hJZmR2AuxbIpE5uD0,3865
|
|
|
121
121
|
datachain/model/segment.py,sha256=NhcEYB_KVa0aLQYiZ4jEwkylH9QBLd8fZhmg6PVnx1Y,1967
|
|
122
122
|
datachain/model/utils.py,sha256=5elwCKleOO6CZM0IuWjFykPekrhc5m7V4jSIOcgGMms,6733
|
|
123
123
|
datachain/model/ultralytics/__init__.py,sha256=EvcNX9qUyxKXXlKCPpsXeRrabyXk5E9EkN-tyiYkfS4,750
|
|
124
|
-
datachain/model/ultralytics/bbox.py,sha256=
|
|
125
|
-
datachain/model/ultralytics/pose.py,sha256=
|
|
126
|
-
datachain/model/ultralytics/segment.py,sha256=
|
|
124
|
+
datachain/model/ultralytics/bbox.py,sha256=C-aDiBhVa_ML2oERWvksRkyMU1XuYSpb6eItHB5q0qc,4764
|
|
125
|
+
datachain/model/ultralytics/pose.py,sha256=pvoXrWWUSWT_UBaMwUb5MBHAY57Co2HFDPigFYNZWUA,3392
|
|
126
|
+
datachain/model/ultralytics/segment.py,sha256=v9_xDxd5zw_I8rXsbl7yQXgEdTs2T38zyY_Y4XGN8ok,3194
|
|
127
127
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
128
128
|
datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
|
|
129
|
-
datachain/query/dataset.py,sha256=
|
|
129
|
+
datachain/query/dataset.py,sha256=qMVySOlyDm3b-zpF7z_DGeeCfltH7YSGwB7HYYmNhBU,63081
|
|
130
130
|
datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
|
|
131
131
|
datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
|
|
132
132
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
@@ -160,9 +160,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
160
160
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
161
161
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
162
162
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
163
|
-
datachain-0.30.
|
|
164
|
-
datachain-0.30.
|
|
165
|
-
datachain-0.30.
|
|
166
|
-
datachain-0.30.
|
|
167
|
-
datachain-0.30.
|
|
168
|
-
datachain-0.30.
|
|
163
|
+
datachain-0.30.3.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
164
|
+
datachain-0.30.3.dist-info/METADATA,sha256=TataasDxkiKnymsMaLxda_Cp0DhYE_FhUMA8CKBXCgg,13903
|
|
165
|
+
datachain-0.30.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
166
|
+
datachain-0.30.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
167
|
+
datachain-0.30.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
168
|
+
datachain-0.30.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|