datachain 0.30.1__py3-none-any.whl → 0.30.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/catalog/catalog.py +86 -29
- datachain/cli/commands/datasets.py +3 -2
- datachain/data_storage/metastore.py +34 -9
- datachain/delta.py +23 -12
- datachain/func/string.py +8 -0
- datachain/lib/dc/database.py +84 -19
- datachain/lib/dc/datachain.py +36 -9
- datachain/lib/dc/datasets.py +9 -4
- datachain/listing.py +5 -9
- datachain/model/ultralytics/bbox.py +14 -12
- datachain/model/ultralytics/pose.py +14 -12
- datachain/model/ultralytics/segment.py +14 -12
- datachain/query/dataset.py +20 -10
- datachain/sql/__init__.py +2 -0
- datachain/sql/postgresql_dialect.py +9 -0
- datachain/sql/postgresql_types.py +21 -0
- datachain/sql/sqlite/__init__.py +5 -1
- datachain/sql/sqlite/base.py +6 -1
- datachain/sql/types.py +32 -8
- {datachain-0.30.1.dist-info → datachain-0.30.3.dist-info}/METADATA +6 -3
- {datachain-0.30.1.dist-info → datachain-0.30.3.dist-info}/RECORD +25 -23
- {datachain-0.30.1.dist-info → datachain-0.30.3.dist-info}/WHEEL +0 -0
- {datachain-0.30.1.dist-info → datachain-0.30.3.dist-info}/entry_points.txt +0 -0
- {datachain-0.30.1.dist-info → datachain-0.30.3.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.30.1.dist-info → datachain-0.30.3.dist-info}/top_level.txt +0 -0
datachain/catalog/catalog.py
CHANGED
|
@@ -680,8 +680,9 @@ class Catalog:
|
|
|
680
680
|
ds_namespace, ds_project, ds_name = parse_dataset_name(ds_name)
|
|
681
681
|
assert ds_namespace
|
|
682
682
|
assert ds_project
|
|
683
|
-
|
|
684
|
-
|
|
683
|
+
dataset = self.get_dataset(
|
|
684
|
+
ds_name, namespace_name=ds_namespace, project_name=ds_project
|
|
685
|
+
)
|
|
685
686
|
if not ds_version:
|
|
686
687
|
ds_version = dataset.latest_version
|
|
687
688
|
dataset_sources = self.warehouse.get_dataset_sources(
|
|
@@ -807,7 +808,11 @@ class Catalog:
|
|
|
807
808
|
)
|
|
808
809
|
default_version = DEFAULT_DATASET_VERSION
|
|
809
810
|
try:
|
|
810
|
-
dataset = self.get_dataset(
|
|
811
|
+
dataset = self.get_dataset(
|
|
812
|
+
name,
|
|
813
|
+
namespace_name=project.namespace.name if project else None,
|
|
814
|
+
project_name=project.name if project else None,
|
|
815
|
+
)
|
|
811
816
|
default_version = dataset.next_version_patch
|
|
812
817
|
if update_version == "major":
|
|
813
818
|
default_version = dataset.next_version_major
|
|
@@ -1016,7 +1021,11 @@ class Catalog:
|
|
|
1016
1021
|
dc.save(name)
|
|
1017
1022
|
except Exception as e: # noqa: BLE001
|
|
1018
1023
|
try:
|
|
1019
|
-
ds = self.get_dataset(
|
|
1024
|
+
ds = self.get_dataset(
|
|
1025
|
+
name,
|
|
1026
|
+
namespace_name=project.namespace.name,
|
|
1027
|
+
project_name=project.name,
|
|
1028
|
+
)
|
|
1020
1029
|
self.metastore.update_dataset_status(
|
|
1021
1030
|
ds,
|
|
1022
1031
|
DatasetStatus.FAILED,
|
|
@@ -1033,7 +1042,11 @@ class Catalog:
|
|
|
1033
1042
|
except DatasetNotFoundError:
|
|
1034
1043
|
raise e from None
|
|
1035
1044
|
|
|
1036
|
-
ds = self.get_dataset(
|
|
1045
|
+
ds = self.get_dataset(
|
|
1046
|
+
name,
|
|
1047
|
+
namespace_name=project.namespace.name,
|
|
1048
|
+
project_name=project.name,
|
|
1049
|
+
)
|
|
1037
1050
|
|
|
1038
1051
|
self.update_dataset_version_with_warehouse_info(
|
|
1039
1052
|
ds,
|
|
@@ -1041,7 +1054,11 @@ class Catalog:
|
|
|
1041
1054
|
sources="\n".join(sources),
|
|
1042
1055
|
)
|
|
1043
1056
|
|
|
1044
|
-
return self.get_dataset(
|
|
1057
|
+
return self.get_dataset(
|
|
1058
|
+
name,
|
|
1059
|
+
namespace_name=project.namespace.name,
|
|
1060
|
+
project_name=project.name,
|
|
1061
|
+
)
|
|
1045
1062
|
|
|
1046
1063
|
def get_full_dataset_name(
|
|
1047
1064
|
self,
|
|
@@ -1077,22 +1094,23 @@ class Catalog:
|
|
|
1077
1094
|
return namespace_name, project_name, name
|
|
1078
1095
|
|
|
1079
1096
|
def get_dataset(
|
|
1080
|
-
self,
|
|
1097
|
+
self,
|
|
1098
|
+
name: str,
|
|
1099
|
+
namespace_name: Optional[str] = None,
|
|
1100
|
+
project_name: Optional[str] = None,
|
|
1081
1101
|
) -> DatasetRecord:
|
|
1082
1102
|
from datachain.lib.listing import is_listing_dataset
|
|
1083
1103
|
|
|
1084
|
-
|
|
1104
|
+
namespace_name = namespace_name or self.metastore.default_namespace_name
|
|
1105
|
+
project_name = project_name or self.metastore.default_project_name
|
|
1085
1106
|
|
|
1086
1107
|
if is_listing_dataset(name):
|
|
1087
|
-
|
|
1108
|
+
namespace_name = self.metastore.system_namespace_name
|
|
1109
|
+
project_name = self.metastore.listing_project_name
|
|
1088
1110
|
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
raise DatasetNotFoundError(
|
|
1093
|
-
f"Dataset {name} not found in namespace {project.namespace.name}"
|
|
1094
|
-
f" and project {project.name}"
|
|
1095
|
-
) from None
|
|
1111
|
+
return self.metastore.get_dataset(
|
|
1112
|
+
name, namespace_name=namespace_name, project_name=project_name
|
|
1113
|
+
)
|
|
1096
1114
|
|
|
1097
1115
|
def get_dataset_with_remote_fallback(
|
|
1098
1116
|
self,
|
|
@@ -1113,8 +1131,11 @@ class Catalog:
|
|
|
1113
1131
|
|
|
1114
1132
|
if self.metastore.is_local_dataset(namespace_name) or not update:
|
|
1115
1133
|
try:
|
|
1116
|
-
|
|
1117
|
-
|
|
1134
|
+
ds = self.get_dataset(
|
|
1135
|
+
name,
|
|
1136
|
+
namespace_name=namespace_name,
|
|
1137
|
+
project_name=project_name,
|
|
1138
|
+
)
|
|
1118
1139
|
if not version or ds.has_version(version):
|
|
1119
1140
|
return ds
|
|
1120
1141
|
except (NamespaceNotFoundError, ProjectNotFoundError, DatasetNotFoundError):
|
|
@@ -1139,7 +1160,9 @@ class Catalog:
|
|
|
1139
1160
|
local_ds_version=version,
|
|
1140
1161
|
)
|
|
1141
1162
|
return self.get_dataset(
|
|
1142
|
-
name,
|
|
1163
|
+
name,
|
|
1164
|
+
namespace_name=namespace_name,
|
|
1165
|
+
project_name=project_name,
|
|
1143
1166
|
)
|
|
1144
1167
|
|
|
1145
1168
|
return self.get_remote_dataset(namespace_name, project_name, name)
|
|
@@ -1148,7 +1171,11 @@ class Catalog:
|
|
|
1148
1171
|
"""Returns dataset that contains version with specific uuid"""
|
|
1149
1172
|
for dataset in self.ls_datasets():
|
|
1150
1173
|
if dataset.has_version_with_uuid(uuid):
|
|
1151
|
-
return self.get_dataset(
|
|
1174
|
+
return self.get_dataset(
|
|
1175
|
+
dataset.name,
|
|
1176
|
+
namespace_name=dataset.project.namespace.name,
|
|
1177
|
+
project_name=dataset.project.name,
|
|
1178
|
+
)
|
|
1152
1179
|
raise DatasetNotFoundError(f"Dataset with version uuid {uuid} not found.")
|
|
1153
1180
|
|
|
1154
1181
|
def get_remote_dataset(
|
|
@@ -1171,9 +1198,18 @@ class Catalog:
|
|
|
1171
1198
|
return DatasetRecord.from_dict(dataset_info)
|
|
1172
1199
|
|
|
1173
1200
|
def get_dataset_dependencies(
|
|
1174
|
-
self,
|
|
1201
|
+
self,
|
|
1202
|
+
name: str,
|
|
1203
|
+
version: str,
|
|
1204
|
+
namespace_name: Optional[str] = None,
|
|
1205
|
+
project_name: Optional[str] = None,
|
|
1206
|
+
indirect=False,
|
|
1175
1207
|
) -> list[Optional[DatasetDependency]]:
|
|
1176
|
-
dataset = self.get_dataset(
|
|
1208
|
+
dataset = self.get_dataset(
|
|
1209
|
+
name,
|
|
1210
|
+
namespace_name=namespace_name,
|
|
1211
|
+
project_name=project_name,
|
|
1212
|
+
)
|
|
1177
1213
|
|
|
1178
1214
|
direct_dependencies = self.metastore.get_direct_dataset_dependencies(
|
|
1179
1215
|
dataset, version
|
|
@@ -1187,10 +1223,13 @@ class Catalog:
|
|
|
1187
1223
|
# dependency has been removed
|
|
1188
1224
|
continue
|
|
1189
1225
|
if d.is_dataset:
|
|
1190
|
-
project = self.metastore.get_project(d.project, d.namespace)
|
|
1191
1226
|
# only datasets can have dependencies
|
|
1192
1227
|
d.dependencies = self.get_dataset_dependencies(
|
|
1193
|
-
d.name,
|
|
1228
|
+
d.name,
|
|
1229
|
+
d.version,
|
|
1230
|
+
namespace_name=d.namespace,
|
|
1231
|
+
project_name=d.project,
|
|
1232
|
+
indirect=indirect,
|
|
1194
1233
|
)
|
|
1195
1234
|
|
|
1196
1235
|
return direct_dependencies
|
|
@@ -1340,7 +1379,11 @@ class Catalog:
|
|
|
1340
1379
|
project: Optional[Project] = None,
|
|
1341
1380
|
client_config=None,
|
|
1342
1381
|
) -> list[str]:
|
|
1343
|
-
dataset = self.get_dataset(
|
|
1382
|
+
dataset = self.get_dataset(
|
|
1383
|
+
name,
|
|
1384
|
+
namespace_name=project.namespace.name if project else None,
|
|
1385
|
+
project_name=project.name if project else None,
|
|
1386
|
+
)
|
|
1344
1387
|
|
|
1345
1388
|
return self.warehouse.export_dataset_table(
|
|
1346
1389
|
bucket_uri, dataset, version, client_config
|
|
@@ -1349,7 +1392,11 @@ class Catalog:
|
|
|
1349
1392
|
def dataset_table_export_file_names(
|
|
1350
1393
|
self, name: str, version: str, project: Optional[Project] = None
|
|
1351
1394
|
) -> list[str]:
|
|
1352
|
-
dataset = self.get_dataset(
|
|
1395
|
+
dataset = self.get_dataset(
|
|
1396
|
+
name,
|
|
1397
|
+
namespace_name=project.namespace.name if project else None,
|
|
1398
|
+
project_name=project.name if project else None,
|
|
1399
|
+
)
|
|
1353
1400
|
return self.warehouse.dataset_table_export_file_names(dataset, version)
|
|
1354
1401
|
|
|
1355
1402
|
def remove_dataset(
|
|
@@ -1359,7 +1406,11 @@ class Catalog:
|
|
|
1359
1406
|
version: Optional[str] = None,
|
|
1360
1407
|
force: Optional[bool] = False,
|
|
1361
1408
|
):
|
|
1362
|
-
dataset = self.get_dataset(
|
|
1409
|
+
dataset = self.get_dataset(
|
|
1410
|
+
name,
|
|
1411
|
+
namespace_name=project.namespace.name if project else None,
|
|
1412
|
+
project_name=project.name if project else None,
|
|
1413
|
+
)
|
|
1363
1414
|
if not version and not force:
|
|
1364
1415
|
raise ValueError(f"Missing dataset version from input for dataset {name}")
|
|
1365
1416
|
if version and not dataset.has_version(version):
|
|
@@ -1395,7 +1446,11 @@ class Catalog:
|
|
|
1395
1446
|
if attrs is not None:
|
|
1396
1447
|
update_data["attrs"] = attrs # type: ignore[assignment]
|
|
1397
1448
|
|
|
1398
|
-
dataset = self.get_dataset(
|
|
1449
|
+
dataset = self.get_dataset(
|
|
1450
|
+
name,
|
|
1451
|
+
namespace_name=project.namespace.name if project else None,
|
|
1452
|
+
project_name=project.name if project else None,
|
|
1453
|
+
)
|
|
1399
1454
|
return self.update_dataset(dataset, **update_data)
|
|
1400
1455
|
|
|
1401
1456
|
def ls(
|
|
@@ -1549,7 +1604,9 @@ class Catalog:
|
|
|
1549
1604
|
)
|
|
1550
1605
|
|
|
1551
1606
|
try:
|
|
1552
|
-
local_dataset = self.get_dataset(
|
|
1607
|
+
local_dataset = self.get_dataset(
|
|
1608
|
+
local_ds_name, namespace_name=namespace.name, project_name=project.name
|
|
1609
|
+
)
|
|
1553
1610
|
if local_dataset and local_dataset.has_version(local_ds_version):
|
|
1554
1611
|
raise DataChainError(
|
|
1555
1612
|
f"Local dataset {local_ds_uri} already exists with different uuid,"
|
|
@@ -107,8 +107,9 @@ def list_datasets_local(catalog: "Catalog", name: Optional[str] = None):
|
|
|
107
107
|
def list_datasets_local_versions(catalog: "Catalog", name: str):
|
|
108
108
|
namespace_name, project_name, name = catalog.get_full_dataset_name(name)
|
|
109
109
|
|
|
110
|
-
|
|
111
|
-
|
|
110
|
+
ds = catalog.get_dataset(
|
|
111
|
+
name, namespace_name=namespace_name, project_name=project_name
|
|
112
|
+
)
|
|
112
113
|
for v in ds.versions:
|
|
113
114
|
yield (name, v.version)
|
|
114
115
|
|
|
@@ -301,7 +301,13 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
301
301
|
"""
|
|
302
302
|
|
|
303
303
|
@abstractmethod
|
|
304
|
-
def get_dataset(
|
|
304
|
+
def get_dataset(
|
|
305
|
+
self,
|
|
306
|
+
name: str, # normal, not full dataset name
|
|
307
|
+
namespace_name: Optional[str] = None,
|
|
308
|
+
project_name: Optional[str] = None,
|
|
309
|
+
conn=None,
|
|
310
|
+
) -> DatasetRecord:
|
|
305
311
|
"""Gets a single dataset by name."""
|
|
306
312
|
|
|
307
313
|
@abstractmethod
|
|
@@ -912,11 +918,14 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
912
918
|
**kwargs, # TODO registered = True / False
|
|
913
919
|
) -> DatasetRecord:
|
|
914
920
|
"""Creates new dataset."""
|
|
915
|
-
|
|
921
|
+
if not project_id:
|
|
922
|
+
project = self.default_project
|
|
923
|
+
else:
|
|
924
|
+
project = self.get_project_by_id(project_id)
|
|
916
925
|
|
|
917
926
|
query = self._datasets_insert().values(
|
|
918
927
|
name=name,
|
|
919
|
-
project_id=
|
|
928
|
+
project_id=project.id,
|
|
920
929
|
status=status,
|
|
921
930
|
feature_schema=json.dumps(feature_schema or {}),
|
|
922
931
|
created_at=datetime.now(timezone.utc),
|
|
@@ -935,7 +944,9 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
935
944
|
query = query.on_conflict_do_nothing(index_elements=["project_id", "name"])
|
|
936
945
|
self.db.execute(query)
|
|
937
946
|
|
|
938
|
-
return self.get_dataset(
|
|
947
|
+
return self.get_dataset(
|
|
948
|
+
name, namespace_name=project.namespace.name, project_name=project.name
|
|
949
|
+
)
|
|
939
950
|
|
|
940
951
|
def create_dataset_version( # noqa: PLR0913
|
|
941
952
|
self,
|
|
@@ -992,7 +1003,12 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
992
1003
|
)
|
|
993
1004
|
self.db.execute(query, conn=conn)
|
|
994
1005
|
|
|
995
|
-
return self.get_dataset(
|
|
1006
|
+
return self.get_dataset(
|
|
1007
|
+
dataset.name,
|
|
1008
|
+
namespace_name=dataset.project.namespace.name,
|
|
1009
|
+
project_name=dataset.project.name,
|
|
1010
|
+
conn=conn,
|
|
1011
|
+
)
|
|
996
1012
|
|
|
997
1013
|
def remove_dataset(self, dataset: DatasetRecord) -> None:
|
|
998
1014
|
"""Removes dataset."""
|
|
@@ -1216,21 +1232,30 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1216
1232
|
def get_dataset(
|
|
1217
1233
|
self,
|
|
1218
1234
|
name: str, # normal, not full dataset name
|
|
1219
|
-
|
|
1235
|
+
namespace_name: Optional[str] = None,
|
|
1236
|
+
project_name: Optional[str] = None,
|
|
1220
1237
|
conn=None,
|
|
1221
1238
|
) -> DatasetRecord:
|
|
1222
1239
|
"""
|
|
1223
1240
|
Gets a single dataset in project by dataset name.
|
|
1224
1241
|
"""
|
|
1225
|
-
|
|
1242
|
+
namespace_name = namespace_name or self.default_namespace_name
|
|
1243
|
+
project_name = project_name or self.default_project_name
|
|
1226
1244
|
|
|
1227
1245
|
d = self._datasets
|
|
1246
|
+
n = self._namespaces
|
|
1247
|
+
p = self._projects
|
|
1228
1248
|
query = self._base_dataset_query()
|
|
1229
|
-
query = query.where(
|
|
1249
|
+
query = query.where(
|
|
1250
|
+
d.c.name == name,
|
|
1251
|
+
n.c.name == namespace_name,
|
|
1252
|
+
p.c.name == project_name,
|
|
1253
|
+
) # type: ignore [attr-defined]
|
|
1230
1254
|
ds = self._parse_dataset(self.db.execute(query, conn=conn))
|
|
1231
1255
|
if not ds:
|
|
1232
1256
|
raise DatasetNotFoundError(
|
|
1233
|
-
f"Dataset {name} not found in
|
|
1257
|
+
f"Dataset {name} not found in namespace {namespace_name}"
|
|
1258
|
+
f" and project {project_name}"
|
|
1234
1259
|
)
|
|
1235
1260
|
|
|
1236
1261
|
return ds
|
datachain/delta.py
CHANGED
|
@@ -77,7 +77,8 @@ def _get_delta_chain(
|
|
|
77
77
|
|
|
78
78
|
def _get_retry_chain(
|
|
79
79
|
name: str,
|
|
80
|
-
|
|
80
|
+
namespace_name: str,
|
|
81
|
+
project_name: str,
|
|
81
82
|
latest_version: str,
|
|
82
83
|
source_ds_name: str,
|
|
83
84
|
source_ds_project: Project,
|
|
@@ -96,8 +97,8 @@ def _get_retry_chain(
|
|
|
96
97
|
# Read the latest version of the result dataset for retry logic
|
|
97
98
|
result_dataset = datachain.read_dataset(
|
|
98
99
|
name,
|
|
99
|
-
namespace=
|
|
100
|
-
project=
|
|
100
|
+
namespace=namespace_name,
|
|
101
|
+
project=project_name,
|
|
101
102
|
version=latest_version,
|
|
102
103
|
)
|
|
103
104
|
source_dc = datachain.read_dataset(
|
|
@@ -128,7 +129,8 @@ def _get_retry_chain(
|
|
|
128
129
|
|
|
129
130
|
def _get_source_info(
|
|
130
131
|
name: str,
|
|
131
|
-
|
|
132
|
+
namespace_name: str,
|
|
133
|
+
project_name: str,
|
|
132
134
|
latest_version: str,
|
|
133
135
|
catalog,
|
|
134
136
|
) -> tuple[
|
|
@@ -145,7 +147,11 @@ def _get_source_info(
|
|
|
145
147
|
Returns (None, None, None, None) if source dataset was removed.
|
|
146
148
|
"""
|
|
147
149
|
dependencies = catalog.get_dataset_dependencies(
|
|
148
|
-
name,
|
|
150
|
+
name,
|
|
151
|
+
latest_version,
|
|
152
|
+
namespace_name=namespace_name,
|
|
153
|
+
project_name=project_name,
|
|
154
|
+
indirect=False,
|
|
149
155
|
)
|
|
150
156
|
|
|
151
157
|
dep = dependencies[0]
|
|
@@ -157,7 +163,9 @@ def _get_source_info(
|
|
|
157
163
|
source_ds_name = dep.name
|
|
158
164
|
source_ds_version = dep.version
|
|
159
165
|
source_ds_latest_version = catalog.get_dataset(
|
|
160
|
-
source_ds_name,
|
|
166
|
+
source_ds_name,
|
|
167
|
+
namespace_name=source_ds_project.namespace.name,
|
|
168
|
+
project_name=source_ds_project.name,
|
|
161
169
|
).latest_version
|
|
162
170
|
|
|
163
171
|
return (
|
|
@@ -211,12 +219,14 @@ def delta_retry_update(
|
|
|
211
219
|
"""
|
|
212
220
|
|
|
213
221
|
catalog = dc.session.catalog
|
|
214
|
-
project = catalog.metastore.get_project(project_name, namespace_name)
|
|
222
|
+
# project = catalog.metastore.get_project(project_name, namespace_name)
|
|
215
223
|
dc._query.apply_listing_pre_step()
|
|
216
224
|
|
|
217
225
|
# Check if dataset exists
|
|
218
226
|
try:
|
|
219
|
-
dataset = catalog.get_dataset(
|
|
227
|
+
dataset = catalog.get_dataset(
|
|
228
|
+
name, namespace_name=namespace_name, project_name=project_name
|
|
229
|
+
)
|
|
220
230
|
latest_version = dataset.latest_version
|
|
221
231
|
except DatasetNotFoundError:
|
|
222
232
|
# First creation of result dataset
|
|
@@ -234,7 +244,7 @@ def delta_retry_update(
|
|
|
234
244
|
source_ds_version,
|
|
235
245
|
source_ds_latest_version,
|
|
236
246
|
dependencies,
|
|
237
|
-
) = _get_source_info(name,
|
|
247
|
+
) = _get_source_info(name, namespace_name, project_name, latest_version, catalog)
|
|
238
248
|
|
|
239
249
|
# If source_ds_name is None, starting dataset was removed
|
|
240
250
|
if source_ds_name is None:
|
|
@@ -264,7 +274,8 @@ def delta_retry_update(
|
|
|
264
274
|
if delta_retry:
|
|
265
275
|
retry_chain = _get_retry_chain(
|
|
266
276
|
name,
|
|
267
|
-
|
|
277
|
+
namespace_name,
|
|
278
|
+
project_name,
|
|
268
279
|
latest_version,
|
|
269
280
|
source_ds_name,
|
|
270
281
|
source_ds_project,
|
|
@@ -290,8 +301,8 @@ def delta_retry_update(
|
|
|
290
301
|
|
|
291
302
|
latest_dataset = datachain.read_dataset(
|
|
292
303
|
name,
|
|
293
|
-
namespace=
|
|
294
|
-
project=
|
|
304
|
+
namespace=namespace_name,
|
|
305
|
+
project=project_name,
|
|
295
306
|
version=latest_version,
|
|
296
307
|
)
|
|
297
308
|
compared_chain = latest_dataset.diff(
|
datachain/func/string.py
CHANGED
datachain/lib/dc/database.py
CHANGED
|
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Optional, Union
|
|
|
7
7
|
import sqlalchemy
|
|
8
8
|
|
|
9
9
|
from datachain.query.schema import ColumnMeta
|
|
10
|
+
from datachain.utils import batched
|
|
10
11
|
|
|
11
12
|
DEFAULT_DATABASE_BATCH_SIZE = 10_000
|
|
12
13
|
|
|
@@ -74,16 +75,18 @@ def to_database(
|
|
|
74
75
|
*,
|
|
75
76
|
batch_rows: int = DEFAULT_DATABASE_BATCH_SIZE,
|
|
76
77
|
on_conflict: Optional[str] = None,
|
|
78
|
+
conflict_columns: Optional[list[str]] = None,
|
|
77
79
|
column_mapping: Optional[dict[str, Optional[str]]] = None,
|
|
78
|
-
) ->
|
|
80
|
+
) -> int:
|
|
79
81
|
"""
|
|
80
82
|
Implementation function for exporting DataChain to database tables.
|
|
81
83
|
|
|
82
84
|
This is the core implementation that handles the actual database operations.
|
|
83
85
|
For user-facing documentation, see DataChain.to_database() method.
|
|
84
|
-
"""
|
|
85
|
-
from datachain.utils import batched
|
|
86
86
|
|
|
87
|
+
Returns:
|
|
88
|
+
int: Number of rows affected (inserted/updated).
|
|
89
|
+
"""
|
|
87
90
|
if on_conflict and on_conflict not in ("ignore", "update"):
|
|
88
91
|
raise ValueError(
|
|
89
92
|
f"on_conflict must be 'ignore' or 'update', got: {on_conflict}"
|
|
@@ -101,23 +104,39 @@ def to_database(
|
|
|
101
104
|
all_columns, normalized_column_mapping
|
|
102
105
|
)
|
|
103
106
|
|
|
107
|
+
normalized_conflict_columns = _normalize_conflict_columns(
|
|
108
|
+
conflict_columns, normalized_column_mapping
|
|
109
|
+
)
|
|
110
|
+
|
|
104
111
|
with _connect(connection) as conn:
|
|
105
112
|
metadata = sqlalchemy.MetaData()
|
|
106
113
|
table = sqlalchemy.Table(table_name, metadata, *columns)
|
|
107
114
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
assert inspector # to satisfy mypy
|
|
111
|
-
table_existed_before = table_name in inspector.get_table_names()
|
|
112
|
-
|
|
115
|
+
table_existed_before = False
|
|
116
|
+
total_rows_affected = 0
|
|
113
117
|
try:
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
118
|
+
with conn.begin():
|
|
119
|
+
# Check if table exists to determine if we should clean up on error.
|
|
120
|
+
inspector = sqlalchemy.inspect(conn)
|
|
121
|
+
assert inspector # to satisfy mypy
|
|
122
|
+
table_existed_before = table_name in inspector.get_table_names()
|
|
123
|
+
|
|
124
|
+
table.create(conn, checkfirst=True)
|
|
125
|
+
|
|
126
|
+
rows_iter = chain._leaf_values()
|
|
127
|
+
for batch in batched(rows_iter, batch_rows):
|
|
128
|
+
rows_affected = _process_batch(
|
|
129
|
+
conn,
|
|
130
|
+
table,
|
|
131
|
+
batch,
|
|
132
|
+
on_conflict,
|
|
133
|
+
normalized_conflict_columns,
|
|
134
|
+
column_indices_and_names,
|
|
135
|
+
)
|
|
136
|
+
if rows_affected < 0 or total_rows_affected < 0:
|
|
137
|
+
total_rows_affected = -1
|
|
138
|
+
else:
|
|
139
|
+
total_rows_affected += rows_affected
|
|
121
140
|
except Exception:
|
|
122
141
|
if not table_existed_before:
|
|
123
142
|
try:
|
|
@@ -127,6 +146,8 @@ def to_database(
|
|
|
127
146
|
pass
|
|
128
147
|
raise
|
|
129
148
|
|
|
149
|
+
return total_rows_affected
|
|
150
|
+
|
|
130
151
|
|
|
131
152
|
def _normalize_column_mapping(
|
|
132
153
|
column_mapping: dict[str, Optional[str]],
|
|
@@ -167,6 +188,30 @@ def _normalize_column_mapping(
|
|
|
167
188
|
return normalized_mapping
|
|
168
189
|
|
|
169
190
|
|
|
191
|
+
def _normalize_conflict_columns(
|
|
192
|
+
conflict_columns: Optional[list[str]], column_mapping: dict[str, Optional[str]]
|
|
193
|
+
) -> Optional[list[str]]:
|
|
194
|
+
"""
|
|
195
|
+
Normalize conflict_columns by converting DataChain format to database format
|
|
196
|
+
and applying column mapping.
|
|
197
|
+
"""
|
|
198
|
+
if not conflict_columns:
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
normalized_columns = []
|
|
202
|
+
for col in conflict_columns:
|
|
203
|
+
db_col = ColumnMeta.to_db_name(col)
|
|
204
|
+
|
|
205
|
+
if db_col in column_mapping or hasattr(column_mapping, "default_factory"):
|
|
206
|
+
mapped_name = column_mapping[db_col]
|
|
207
|
+
if mapped_name:
|
|
208
|
+
normalized_columns.append(mapped_name)
|
|
209
|
+
else:
|
|
210
|
+
normalized_columns.append(db_col)
|
|
211
|
+
|
|
212
|
+
return normalized_columns
|
|
213
|
+
|
|
214
|
+
|
|
170
215
|
def _prepare_columns(all_columns, column_mapping):
|
|
171
216
|
"""Prepare column mapping and column definitions."""
|
|
172
217
|
column_indices_and_names = [] # List of (index, target_name) tuples
|
|
@@ -183,8 +228,14 @@ def _prepare_columns(all_columns, column_mapping):
|
|
|
183
228
|
return column_indices_and_names, columns
|
|
184
229
|
|
|
185
230
|
|
|
186
|
-
def _process_batch(
|
|
187
|
-
|
|
231
|
+
def _process_batch(
|
|
232
|
+
conn, table, batch, on_conflict, conflict_columns, column_indices_and_names
|
|
233
|
+
) -> int:
|
|
234
|
+
"""Process a batch of rows with conflict resolution.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
int: Number of rows affected by the insert operation.
|
|
238
|
+
"""
|
|
188
239
|
|
|
189
240
|
def prepare_row(row_values):
|
|
190
241
|
"""Convert a row tuple to a dictionary with proper DB column names."""
|
|
@@ -197,6 +248,7 @@ def _process_batch(conn, table, batch, on_conflict, column_indices_and_names):
|
|
|
197
248
|
|
|
198
249
|
supports_conflict = on_conflict and conn.engine.name in ("postgresql", "sqlite")
|
|
199
250
|
|
|
251
|
+
insert_stmt: Any # Can be PostgreSQL, SQLite, or regular insert statement
|
|
200
252
|
if supports_conflict:
|
|
201
253
|
# Use dialect-specific insert for conflict resolution
|
|
202
254
|
if conn.engine.name == "postgresql":
|
|
@@ -217,7 +269,19 @@ def _process_batch(conn, table, batch, on_conflict, column_indices_and_names):
|
|
|
217
269
|
update_values = {
|
|
218
270
|
col.name: insert_stmt.excluded[col.name] for col in table.columns
|
|
219
271
|
}
|
|
220
|
-
|
|
272
|
+
if conn.engine.name == "postgresql":
|
|
273
|
+
if not conflict_columns:
|
|
274
|
+
raise ValueError(
|
|
275
|
+
"conflict_columns parameter is required when "
|
|
276
|
+
"on_conflict='update' with PostgreSQL. Specify the column "
|
|
277
|
+
"names that form a unique constraint."
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
insert_stmt = insert_stmt.on_conflict_do_update(
|
|
281
|
+
index_elements=conflict_columns, set_=update_values
|
|
282
|
+
)
|
|
283
|
+
else:
|
|
284
|
+
insert_stmt = insert_stmt.on_conflict_do_update(set_=update_values)
|
|
221
285
|
elif on_conflict:
|
|
222
286
|
import warnings
|
|
223
287
|
|
|
@@ -228,7 +292,8 @@ def _process_batch(conn, table, batch, on_conflict, column_indices_and_names):
|
|
|
228
292
|
stacklevel=2,
|
|
229
293
|
)
|
|
230
294
|
|
|
231
|
-
conn.execute(insert_stmt, rows_to_insert)
|
|
295
|
+
result = conn.execute(insert_stmt, rows_to_insert)
|
|
296
|
+
return result.rowcount
|
|
232
297
|
|
|
233
298
|
|
|
234
299
|
def read_database(
|
datachain/lib/dc/datachain.py
CHANGED
|
@@ -284,7 +284,11 @@ class DataChain:
|
|
|
284
284
|
"""Underlying dataset, if there is one."""
|
|
285
285
|
if not self.name:
|
|
286
286
|
return None
|
|
287
|
-
return self.session.catalog.get_dataset(
|
|
287
|
+
return self.session.catalog.get_dataset(
|
|
288
|
+
self.name,
|
|
289
|
+
namespace_name=self._query.project.namespace.name,
|
|
290
|
+
project_name=self._query.project.name,
|
|
291
|
+
)
|
|
288
292
|
|
|
289
293
|
def __or__(self, other: "Self") -> "Self":
|
|
290
294
|
"""Return `self.union(other)`."""
|
|
@@ -2296,14 +2300,19 @@ class DataChain:
|
|
|
2296
2300
|
*,
|
|
2297
2301
|
batch_rows: int = DEFAULT_DATABASE_BATCH_SIZE,
|
|
2298
2302
|
on_conflict: Optional[str] = None,
|
|
2303
|
+
conflict_columns: Optional[list[str]] = None,
|
|
2299
2304
|
column_mapping: Optional[dict[str, Optional[str]]] = None,
|
|
2300
|
-
) ->
|
|
2305
|
+
) -> int:
|
|
2301
2306
|
"""Save chain to a database table using a given database connection.
|
|
2302
2307
|
|
|
2303
2308
|
This method exports all DataChain records to a database table, creating the
|
|
2304
2309
|
table if it doesn't exist and appending data if it does. The table schema
|
|
2305
2310
|
is automatically inferred from the DataChain's signal schema.
|
|
2306
2311
|
|
|
2312
|
+
For PostgreSQL, tables are created in the schema specified by the connection's
|
|
2313
|
+
search_path (defaults to 'public'). Use URL parameters to target specific
|
|
2314
|
+
schemas.
|
|
2315
|
+
|
|
2307
2316
|
Parameters:
|
|
2308
2317
|
table_name: Name of the database table to create/write to.
|
|
2309
2318
|
connection: SQLAlchemy connectable, str, or a sqlite3 connection
|
|
@@ -2319,25 +2328,34 @@ class DataChain:
|
|
|
2319
2328
|
(default)
|
|
2320
2329
|
- "ignore": Skip duplicate rows silently
|
|
2321
2330
|
- "update": Update existing rows with new values
|
|
2331
|
+
conflict_columns: List of column names that form a unique constraint
|
|
2332
|
+
for conflict resolution. Required when on_conflict='update' and
|
|
2333
|
+
using PostgreSQL.
|
|
2322
2334
|
column_mapping: Optional mapping to rename or skip columns:
|
|
2323
2335
|
- Dict mapping DataChain column names to database column names
|
|
2324
2336
|
- Set values to None to skip columns entirely, or use `defaultdict` to
|
|
2325
2337
|
skip all columns except those specified.
|
|
2326
2338
|
|
|
2339
|
+
Returns:
|
|
2340
|
+
int: Number of rows affected (inserted/updated). -1 if DB driver doesn't
|
|
2341
|
+
support telemetry.
|
|
2342
|
+
|
|
2327
2343
|
Examples:
|
|
2328
2344
|
Basic usage with PostgreSQL:
|
|
2329
2345
|
```py
|
|
2330
|
-
import sqlalchemy as sa
|
|
2331
2346
|
import datachain as dc
|
|
2332
2347
|
|
|
2333
|
-
|
|
2334
|
-
|
|
2335
|
-
|
|
2348
|
+
rows_affected = (dc
|
|
2349
|
+
.read_storage("s3://my-bucket/")
|
|
2350
|
+
.to_database("files_table", "postgresql://user:pass@localhost/mydb")
|
|
2351
|
+
)
|
|
2352
|
+
print(f"Inserted/updated {rows_affected} rows")
|
|
2336
2353
|
```
|
|
2337
2354
|
|
|
2338
2355
|
Using SQLite with connection string:
|
|
2339
2356
|
```py
|
|
2340
|
-
chain.to_database("my_table", "sqlite:///data.db")
|
|
2357
|
+
rows_affected = chain.to_database("my_table", "sqlite:///data.db")
|
|
2358
|
+
print(f"Affected {rows_affected} rows")
|
|
2341
2359
|
```
|
|
2342
2360
|
|
|
2343
2361
|
Column mapping and renaming:
|
|
@@ -2356,7 +2374,9 @@ class DataChain:
|
|
|
2356
2374
|
chain.to_database("my_table", engine, on_conflict="ignore")
|
|
2357
2375
|
|
|
2358
2376
|
# Update existing records
|
|
2359
|
-
chain.to_database(
|
|
2377
|
+
chain.to_database(
|
|
2378
|
+
"my_table", engine, on_conflict="update", conflict_columns=["id"]
|
|
2379
|
+
)
|
|
2360
2380
|
```
|
|
2361
2381
|
|
|
2362
2382
|
Working with different databases:
|
|
@@ -2368,15 +2388,22 @@ class DataChain:
|
|
|
2368
2388
|
# SQLite in-memory
|
|
2369
2389
|
chain.to_database("temp_table", "sqlite:///:memory:")
|
|
2370
2390
|
```
|
|
2391
|
+
|
|
2392
|
+
PostgreSQL with schema support:
|
|
2393
|
+
```py
|
|
2394
|
+
pg_url = "postgresql://user:pass@host/db?options=-c search_path=analytics"
|
|
2395
|
+
chain.to_database("processed_data", pg_url)
|
|
2396
|
+
```
|
|
2371
2397
|
"""
|
|
2372
2398
|
from .database import to_database
|
|
2373
2399
|
|
|
2374
|
-
to_database(
|
|
2400
|
+
return to_database(
|
|
2375
2401
|
self,
|
|
2376
2402
|
table_name,
|
|
2377
2403
|
connection,
|
|
2378
2404
|
batch_rows=batch_rows,
|
|
2379
2405
|
on_conflict=on_conflict,
|
|
2406
|
+
conflict_columns=conflict_columns,
|
|
2380
2407
|
column_mapping=column_mapping,
|
|
2381
2408
|
)
|
|
2382
2409
|
|
datachain/lib/dc/datasets.py
CHANGED
|
@@ -357,7 +357,14 @@ def delete_dataset(
|
|
|
357
357
|
) from None
|
|
358
358
|
|
|
359
359
|
if not force:
|
|
360
|
-
version =
|
|
360
|
+
version = (
|
|
361
|
+
version
|
|
362
|
+
or catalog.get_dataset(
|
|
363
|
+
name,
|
|
364
|
+
namespace_name=ds_project.namespace.name,
|
|
365
|
+
project_name=ds_project.name,
|
|
366
|
+
).latest_version
|
|
367
|
+
)
|
|
361
368
|
else:
|
|
362
369
|
version = None
|
|
363
370
|
catalog.remove_dataset(name, ds_project, version=version, force=force)
|
|
@@ -403,9 +410,7 @@ def move_dataset(
|
|
|
403
410
|
namespace, project, name = catalog.get_full_dataset_name(src)
|
|
404
411
|
dest_namespace, dest_project, dest_name = catalog.get_full_dataset_name(dest)
|
|
405
412
|
|
|
406
|
-
dataset = catalog.get_dataset(
|
|
407
|
-
name, catalog.metastore.get_project(project, namespace)
|
|
408
|
-
)
|
|
413
|
+
dataset = catalog.get_dataset(name, namespace_name=namespace, project_name=project)
|
|
409
414
|
|
|
410
415
|
catalog.update_dataset(
|
|
411
416
|
dataset,
|
datachain/listing.py
CHANGED
|
@@ -65,17 +65,13 @@ class Listing:
|
|
|
65
65
|
|
|
66
66
|
@cached_property
|
|
67
67
|
def dataset(self) -> "DatasetRecord":
|
|
68
|
-
from datachain.error import DatasetNotFoundError
|
|
69
|
-
|
|
70
68
|
assert self.dataset_name
|
|
71
69
|
project = self.metastore.listing_project
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
f" {project.namespace.name} and project {project.name}"
|
|
78
|
-
) from None
|
|
70
|
+
return self.metastore.get_dataset(
|
|
71
|
+
self.dataset_name,
|
|
72
|
+
namespace_name=project.namespace.name,
|
|
73
|
+
project_name=project.name,
|
|
74
|
+
)
|
|
79
75
|
|
|
80
76
|
@cached_property
|
|
81
77
|
def dataset_rows(self):
|
|
@@ -31,11 +31,11 @@ class YoloBBox(DataModel):
|
|
|
31
31
|
if not summary:
|
|
32
32
|
return YoloBBox(box=BBox())
|
|
33
33
|
name = summary[0].get("name", "")
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
34
|
+
if summary[0].get("box"):
|
|
35
|
+
assert isinstance(summary[0]["box"], dict)
|
|
36
|
+
box = BBox.from_dict(summary[0]["box"], title=name)
|
|
37
|
+
else:
|
|
38
|
+
box = BBox()
|
|
39
39
|
return YoloBBox(
|
|
40
40
|
cls=summary[0]["class"],
|
|
41
41
|
name=name,
|
|
@@ -70,7 +70,8 @@ class YoloBBoxes(DataModel):
|
|
|
70
70
|
names.append(name)
|
|
71
71
|
confidence.append(s["confidence"])
|
|
72
72
|
if s.get("box"):
|
|
73
|
-
|
|
73
|
+
assert isinstance(s["box"], dict)
|
|
74
|
+
box.append(BBox.from_dict(s["box"], title=name))
|
|
74
75
|
return YoloBBoxes(
|
|
75
76
|
cls=cls,
|
|
76
77
|
name=names,
|
|
@@ -101,11 +102,11 @@ class YoloOBBox(DataModel):
|
|
|
101
102
|
if not summary:
|
|
102
103
|
return YoloOBBox(box=OBBox())
|
|
103
104
|
name = summary[0].get("name", "")
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
105
|
+
if summary[0].get("box"):
|
|
106
|
+
assert isinstance(summary[0]["box"], dict)
|
|
107
|
+
box = OBBox.from_dict(summary[0]["box"], title=name)
|
|
108
|
+
else:
|
|
109
|
+
box = OBBox()
|
|
109
110
|
return YoloOBBox(
|
|
110
111
|
cls=summary[0]["class"],
|
|
111
112
|
name=name,
|
|
@@ -140,7 +141,8 @@ class YoloOBBoxes(DataModel):
|
|
|
140
141
|
names.append(name)
|
|
141
142
|
confidence.append(s["confidence"])
|
|
142
143
|
if s.get("box"):
|
|
143
|
-
|
|
144
|
+
assert isinstance(s["box"], dict)
|
|
145
|
+
box.append(OBBox.from_dict(s["box"], title=name))
|
|
144
146
|
return YoloOBBoxes(
|
|
145
147
|
cls=cls,
|
|
146
148
|
name=names,
|
|
@@ -56,16 +56,16 @@ class YoloPose(DataModel):
|
|
|
56
56
|
if not summary:
|
|
57
57
|
return YoloPose(box=BBox(), pose=Pose3D())
|
|
58
58
|
name = summary[0].get("name", "")
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
59
|
+
if summary[0].get("box"):
|
|
60
|
+
assert isinstance(summary[0]["box"], dict)
|
|
61
|
+
box = BBox.from_dict(summary[0]["box"], title=name)
|
|
62
|
+
else:
|
|
63
|
+
box = BBox()
|
|
64
|
+
if summary[0].get("keypoints"):
|
|
65
|
+
assert isinstance(summary[0]["keypoints"], dict)
|
|
66
|
+
pose = Pose3D.from_dict(summary[0]["keypoints"])
|
|
67
|
+
else:
|
|
68
|
+
pose = Pose3D()
|
|
69
69
|
return YoloPose(
|
|
70
70
|
cls=summary[0]["class"],
|
|
71
71
|
name=name,
|
|
@@ -103,9 +103,11 @@ class YoloPoses(DataModel):
|
|
|
103
103
|
names.append(name)
|
|
104
104
|
confidence.append(s["confidence"])
|
|
105
105
|
if s.get("box"):
|
|
106
|
-
|
|
106
|
+
assert isinstance(s["box"], dict)
|
|
107
|
+
box.append(BBox.from_dict(s["box"], title=name))
|
|
107
108
|
if s.get("keypoints"):
|
|
108
|
-
|
|
109
|
+
assert isinstance(s["keypoints"], dict)
|
|
110
|
+
pose.append(Pose3D.from_dict(s["keypoints"]))
|
|
109
111
|
return YoloPoses(
|
|
110
112
|
cls=cls,
|
|
111
113
|
name=names,
|
|
@@ -34,16 +34,16 @@ class YoloSegment(DataModel):
|
|
|
34
34
|
if not summary:
|
|
35
35
|
return YoloSegment(box=BBox(), segment=Segment())
|
|
36
36
|
name = summary[0].get("name", "")
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
37
|
+
if summary[0].get("box"):
|
|
38
|
+
assert isinstance(summary[0]["box"], dict)
|
|
39
|
+
box = BBox.from_dict(summary[0]["box"], title=name)
|
|
40
|
+
else:
|
|
41
|
+
box = BBox()
|
|
42
|
+
if summary[0].get("segments"):
|
|
43
|
+
assert isinstance(summary[0]["segments"], dict)
|
|
44
|
+
segment = Segment.from_dict(summary[0]["segments"], title=name)
|
|
45
|
+
else:
|
|
46
|
+
segment = Segment()
|
|
47
47
|
return YoloSegment(
|
|
48
48
|
cls=summary[0]["class"],
|
|
49
49
|
name=summary[0]["name"],
|
|
@@ -81,9 +81,11 @@ class YoloSegments(DataModel):
|
|
|
81
81
|
names.append(name)
|
|
82
82
|
confidence.append(s["confidence"])
|
|
83
83
|
if s.get("box"):
|
|
84
|
-
|
|
84
|
+
assert isinstance(s["box"], dict)
|
|
85
|
+
box.append(BBox.from_dict(s["box"], title=name))
|
|
85
86
|
if s.get("segments"):
|
|
86
|
-
|
|
87
|
+
assert isinstance(s["segments"], dict)
|
|
88
|
+
segment.append(Segment.from_dict(s["segments"], title=name))
|
|
87
89
|
return YoloSegments(
|
|
88
90
|
cls=cls,
|
|
89
91
|
name=names,
|
datachain/query/dataset.py
CHANGED
|
@@ -1703,16 +1703,18 @@ class DatasetQuery:
|
|
|
1703
1703
|
for dep in self.catalog.get_dataset_dependencies(
|
|
1704
1704
|
dep_dataset.name,
|
|
1705
1705
|
dep_dataset_version,
|
|
1706
|
-
dep_dataset.project,
|
|
1706
|
+
namespace_name=dep_dataset.project.namespace.name,
|
|
1707
|
+
project_name=dep_dataset.project.name,
|
|
1707
1708
|
indirect=False,
|
|
1708
1709
|
):
|
|
1709
1710
|
if dep:
|
|
1710
|
-
dep_project = self.catalog.metastore.get_project(
|
|
1711
|
-
dep.project, dep.namespace
|
|
1712
|
-
)
|
|
1713
1711
|
dependencies.add(
|
|
1714
1712
|
(
|
|
1715
|
-
self.catalog.get_dataset(
|
|
1713
|
+
self.catalog.get_dataset(
|
|
1714
|
+
dep.name,
|
|
1715
|
+
namespace_name=dep.namespace,
|
|
1716
|
+
project_name=dep.project,
|
|
1717
|
+
),
|
|
1716
1718
|
dep.version,
|
|
1717
1719
|
)
|
|
1718
1720
|
)
|
|
@@ -1754,7 +1756,11 @@ class DatasetQuery:
|
|
|
1754
1756
|
if (
|
|
1755
1757
|
name
|
|
1756
1758
|
and version
|
|
1757
|
-
and self.catalog.get_dataset(
|
|
1759
|
+
and self.catalog.get_dataset(
|
|
1760
|
+
name,
|
|
1761
|
+
namespace_name=project.namespace.name,
|
|
1762
|
+
project_name=project.name,
|
|
1763
|
+
).has_version(version)
|
|
1758
1764
|
):
|
|
1759
1765
|
raise RuntimeError(f"Dataset {name} already has version {version}")
|
|
1760
1766
|
except DatasetNotFoundError:
|
|
@@ -1808,11 +1814,15 @@ class DatasetQuery:
|
|
|
1808
1814
|
# overriding dependencies
|
|
1809
1815
|
self.dependencies = set()
|
|
1810
1816
|
for dep in dependencies:
|
|
1811
|
-
dep_project = self.catalog.metastore.get_project(
|
|
1812
|
-
dep.project, dep.namespace
|
|
1813
|
-
)
|
|
1814
1817
|
self.dependencies.add(
|
|
1815
|
-
(
|
|
1818
|
+
(
|
|
1819
|
+
self.catalog.get_dataset(
|
|
1820
|
+
dep.name,
|
|
1821
|
+
namespace_name=dep.namespace,
|
|
1822
|
+
project_name=dep.project,
|
|
1823
|
+
),
|
|
1824
|
+
dep.version,
|
|
1825
|
+
)
|
|
1816
1826
|
)
|
|
1817
1827
|
|
|
1818
1828
|
self._add_dependencies(dataset, version) # type: ignore [arg-type]
|
datachain/sql/__init__.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from sqlalchemy.sql.elements import literal
|
|
2
2
|
from sqlalchemy.sql.expression import column
|
|
3
3
|
|
|
4
|
+
# Import PostgreSQL dialect registration (registers PostgreSQL type converter)
|
|
5
|
+
from . import postgresql_dialect # noqa: F401
|
|
4
6
|
from .default import setup as default_setup
|
|
5
7
|
from .selectable import select, values
|
|
6
8
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PostgreSQL dialect registration for DataChain.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from datachain.sql.postgresql_types import PostgreSQLTypeConverter
|
|
6
|
+
from datachain.sql.types import register_backend_types
|
|
7
|
+
|
|
8
|
+
# Register PostgreSQL type converter
|
|
9
|
+
register_backend_types("postgresql", PostgreSQLTypeConverter())
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PostgreSQL-specific type converter for DataChain.
|
|
3
|
+
|
|
4
|
+
Handles PostgreSQL-specific type mappings that differ from the default dialect.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from sqlalchemy.dialects import postgresql
|
|
8
|
+
|
|
9
|
+
from datachain.sql.types import TypeConverter
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PostgreSQLTypeConverter(TypeConverter):
|
|
13
|
+
"""PostgreSQL-specific type converter."""
|
|
14
|
+
|
|
15
|
+
def datetime(self):
|
|
16
|
+
"""PostgreSQL uses TIMESTAMP WITH TIME ZONE to preserve timezone information."""
|
|
17
|
+
return postgresql.TIMESTAMP(timezone=True)
|
|
18
|
+
|
|
19
|
+
def json(self):
|
|
20
|
+
"""PostgreSQL uses JSONB for better performance and query capabilities."""
|
|
21
|
+
return postgresql.JSONB()
|
datachain/sql/sqlite/__init__.py
CHANGED
datachain/sql/sqlite/base.py
CHANGED
|
@@ -304,7 +304,11 @@ def register_user_defined_sql_functions() -> None:
|
|
|
304
304
|
|
|
305
305
|
|
|
306
306
|
def adapt_datetime(val: datetime) -> str:
|
|
307
|
-
|
|
307
|
+
is_utc_check = val.tzinfo is timezone.utc
|
|
308
|
+
tzname_check = val.tzname() == "UTC"
|
|
309
|
+
combined_check = is_utc_check or tzname_check
|
|
310
|
+
|
|
311
|
+
if not combined_check:
|
|
308
312
|
try:
|
|
309
313
|
val = val.astimezone(timezone.utc)
|
|
310
314
|
except (OverflowError, ValueError, OSError):
|
|
@@ -314,6 +318,7 @@ def adapt_datetime(val: datetime) -> str:
|
|
|
314
318
|
val = datetime.min.replace(tzinfo=timezone.utc)
|
|
315
319
|
else:
|
|
316
320
|
raise
|
|
321
|
+
|
|
317
322
|
return val.replace(tzinfo=None).isoformat(" ")
|
|
318
323
|
|
|
319
324
|
|
datachain/sql/types.py
CHANGED
|
@@ -58,9 +58,14 @@ def converter(dialect) -> "TypeConverter":
|
|
|
58
58
|
try:
|
|
59
59
|
return registry[name]
|
|
60
60
|
except KeyError:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
61
|
+
# Fall back to default converter if specific dialect not found
|
|
62
|
+
try:
|
|
63
|
+
return registry["default"]
|
|
64
|
+
except KeyError:
|
|
65
|
+
raise ValueError(
|
|
66
|
+
f"No type converter registered for dialect: {dialect.name!r} "
|
|
67
|
+
f"and no default converter available"
|
|
68
|
+
) from None
|
|
64
69
|
|
|
65
70
|
|
|
66
71
|
def read_converter(dialect) -> "TypeReadConverter":
|
|
@@ -68,9 +73,14 @@ def read_converter(dialect) -> "TypeReadConverter":
|
|
|
68
73
|
try:
|
|
69
74
|
return read_converter_registry[name]
|
|
70
75
|
except KeyError:
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
76
|
+
# Fall back to default converter if specific dialect not found
|
|
77
|
+
try:
|
|
78
|
+
return read_converter_registry["default"]
|
|
79
|
+
except KeyError:
|
|
80
|
+
raise ValueError(
|
|
81
|
+
f"No read type converter registered for dialect: {dialect.name!r} "
|
|
82
|
+
f"and no default converter available"
|
|
83
|
+
) from None
|
|
74
84
|
|
|
75
85
|
|
|
76
86
|
def type_defaults(dialect) -> "TypeDefaults":
|
|
@@ -78,7 +88,14 @@ def type_defaults(dialect) -> "TypeDefaults":
|
|
|
78
88
|
try:
|
|
79
89
|
return type_defaults_registry[name]
|
|
80
90
|
except KeyError:
|
|
81
|
-
|
|
91
|
+
# Fall back to default converter if specific dialect not found
|
|
92
|
+
try:
|
|
93
|
+
return type_defaults_registry["default"]
|
|
94
|
+
except KeyError:
|
|
95
|
+
raise ValueError(
|
|
96
|
+
f"No type defaults registered for dialect: {dialect.name!r} "
|
|
97
|
+
f"and no default converter available"
|
|
98
|
+
) from None
|
|
82
99
|
|
|
83
100
|
|
|
84
101
|
def db_defaults(dialect) -> "DBDefaults":
|
|
@@ -86,7 +103,14 @@ def db_defaults(dialect) -> "DBDefaults":
|
|
|
86
103
|
try:
|
|
87
104
|
return db_defaults_registry[name]
|
|
88
105
|
except KeyError:
|
|
89
|
-
|
|
106
|
+
# Fall back to default converter if specific dialect not found
|
|
107
|
+
try:
|
|
108
|
+
return db_defaults_registry["default"]
|
|
109
|
+
except KeyError:
|
|
110
|
+
raise ValueError(
|
|
111
|
+
f"No DB defaults registered for dialect: {dialect.name!r} "
|
|
112
|
+
f"and no default converter available"
|
|
113
|
+
) from None
|
|
90
114
|
|
|
91
115
|
|
|
92
116
|
class SQLType(TypeDecorator):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.30.
|
|
3
|
+
Version: 0.30.3
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -45,7 +45,7 @@ Requires-Dist: datamodel-code-generator>=0.25
|
|
|
45
45
|
Requires-Dist: Pillow<12,>=10.0.0
|
|
46
46
|
Requires-Dist: msgpack<2,>=1.0.4
|
|
47
47
|
Requires-Dist: psutil
|
|
48
|
-
Requires-Dist: huggingface_hub
|
|
48
|
+
Requires-Dist: huggingface_hub
|
|
49
49
|
Requires-Dist: iterative-telemetry>=0.0.10
|
|
50
50
|
Requires-Dist: platformdirs
|
|
51
51
|
Requires-Dist: dvc-studio-client<1,>=0.21
|
|
@@ -81,8 +81,10 @@ Provides-Extra: video
|
|
|
81
81
|
Requires-Dist: ffmpeg-python; extra == "video"
|
|
82
82
|
Requires-Dist: imageio[ffmpeg,pyav]>=2.37.0; extra == "video"
|
|
83
83
|
Requires-Dist: opencv-python; extra == "video"
|
|
84
|
+
Provides-Extra: postgres
|
|
85
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
84
86
|
Provides-Extra: tests
|
|
85
|
-
Requires-Dist: datachain[audio,hf,remote,torch,vector,video]; extra == "tests"
|
|
87
|
+
Requires-Dist: datachain[audio,hf,postgres,remote,torch,vector,video]; extra == "tests"
|
|
86
88
|
Requires-Dist: pytest<9,>=8; extra == "tests"
|
|
87
89
|
Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
|
|
88
90
|
Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
|
|
@@ -90,6 +92,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
|
|
|
90
92
|
Requires-Dist: pytest-servers[all]>=0.5.9; extra == "tests"
|
|
91
93
|
Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
|
|
92
94
|
Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
|
|
95
|
+
Requires-Dist: pytest-env>=1.1.0; extra == "tests"
|
|
93
96
|
Requires-Dist: virtualenv; extra == "tests"
|
|
94
97
|
Requires-Dist: dulwich; extra == "tests"
|
|
95
98
|
Requires-Dist: hypothesis; extra == "tests"
|
|
@@ -4,10 +4,10 @@ datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
|
|
|
4
4
|
datachain/cache.py,sha256=ESVRaCJXEThMIfGEFVHx6wJPOZA7FYk9V6WxjyuqUBY,3626
|
|
5
5
|
datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
|
|
6
6
|
datachain/dataset.py,sha256=ATGa-CBTFoZeTN2V40-zHEzfMBcdYK0WuoJ6H2yEAvo,25268
|
|
7
|
-
datachain/delta.py,sha256=
|
|
7
|
+
datachain/delta.py,sha256=dghGvD44LcglvL5-kUOIKk75ywBO0U7eikA3twKZC28,10202
|
|
8
8
|
datachain/error.py,sha256=OWwWMkzZYJrkcoEDGhJHMf7SfKvxcsOLRF94mjPf29I,1609
|
|
9
9
|
datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
|
|
10
|
-
datachain/listing.py,sha256=
|
|
10
|
+
datachain/listing.py,sha256=aqayl5St3D9PwdwM6nR1STkpLSw-S3U8pudO9PWi3N8,7241
|
|
11
11
|
datachain/namespace.py,sha256=MozcXYxedIbamzY56YKy9r9fgSpOm2VryhWfIf6stYk,1791
|
|
12
12
|
datachain/node.py,sha256=KWDT0ClYXB7FYI-QOvzAa-UDkLJErUI2eWm5FBteYuU,5577
|
|
13
13
|
datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,1095
|
|
@@ -21,13 +21,13 @@ datachain/studio.py,sha256=27750qCSNxIChEzhV02damIFreLMfr7UdiWqMFyk8AA,15361
|
|
|
21
21
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
22
22
|
datachain/utils.py,sha256=Gp5JVr_m7nVWQGDOjrGnZjRXF9-Ai-MBxiPJIcpPvWQ,15451
|
|
23
23
|
datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
|
|
24
|
-
datachain/catalog/catalog.py,sha256=
|
|
24
|
+
datachain/catalog/catalog.py,sha256=vy5k0ME9FxDnpV5D9Ry0YT8eRAKZglUDdyePslJSBpk,67174
|
|
25
25
|
datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
|
|
26
26
|
datachain/catalog/loader.py,sha256=B2cps5coFE4MBttM-j8cs7JgNVPjnHKF4Gx1s2fJrxw,6119
|
|
27
27
|
datachain/cli/__init__.py,sha256=migILaB4-dSSlxEoi6MYOGhOg2jpZo3a2AhS3mSuN8o,8237
|
|
28
28
|
datachain/cli/utils.py,sha256=wrLnAh7Wx8O_ojZE8AE4Lxn5WoxHbOj7as8NWlLAA74,3036
|
|
29
29
|
datachain/cli/commands/__init__.py,sha256=zp3bYIioO60x_X04A4-IpZqSYVnpwOa1AdERQaRlIhI,493
|
|
30
|
-
datachain/cli/commands/datasets.py,sha256=
|
|
30
|
+
datachain/cli/commands/datasets.py,sha256=m8aTlUMpYCZmfSwhp7Lb_UFaQRQm_kOWXh71bNBn7qQ,6461
|
|
31
31
|
datachain/cli/commands/du.py,sha256=9edEzDEs98K2VYk8Wf-ZMpUzALcgm9uD6YtoqbvtUGU,391
|
|
32
32
|
datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR88,198
|
|
33
33
|
datachain/cli/commands/ls.py,sha256=CBmk838Q-EQp04lE2Qdnpsc1GXAkC4-I-b-a_828n1E,5272
|
|
@@ -49,7 +49,7 @@ datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
|
|
|
49
49
|
datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
|
|
50
50
|
datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
|
|
51
51
|
datachain/data_storage/job.py,sha256=ZkeXCNUj_VCkoKYx29hqB4AcfVUielnRjY-GYUcUxt4,426
|
|
52
|
-
datachain/data_storage/metastore.py,sha256=
|
|
52
|
+
datachain/data_storage/metastore.py,sha256=0-myih2VtUm_gcwz8bNgnP2h0rBkBl6v1IEhduShb4w,54517
|
|
53
53
|
datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
|
|
54
54
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
55
55
|
datachain/data_storage/sqlite.py,sha256=TTQjdDXUaZSr3MEaxZjDhsVIkIJqxFNA-sD25TO3m_4,30228
|
|
@@ -67,7 +67,7 @@ datachain/func/func.py,sha256=fpslnn4edr0dH3mD8BSTndRFJiiVZvbJoBJV6HkHMqw,17400
|
|
|
67
67
|
datachain/func/numeric.py,sha256=J6FgzuIAcS6B02Cm1qPnJdB6ut21jyBDVXSBrkZNZaQ,6978
|
|
68
68
|
datachain/func/path.py,sha256=9Jas35QhEtRai4l54hMqVvuJsqxHvOx88oo4vym1H_I,4077
|
|
69
69
|
datachain/func/random.py,sha256=t7jwXsI8-hy0qAdvjAntgzy-AHtTAfozlZ1CpKR-QZE,458
|
|
70
|
-
datachain/func/string.py,sha256=
|
|
70
|
+
datachain/func/string.py,sha256=6-fZM7wHv0JZ2ZzpLFPLLYW15K_CT5VfYsmx56zBrpA,7419
|
|
71
71
|
datachain/func/window.py,sha256=ImyRpc1QI8QUSPO7KdD60e_DPVo7Ja0G5kcm6BlyMcw,1584
|
|
72
72
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
73
|
datachain/lib/arrow.py,sha256=geoLvyDd5uMqS3D9Ec1ODlShCUAdtwHUwl8FqbUX_hg,10776
|
|
@@ -103,9 +103,9 @@ datachain/lib/convert/unflatten.py,sha256=ysMkstwJzPMWUlnxn-Z-tXJR3wmhjHeSN_P-sD
|
|
|
103
103
|
datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUOzHUGPoyZXAB0,4360
|
|
104
104
|
datachain/lib/dc/__init__.py,sha256=TFci5HTvYGjBesNUxDAnXaX36PnzPEUSn5a6JxB9o0U,872
|
|
105
105
|
datachain/lib/dc/csv.py,sha256=q6a9BpapGwP6nwy6c5cklxQumep2fUp9l2LAjtTJr6s,4411
|
|
106
|
-
datachain/lib/dc/database.py,sha256=
|
|
107
|
-
datachain/lib/dc/datachain.py,sha256=
|
|
108
|
-
datachain/lib/dc/datasets.py,sha256=
|
|
106
|
+
datachain/lib/dc/database.py,sha256=F6EOjPKwSdp26kJsOKGq49D9OxqyKEalINHEwLQav2s,14716
|
|
107
|
+
datachain/lib/dc/datachain.py,sha256=sfLT_iMGkNsPh5JAPzRATyC4hsotp34bDS0rSu2NQ10,99244
|
|
108
|
+
datachain/lib/dc/datasets.py,sha256=jRwfHUUESlNdK1heUNxb-V10ZobRJYPbvCqYYkz2FYI,15240
|
|
109
109
|
datachain/lib/dc/hf.py,sha256=AP_MUHg6HJWae10PN9hD_beQVjrl0cleZ6Cvhtl1yoI,2901
|
|
110
110
|
datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
|
|
111
111
|
datachain/lib/dc/listings.py,sha256=V379Cb-7ZyquM0w7sWArQZkzInZy4GB7QQ1ZfowKzQY,4544
|
|
@@ -121,12 +121,12 @@ datachain/model/pose.py,sha256=rjquA6M-I-Y30Xm6YSkGv1OY52hJZmR2AuxbIpE5uD0,3865
|
|
|
121
121
|
datachain/model/segment.py,sha256=NhcEYB_KVa0aLQYiZ4jEwkylH9QBLd8fZhmg6PVnx1Y,1967
|
|
122
122
|
datachain/model/utils.py,sha256=5elwCKleOO6CZM0IuWjFykPekrhc5m7V4jSIOcgGMms,6733
|
|
123
123
|
datachain/model/ultralytics/__init__.py,sha256=EvcNX9qUyxKXXlKCPpsXeRrabyXk5E9EkN-tyiYkfS4,750
|
|
124
|
-
datachain/model/ultralytics/bbox.py,sha256=
|
|
125
|
-
datachain/model/ultralytics/pose.py,sha256=
|
|
126
|
-
datachain/model/ultralytics/segment.py,sha256=
|
|
124
|
+
datachain/model/ultralytics/bbox.py,sha256=C-aDiBhVa_ML2oERWvksRkyMU1XuYSpb6eItHB5q0qc,4764
|
|
125
|
+
datachain/model/ultralytics/pose.py,sha256=pvoXrWWUSWT_UBaMwUb5MBHAY57Co2HFDPigFYNZWUA,3392
|
|
126
|
+
datachain/model/ultralytics/segment.py,sha256=v9_xDxd5zw_I8rXsbl7yQXgEdTs2T38zyY_Y4XGN8ok,3194
|
|
127
127
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
128
128
|
datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
|
|
129
|
-
datachain/query/dataset.py,sha256=
|
|
129
|
+
datachain/query/dataset.py,sha256=qMVySOlyDm3b-zpF7z_DGeeCfltH7YSGwB7HYYmNhBU,63081
|
|
130
130
|
datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
|
|
131
131
|
datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
|
|
132
132
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
@@ -137,9 +137,11 @@ datachain/query/udf.py,sha256=e753bDJzTNjGFQn1WGTvOAWSwjDbrFI1-_DDWkWN2ls,1343
|
|
|
137
137
|
datachain/query/utils.py,sha256=a2PTBZ3qsG6XlUcp9XsoGiQfKkca4Q3m-VzFgiGQPAc,1230
|
|
138
138
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
139
139
|
datachain/remote/studio.py,sha256=pDThxvEEpIKVGfa9rmtz_zeqHwrgzh0Lv-Pd4wzDx5k,15448
|
|
140
|
-
datachain/sql/__init__.py,sha256=
|
|
140
|
+
datachain/sql/__init__.py,sha256=8D2omsBiATt8bjLjGo6jBEtaKEkOlnlNFWhVryHMDv0,388
|
|
141
|
+
datachain/sql/postgresql_dialect.py,sha256=pDTfH8xaXz5xZsq8O1aQUvWLRIv_ogYeAqtmKlPp3Rw,280
|
|
142
|
+
datachain/sql/postgresql_types.py,sha256=ryb_0lzuA9UOJ_B6nW9Yb8nJjzeSmEItAL_Ceue65lc,627
|
|
141
143
|
datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
|
|
142
|
-
datachain/sql/types.py,sha256=
|
|
144
|
+
datachain/sql/types.py,sha256=RWOghtYFx14K-e71QOGg5yfKb-A4-4JgFjaJ0wCZ17Y,15006
|
|
143
145
|
datachain/sql/utils.py,sha256=rzlJw08etivdrcuQPqNVvVWhuVSyUPUQEEc6DOhu258,818
|
|
144
146
|
datachain/sql/default/__init__.py,sha256=XQ2cEZpzWiABqjV-6yYHUBGI9vN_UHxbxZENESmVAWw,45
|
|
145
147
|
datachain/sql/default/base.py,sha256=QD-31C6JnyOXzogyDx90sUhm7QvgXIYpeHEASH84igU,628
|
|
@@ -151,16 +153,16 @@ datachain/sql/functions/numeric.py,sha256=BK2KCiPSgM2IveCq-9M_PG3CtPBlztaS9TTn1L
|
|
|
151
153
|
datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0mg,1294
|
|
152
154
|
datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
|
|
153
155
|
datachain/sql/functions/string.py,sha256=E-T9OIzUR-GKaLgjZsEtg5CJrY_sLf1lt1awTvY7w2w,1426
|
|
154
|
-
datachain/sql/sqlite/__init__.py,sha256=
|
|
155
|
-
datachain/sql/sqlite/base.py,sha256=
|
|
156
|
+
datachain/sql/sqlite/__init__.py,sha256=PsLaDSij9a03VxGSpagpNl7NQsGtgm72ArUeALZONoc,183
|
|
157
|
+
datachain/sql/sqlite/base.py,sha256=6aoQHeggY3hs31_YZ-wlYKA1Lto4MFOpgfgRspH6IMc,21498
|
|
156
158
|
datachain/sql/sqlite/types.py,sha256=cH6oge2E_YWFy22wY-txPJH8gxoQFSpCthtZR8PZjpo,1849
|
|
157
159
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
158
160
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
159
161
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
160
162
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
161
|
-
datachain-0.30.
|
|
162
|
-
datachain-0.30.
|
|
163
|
-
datachain-0.30.
|
|
164
|
-
datachain-0.30.
|
|
165
|
-
datachain-0.30.
|
|
166
|
-
datachain-0.30.
|
|
163
|
+
datachain-0.30.3.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
164
|
+
datachain-0.30.3.dist-info/METADATA,sha256=TataasDxkiKnymsMaLxda_Cp0DhYE_FhUMA8CKBXCgg,13903
|
|
165
|
+
datachain-0.30.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
166
|
+
datachain-0.30.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
167
|
+
datachain-0.30.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
168
|
+
datachain-0.30.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|