datachain 0.30.2__py3-none-any.whl → 0.30.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -680,8 +680,9 @@ class Catalog:
680
680
  ds_namespace, ds_project, ds_name = parse_dataset_name(ds_name)
681
681
  assert ds_namespace
682
682
  assert ds_project
683
- project = self.metastore.get_project(ds_project, ds_namespace)
684
- dataset = self.get_dataset(ds_name, project)
683
+ dataset = self.get_dataset(
684
+ ds_name, namespace_name=ds_namespace, project_name=ds_project
685
+ )
685
686
  if not ds_version:
686
687
  ds_version = dataset.latest_version
687
688
  dataset_sources = self.warehouse.get_dataset_sources(
@@ -807,7 +808,11 @@ class Catalog:
807
808
  )
808
809
  default_version = DEFAULT_DATASET_VERSION
809
810
  try:
810
- dataset = self.get_dataset(name, project)
811
+ dataset = self.get_dataset(
812
+ name,
813
+ namespace_name=project.namespace.name if project else None,
814
+ project_name=project.name if project else None,
815
+ )
811
816
  default_version = dataset.next_version_patch
812
817
  if update_version == "major":
813
818
  default_version = dataset.next_version_major
@@ -1016,7 +1021,11 @@ class Catalog:
1016
1021
  dc.save(name)
1017
1022
  except Exception as e: # noqa: BLE001
1018
1023
  try:
1019
- ds = self.get_dataset(name, project)
1024
+ ds = self.get_dataset(
1025
+ name,
1026
+ namespace_name=project.namespace.name,
1027
+ project_name=project.name,
1028
+ )
1020
1029
  self.metastore.update_dataset_status(
1021
1030
  ds,
1022
1031
  DatasetStatus.FAILED,
@@ -1033,7 +1042,11 @@ class Catalog:
1033
1042
  except DatasetNotFoundError:
1034
1043
  raise e from None
1035
1044
 
1036
- ds = self.get_dataset(name, project)
1045
+ ds = self.get_dataset(
1046
+ name,
1047
+ namespace_name=project.namespace.name,
1048
+ project_name=project.name,
1049
+ )
1037
1050
 
1038
1051
  self.update_dataset_version_with_warehouse_info(
1039
1052
  ds,
@@ -1041,7 +1054,11 @@ class Catalog:
1041
1054
  sources="\n".join(sources),
1042
1055
  )
1043
1056
 
1044
- return self.get_dataset(name, project)
1057
+ return self.get_dataset(
1058
+ name,
1059
+ namespace_name=project.namespace.name,
1060
+ project_name=project.name,
1061
+ )
1045
1062
 
1046
1063
  def get_full_dataset_name(
1047
1064
  self,
@@ -1077,22 +1094,23 @@ class Catalog:
1077
1094
  return namespace_name, project_name, name
1078
1095
 
1079
1096
  def get_dataset(
1080
- self, name: str, project: Optional[Project] = None
1097
+ self,
1098
+ name: str,
1099
+ namespace_name: Optional[str] = None,
1100
+ project_name: Optional[str] = None,
1081
1101
  ) -> DatasetRecord:
1082
1102
  from datachain.lib.listing import is_listing_dataset
1083
1103
 
1084
- project = project or self.metastore.default_project
1104
+ namespace_name = namespace_name or self.metastore.default_namespace_name
1105
+ project_name = project_name or self.metastore.default_project_name
1085
1106
 
1086
1107
  if is_listing_dataset(name):
1087
- project = self.metastore.listing_project
1108
+ namespace_name = self.metastore.system_namespace_name
1109
+ project_name = self.metastore.listing_project_name
1088
1110
 
1089
- try:
1090
- return self.metastore.get_dataset(name, project.id if project else None)
1091
- except DatasetNotFoundError:
1092
- raise DatasetNotFoundError(
1093
- f"Dataset {name} not found in namespace {project.namespace.name}"
1094
- f" and project {project.name}"
1095
- ) from None
1111
+ return self.metastore.get_dataset(
1112
+ name, namespace_name=namespace_name, project_name=project_name
1113
+ )
1096
1114
 
1097
1115
  def get_dataset_with_remote_fallback(
1098
1116
  self,
@@ -1113,8 +1131,11 @@ class Catalog:
1113
1131
 
1114
1132
  if self.metastore.is_local_dataset(namespace_name) or not update:
1115
1133
  try:
1116
- project = self.metastore.get_project(project_name, namespace_name)
1117
- ds = self.get_dataset(name, project)
1134
+ ds = self.get_dataset(
1135
+ name,
1136
+ namespace_name=namespace_name,
1137
+ project_name=project_name,
1138
+ )
1118
1139
  if not version or ds.has_version(version):
1119
1140
  return ds
1120
1141
  except (NamespaceNotFoundError, ProjectNotFoundError, DatasetNotFoundError):
@@ -1139,7 +1160,9 @@ class Catalog:
1139
1160
  local_ds_version=version,
1140
1161
  )
1141
1162
  return self.get_dataset(
1142
- name, self.metastore.get_project(project_name, namespace_name)
1163
+ name,
1164
+ namespace_name=namespace_name,
1165
+ project_name=project_name,
1143
1166
  )
1144
1167
 
1145
1168
  return self.get_remote_dataset(namespace_name, project_name, name)
@@ -1148,7 +1171,11 @@ class Catalog:
1148
1171
  """Returns dataset that contains version with specific uuid"""
1149
1172
  for dataset in self.ls_datasets():
1150
1173
  if dataset.has_version_with_uuid(uuid):
1151
- return self.get_dataset(dataset.name, dataset.project)
1174
+ return self.get_dataset(
1175
+ dataset.name,
1176
+ namespace_name=dataset.project.namespace.name,
1177
+ project_name=dataset.project.name,
1178
+ )
1152
1179
  raise DatasetNotFoundError(f"Dataset with version uuid {uuid} not found.")
1153
1180
 
1154
1181
  def get_remote_dataset(
@@ -1171,9 +1198,18 @@ class Catalog:
1171
1198
  return DatasetRecord.from_dict(dataset_info)
1172
1199
 
1173
1200
  def get_dataset_dependencies(
1174
- self, name: str, version: str, project: Optional[Project] = None, indirect=False
1201
+ self,
1202
+ name: str,
1203
+ version: str,
1204
+ namespace_name: Optional[str] = None,
1205
+ project_name: Optional[str] = None,
1206
+ indirect=False,
1175
1207
  ) -> list[Optional[DatasetDependency]]:
1176
- dataset = self.get_dataset(name, project)
1208
+ dataset = self.get_dataset(
1209
+ name,
1210
+ namespace_name=namespace_name,
1211
+ project_name=project_name,
1212
+ )
1177
1213
 
1178
1214
  direct_dependencies = self.metastore.get_direct_dataset_dependencies(
1179
1215
  dataset, version
@@ -1187,10 +1223,13 @@ class Catalog:
1187
1223
  # dependency has been removed
1188
1224
  continue
1189
1225
  if d.is_dataset:
1190
- project = self.metastore.get_project(d.project, d.namespace)
1191
1226
  # only datasets can have dependencies
1192
1227
  d.dependencies = self.get_dataset_dependencies(
1193
- d.name, d.version, project, indirect=indirect
1228
+ d.name,
1229
+ d.version,
1230
+ namespace_name=d.namespace,
1231
+ project_name=d.project,
1232
+ indirect=indirect,
1194
1233
  )
1195
1234
 
1196
1235
  return direct_dependencies
@@ -1340,7 +1379,11 @@ class Catalog:
1340
1379
  project: Optional[Project] = None,
1341
1380
  client_config=None,
1342
1381
  ) -> list[str]:
1343
- dataset = self.get_dataset(name, project)
1382
+ dataset = self.get_dataset(
1383
+ name,
1384
+ namespace_name=project.namespace.name if project else None,
1385
+ project_name=project.name if project else None,
1386
+ )
1344
1387
 
1345
1388
  return self.warehouse.export_dataset_table(
1346
1389
  bucket_uri, dataset, version, client_config
@@ -1349,7 +1392,11 @@ class Catalog:
1349
1392
  def dataset_table_export_file_names(
1350
1393
  self, name: str, version: str, project: Optional[Project] = None
1351
1394
  ) -> list[str]:
1352
- dataset = self.get_dataset(name, project)
1395
+ dataset = self.get_dataset(
1396
+ name,
1397
+ namespace_name=project.namespace.name if project else None,
1398
+ project_name=project.name if project else None,
1399
+ )
1353
1400
  return self.warehouse.dataset_table_export_file_names(dataset, version)
1354
1401
 
1355
1402
  def remove_dataset(
@@ -1359,7 +1406,11 @@ class Catalog:
1359
1406
  version: Optional[str] = None,
1360
1407
  force: Optional[bool] = False,
1361
1408
  ):
1362
- dataset = self.get_dataset(name, project)
1409
+ dataset = self.get_dataset(
1410
+ name,
1411
+ namespace_name=project.namespace.name if project else None,
1412
+ project_name=project.name if project else None,
1413
+ )
1363
1414
  if not version and not force:
1364
1415
  raise ValueError(f"Missing dataset version from input for dataset {name}")
1365
1416
  if version and not dataset.has_version(version):
@@ -1395,7 +1446,11 @@ class Catalog:
1395
1446
  if attrs is not None:
1396
1447
  update_data["attrs"] = attrs # type: ignore[assignment]
1397
1448
 
1398
- dataset = self.get_dataset(name, project)
1449
+ dataset = self.get_dataset(
1450
+ name,
1451
+ namespace_name=project.namespace.name if project else None,
1452
+ project_name=project.name if project else None,
1453
+ )
1399
1454
  return self.update_dataset(dataset, **update_data)
1400
1455
 
1401
1456
  def ls(
@@ -1549,7 +1604,9 @@ class Catalog:
1549
1604
  )
1550
1605
 
1551
1606
  try:
1552
- local_dataset = self.get_dataset(local_ds_name, project=project)
1607
+ local_dataset = self.get_dataset(
1608
+ local_ds_name, namespace_name=namespace.name, project_name=project.name
1609
+ )
1553
1610
  if local_dataset and local_dataset.has_version(local_ds_version):
1554
1611
  raise DataChainError(
1555
1612
  f"Local dataset {local_ds_uri} already exists with different uuid,"
@@ -107,8 +107,9 @@ def list_datasets_local(catalog: "Catalog", name: Optional[str] = None):
107
107
  def list_datasets_local_versions(catalog: "Catalog", name: str):
108
108
  namespace_name, project_name, name = catalog.get_full_dataset_name(name)
109
109
 
110
- project = catalog.metastore.get_project(project_name, namespace_name)
111
- ds = catalog.get_dataset(name, project)
110
+ ds = catalog.get_dataset(
111
+ name, namespace_name=namespace_name, project_name=project_name
112
+ )
112
113
  for v in ds.versions:
113
114
  yield (name, v.version)
114
115
 
@@ -301,7 +301,13 @@ class AbstractMetastore(ABC, Serializable):
301
301
  """
302
302
 
303
303
  @abstractmethod
304
- def get_dataset(self, name: str, project_id: Optional[int] = None) -> DatasetRecord:
304
+ def get_dataset(
305
+ self,
306
+ name: str, # normal, not full dataset name
307
+ namespace_name: Optional[str] = None,
308
+ project_name: Optional[str] = None,
309
+ conn=None,
310
+ ) -> DatasetRecord:
305
311
  """Gets a single dataset by name."""
306
312
 
307
313
  @abstractmethod
@@ -912,11 +918,14 @@ class AbstractDBMetastore(AbstractMetastore):
912
918
  **kwargs, # TODO registered = True / False
913
919
  ) -> DatasetRecord:
914
920
  """Creates new dataset."""
915
- project_id = project_id or self.default_project.id
921
+ if not project_id:
922
+ project = self.default_project
923
+ else:
924
+ project = self.get_project_by_id(project_id)
916
925
 
917
926
  query = self._datasets_insert().values(
918
927
  name=name,
919
- project_id=project_id,
928
+ project_id=project.id,
920
929
  status=status,
921
930
  feature_schema=json.dumps(feature_schema or {}),
922
931
  created_at=datetime.now(timezone.utc),
@@ -935,7 +944,9 @@ class AbstractDBMetastore(AbstractMetastore):
935
944
  query = query.on_conflict_do_nothing(index_elements=["project_id", "name"])
936
945
  self.db.execute(query)
937
946
 
938
- return self.get_dataset(name, project_id)
947
+ return self.get_dataset(
948
+ name, namespace_name=project.namespace.name, project_name=project.name
949
+ )
939
950
 
940
951
  def create_dataset_version( # noqa: PLR0913
941
952
  self,
@@ -992,7 +1003,12 @@ class AbstractDBMetastore(AbstractMetastore):
992
1003
  )
993
1004
  self.db.execute(query, conn=conn)
994
1005
 
995
- return self.get_dataset(dataset.name, dataset.project.id, conn=conn)
1006
+ return self.get_dataset(
1007
+ dataset.name,
1008
+ namespace_name=dataset.project.namespace.name,
1009
+ project_name=dataset.project.name,
1010
+ conn=conn,
1011
+ )
996
1012
 
997
1013
  def remove_dataset(self, dataset: DatasetRecord) -> None:
998
1014
  """Removes dataset."""
@@ -1216,21 +1232,30 @@ class AbstractDBMetastore(AbstractMetastore):
1216
1232
  def get_dataset(
1217
1233
  self,
1218
1234
  name: str, # normal, not full dataset name
1219
- project_id: Optional[int] = None,
1235
+ namespace_name: Optional[str] = None,
1236
+ project_name: Optional[str] = None,
1220
1237
  conn=None,
1221
1238
  ) -> DatasetRecord:
1222
1239
  """
1223
1240
  Gets a single dataset in project by dataset name.
1224
1241
  """
1225
- project_id = project_id or self.default_project.id
1242
+ namespace_name = namespace_name or self.default_namespace_name
1243
+ project_name = project_name or self.default_project_name
1226
1244
 
1227
1245
  d = self._datasets
1246
+ n = self._namespaces
1247
+ p = self._projects
1228
1248
  query = self._base_dataset_query()
1229
- query = query.where(d.c.name == name, d.c.project_id == project_id) # type: ignore [attr-defined]
1249
+ query = query.where(
1250
+ d.c.name == name,
1251
+ n.c.name == namespace_name,
1252
+ p.c.name == project_name,
1253
+ ) # type: ignore [attr-defined]
1230
1254
  ds = self._parse_dataset(self.db.execute(query, conn=conn))
1231
1255
  if not ds:
1232
1256
  raise DatasetNotFoundError(
1233
- f"Dataset {name} not found in project with id {project_id}"
1257
+ f"Dataset {name} not found in namespace {namespace_name}"
1258
+ f" and project {project_name}"
1234
1259
  )
1235
1260
 
1236
1261
  return ds
datachain/delta.py CHANGED
@@ -77,7 +77,8 @@ def _get_delta_chain(
77
77
 
78
78
  def _get_retry_chain(
79
79
  name: str,
80
- project: Project,
80
+ namespace_name: str,
81
+ project_name: str,
81
82
  latest_version: str,
82
83
  source_ds_name: str,
83
84
  source_ds_project: Project,
@@ -96,8 +97,8 @@ def _get_retry_chain(
96
97
  # Read the latest version of the result dataset for retry logic
97
98
  result_dataset = datachain.read_dataset(
98
99
  name,
99
- namespace=project.namespace.name,
100
- project=project.name,
100
+ namespace=namespace_name,
101
+ project=project_name,
101
102
  version=latest_version,
102
103
  )
103
104
  source_dc = datachain.read_dataset(
@@ -128,7 +129,8 @@ def _get_retry_chain(
128
129
 
129
130
  def _get_source_info(
130
131
  name: str,
131
- project: Project,
132
+ namespace_name: str,
133
+ project_name: str,
132
134
  latest_version: str,
133
135
  catalog,
134
136
  ) -> tuple[
@@ -145,7 +147,11 @@ def _get_source_info(
145
147
  Returns (None, None, None, None) if source dataset was removed.
146
148
  """
147
149
  dependencies = catalog.get_dataset_dependencies(
148
- name, latest_version, project=project, indirect=False
150
+ name,
151
+ latest_version,
152
+ namespace_name=namespace_name,
153
+ project_name=project_name,
154
+ indirect=False,
149
155
  )
150
156
 
151
157
  dep = dependencies[0]
@@ -157,7 +163,9 @@ def _get_source_info(
157
163
  source_ds_name = dep.name
158
164
  source_ds_version = dep.version
159
165
  source_ds_latest_version = catalog.get_dataset(
160
- source_ds_name, project=source_ds_project
166
+ source_ds_name,
167
+ namespace_name=source_ds_project.namespace.name,
168
+ project_name=source_ds_project.name,
161
169
  ).latest_version
162
170
 
163
171
  return (
@@ -211,12 +219,14 @@ def delta_retry_update(
211
219
  """
212
220
 
213
221
  catalog = dc.session.catalog
214
- project = catalog.metastore.get_project(project_name, namespace_name)
222
+ # project = catalog.metastore.get_project(project_name, namespace_name)
215
223
  dc._query.apply_listing_pre_step()
216
224
 
217
225
  # Check if dataset exists
218
226
  try:
219
- dataset = catalog.get_dataset(name, project=project)
227
+ dataset = catalog.get_dataset(
228
+ name, namespace_name=namespace_name, project_name=project_name
229
+ )
220
230
  latest_version = dataset.latest_version
221
231
  except DatasetNotFoundError:
222
232
  # First creation of result dataset
@@ -234,7 +244,7 @@ def delta_retry_update(
234
244
  source_ds_version,
235
245
  source_ds_latest_version,
236
246
  dependencies,
237
- ) = _get_source_info(name, project, latest_version, catalog)
247
+ ) = _get_source_info(name, namespace_name, project_name, latest_version, catalog)
238
248
 
239
249
  # If source_ds_name is None, starting dataset was removed
240
250
  if source_ds_name is None:
@@ -264,7 +274,8 @@ def delta_retry_update(
264
274
  if delta_retry:
265
275
  retry_chain = _get_retry_chain(
266
276
  name,
267
- project,
277
+ namespace_name,
278
+ project_name,
268
279
  latest_version,
269
280
  source_ds_name,
270
281
  source_ds_project,
@@ -290,8 +301,8 @@ def delta_retry_update(
290
301
 
291
302
  latest_dataset = datachain.read_dataset(
292
303
  name,
293
- namespace=project.namespace.name,
294
- project=project.name,
304
+ namespace=namespace_name,
305
+ project=project_name,
295
306
  version=latest_version,
296
307
  )
297
308
  compared_chain = latest_dataset.diff(
datachain/func/string.py CHANGED
@@ -6,6 +6,14 @@ from datachain.sql.functions import string
6
6
 
7
7
  from .func import ColT, Func
8
8
 
9
+ __all__ = [
10
+ "byte_hamming_distance",
11
+ "length",
12
+ "regexp_replace",
13
+ "replace",
14
+ "split",
15
+ ]
16
+
9
17
 
10
18
  def length(col: ColT) -> Func:
11
19
  """
@@ -77,12 +77,15 @@ def to_database(
77
77
  on_conflict: Optional[str] = None,
78
78
  conflict_columns: Optional[list[str]] = None,
79
79
  column_mapping: Optional[dict[str, Optional[str]]] = None,
80
- ) -> None:
80
+ ) -> int:
81
81
  """
82
82
  Implementation function for exporting DataChain to database tables.
83
83
 
84
84
  This is the core implementation that handles the actual database operations.
85
85
  For user-facing documentation, see DataChain.to_database() method.
86
+
87
+ Returns:
88
+ int: Number of rows affected (inserted/updated).
86
89
  """
87
90
  if on_conflict and on_conflict not in ("ignore", "update"):
88
91
  raise ValueError(
@@ -101,11 +104,16 @@ def to_database(
101
104
  all_columns, normalized_column_mapping
102
105
  )
103
106
 
107
+ normalized_conflict_columns = _normalize_conflict_columns(
108
+ conflict_columns, normalized_column_mapping
109
+ )
110
+
104
111
  with _connect(connection) as conn:
105
112
  metadata = sqlalchemy.MetaData()
106
113
  table = sqlalchemy.Table(table_name, metadata, *columns)
107
114
 
108
115
  table_existed_before = False
116
+ total_rows_affected = 0
109
117
  try:
110
118
  with conn.begin():
111
119
  # Check if table exists to determine if we should clean up on error.
@@ -117,14 +125,18 @@ def to_database(
117
125
 
118
126
  rows_iter = chain._leaf_values()
119
127
  for batch in batched(rows_iter, batch_rows):
120
- _process_batch(
128
+ rows_affected = _process_batch(
121
129
  conn,
122
130
  table,
123
131
  batch,
124
132
  on_conflict,
125
- conflict_columns,
133
+ normalized_conflict_columns,
126
134
  column_indices_and_names,
127
135
  )
136
+ if rows_affected < 0 or total_rows_affected < 0:
137
+ total_rows_affected = -1
138
+ else:
139
+ total_rows_affected += rows_affected
128
140
  except Exception:
129
141
  if not table_existed_before:
130
142
  try:
@@ -134,6 +146,8 @@ def to_database(
134
146
  pass
135
147
  raise
136
148
 
149
+ return total_rows_affected
150
+
137
151
 
138
152
  def _normalize_column_mapping(
139
153
  column_mapping: dict[str, Optional[str]],
@@ -174,6 +188,30 @@ def _normalize_column_mapping(
174
188
  return normalized_mapping
175
189
 
176
190
 
191
+ def _normalize_conflict_columns(
192
+ conflict_columns: Optional[list[str]], column_mapping: dict[str, Optional[str]]
193
+ ) -> Optional[list[str]]:
194
+ """
195
+ Normalize conflict_columns by converting DataChain format to database format
196
+ and applying column mapping.
197
+ """
198
+ if not conflict_columns:
199
+ return None
200
+
201
+ normalized_columns = []
202
+ for col in conflict_columns:
203
+ db_col = ColumnMeta.to_db_name(col)
204
+
205
+ if db_col in column_mapping or hasattr(column_mapping, "default_factory"):
206
+ mapped_name = column_mapping[db_col]
207
+ if mapped_name:
208
+ normalized_columns.append(mapped_name)
209
+ else:
210
+ normalized_columns.append(db_col)
211
+
212
+ return normalized_columns
213
+
214
+
177
215
  def _prepare_columns(all_columns, column_mapping):
178
216
  """Prepare column mapping and column definitions."""
179
217
  column_indices_and_names = [] # List of (index, target_name) tuples
@@ -192,8 +230,12 @@ def _prepare_columns(all_columns, column_mapping):
192
230
 
193
231
  def _process_batch(
194
232
  conn, table, batch, on_conflict, conflict_columns, column_indices_and_names
195
- ):
196
- """Process a batch of rows with conflict resolution."""
233
+ ) -> int:
234
+ """Process a batch of rows with conflict resolution.
235
+
236
+ Returns:
237
+ int: Number of rows affected by the insert operation.
238
+ """
197
239
 
198
240
  def prepare_row(row_values):
199
241
  """Convert a row tuple to a dictionary with proper DB column names."""
@@ -206,6 +248,7 @@ def _process_batch(
206
248
 
207
249
  supports_conflict = on_conflict and conn.engine.name in ("postgresql", "sqlite")
208
250
 
251
+ insert_stmt: Any # Can be PostgreSQL, SQLite, or regular insert statement
209
252
  if supports_conflict:
210
253
  # Use dialect-specific insert for conflict resolution
211
254
  if conn.engine.name == "postgresql":
@@ -249,7 +292,8 @@ def _process_batch(
249
292
  stacklevel=2,
250
293
  )
251
294
 
252
- conn.execute(insert_stmt, rows_to_insert)
295
+ result = conn.execute(insert_stmt, rows_to_insert)
296
+ return result.rowcount
253
297
 
254
298
 
255
299
  def read_database(
@@ -284,7 +284,11 @@ class DataChain:
284
284
  """Underlying dataset, if there is one."""
285
285
  if not self.name:
286
286
  return None
287
- return self.session.catalog.get_dataset(self.name, self._query.project)
287
+ return self.session.catalog.get_dataset(
288
+ self.name,
289
+ namespace_name=self._query.project.namespace.name,
290
+ project_name=self._query.project.name,
291
+ )
288
292
 
289
293
  def __or__(self, other: "Self") -> "Self":
290
294
  """Return `self.union(other)`."""
@@ -2298,13 +2302,17 @@ class DataChain:
2298
2302
  on_conflict: Optional[str] = None,
2299
2303
  conflict_columns: Optional[list[str]] = None,
2300
2304
  column_mapping: Optional[dict[str, Optional[str]]] = None,
2301
- ) -> None:
2305
+ ) -> int:
2302
2306
  """Save chain to a database table using a given database connection.
2303
2307
 
2304
2308
  This method exports all DataChain records to a database table, creating the
2305
2309
  table if it doesn't exist and appending data if it does. The table schema
2306
2310
  is automatically inferred from the DataChain's signal schema.
2307
2311
 
2312
+ For PostgreSQL, tables are created in the schema specified by the connection's
2313
+ search_path (defaults to 'public'). Use URL parameters to target specific
2314
+ schemas.
2315
+
2308
2316
  Parameters:
2309
2317
  table_name: Name of the database table to create/write to.
2310
2318
  connection: SQLAlchemy connectable, str, or a sqlite3 connection
@@ -2328,20 +2336,26 @@ class DataChain:
2328
2336
  - Set values to None to skip columns entirely, or use `defaultdict` to
2329
2337
  skip all columns except those specified.
2330
2338
 
2339
+ Returns:
2340
+ int: Number of rows affected (inserted/updated). -1 if DB driver doesn't
2341
+ support telemetry.
2342
+
2331
2343
  Examples:
2332
2344
  Basic usage with PostgreSQL:
2333
2345
  ```py
2334
- import sqlalchemy as sa
2335
2346
  import datachain as dc
2336
2347
 
2337
- chain = dc.read_storage("s3://my-bucket/")
2338
- engine = sa.create_engine("postgresql://user:pass@localhost/mydb")
2339
- chain.to_database("files_table", engine)
2348
+ rows_affected = (dc
2349
+ .read_storage("s3://my-bucket/")
2350
+ .to_database("files_table", "postgresql://user:pass@localhost/mydb")
2351
+ )
2352
+ print(f"Inserted/updated {rows_affected} rows")
2340
2353
  ```
2341
2354
 
2342
2355
  Using SQLite with connection string:
2343
2356
  ```py
2344
- chain.to_database("my_table", "sqlite:///data.db")
2357
+ rows_affected = chain.to_database("my_table", "sqlite:///data.db")
2358
+ print(f"Affected {rows_affected} rows")
2345
2359
  ```
2346
2360
 
2347
2361
  Column mapping and renaming:
@@ -2360,7 +2374,9 @@ class DataChain:
2360
2374
  chain.to_database("my_table", engine, on_conflict="ignore")
2361
2375
 
2362
2376
  # Update existing records
2363
- chain.to_database("my_table", engine, on_conflict="update")
2377
+ chain.to_database(
2378
+ "my_table", engine, on_conflict="update", conflict_columns=["id"]
2379
+ )
2364
2380
  ```
2365
2381
 
2366
2382
  Working with different databases:
@@ -2372,10 +2388,16 @@ class DataChain:
2372
2388
  # SQLite in-memory
2373
2389
  chain.to_database("temp_table", "sqlite:///:memory:")
2374
2390
  ```
2391
+
2392
+ PostgreSQL with schema support:
2393
+ ```py
2394
+ pg_url = "postgresql://user:pass@host/db?options=-c search_path=analytics"
2395
+ chain.to_database("processed_data", pg_url)
2396
+ ```
2375
2397
  """
2376
2398
  from .database import to_database
2377
2399
 
2378
- to_database(
2400
+ return to_database(
2379
2401
  self,
2380
2402
  table_name,
2381
2403
  connection,
@@ -357,7 +357,14 @@ def delete_dataset(
357
357
  ) from None
358
358
 
359
359
  if not force:
360
- version = version or catalog.get_dataset(name, ds_project).latest_version
360
+ version = (
361
+ version
362
+ or catalog.get_dataset(
363
+ name,
364
+ namespace_name=ds_project.namespace.name,
365
+ project_name=ds_project.name,
366
+ ).latest_version
367
+ )
361
368
  else:
362
369
  version = None
363
370
  catalog.remove_dataset(name, ds_project, version=version, force=force)
@@ -403,9 +410,7 @@ def move_dataset(
403
410
  namespace, project, name = catalog.get_full_dataset_name(src)
404
411
  dest_namespace, dest_project, dest_name = catalog.get_full_dataset_name(dest)
405
412
 
406
- dataset = catalog.get_dataset(
407
- name, catalog.metastore.get_project(project, namespace)
408
- )
413
+ dataset = catalog.get_dataset(name, namespace_name=namespace, project_name=project)
409
414
 
410
415
  catalog.update_dataset(
411
416
  dataset,
datachain/listing.py CHANGED
@@ -65,17 +65,13 @@ class Listing:
65
65
 
66
66
  @cached_property
67
67
  def dataset(self) -> "DatasetRecord":
68
- from datachain.error import DatasetNotFoundError
69
-
70
68
  assert self.dataset_name
71
69
  project = self.metastore.listing_project
72
- try:
73
- return self.metastore.get_dataset(self.dataset_name, project.id)
74
- except DatasetNotFoundError:
75
- raise DatasetNotFoundError(
76
- f"Dataset {self.dataset_name} not found in namespace"
77
- f" {project.namespace.name} and project {project.name}"
78
- ) from None
70
+ return self.metastore.get_dataset(
71
+ self.dataset_name,
72
+ namespace_name=project.namespace.name,
73
+ project_name=project.name,
74
+ )
79
75
 
80
76
  @cached_property
81
77
  def dataset_rows(self):
@@ -31,11 +31,11 @@ class YoloBBox(DataModel):
31
31
  if not summary:
32
32
  return YoloBBox(box=BBox())
33
33
  name = summary[0].get("name", "")
34
- box = (
35
- BBox.from_dict(summary[0]["box"], title=name)
36
- if summary[0].get("box")
37
- else BBox()
38
- )
34
+ if summary[0].get("box"):
35
+ assert isinstance(summary[0]["box"], dict)
36
+ box = BBox.from_dict(summary[0]["box"], title=name)
37
+ else:
38
+ box = BBox()
39
39
  return YoloBBox(
40
40
  cls=summary[0]["class"],
41
41
  name=name,
@@ -70,7 +70,8 @@ class YoloBBoxes(DataModel):
70
70
  names.append(name)
71
71
  confidence.append(s["confidence"])
72
72
  if s.get("box"):
73
- box.append(BBox.from_dict(s.get("box"), title=name))
73
+ assert isinstance(s["box"], dict)
74
+ box.append(BBox.from_dict(s["box"], title=name))
74
75
  return YoloBBoxes(
75
76
  cls=cls,
76
77
  name=names,
@@ -101,11 +102,11 @@ class YoloOBBox(DataModel):
101
102
  if not summary:
102
103
  return YoloOBBox(box=OBBox())
103
104
  name = summary[0].get("name", "")
104
- box = (
105
- OBBox.from_dict(summary[0]["box"], title=name)
106
- if summary[0].get("box")
107
- else OBBox()
108
- )
105
+ if summary[0].get("box"):
106
+ assert isinstance(summary[0]["box"], dict)
107
+ box = OBBox.from_dict(summary[0]["box"], title=name)
108
+ else:
109
+ box = OBBox()
109
110
  return YoloOBBox(
110
111
  cls=summary[0]["class"],
111
112
  name=name,
@@ -140,7 +141,8 @@ class YoloOBBoxes(DataModel):
140
141
  names.append(name)
141
142
  confidence.append(s["confidence"])
142
143
  if s.get("box"):
143
- box.append(OBBox.from_dict(s.get("box"), title=name))
144
+ assert isinstance(s["box"], dict)
145
+ box.append(OBBox.from_dict(s["box"], title=name))
144
146
  return YoloOBBoxes(
145
147
  cls=cls,
146
148
  name=names,
@@ -56,16 +56,16 @@ class YoloPose(DataModel):
56
56
  if not summary:
57
57
  return YoloPose(box=BBox(), pose=Pose3D())
58
58
  name = summary[0].get("name", "")
59
- box = (
60
- BBox.from_dict(summary[0]["box"], title=name)
61
- if summary[0].get("box")
62
- else BBox()
63
- )
64
- pose = (
65
- Pose3D.from_dict(summary[0]["keypoints"])
66
- if summary[0].get("keypoints")
67
- else Pose3D()
68
- )
59
+ if summary[0].get("box"):
60
+ assert isinstance(summary[0]["box"], dict)
61
+ box = BBox.from_dict(summary[0]["box"], title=name)
62
+ else:
63
+ box = BBox()
64
+ if summary[0].get("keypoints"):
65
+ assert isinstance(summary[0]["keypoints"], dict)
66
+ pose = Pose3D.from_dict(summary[0]["keypoints"])
67
+ else:
68
+ pose = Pose3D()
69
69
  return YoloPose(
70
70
  cls=summary[0]["class"],
71
71
  name=name,
@@ -103,9 +103,11 @@ class YoloPoses(DataModel):
103
103
  names.append(name)
104
104
  confidence.append(s["confidence"])
105
105
  if s.get("box"):
106
- box.append(BBox.from_dict(s.get("box"), title=name))
106
+ assert isinstance(s["box"], dict)
107
+ box.append(BBox.from_dict(s["box"], title=name))
107
108
  if s.get("keypoints"):
108
- pose.append(Pose3D.from_dict(s.get("keypoints")))
109
+ assert isinstance(s["keypoints"], dict)
110
+ pose.append(Pose3D.from_dict(s["keypoints"]))
109
111
  return YoloPoses(
110
112
  cls=cls,
111
113
  name=names,
@@ -34,16 +34,16 @@ class YoloSegment(DataModel):
34
34
  if not summary:
35
35
  return YoloSegment(box=BBox(), segment=Segment())
36
36
  name = summary[0].get("name", "")
37
- box = (
38
- BBox.from_dict(summary[0]["box"], title=name)
39
- if summary[0].get("box")
40
- else BBox()
41
- )
42
- segment = (
43
- Segment.from_dict(summary[0]["segments"], title=name)
44
- if summary[0].get("segments")
45
- else Segment()
46
- )
37
+ if summary[0].get("box"):
38
+ assert isinstance(summary[0]["box"], dict)
39
+ box = BBox.from_dict(summary[0]["box"], title=name)
40
+ else:
41
+ box = BBox()
42
+ if summary[0].get("segments"):
43
+ assert isinstance(summary[0]["segments"], dict)
44
+ segment = Segment.from_dict(summary[0]["segments"], title=name)
45
+ else:
46
+ segment = Segment()
47
47
  return YoloSegment(
48
48
  cls=summary[0]["class"],
49
49
  name=summary[0]["name"],
@@ -81,9 +81,11 @@ class YoloSegments(DataModel):
81
81
  names.append(name)
82
82
  confidence.append(s["confidence"])
83
83
  if s.get("box"):
84
- box.append(BBox.from_dict(s.get("box"), title=name))
84
+ assert isinstance(s["box"], dict)
85
+ box.append(BBox.from_dict(s["box"], title=name))
85
86
  if s.get("segments"):
86
- segment.append(Segment.from_dict(s.get("segments"), title=name))
87
+ assert isinstance(s["segments"], dict)
88
+ segment.append(Segment.from_dict(s["segments"], title=name))
87
89
  return YoloSegments(
88
90
  cls=cls,
89
91
  name=names,
@@ -1703,16 +1703,18 @@ class DatasetQuery:
1703
1703
  for dep in self.catalog.get_dataset_dependencies(
1704
1704
  dep_dataset.name,
1705
1705
  dep_dataset_version,
1706
- dep_dataset.project,
1706
+ namespace_name=dep_dataset.project.namespace.name,
1707
+ project_name=dep_dataset.project.name,
1707
1708
  indirect=False,
1708
1709
  ):
1709
1710
  if dep:
1710
- dep_project = self.catalog.metastore.get_project(
1711
- dep.project, dep.namespace
1712
- )
1713
1711
  dependencies.add(
1714
1712
  (
1715
- self.catalog.get_dataset(dep.name, dep_project),
1713
+ self.catalog.get_dataset(
1714
+ dep.name,
1715
+ namespace_name=dep.namespace,
1716
+ project_name=dep.project,
1717
+ ),
1716
1718
  dep.version,
1717
1719
  )
1718
1720
  )
@@ -1754,7 +1756,11 @@ class DatasetQuery:
1754
1756
  if (
1755
1757
  name
1756
1758
  and version
1757
- and self.catalog.get_dataset(name, project).has_version(version)
1759
+ and self.catalog.get_dataset(
1760
+ name,
1761
+ namespace_name=project.namespace.name,
1762
+ project_name=project.name,
1763
+ ).has_version(version)
1758
1764
  ):
1759
1765
  raise RuntimeError(f"Dataset {name} already has version {version}")
1760
1766
  except DatasetNotFoundError:
@@ -1808,11 +1814,15 @@ class DatasetQuery:
1808
1814
  # overriding dependencies
1809
1815
  self.dependencies = set()
1810
1816
  for dep in dependencies:
1811
- dep_project = self.catalog.metastore.get_project(
1812
- dep.project, dep.namespace
1813
- )
1814
1817
  self.dependencies.add(
1815
- (self.catalog.get_dataset(dep.name, dep_project), dep.version)
1818
+ (
1819
+ self.catalog.get_dataset(
1820
+ dep.name,
1821
+ namespace_name=dep.namespace,
1822
+ project_name=dep.project,
1823
+ ),
1824
+ dep.version,
1825
+ )
1816
1826
  )
1817
1827
 
1818
1828
  self._add_dependencies(dataset, version) # type: ignore [arg-type]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.30.2
3
+ Version: 0.30.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -45,7 +45,7 @@ Requires-Dist: datamodel-code-generator>=0.25
45
45
  Requires-Dist: Pillow<12,>=10.0.0
46
46
  Requires-Dist: msgpack<2,>=1.0.4
47
47
  Requires-Dist: psutil
48
- Requires-Dist: huggingface_hub<0.34.0
48
+ Requires-Dist: huggingface_hub
49
49
  Requires-Dist: iterative-telemetry>=0.0.10
50
50
  Requires-Dist: platformdirs
51
51
  Requires-Dist: dvc-studio-client<1,>=0.21
@@ -4,10 +4,10 @@ datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
4
4
  datachain/cache.py,sha256=ESVRaCJXEThMIfGEFVHx6wJPOZA7FYk9V6WxjyuqUBY,3626
5
5
  datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
6
6
  datachain/dataset.py,sha256=ATGa-CBTFoZeTN2V40-zHEzfMBcdYK0WuoJ6H2yEAvo,25268
7
- datachain/delta.py,sha256=fTEhCedseUsHuH_Ek52NXFhFPyFD_6MioEH5sCilNgo,9897
7
+ datachain/delta.py,sha256=dghGvD44LcglvL5-kUOIKk75ywBO0U7eikA3twKZC28,10202
8
8
  datachain/error.py,sha256=OWwWMkzZYJrkcoEDGhJHMf7SfKvxcsOLRF94mjPf29I,1609
9
9
  datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
10
- datachain/listing.py,sha256=GuUlj3oYJQOIbLYr3IqNCSeuiU5pfOtRAQQl9uBEFQU,7458
10
+ datachain/listing.py,sha256=aqayl5St3D9PwdwM6nR1STkpLSw-S3U8pudO9PWi3N8,7241
11
11
  datachain/namespace.py,sha256=MozcXYxedIbamzY56YKy9r9fgSpOm2VryhWfIf6stYk,1791
12
12
  datachain/node.py,sha256=KWDT0ClYXB7FYI-QOvzAa-UDkLJErUI2eWm5FBteYuU,5577
13
13
  datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,1095
@@ -21,13 +21,13 @@ datachain/studio.py,sha256=27750qCSNxIChEzhV02damIFreLMfr7UdiWqMFyk8AA,15361
21
21
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
22
22
  datachain/utils.py,sha256=Gp5JVr_m7nVWQGDOjrGnZjRXF9-Ai-MBxiPJIcpPvWQ,15451
23
23
  datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
24
- datachain/catalog/catalog.py,sha256=QTWCXy75iWo-0MCXyfV_WbsKeZ1fpLpvL8d60rxn1ws,65528
24
+ datachain/catalog/catalog.py,sha256=vy5k0ME9FxDnpV5D9Ry0YT8eRAKZglUDdyePslJSBpk,67174
25
25
  datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
26
26
  datachain/catalog/loader.py,sha256=B2cps5coFE4MBttM-j8cs7JgNVPjnHKF4Gx1s2fJrxw,6119
27
27
  datachain/cli/__init__.py,sha256=migILaB4-dSSlxEoi6MYOGhOg2jpZo3a2AhS3mSuN8o,8237
28
28
  datachain/cli/utils.py,sha256=wrLnAh7Wx8O_ojZE8AE4Lxn5WoxHbOj7as8NWlLAA74,3036
29
29
  datachain/cli/commands/__init__.py,sha256=zp3bYIioO60x_X04A4-IpZqSYVnpwOa1AdERQaRlIhI,493
30
- datachain/cli/commands/datasets.py,sha256=LfOe22O9OCEDI8K2cy05Gp4_Q-GFHOHRv4bXQ-USM4s,6472
30
+ datachain/cli/commands/datasets.py,sha256=m8aTlUMpYCZmfSwhp7Lb_UFaQRQm_kOWXh71bNBn7qQ,6461
31
31
  datachain/cli/commands/du.py,sha256=9edEzDEs98K2VYk8Wf-ZMpUzALcgm9uD6YtoqbvtUGU,391
32
32
  datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR88,198
33
33
  datachain/cli/commands/ls.py,sha256=CBmk838Q-EQp04lE2Qdnpsc1GXAkC4-I-b-a_828n1E,5272
@@ -49,7 +49,7 @@ datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
49
49
  datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
50
50
  datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
51
51
  datachain/data_storage/job.py,sha256=ZkeXCNUj_VCkoKYx29hqB4AcfVUielnRjY-GYUcUxt4,426
52
- datachain/data_storage/metastore.py,sha256=Qw332arvhgXB4UY0yX-Hu8Vgl3smU12l6bvxrL9Q-vo,53810
52
+ datachain/data_storage/metastore.py,sha256=0-myih2VtUm_gcwz8bNgnP2h0rBkBl6v1IEhduShb4w,54517
53
53
  datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
54
54
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
55
55
  datachain/data_storage/sqlite.py,sha256=TTQjdDXUaZSr3MEaxZjDhsVIkIJqxFNA-sD25TO3m_4,30228
@@ -67,7 +67,7 @@ datachain/func/func.py,sha256=fpslnn4edr0dH3mD8BSTndRFJiiVZvbJoBJV6HkHMqw,17400
67
67
  datachain/func/numeric.py,sha256=J6FgzuIAcS6B02Cm1qPnJdB6ut21jyBDVXSBrkZNZaQ,6978
68
68
  datachain/func/path.py,sha256=9Jas35QhEtRai4l54hMqVvuJsqxHvOx88oo4vym1H_I,4077
69
69
  datachain/func/random.py,sha256=t7jwXsI8-hy0qAdvjAntgzy-AHtTAfozlZ1CpKR-QZE,458
70
- datachain/func/string.py,sha256=X9u4ip97U63RCaKRhMddoze7HgPiY3LbPRn9G06UWWo,7311
70
+ datachain/func/string.py,sha256=6-fZM7wHv0JZ2ZzpLFPLLYW15K_CT5VfYsmx56zBrpA,7419
71
71
  datachain/func/window.py,sha256=ImyRpc1QI8QUSPO7KdD60e_DPVo7Ja0G5kcm6BlyMcw,1584
72
72
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
73
  datachain/lib/arrow.py,sha256=geoLvyDd5uMqS3D9Ec1ODlShCUAdtwHUwl8FqbUX_hg,10776
@@ -103,9 +103,9 @@ datachain/lib/convert/unflatten.py,sha256=ysMkstwJzPMWUlnxn-Z-tXJR3wmhjHeSN_P-sD
103
103
  datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUOzHUGPoyZXAB0,4360
104
104
  datachain/lib/dc/__init__.py,sha256=TFci5HTvYGjBesNUxDAnXaX36PnzPEUSn5a6JxB9o0U,872
105
105
  datachain/lib/dc/csv.py,sha256=q6a9BpapGwP6nwy6c5cklxQumep2fUp9l2LAjtTJr6s,4411
106
- datachain/lib/dc/database.py,sha256=4Fhen6KZRMYzSfONydwTFwSUECbdff0t-9GSj6ADyYM,13288
107
- datachain/lib/dc/datachain.py,sha256=UHICzncqG6GmDXxrX0DEYVXJK19c-8H-eoRuA7097zc,98439
108
- datachain/lib/dc/datasets.py,sha256=P6CIJizD2IYFwOQG5D3VbQRjDmUiRH0ysdtb551Xdm8,15098
106
+ datachain/lib/dc/database.py,sha256=F6EOjPKwSdp26kJsOKGq49D9OxqyKEalINHEwLQav2s,14716
107
+ datachain/lib/dc/datachain.py,sha256=sfLT_iMGkNsPh5JAPzRATyC4hsotp34bDS0rSu2NQ10,99244
108
+ datachain/lib/dc/datasets.py,sha256=jRwfHUUESlNdK1heUNxb-V10ZobRJYPbvCqYYkz2FYI,15240
109
109
  datachain/lib/dc/hf.py,sha256=AP_MUHg6HJWae10PN9hD_beQVjrl0cleZ6Cvhtl1yoI,2901
110
110
  datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
111
111
  datachain/lib/dc/listings.py,sha256=V379Cb-7ZyquM0w7sWArQZkzInZy4GB7QQ1ZfowKzQY,4544
@@ -121,12 +121,12 @@ datachain/model/pose.py,sha256=rjquA6M-I-Y30Xm6YSkGv1OY52hJZmR2AuxbIpE5uD0,3865
121
121
  datachain/model/segment.py,sha256=NhcEYB_KVa0aLQYiZ4jEwkylH9QBLd8fZhmg6PVnx1Y,1967
122
122
  datachain/model/utils.py,sha256=5elwCKleOO6CZM0IuWjFykPekrhc5m7V4jSIOcgGMms,6733
123
123
  datachain/model/ultralytics/__init__.py,sha256=EvcNX9qUyxKXXlKCPpsXeRrabyXk5E9EkN-tyiYkfS4,750
124
- datachain/model/ultralytics/bbox.py,sha256=vxrqu7poIgPD0V2iuy26F74YCKr3s-CFKibPOAbGS40,4570
125
- datachain/model/ultralytics/pose.py,sha256=pBlmt63Qe68FKmexHimUGlNbNOoOlMHXG4fzXZ9edh8,3185
126
- datachain/model/ultralytics/segment.py,sha256=63bDCj43E6iZ0hFI5J6uQfksdCmjEp6sEm1XzVaE8pw,2986
124
+ datachain/model/ultralytics/bbox.py,sha256=C-aDiBhVa_ML2oERWvksRkyMU1XuYSpb6eItHB5q0qc,4764
125
+ datachain/model/ultralytics/pose.py,sha256=pvoXrWWUSWT_UBaMwUb5MBHAY57Co2HFDPigFYNZWUA,3392
126
+ datachain/model/ultralytics/segment.py,sha256=v9_xDxd5zw_I8rXsbl7yQXgEdTs2T38zyY_Y4XGN8ok,3194
127
127
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
128
128
  datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
129
- datachain/query/dataset.py,sha256=hUKKHuqkfNaojzOt6rMIksU-PG72i_lfbKSHZ5rt--M,62715
129
+ datachain/query/dataset.py,sha256=qMVySOlyDm3b-zpF7z_DGeeCfltH7YSGwB7HYYmNhBU,63081
130
130
  datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
131
131
  datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
132
132
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
@@ -160,9 +160,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
160
160
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
161
161
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
162
162
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
163
- datachain-0.30.2.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
164
- datachain-0.30.2.dist-info/METADATA,sha256=nLnOc_mmzRuLWdFk3hiUi_P71TLY975X_ZWk4iyojeg,13910
165
- datachain-0.30.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
166
- datachain-0.30.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
167
- datachain-0.30.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
168
- datachain-0.30.2.dist-info/RECORD,,
163
+ datachain-0.30.3.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
164
+ datachain-0.30.3.dist-info/METADATA,sha256=TataasDxkiKnymsMaLxda_Cp0DhYE_FhUMA8CKBXCgg,13903
165
+ datachain-0.30.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
166
+ datachain-0.30.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
167
+ datachain-0.30.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
168
+ datachain-0.30.3.dist-info/RECORD,,