datachain 0.11.0__py3-none-any.whl → 0.11.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/cli/__init__.py CHANGED
@@ -215,6 +215,7 @@ def handle_show_command(args, catalog):
215
215
  columns=args.columns,
216
216
  no_collapse=args.no_collapse,
217
217
  schema=args.schema,
218
+ include_hidden=args.hidden,
218
219
  )
219
220
 
220
221
 
@@ -1,6 +1,8 @@
1
1
  from collections.abc import Sequence
2
2
  from typing import TYPE_CHECKING, Optional
3
3
 
4
+ from datachain.lib.signal_schema import SignalSchema
5
+
4
6
  if TYPE_CHECKING:
5
7
  from datachain.catalog import Catalog
6
8
 
@@ -14,6 +16,7 @@ def show(
14
16
  columns: Sequence[str] = (),
15
17
  no_collapse: bool = False,
16
18
  schema: bool = False,
19
+ include_hidden: bool = False,
17
20
  ) -> None:
18
21
  from datachain import Session
19
22
  from datachain.lib.dc import DataChain
@@ -23,6 +26,13 @@ def show(
23
26
  dataset = catalog.get_dataset(name)
24
27
  dataset_version = dataset.get_version(version or dataset.latest_version)
25
28
 
29
+ if include_hidden:
30
+ hidden_fields = []
31
+ else:
32
+ hidden_fields = SignalSchema.get_flatten_hidden_fields(
33
+ dataset_version.feature_schema
34
+ )
35
+
26
36
  query = (
27
37
  DatasetQuery(name=name, version=version, catalog=catalog)
28
38
  .select(*columns)
@@ -30,7 +40,8 @@ def show(
30
40
  .offset(offset)
31
41
  )
32
42
  records = query.to_db_records()
33
- show_records(records, collapse_columns=not no_collapse)
43
+ show_records(records, collapse_columns=not no_collapse, hidden_fields=hidden_fields)
44
+
34
45
  if schema and dataset_version.feature_schema:
35
46
  print("\nSchema:")
36
47
  session = Session.get(catalog=catalog)
@@ -98,3 +98,9 @@ def add_show_args(parser: ArgumentParser) -> None:
98
98
  default=False,
99
99
  help="Do not collapse the columns",
100
100
  )
101
+ parser.add_argument(
102
+ "--hidden",
103
+ action="store_true",
104
+ default=False,
105
+ help="Show hidden fields",
106
+ )
@@ -26,6 +26,7 @@ class DataModel(BaseModel):
26
26
  """Pydantic model wrapper that registers model with `DataChain`."""
27
27
 
28
28
  _version: ClassVar[int] = 1
29
+ _hidden_fields: ClassVar[list[str]] = []
29
30
 
30
31
  @classmethod
31
32
  def __pydantic_init_subclass__(cls):
@@ -41,6 +42,11 @@ class DataModel(BaseModel):
41
42
  for val in models:
42
43
  ModelStore.register(val)
43
44
 
45
+ @classmethod
46
+ def hidden_fields(cls) -> list[str]:
47
+ """Returns a list of fields that should be hidden from the user."""
48
+ return cls._hidden_fields
49
+
44
50
 
45
51
  def is_chain_type(t: type) -> bool:
46
52
  """Return true if type is supported by `DataChain`."""
datachain/lib/dc.py CHANGED
@@ -23,6 +23,7 @@ import sqlalchemy
23
23
  from pydantic import BaseModel
24
24
  from sqlalchemy.sql.functions import GenericFunction
25
25
  from sqlalchemy.sql.sqltypes import NullType
26
+ from tqdm import tqdm
26
27
 
27
28
  from datachain.dataset import DatasetRecord
28
29
  from datachain.func import literal
@@ -32,7 +33,14 @@ from datachain.lib.convert.python_to_sql import python_to_sql
32
33
  from datachain.lib.convert.values_to_tuples import values_to_tuples
33
34
  from datachain.lib.data_model import DataModel, DataType, DataValue, dict_to_data_model
34
35
  from datachain.lib.dataset_info import DatasetInfo
35
- from datachain.lib.file import ArrowRow, File, FileType, get_file_type
36
+ from datachain.lib.file import (
37
+ EXPORT_FILES_MAX_THREADS,
38
+ ArrowRow,
39
+ File,
40
+ FileExporter,
41
+ FileType,
42
+ get_file_type,
43
+ )
36
44
  from datachain.lib.file import ExportPlacement as FileExportPlacement
37
45
  from datachain.lib.listing import get_file_info, get_listing, list_bucket, ls
38
46
  from datachain.lib.listing_info import ListingInfo
@@ -65,7 +73,6 @@ _T = TypeVar("_T")
65
73
  D = TypeVar("D", bound="DataChain")
66
74
  UDFObjT = TypeVar("UDFObjT", bound=UDFBase)
67
75
 
68
-
69
76
  DEFAULT_PARQUET_CHUNK_SIZE = 100_000
70
77
 
71
78
 
@@ -1050,7 +1057,7 @@ class DataChain:
1050
1057
  def select(self, *args: str, _sys: bool = True) -> "Self":
1051
1058
  """Select only a specified set of signals."""
1052
1059
  new_schema = self.signals_schema.resolve(*args)
1053
- if _sys:
1060
+ if self._sys and _sys:
1054
1061
  new_schema = SignalSchema({"sys": Sys}) | new_schema
1055
1062
  columns = new_schema.db_signals()
1056
1063
  return self._evolve(
@@ -1093,6 +1100,7 @@ class DataChain:
1093
1100
  partition_by_columns: list[Column] = []
1094
1101
  signal_columns: list[Column] = []
1095
1102
  schema_fields: dict[str, DataType] = {}
1103
+ keep_columns: list[str] = []
1096
1104
 
1097
1105
  # validate partition_by columns and add them to the schema
1098
1106
  for col in partition_by:
@@ -1100,10 +1108,13 @@ class DataChain:
1100
1108
  col_db_name = ColumnMeta.to_db_name(col)
1101
1109
  col_type = self.signals_schema.get_column_type(col_db_name)
1102
1110
  column = Column(col_db_name, python_to_sql(col_type))
1111
+ if col not in keep_columns:
1112
+ keep_columns.append(col)
1103
1113
  elif isinstance(col, Function):
1104
1114
  column = col.get_column(self.signals_schema)
1105
1115
  col_db_name = column.name
1106
1116
  col_type = column.type.python_type
1117
+ schema_fields[col_db_name] = col_type
1107
1118
  else:
1108
1119
  raise DataChainColumnError(
1109
1120
  col,
@@ -1113,7 +1124,6 @@ class DataChain:
1113
1124
  ),
1114
1125
  )
1115
1126
  partition_by_columns.append(column)
1116
- schema_fields[col_db_name] = col_type
1117
1127
 
1118
1128
  # validate signal columns and add them to the schema
1119
1129
  if not kwargs:
@@ -1128,9 +1138,13 @@ class DataChain:
1128
1138
  signal_columns.append(column)
1129
1139
  schema_fields[col_name] = func.get_result_type(self.signals_schema)
1130
1140
 
1141
+ signal_schema = SignalSchema(schema_fields)
1142
+ if keep_columns:
1143
+ signal_schema |= self.signals_schema.to_partial(*keep_columns)
1144
+
1131
1145
  return self._evolve(
1132
1146
  query=self._query.group_by(signal_columns, partition_by_columns),
1133
- signal_schema=SignalSchema(schema_fields),
1147
+ signal_schema=signal_schema,
1134
1148
  )
1135
1149
 
1136
1150
  def mutate(self, **kwargs) -> "Self":
@@ -1225,23 +1239,37 @@ class DataChain:
1225
1239
  @overload
1226
1240
  def collect_flatten(self) -> Iterator[tuple[Any, ...]]: ...
1227
1241
 
1242
+ @overload
1243
+ def collect_flatten(self, *, include_hidden: bool) -> Iterator[tuple[Any, ...]]: ...
1244
+
1228
1245
  @overload
1229
1246
  def collect_flatten(
1230
1247
  self, *, row_factory: Callable[[list[str], tuple[Any, ...]], _T]
1231
1248
  ) -> Iterator[_T]: ...
1232
1249
 
1233
- def collect_flatten(self, *, row_factory=None):
1250
+ @overload
1251
+ def collect_flatten(
1252
+ self,
1253
+ *,
1254
+ row_factory: Callable[[list[str], tuple[Any, ...]], _T],
1255
+ include_hidden: bool,
1256
+ ) -> Iterator[_T]: ...
1257
+
1258
+ def collect_flatten(self, *, row_factory=None, include_hidden: bool = True):
1234
1259
  """Yields flattened rows of values as a tuple.
1235
1260
 
1236
1261
  Args:
1237
1262
  row_factory : A callable to convert row to a custom format.
1238
1263
  It should accept two arguments: a list of column names and
1239
1264
  a tuple of row values.
1265
+ include_hidden: Whether to include hidden signals from the schema.
1240
1266
  """
1241
- db_signals = self._effective_signals_schema.db_signals()
1267
+ db_signals = self._effective_signals_schema.db_signals(
1268
+ include_hidden=include_hidden
1269
+ )
1242
1270
  with self._query.ordered_select(*db_signals).as_iterable() as rows:
1243
1271
  if row_factory:
1244
- rows = (row_factory(db_signals, r) for r in rows)
1272
+ rows = (row_factory(db_signals, r) for r in rows) # type: ignore[assignment]
1245
1273
  yield from rows
1246
1274
 
1247
1275
  def to_columnar_data_with_names(
@@ -1275,10 +1303,23 @@ class DataChain:
1275
1303
  self, *, row_factory: Callable[[list[str], tuple[Any, ...]], _T]
1276
1304
  ) -> list[_T]: ...
1277
1305
 
1278
- def results(self, *, row_factory=None): # noqa: D102
1306
+ @overload
1307
+ def results(
1308
+ self,
1309
+ *,
1310
+ row_factory: Callable[[list[str], tuple[Any, ...]], _T],
1311
+ include_hidden: bool,
1312
+ ) -> list[_T]: ...
1313
+
1314
+ @overload
1315
+ def results(self, *, include_hidden: bool) -> list[tuple[Any, ...]]: ...
1316
+
1317
+ def results(self, *, row_factory=None, include_hidden=True): # noqa: D102
1279
1318
  if row_factory is None:
1280
- return list(self.collect_flatten())
1281
- return list(self.collect_flatten(row_factory=row_factory))
1319
+ return list(self.collect_flatten(include_hidden=include_hidden))
1320
+ return list(
1321
+ self.collect_flatten(row_factory=row_factory, include_hidden=include_hidden)
1322
+ )
1282
1323
 
1283
1324
  def to_records(self) -> list[dict[str, Any]]:
1284
1325
  """Convert every row to a dictionary."""
@@ -1788,21 +1829,25 @@ class DataChain:
1788
1829
  **fr_map,
1789
1830
  )
1790
1831
 
1791
- def to_pandas(self, flatten=False) -> "pd.DataFrame":
1832
+ def to_pandas(self, flatten=False, include_hidden=True) -> "pd.DataFrame":
1792
1833
  """Return a pandas DataFrame from the chain.
1793
1834
 
1794
1835
  Parameters:
1795
1836
  flatten : Whether to use a multiindex or flatten column names.
1837
+ include_hidden : Whether to include hidden columns.
1796
1838
  """
1797
1839
  import pandas as pd
1798
1840
 
1799
- headers, max_length = self._effective_signals_schema.get_headers_with_length()
1841
+ headers, max_length = self._effective_signals_schema.get_headers_with_length(
1842
+ include_hidden=include_hidden
1843
+ )
1800
1844
  if flatten or max_length < 2:
1801
1845
  columns = [".".join(filter(None, header)) for header in headers]
1802
1846
  else:
1803
1847
  columns = pd.MultiIndex.from_tuples(map(tuple, headers))
1804
1848
 
1805
- return pd.DataFrame.from_records(self.results(), columns=columns)
1849
+ results = self.results(include_hidden=include_hidden)
1850
+ return pd.DataFrame.from_records(results, columns=columns)
1806
1851
 
1807
1852
  def show(
1808
1853
  self,
@@ -1810,6 +1855,7 @@ class DataChain:
1810
1855
  flatten=False,
1811
1856
  transpose=False,
1812
1857
  truncate=True,
1858
+ include_hidden=False,
1813
1859
  ) -> None:
1814
1860
  """Show a preview of the chain results.
1815
1861
 
@@ -1818,11 +1864,12 @@ class DataChain:
1818
1864
  flatten : Whether to use a multiindex or flatten column names.
1819
1865
  transpose : Whether to transpose rows and columns.
1820
1866
  truncate : Whether or not to truncate the contents of columns.
1867
+ include_hidden : Whether to include hidden columns.
1821
1868
  """
1822
1869
  import pandas as pd
1823
1870
 
1824
1871
  dc = self.limit(limit) if limit > 0 else self # type: ignore[misc]
1825
- df = dc.to_pandas(flatten)
1872
+ df = dc.to_pandas(flatten, include_hidden=include_hidden)
1826
1873
 
1827
1874
  if df.empty:
1828
1875
  print("Empty result")
@@ -2498,19 +2545,25 @@ class DataChain:
2498
2545
  output: str,
2499
2546
  signal: str = "file",
2500
2547
  placement: FileExportPlacement = "fullpath",
2501
- use_cache: bool = True,
2502
2548
  link_type: Literal["copy", "symlink"] = "copy",
2549
+ num_threads: Optional[int] = EXPORT_FILES_MAX_THREADS,
2550
+ anon: bool = False,
2551
+ client_config: Optional[dict] = None,
2503
2552
  ) -> None:
2504
- """Export files from a specified signal to a directory.
2553
+ """Export files from a specified signal to a directory. Files can be
2554
+ exported to a local or cloud directory.
2505
2555
 
2506
2556
  Args:
2507
2557
  output: Path to the target directory for exporting files.
2508
2558
  signal: Name of the signal to export files from.
2509
2559
  placement: The method to use for naming exported files.
2510
2560
  The possible values are: "filename", "etag", "fullpath", and "checksum".
2511
- use_cache: If `True`, cache the files before exporting.
2512
2561
  link_type: Method to use for exporting files.
2513
2562
  Falls back to `'copy'` if symlinking fails.
2563
+ num_threads : number of threads to use for exporting files.
2564
+ By default it uses 5 threads.
2565
+ anon: If true, we will treat cloud bucket as public one
2566
+ client_config: Optional configuration for the destination storage client
2514
2567
 
2515
2568
  Example:
2516
2569
  Cross cloud transfer
@@ -2525,8 +2578,26 @@ class DataChain:
2525
2578
  ):
2526
2579
  raise ValueError("Files with the same name found")
2527
2580
 
2528
- for file in self.collect(signal):
2529
- file.export(output, placement, use_cache, link_type=link_type) # type: ignore[union-attr]
2581
+ if anon:
2582
+ client_config = (client_config or {}) | {"anon": True}
2583
+
2584
+ progress_bar = tqdm(
2585
+ desc=f"Exporting files to {output}: ",
2586
+ unit=" files",
2587
+ unit_scale=True,
2588
+ unit_divisor=10,
2589
+ total=self.count(),
2590
+ leave=False,
2591
+ )
2592
+ file_exporter = FileExporter(
2593
+ output,
2594
+ placement,
2595
+ self._settings.cache if self._settings else False,
2596
+ link_type,
2597
+ max_threads=num_threads or 1,
2598
+ client_config=client_config,
2599
+ )
2600
+ file_exporter.run(self.collect(signal), progress_bar)
2530
2601
 
2531
2602
  def shuffle(self) -> "Self":
2532
2603
  """Shuffle the rows of the chain deterministically."""
datachain/lib/file.py CHANGED
@@ -24,6 +24,7 @@ from pydantic import Field, field_validator
24
24
  from datachain.client.fileslice import FileSlice
25
25
  from datachain.lib.data_model import DataModel
26
26
  from datachain.lib.utils import DataChainError
27
+ from datachain.nodes_thread_pool import NodesThreadPool
27
28
  from datachain.sql.types import JSON, Boolean, DateTime, Int, String
28
29
  from datachain.utils import TIME_ZERO
29
30
 
@@ -43,6 +44,41 @@ logger = logging.getLogger("datachain")
43
44
  ExportPlacement = Literal["filename", "etag", "fullpath", "checksum"]
44
45
 
45
46
  FileType = Literal["binary", "text", "image", "video"]
47
+ EXPORT_FILES_MAX_THREADS = 5
48
+
49
+
50
+ class FileExporter(NodesThreadPool):
51
+ """Class that does file exporting concurrently with thread pool"""
52
+
53
+ def __init__(
54
+ self,
55
+ output: str,
56
+ placement: ExportPlacement,
57
+ use_cache: bool,
58
+ link_type: Literal["copy", "symlink"],
59
+ max_threads: int = EXPORT_FILES_MAX_THREADS,
60
+ client_config: Optional[dict] = None,
61
+ ):
62
+ super().__init__(max_threads)
63
+ self.output = output
64
+ self.placement = placement
65
+ self.use_cache = use_cache
66
+ self.link_type = link_type
67
+ self.client_config = client_config
68
+
69
+ def done_task(self, done):
70
+ for task in done:
71
+ task.result()
72
+
73
+ def do_task(self, file):
74
+ file.export(
75
+ self.output,
76
+ self.placement,
77
+ self.use_cache,
78
+ link_type=self.link_type,
79
+ client_config=self.client_config,
80
+ )
81
+ self.increase_counter(1)
46
82
 
47
83
 
48
84
  class VFileError(DataChainError):
@@ -158,6 +194,7 @@ class File(DataModel):
158
194
  "last_modified": DateTime,
159
195
  "location": JSON,
160
196
  }
197
+ _hidden_fields: ClassVar[list[str]] = ["version", "source"]
161
198
 
162
199
  _unique_id_keys: ClassVar[list[str]] = [
163
200
  "source",
@@ -269,10 +306,10 @@ class File(DataModel):
269
306
  with self.open(mode="r") as stream:
270
307
  return stream.read()
271
308
 
272
- def save(self, destination: str):
309
+ def save(self, destination: str, client_config: Optional[dict] = None):
273
310
  """Writes it's content to destination"""
274
311
  destination = stringify_path(destination)
275
- client: Client = self._catalog.get_client(destination)
312
+ client: Client = self._catalog.get_client(destination, **(client_config or {}))
276
313
 
277
314
  if client.PREFIX == "file://" and not destination.startswith(client.PREFIX):
278
315
  destination = Path(destination).absolute().as_uri()
@@ -300,13 +337,13 @@ class File(DataModel):
300
337
  placement: ExportPlacement = "fullpath",
301
338
  use_cache: bool = True,
302
339
  link_type: Literal["copy", "symlink"] = "copy",
340
+ client_config: Optional[dict] = None,
303
341
  ) -> None:
304
342
  """Export file to new location."""
305
- if use_cache:
306
- self._caching_enabled = use_cache
343
+ self._caching_enabled = use_cache
307
344
  dst = self.get_destination_path(output, placement)
308
345
  dst_dir = os.path.dirname(dst)
309
- client: Client = self._catalog.get_client(dst_dir)
346
+ client: Client = self._catalog.get_client(dst_dir, **(client_config or {}))
310
347
  client.fs.makedirs(dst_dir, exist_ok=True)
311
348
 
312
349
  if link_type == "symlink":
@@ -316,7 +353,7 @@ class File(DataModel):
316
353
  if exc.errno not in (errno.ENOTSUP, errno.EXDEV, errno.ENOSYS):
317
354
  raise
318
355
 
319
- self.save(dst)
356
+ self.save(dst, client_config=client_config)
320
357
 
321
358
  def _set_stream(
322
359
  self,
@@ -502,11 +539,11 @@ class TextFile(File):
502
539
  with self.open() as stream:
503
540
  return stream.read()
504
541
 
505
- def save(self, destination: str):
542
+ def save(self, destination: str, client_config: Optional[dict] = None):
506
543
  """Writes it's content to destination"""
507
544
  destination = stringify_path(destination)
508
545
 
509
- client: Client = self._catalog.get_client(destination)
546
+ client: Client = self._catalog.get_client(destination, **(client_config or {}))
510
547
  with client.fs.open(destination, mode="w") as f:
511
548
  f.write(self.read_text())
512
549
 
@@ -519,11 +556,11 @@ class ImageFile(File):
519
556
  fobj = super().read()
520
557
  return PilImage.open(BytesIO(fobj))
521
558
 
522
- def save(self, destination: str):
559
+ def save(self, destination: str, client_config: Optional[dict] = None):
523
560
  """Writes it's content to destination"""
524
561
  destination = stringify_path(destination)
525
562
 
526
- client: Client = self._catalog.get_client(destination)
563
+ client: Client = self._catalog.get_client(destination, **(client_config or {}))
527
564
  with client.fs.open(destination, mode="wb") as f:
528
565
  self.read().save(f)
529
566
 
@@ -91,6 +91,7 @@ class CustomType(BaseModel):
91
91
  name: str
92
92
  fields: dict[str, str]
93
93
  bases: list[tuple[str, str, Optional[str]]]
94
+ hidden_fields: Optional[list[str]] = None
94
95
 
95
96
  @classmethod
96
97
  def deserialize(cls, data: dict[str, Any], type_name: str) -> "CustomType":
@@ -102,6 +103,7 @@ class CustomType(BaseModel):
102
103
  "name": type_name,
103
104
  "fields": data,
104
105
  "bases": [],
106
+ "hidden_fields": [],
105
107
  }
106
108
 
107
109
  return cls(**data)
@@ -179,6 +181,16 @@ class SignalSchema:
179
181
  )
180
182
  return SignalSchema(signals)
181
183
 
184
+ @staticmethod
185
+ def _get_bases(fr: type) -> list[tuple[str, str, Optional[str]]]:
186
+ bases: list[tuple[str, str, Optional[str]]] = []
187
+ for base in fr.__mro__:
188
+ model_store_name = (
189
+ ModelStore.get_name(base) if issubclass(base, DataModel) else None
190
+ )
191
+ bases.append((base.__name__, base.__module__, model_store_name))
192
+ return bases
193
+
182
194
  @staticmethod
183
195
  def _serialize_custom_model(
184
196
  version_name: str, fr: type[BaseModel], custom_types: dict[str, Any]
@@ -196,14 +208,15 @@ class SignalSchema:
196
208
  assert field_type
197
209
  fields[field_name] = SignalSchema._serialize_type(field_type, custom_types)
198
210
 
199
- bases: list[tuple[str, str, Optional[str]]] = []
200
- for type_ in fr.__mro__:
201
- model_store_name = (
202
- ModelStore.get_name(type_) if issubclass(type_, DataModel) else None
203
- )
204
- bases.append((type_.__name__, type_.__module__, model_store_name))
211
+ bases = SignalSchema._get_bases(fr)
205
212
 
206
- ct = CustomType(schema_version=2, name=version_name, fields=fields, bases=bases)
213
+ ct = CustomType(
214
+ schema_version=2,
215
+ name=version_name,
216
+ fields=fields,
217
+ bases=bases,
218
+ hidden_fields=getattr(fr, "_hidden_fields", []),
219
+ )
207
220
  custom_types[version_name] = ct.model_dump()
208
221
 
209
222
  return version_name
@@ -384,6 +397,37 @@ class SignalSchema:
384
397
 
385
398
  return SignalSchema(signals)
386
399
 
400
+ @staticmethod
401
+ def get_flatten_hidden_fields(schema):
402
+ custom_types = schema.get("_custom_types", {})
403
+ if not custom_types:
404
+ return []
405
+
406
+ hidden_by_types = {
407
+ name: schema.get("hidden_fields", [])
408
+ for name, schema in custom_types.items()
409
+ }
410
+
411
+ hidden_fields = []
412
+
413
+ def traverse(prefix, schema_info):
414
+ for field, field_type in schema_info.items():
415
+ if field == "_custom_types":
416
+ continue
417
+
418
+ if field_type in custom_types:
419
+ hidden_fields.extend(
420
+ f"{prefix}{field}__{f}" for f in hidden_by_types[field_type]
421
+ )
422
+ traverse(
423
+ prefix + field + "__",
424
+ custom_types[field_type].get("fields", {}),
425
+ )
426
+
427
+ traverse("", schema)
428
+
429
+ return hidden_fields
430
+
387
431
  def to_udf_spec(self) -> dict[str, type]:
388
432
  res = {}
389
433
  for path, type_, has_subtree, _ in self.get_flat_tree():
@@ -479,7 +523,7 @@ class SignalSchema:
479
523
  raise SignalResolvingError([col_name], "is not found")
480
524
 
481
525
  def db_signals(
482
- self, name: Optional[str] = None, as_columns=False
526
+ self, name: Optional[str] = None, as_columns=False, include_hidden: bool = True
483
527
  ) -> Union[list[str], list[Column]]:
484
528
  """
485
529
  Returns DB columns as strings or Column objects with proper types
@@ -489,7 +533,9 @@ class SignalSchema:
489
533
  DEFAULT_DELIMITER.join(path)
490
534
  if not as_columns
491
535
  else Column(DEFAULT_DELIMITER.join(path), python_to_sql(_type))
492
- for path, _type, has_subtree, _ in self.get_flat_tree()
536
+ for path, _type, has_subtree, _ in self.get_flat_tree(
537
+ include_hidden=include_hidden
538
+ )
493
539
  if not has_subtree
494
540
  ]
495
541
 
@@ -624,19 +670,31 @@ class SignalSchema:
624
670
  for name, val in values.items()
625
671
  }
626
672
 
627
- def get_flat_tree(self) -> Iterator[tuple[list[str], DataType, bool, int]]:
628
- yield from self._get_flat_tree(self.tree, [], 0)
673
+ def get_flat_tree(
674
+ self, include_hidden: bool = True
675
+ ) -> Iterator[tuple[list[str], DataType, bool, int]]:
676
+ yield from self._get_flat_tree(self.tree, [], 0, include_hidden)
629
677
 
630
678
  def _get_flat_tree(
631
- self, tree: dict, prefix: list[str], depth: int
679
+ self, tree: dict, prefix: list[str], depth: int, include_hidden: bool
632
680
  ) -> Iterator[tuple[list[str], DataType, bool, int]]:
633
681
  for name, (type_, substree) in tree.items():
634
682
  suffix = name.split(".")
635
683
  new_prefix = prefix + suffix
684
+ hidden_fields = getattr(type_, "_hidden_fields", None)
685
+ if hidden_fields and substree and not include_hidden:
686
+ substree = {
687
+ field: info
688
+ for field, info in substree.items()
689
+ if field not in hidden_fields
690
+ }
691
+
636
692
  has_subtree = substree is not None
637
693
  yield new_prefix, type_, has_subtree, depth
638
694
  if substree is not None:
639
- yield from self._get_flat_tree(substree, new_prefix, depth + 1)
695
+ yield from self._get_flat_tree(
696
+ substree, new_prefix, depth + 1, include_hidden
697
+ )
640
698
 
641
699
  def print_tree(self, indent: int = 4, start_at: int = 0):
642
700
  for path, type_, _, depth in self.get_flat_tree():
@@ -649,9 +707,13 @@ class SignalSchema:
649
707
  sub_schema = SignalSchema({"* list of": args[0]})
650
708
  sub_schema.print_tree(indent=indent, start_at=total_indent + indent)
651
709
 
652
- def get_headers_with_length(self):
710
+ def get_headers_with_length(self, include_hidden: bool = True):
653
711
  paths = [
654
- path for path, _, has_subtree, _ in self.get_flat_tree() if not has_subtree
712
+ path
713
+ for path, _, has_subtree, _ in self.get_flat_tree(
714
+ include_hidden=include_hidden
715
+ )
716
+ if not has_subtree
655
717
  ]
656
718
  max_length = max([len(path) for path in paths], default=0)
657
719
  return [
@@ -749,3 +811,120 @@ class SignalSchema:
749
811
  res[name] = (anno, subtree) # type: ignore[assignment]
750
812
 
751
813
  return res
814
+
815
+ def to_partial(self, *columns: str) -> "SignalSchema":
816
+ """
817
+ Convert the schema to a partial schema with only the specified columns.
818
+
819
+ E.g. if original schema is:
820
+
821
+ ```
822
+ signal: Foo@v1
823
+ name: str
824
+ value: float
825
+ count: int
826
+ ```
827
+
828
+ Then `to_partial("signal.name", "count")` will return a partial schema:
829
+
830
+ ```
831
+ signal: FooPartial@v1
832
+ name: str
833
+ count: int
834
+ ```
835
+
836
+ Note that partial schema will have a different name for the custom types
837
+ (e.g. `FooPartial@v1` instead of `Foo@v1`) to avoid conflicts
838
+ with the original schema.
839
+
840
+ Args:
841
+ *columns (str): The columns to include in the partial schema.
842
+
843
+ Returns:
844
+ SignalSchema: The new partial schema.
845
+ """
846
+ serialized = self.serialize()
847
+ custom_types = serialized.get("_custom_types", {})
848
+
849
+ schema: dict[str, Any] = {}
850
+ schema_custom_types: dict[str, CustomType] = {}
851
+
852
+ data_model_bases: Optional[list[tuple[str, str, Optional[str]]]] = None
853
+
854
+ signal_partials: dict[str, str] = {}
855
+ partial_versions: dict[str, int] = {}
856
+
857
+ def _type_name_to_partial(signal_name: str, type_name: str) -> str:
858
+ if "@" not in type_name:
859
+ return type_name
860
+ model_name, _ = ModelStore.parse_name_version(type_name)
861
+
862
+ if signal_name not in signal_partials:
863
+ partial_versions.setdefault(model_name, 0)
864
+ partial_versions[model_name] += 1
865
+ version = partial_versions[model_name]
866
+ signal_partials[signal_name] = f"{model_name}Partial{version}"
867
+
868
+ return signal_partials[signal_name]
869
+
870
+ for column in columns:
871
+ parent_type, parent_type_partial = "", ""
872
+ column_parts = column.split(".")
873
+ for i, signal in enumerate(column_parts):
874
+ if i == 0:
875
+ if signal not in serialized:
876
+ raise SignalSchemaError(
877
+ f"Column {column} not found in the schema"
878
+ )
879
+
880
+ parent_type = serialized[signal]
881
+ parent_type_partial = _type_name_to_partial(signal, parent_type)
882
+
883
+ schema[signal] = parent_type_partial
884
+ continue
885
+
886
+ if parent_type not in custom_types:
887
+ raise SignalSchemaError(
888
+ f"Custom type {parent_type} not found in the schema"
889
+ )
890
+
891
+ custom_type = custom_types[parent_type]
892
+ signal_type = custom_type["fields"].get(signal)
893
+ if not signal_type:
894
+ raise SignalSchemaError(
895
+ f"Field {signal} not found in custom type {parent_type}"
896
+ )
897
+
898
+ partial_type = _type_name_to_partial(
899
+ ".".join(column_parts[: i + 1]),
900
+ signal_type,
901
+ )
902
+
903
+ if parent_type_partial in schema_custom_types:
904
+ schema_custom_types[parent_type_partial].fields[signal] = (
905
+ partial_type
906
+ )
907
+ else:
908
+ if data_model_bases is None:
909
+ data_model_bases = SignalSchema._get_bases(DataModel)
910
+
911
+ partial_type_name, _ = ModelStore.parse_name_version(partial_type)
912
+ schema_custom_types[parent_type_partial] = CustomType(
913
+ schema_version=2,
914
+ name=partial_type_name,
915
+ fields={signal: partial_type},
916
+ bases=[
917
+ (partial_type_name, "__main__", partial_type),
918
+ *data_model_bases,
919
+ ],
920
+ )
921
+
922
+ parent_type, parent_type_partial = signal_type, partial_type
923
+
924
+ if schema_custom_types:
925
+ schema["_custom_types"] = {
926
+ type_name: ct.model_dump()
927
+ for type_name, ct in schema_custom_types.items()
928
+ }
929
+
930
+ return SignalSchema.deserialize(schema)
@@ -57,6 +57,9 @@ class NodesThreadPool(ABC):
57
57
  self._max_threads = max_threads
58
58
  self._thread_counter = 0
59
59
  self._thread_lock = threading.Lock()
60
+ self.tasks = set()
61
+ self.canceled = False
62
+ self.th_pool = None
60
63
 
61
64
  def run(
62
65
  self,
@@ -64,37 +67,55 @@ class NodesThreadPool(ABC):
64
67
  progress_bar=None,
65
68
  ):
66
69
  results = []
67
- with concurrent.futures.ThreadPoolExecutor(self._max_threads) as th_pool:
68
- tasks = set()
70
+ self.th_pool = concurrent.futures.ThreadPoolExecutor(self._max_threads)
71
+ try:
69
72
  self._thread_counter = 0
70
73
  for chunk in chunk_gen:
71
- while len(tasks) >= self._max_threads:
74
+ if self.canceled:
75
+ break
76
+ while len(self.tasks) >= self._max_threads:
72
77
  done, _ = concurrent.futures.wait(
73
- tasks, timeout=1, return_when="FIRST_COMPLETED"
78
+ self.tasks, timeout=1, return_when="FIRST_COMPLETED"
74
79
  )
75
80
  self.done_task(done)
76
81
 
77
- tasks = tasks - done
82
+ self.tasks = self.tasks - done
78
83
  self.update_progress_bar(progress_bar)
79
84
 
80
- tasks.add(th_pool.submit(self.do_task, chunk))
85
+ self.tasks.add(self.th_pool.submit(self.do_task, chunk))
81
86
  self.update_progress_bar(progress_bar)
82
87
 
83
- while tasks:
88
+ while self.tasks:
89
+ if self.canceled:
90
+ break
84
91
  done, _ = concurrent.futures.wait(
85
- tasks, timeout=1, return_when="FIRST_COMPLETED"
92
+ self.tasks, timeout=1, return_when="FIRST_COMPLETED"
86
93
  )
87
94
  task_results = self.done_task(done)
88
95
  if task_results:
89
96
  results.extend(task_results)
90
97
 
91
- tasks = tasks - done
98
+ self.tasks = self.tasks - done
92
99
  self.update_progress_bar(progress_bar)
93
-
94
- th_pool.shutdown()
100
+ except:
101
+ self.cancel_all()
102
+ raise
103
+ else:
104
+ self.th_pool.shutdown()
95
105
 
96
106
  return results
97
107
 
108
+ def cancel_all(self):
109
+ self.cancel = True
110
+ # Canceling tasks just in case any of them is scheduled to run.
111
+ # Note that running tasks cannot be canceled, instead we will wait for
112
+ # them to finish when shutting down thread loop executor by calling
113
+ # shutdown() method.
114
+ for task in self.tasks:
115
+ task.cancel()
116
+ if self.th_pool:
117
+ self.th_pool.shutdown() # this will wait for running tasks to finish
118
+
98
119
  def update_progress_bar(self, progress_bar):
99
120
  if progress_bar is not None:
100
121
  with self._thread_lock:
datachain/utils.py CHANGED
@@ -362,6 +362,7 @@ def show_records(
362
362
  records: Optional[list[dict]],
363
363
  collapse_columns: bool = False,
364
364
  system_columns: bool = False,
365
+ hidden_fields: Optional[list[str]] = None,
365
366
  ) -> None:
366
367
  import pandas as pd
367
368
 
@@ -369,6 +370,8 @@ def show_records(
369
370
  return
370
371
 
371
372
  df = pd.DataFrame.from_records(records)
373
+ if hidden_fields:
374
+ df = df.drop(columns=hidden_fields, errors="ignore")
372
375
  return show_df(df, collapse_columns=collapse_columns, system_columns=system_columns)
373
376
 
374
377
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: datachain
3
- Version: 0.11.0
3
+ Version: 0.11.11
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.9
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
15
16
  Classifier: Development Status :: 2 - Pre-Alpha
16
17
  Requires-Python: >=3.9
17
18
  Description-Content-Type: text/x-rst
@@ -71,9 +72,8 @@ Provides-Extra: hf
71
72
  Requires-Dist: numba>=0.60.0; extra == "hf"
72
73
  Requires-Dist: datasets[audio,vision]>=2.21.0; extra == "hf"
73
74
  Provides-Extra: video
74
- Requires-Dist: av<14; extra == "video"
75
75
  Requires-Dist: ffmpeg-python; extra == "video"
76
- Requires-Dist: imageio[ffmpeg]; extra == "video"
76
+ Requires-Dist: imageio[ffmpeg,pyav]>=2.37.0; extra == "video"
77
77
  Requires-Dist: opencv-python; extra == "video"
78
78
  Provides-Extra: tests
79
79
  Requires-Dist: datachain[hf,remote,torch,vector,video]; extra == "tests"
@@ -103,7 +103,7 @@ Requires-Dist: datachain[tests]; extra == "examples"
103
103
  Requires-Dist: defusedxml; extra == "examples"
104
104
  Requires-Dist: accelerate; extra == "examples"
105
105
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
106
- Requires-Dist: ultralytics==8.3.78; extra == "examples"
106
+ Requires-Dist: ultralytics==8.3.82; extra == "examples"
107
107
  Requires-Dist: open_clip_torch; extra == "examples"
108
108
 
109
109
  ================
@@ -9,18 +9,18 @@ datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
9
9
  datachain/listing.py,sha256=HNB-xeKA6aUA-HTWr--H22S6jVOxP2OVQ-3d07ISqAk,7109
10
10
  datachain/node.py,sha256=KWDT0ClYXB7FYI-QOvzAa-UDkLJErUI2eWm5FBteYuU,5577
11
11
  datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,1095
12
- datachain/nodes_thread_pool.py,sha256=uPo-xl8zG5m9YgODjPFBpbcqqHjI-dcxH87yAbj_qco,3192
12
+ datachain/nodes_thread_pool.py,sha256=mdo0s-VybuSZkRUARcUO4Tjh8KFfZr9foHqmupx2SmM,3989
13
13
  datachain/progress.py,sha256=lRzxoYP4Qv2XBwD78sOkmYRzHFpZ2ExVNJF8wAeICtY,770
14
14
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
16
16
  datachain/studio.py,sha256=Coo_6murSjh-RypiHDWNsVXGmfsopyMPCpPS1sA6uUc,9844
17
17
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
18
- datachain/utils.py,sha256=n8fcyOM8P_2CEFK4h8BZxCAwCkOpt8NAeJK5tm1gIOg,14433
18
+ datachain/utils.py,sha256=-vhV9LMUcUxDSBmyeJH4WJcfLTO416usD6igXS8c49k,14563
19
19
  datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
20
20
  datachain/catalog/catalog.py,sha256=xZC6drw4opoYcxTTiAFv6nbhNOzBb-UZZ_VqY9dqdIs,59458
21
21
  datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
22
22
  datachain/catalog/loader.py,sha256=HA_mBC7q_My8j2WnSvIjUGuJpl6SIdg5vvy_lagxJlA,5733
23
- datachain/cli/__init__.py,sha256=Uu_ARR5-VS1srC_o2EADRjYKX1c86GK7LZCDL4ufE_w,8290
23
+ datachain/cli/__init__.py,sha256=OLoDOYm7M23bLdMJhw3_GsJDGPl8pWYzcjpwgxEdFDs,8326
24
24
  datachain/cli/utils.py,sha256=wrLnAh7Wx8O_ojZE8AE4Lxn5WoxHbOj7as8NWlLAA74,3036
25
25
  datachain/cli/commands/__init__.py,sha256=zp3bYIioO60x_X04A4-IpZqSYVnpwOa1AdERQaRlIhI,493
26
26
  datachain/cli/commands/datasets.py,sha256=865ui6q4UVPbL_-jk18C-lYi_bGMlh7XhfRaHbbNyhk,5796
@@ -29,11 +29,11 @@ datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR
29
29
  datachain/cli/commands/ls.py,sha256=Wb8hXyBwyhb62Zk6ZhNFPFrj2lJhdbRcnBQQkgL_qyw,5174
30
30
  datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibVE,600
31
31
  datachain/cli/commands/query.py,sha256=2S7hQxialt1fkbocxi6JXZI6jS5QnFrD1aOjKgZkzfI,1471
32
- datachain/cli/commands/show.py,sha256=RVb_7Kjd1kzqTxRKYFvmD04LaJHOtrCc4FYMyc-ZEYw,1149
32
+ datachain/cli/commands/show.py,sha256=d-DDw4hA3TWA2vqIS-FkEXrzqvttcTdh2QPaahtLdy0,1445
33
33
  datachain/cli/parser/__init__.py,sha256=rtjlqSsDd4LZH9WdgvluO27M4sID1wD7YkQ4cKhNXzw,15721
34
34
  datachain/cli/parser/job.py,sha256=kvQkSfieyUmvJpOK8p78UgS8sygHhQXztRlOtVcgtaU,3449
35
35
  datachain/cli/parser/studio.py,sha256=4HEE1K93WDJxMLfgqAA4mHdigpSzC7SLUx-qPF0NgYQ,3254
36
- datachain/cli/parser/utils.py,sha256=GEzxfPJ4i6nt6JhjvZ3PQesXl9islEV3E-N1NZGrLaA,2750
36
+ datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI,2888
37
37
  datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
38
38
  datachain/client/azure.py,sha256=ma6fJcnveG8wpNy1PSrN5hgvmRdCj8Sf3RKjfd3qCyM,3221
39
39
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
@@ -67,10 +67,10 @@ datachain/func/window.py,sha256=0MB1yjpVbwOrl_WNLZ8V3jkJz3o0XlYinpAcZQJuxiA,1688
67
67
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
68
  datachain/lib/arrow.py,sha256=9UBCF-lftQaz0yxdsjbLKbyzVSmrF_QSWdhp2oBDPqs,9486
69
69
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
70
- datachain/lib/data_model.py,sha256=zS4lmXHVBXc9ntcyea2a1CRLXGSAN_0glXcF88CohgY,2685
70
+ datachain/lib/data_model.py,sha256=ZwBXELtqROEdLL4DmxTipnwUZmhQvMz_UVDzyf7nQ9Y,2899
71
71
  datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810,2493
72
- datachain/lib/dc.py,sha256=QQPnrS_OB1d3CfjLnYtRByGc7wNX_YT24WOjaoFPJgw,95372
73
- datachain/lib/file.py,sha256=Bbnb7JBiAFRD1RsZwPdvoiWFKHkl7V3haDLh672xTZg,27658
72
+ datachain/lib/dc.py,sha256=XU4VmRjm7CR37YuEKMhtU_DGxb1a7agXoNVU5WsaLRc,97772
73
+ datachain/lib/file.py,sha256=LwpRWsDvO3ZvUBAtS29mFotp_arfEy-HhPQ0jaL_2Rc,29006
74
74
  datachain/lib/hf.py,sha256=gjxuStZBlKtNk3-4yYSlWZDv9zBGblOdvEy_Lwap5hA,5882
75
75
  datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
76
76
  datachain/lib/listing.py,sha256=auodM0HitYZsL0DybdgQUYhne_LgkVW-LKGYYOACP90,7272
@@ -79,7 +79,7 @@ datachain/lib/meta_formats.py,sha256=hDPfEkcmiLZOjhBBXuareMdnq65Wj8vZvxjmum6cROM
79
79
  datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
80
80
  datachain/lib/pytorch.py,sha256=QxXBhrn2-D0RiFA2rdxZ7wKMxyuQ0WWHKfiFEWAA760,7710
81
81
  datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
82
- datachain/lib/signal_schema.py,sha256=ps5od6zhWtdX3Khx2fwArl2xlGkK8SKi6vCQ6QmbaR0,27404
82
+ datachain/lib/signal_schema.py,sha256=WyVTXUsa4DVTIZRAX2-MdjOe4deat_Fufsd9n8ycrXQ,33629
83
83
  datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
84
84
  datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
85
85
  datachain/lib/udf.py,sha256=TlvikKTFvkIKaqqSkSriOyXhQ0rwRHV2ZRs1LHZOCmo,16107
@@ -136,9 +136,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
136
136
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
137
137
  datachain/toolkit/split.py,sha256=z3zRJNzjWrpPuRw-zgFbCOBKInyYxJew8ygrYQRQLNc,2930
138
138
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
139
- datachain-0.11.0.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
140
- datachain-0.11.0.dist-info/METADATA,sha256=ijLSRDc7IAZe6YxdX0ZRRNY2LOUlsFFib660U_upu20,11241
141
- datachain-0.11.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
142
- datachain-0.11.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
143
- datachain-0.11.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
144
- datachain-0.11.0.dist-info/RECORD,,
139
+ datachain-0.11.11.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
140
+ datachain-0.11.11.dist-info/METADATA,sha256=iF194pmsP-vh7ITTJG62w-VbTQbWGDckY-GJfempDBg,11267
141
+ datachain-0.11.11.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
142
+ datachain-0.11.11.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
143
+ datachain-0.11.11.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
144
+ datachain-0.11.11.dist-info/RECORD,,