datachain 0.12.0__py3-none-any.whl → 0.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -777,6 +777,8 @@ class Catalog:
777
777
  validate_version: Optional[bool] = True,
778
778
  listing: Optional[bool] = False,
779
779
  uuid: Optional[str] = None,
780
+ description: Optional[str] = None,
781
+ labels: Optional[list[str]] = None,
780
782
  ) -> "DatasetRecord":
781
783
  """
782
784
  Creates new dataset of a specific version.
@@ -793,6 +795,19 @@ class Catalog:
793
795
  try:
794
796
  dataset = self.get_dataset(name)
795
797
  default_version = dataset.next_version
798
+
799
+ if (description or labels) and (
800
+ dataset.description != description or dataset.labels != labels
801
+ ):
802
+ description = description or dataset.description
803
+ labels = labels or dataset.labels
804
+
805
+ self.update_dataset(
806
+ dataset,
807
+ description=description,
808
+ labels=labels,
809
+ )
810
+
796
811
  except DatasetNotFoundError:
797
812
  schema = {
798
813
  c.name: c.type.to_dict() for c in columns if isinstance(c.type, SQLType)
@@ -803,6 +818,8 @@ class Catalog:
803
818
  query_script=query_script,
804
819
  schema=schema,
805
820
  ignore_if_exists=True,
821
+ description=description,
822
+ labels=labels,
806
823
  )
807
824
 
808
825
  version = version or default_version
@@ -1608,7 +1625,7 @@ class Catalog:
1608
1625
  except TerminationSignal as exc:
1609
1626
  signal.signal(signal.SIGTERM, orig_sigterm_handler)
1610
1627
  signal.signal(signal.SIGINT, orig_sigint_handler)
1611
- logging.info("Shutting down process %s, received %r", proc.pid, exc)
1628
+ logger.info("Shutting down process %s, received %r", proc.pid, exc)
1612
1629
  # Rather than forwarding the signal to the child, we try to shut it down
1613
1630
  # gracefully. This is because we consider the script to be interactive
1614
1631
  # and special, so we give it time to cleanup before exiting.
@@ -1623,7 +1640,7 @@ class Catalog:
1623
1640
  if thread:
1624
1641
  thread.join() # wait for the reader thread
1625
1642
 
1626
- logging.info("Process %s exited with return code %s", proc.pid, proc.returncode)
1643
+ logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
1627
1644
  if proc.returncode == QUERY_SCRIPT_CANCELED_EXIT_CODE:
1628
1645
  raise QueryScriptCancelError(
1629
1646
  "Query script was canceled by user",
@@ -38,11 +38,12 @@ def ls_local(
38
38
  ):
39
39
  from datachain import DataChain
40
40
 
41
- if catalog is None:
42
- from datachain.catalog import get_catalog
43
-
44
- catalog = get_catalog(client_config=client_config)
45
41
  if sources:
42
+ if catalog is None:
43
+ from datachain.catalog import get_catalog
44
+
45
+ catalog = get_catalog(client_config=client_config)
46
+
46
47
  actual_sources = list(ls_urls(sources, catalog=catalog, long=long, **kwargs))
47
48
  if len(actual_sources) == 1:
48
49
  for _, entries in actual_sources:
@@ -61,8 +62,9 @@ def ls_local(
61
62
  for entry in entries:
62
63
  print(format_ls_entry(entry))
63
64
  else:
64
- chain = DataChain.listings()
65
- for ls in chain.collect("listing"):
65
+ # Collect results in a list here to prevent interference from `tqdm` and `print`
66
+ listing = list(DataChain.listings().collect("listing"))
67
+ for ls in listing:
66
68
  print(format_ls_entry(f"{ls.uri}@v{ls.version}")) # type: ignore[union-attr]
67
69
 
68
70
 
@@ -40,6 +40,13 @@ def show(
40
40
  .offset(offset)
41
41
  )
42
42
  records = query.to_db_records()
43
+ print("Name: ", name)
44
+ if dataset.description:
45
+ print("Description: ", dataset.description)
46
+ if dataset.labels:
47
+ print("Labels: ", ",".join(dataset.labels))
48
+ print("\n")
49
+
43
50
  show_records(records, collapse_columns=not no_collapse, hidden_fields=hidden_fields)
44
51
 
45
52
  if schema and dataset_version.feature_schema:
datachain/client/gcs.py CHANGED
@@ -30,7 +30,7 @@ class GCSClient(Client):
30
30
  if kwargs.pop("anon", False):
31
31
  kwargs["token"] = "anon" # noqa: S105
32
32
 
33
- return cast(GCSFileSystem, super().create_fs(**kwargs))
33
+ return cast("GCSFileSystem", super().create_fs(**kwargs))
34
34
 
35
35
  def url(self, path: str, expires: int = 3600, **kwargs) -> str:
36
36
  """
datachain/client/s3.py CHANGED
@@ -55,7 +55,7 @@ class ClientS3(Client):
55
55
  except NotImplementedError:
56
56
  pass
57
57
 
58
- return cast(S3FileSystem, super().create_fs(**kwargs))
58
+ return cast("S3FileSystem", super().create_fs(**kwargs))
59
59
 
60
60
  def url(self, path: str, expires: int = 3600, **kwargs) -> str:
61
61
  """
@@ -119,6 +119,8 @@ class AbstractMetastore(ABC, Serializable):
119
119
  query_script: str = "",
120
120
  schema: Optional[dict[str, Any]] = None,
121
121
  ignore_if_exists: bool = False,
122
+ description: Optional[str] = None,
123
+ labels: Optional[list[str]] = None,
122
124
  ) -> DatasetRecord:
123
125
  """Creates new dataset."""
124
126
 
@@ -518,6 +520,8 @@ class AbstractDBMetastore(AbstractMetastore):
518
520
  query_script: str = "",
519
521
  schema: Optional[dict[str, Any]] = None,
520
522
  ignore_if_exists: bool = False,
523
+ description: Optional[str] = None,
524
+ labels: Optional[list[str]] = None,
521
525
  **kwargs, # TODO registered = True / False
522
526
  ) -> DatasetRecord:
523
527
  """Creates new dataset."""
@@ -533,6 +537,8 @@ class AbstractDBMetastore(AbstractMetastore):
533
537
  sources="\n".join(sources) if sources else "",
534
538
  query_script=query_script,
535
539
  schema=json.dumps(schema or {}),
540
+ description=description,
541
+ labels=json.dumps(labels or []),
536
542
  )
537
543
  if ignore_if_exists and hasattr(query, "on_conflict_do_nothing"):
538
544
  # SQLite and PostgreSQL both support 'on_conflict_do_nothing',
@@ -74,6 +74,7 @@ def _compare( # noqa: C901
74
74
  # all left and right columns
75
75
  cols = left.signals_schema.clone_without_sys_signals().db_signals()
76
76
  right_cols = right.signals_schema.clone_without_sys_signals().db_signals()
77
+ cols_select = list(left.signals_schema.clone_without_sys_signals().values.keys())
77
78
 
78
79
  # getting correct on and right_on column names
79
80
  on = left.signals_schema.resolve(*on).db_signals() # type: ignore[assignment]
@@ -131,10 +132,12 @@ def _compare( # noqa: C901
131
132
  # when the row is deleted, we need to take column values from the right chain
132
133
  .mutate(
133
134
  **{
134
- f"{c}": ifelse(
135
- C(diff_col) == CompareStatus.DELETED, C(f"{rname}{c}"), C(c)
135
+ f"{l_on}": ifelse(
136
+ C(diff_col) == CompareStatus.DELETED,
137
+ C(f"{rname + l_on if on == right_on else r_on}"),
138
+ C(l_on),
136
139
  )
137
- for c in [c for c in cols if c in right_cols]
140
+ for l_on, r_on in zip(on, right_on) # type: ignore[arg-type]
138
141
  }
139
142
  )
140
143
  .select_except(ldiff_col, rdiff_col)
@@ -150,9 +153,9 @@ def _compare( # noqa: C901
150
153
  dc_diff = dc_diff.filter(C(diff_col) != CompareStatus.DELETED)
151
154
 
152
155
  if status_col:
153
- cols.append(diff_col) # type: ignore[arg-type]
156
+ cols_select.append(diff_col)
154
157
 
155
- dc_diff = dc_diff.select(*cols)
158
+ dc_diff = dc_diff.select(*cols_select)
156
159
 
157
160
  # final schema is schema from the left chain with status column added if needed
158
161
  dc_diff.signals_schema = (
datachain/lib/dc.py CHANGED
@@ -6,6 +6,7 @@ import sys
6
6
  from collections.abc import Iterator, Sequence
7
7
  from functools import wraps
8
8
  from typing import (
9
+ IO,
9
10
  TYPE_CHECKING,
10
11
  Any,
11
12
  BinaryIO,
@@ -270,6 +271,18 @@ class DataChain:
270
271
  self._setup: dict = setup or {}
271
272
  self._sys = _sys
272
273
 
274
+ def __repr__(self) -> str:
275
+ """Return a string representation of the chain."""
276
+ classname = self.__class__.__name__
277
+ if not self._effective_signals_schema.values:
278
+ return f"Empty {classname}"
279
+
280
+ import io
281
+
282
+ file = io.StringIO()
283
+ self.print_schema(file=file)
284
+ return file.getvalue()
285
+
273
286
  @property
274
287
  def schema(self) -> dict[str, DataType]:
275
288
  """Get schema of the chain."""
@@ -323,9 +336,9 @@ class DataChain:
323
336
  """Return `self.union(other)`."""
324
337
  return self.union(other)
325
338
 
326
- def print_schema(self) -> None:
339
+ def print_schema(self, file: Optional[IO] = None) -> None:
327
340
  """Print schema of the chain."""
328
- self._effective_signals_schema.print_tree()
341
+ self._effective_signals_schema.print_tree(file=file)
329
342
 
330
343
  def clone(self) -> "Self":
331
344
  """Make a copy of the chain in a new table."""
@@ -629,7 +642,8 @@ class DataChain:
629
642
  model_name=model_name,
630
643
  jmespath=jmespath,
631
644
  nrows=nrows,
632
- )
645
+ ),
646
+ "params": {"file": File},
633
647
  }
634
648
  # disable prefetch if nrows is set
635
649
  settings = {"prefetch": 0} if nrows else {}
@@ -773,7 +787,12 @@ class DataChain:
773
787
  )
774
788
 
775
789
  def save( # type: ignore[override]
776
- self, name: Optional[str] = None, version: Optional[int] = None, **kwargs
790
+ self,
791
+ name: Optional[str] = None,
792
+ version: Optional[int] = None,
793
+ description: Optional[str] = None,
794
+ labels: Optional[list[str]] = None,
795
+ **kwargs,
777
796
  ) -> "Self":
778
797
  """Save to a Dataset. It returns the chain itself.
779
798
 
@@ -781,11 +800,18 @@ class DataChain:
781
800
  name : dataset name. Empty name saves to a temporary dataset that will be
782
801
  removed after process ends. Temp dataset are useful for optimization.
783
802
  version : version of a dataset. Default - the last version that exist.
803
+ description : description of a dataset.
804
+ labels : labels of a dataset.
784
805
  """
785
806
  schema = self.signals_schema.clone_without_sys_signals().serialize()
786
807
  return self._evolve(
787
808
  query=self._query.save(
788
- name=name, version=version, feature_schema=schema, **kwargs
809
+ name=name,
810
+ version=version,
811
+ description=description,
812
+ labels=labels,
813
+ feature_schema=schema,
814
+ **kwargs,
789
815
  )
790
816
  )
791
817
 
@@ -1003,8 +1029,9 @@ class DataChain:
1003
1029
  func: Optional[Union[Callable, UDFObjT]],
1004
1030
  params: Union[None, str, Sequence[str]],
1005
1031
  output: OutputType,
1006
- signal_map,
1032
+ signal_map: dict[str, Callable],
1007
1033
  ) -> UDFObjT:
1034
+ is_batch = target_class.is_input_batched
1008
1035
  is_generator = target_class.is_output_batched
1009
1036
  name = self.name or ""
1010
1037
 
@@ -1015,7 +1042,9 @@ class DataChain:
1015
1042
  if self._sys:
1016
1043
  signals_schema = SignalSchema({"sys": Sys}) | signals_schema
1017
1044
 
1018
- params_schema = signals_schema.slice(sign.params, self._setup)
1045
+ params_schema = signals_schema.slice(
1046
+ sign.params, self._setup, is_batch=is_batch
1047
+ )
1019
1048
 
1020
1049
  return target_class._create(sign, params_schema)
1021
1050
 
datachain/lib/file.py CHANGED
@@ -193,7 +193,14 @@ class File(DataModel):
193
193
  "last_modified": DateTime,
194
194
  "location": JSON,
195
195
  }
196
- _hidden_fields: ClassVar[list[str]] = ["version", "source"]
196
+ _hidden_fields: ClassVar[list[str]] = [
197
+ "source",
198
+ "version",
199
+ "etag",
200
+ "is_latest",
201
+ "last_modified",
202
+ "location",
203
+ ]
197
204
 
198
205
  _unique_id_keys: ClassVar[list[str]] = [
199
206
  "source",
@@ -10,7 +10,7 @@ import jmespath as jsp
10
10
  from pydantic import BaseModel, ConfigDict, Field, ValidationError # noqa: F401
11
11
 
12
12
  from datachain.lib.data_model import DataModel # noqa: F401
13
- from datachain.lib.file import File
13
+ from datachain.lib.file import TextFile
14
14
 
15
15
 
16
16
  class UserModel(BaseModel):
@@ -130,7 +130,7 @@ def read_meta( # noqa: C901
130
130
  #
131
131
 
132
132
  def parse_data(
133
- file: File,
133
+ file: TextFile,
134
134
  data_model=spec,
135
135
  format=format,
136
136
  jmespath=jmespath,
@@ -5,6 +5,7 @@ from dataclasses import dataclass
5
5
  from datetime import datetime
6
6
  from inspect import isclass
7
7
  from typing import ( # noqa: UP035
8
+ IO,
8
9
  TYPE_CHECKING,
9
10
  Annotated,
10
11
  Any,
@@ -154,9 +155,9 @@ class SignalSchema:
154
155
  if not callable(func):
155
156
  raise SetupError(key, "value must be function or callable class")
156
157
 
157
- def _init_setup_values(self):
158
+ def _init_setup_values(self) -> None:
158
159
  if self.setup_values is not None:
159
- return self.setup_values
160
+ return
160
161
 
161
162
  res = {}
162
163
  for key, func in self.setup_func.items():
@@ -398,7 +399,7 @@ class SignalSchema:
398
399
  return SignalSchema(signals)
399
400
 
400
401
  @staticmethod
401
- def get_flatten_hidden_fields(schema):
402
+ def get_flatten_hidden_fields(schema: dict):
402
403
  custom_types = schema.get("_custom_types", {})
403
404
  if not custom_types:
404
405
  return []
@@ -464,19 +465,61 @@ class SignalSchema:
464
465
  return False
465
466
 
466
467
  def slice(
467
- self, keys: Sequence[str], setup: Optional[dict[str, Callable]] = None
468
+ self,
469
+ params: dict[str, Union[DataType, Any]],
470
+ setup: Optional[dict[str, Callable]] = None,
471
+ is_batch: bool = False,
468
472
  ) -> "SignalSchema":
469
- # Make new schema that combines current schema and setup signals
470
- setup = setup or {}
471
- setup_no_types = dict.fromkeys(setup.keys(), str)
472
- union = SignalSchema(self.values | setup_no_types)
473
- # Slice combined schema by keys
474
- schema = {}
475
- for k in keys:
476
- try:
477
- schema[k] = union._find_in_tree(k.split("."))
478
- except SignalResolvingError:
479
- pass
473
+ """
474
+ Returns new schema that combines current schema and setup signals.
475
+ """
476
+ setup_params = setup.keys() if setup else []
477
+ schema: dict[str, DataType] = {}
478
+
479
+ for param, param_type in params.items():
480
+ # This is special case for setup params, they are always treated as strings
481
+ if param in setup_params:
482
+ schema[param] = str
483
+ continue
484
+
485
+ schema_type = self._find_in_tree(param.split("."))
486
+
487
+ if param_type is Any:
488
+ schema[param] = schema_type
489
+ continue
490
+
491
+ schema_origin = get_origin(schema_type)
492
+ param_origin = get_origin(param_type)
493
+
494
+ if schema_origin is Union and type(None) in get_args(schema_type):
495
+ schema_type = get_args(schema_type)[0]
496
+ if param_origin is Union and type(None) in get_args(param_type):
497
+ param_type = get_args(param_type)[0]
498
+
499
+ if is_batch:
500
+ if param_type is list:
501
+ schema[param] = schema_type
502
+ continue
503
+
504
+ if param_origin is not list:
505
+ raise SignalResolvingError(param.split("."), "is not a list")
506
+
507
+ param_type = get_args(param_type)[0]
508
+
509
+ if param_type == schema_type or (
510
+ isclass(param_type)
511
+ and isclass(schema_type)
512
+ and issubclass(param_type, File)
513
+ and issubclass(schema_type, File)
514
+ ):
515
+ schema[param] = schema_type
516
+ continue
517
+
518
+ raise SignalResolvingError(
519
+ param.split("."),
520
+ f"types mismatch: {param_type} != {schema_type}",
521
+ )
522
+
480
523
  return SignalSchema(schema, setup)
481
524
 
482
525
  def row_to_features(
@@ -696,16 +739,20 @@ class SignalSchema:
696
739
  substree, new_prefix, depth + 1, include_hidden
697
740
  )
698
741
 
699
- def print_tree(self, indent: int = 4, start_at: int = 0):
742
+ def print_tree(self, indent: int = 2, start_at: int = 0, file: Optional[IO] = None):
700
743
  for path, type_, _, depth in self.get_flat_tree():
701
744
  total_indent = start_at + depth * indent
702
- print(" " * total_indent, f"{path[-1]}:", SignalSchema._type_to_str(type_))
745
+ col_name = " " * total_indent + path[-1]
746
+ col_type = SignalSchema._type_to_str(type_)
747
+ print(col_name, col_type, sep=": ", file=file)
703
748
 
704
749
  if get_origin(type_) is list:
705
750
  args = get_args(type_)
706
751
  if len(args) > 0 and ModelStore.is_pydantic(args[0]):
707
752
  sub_schema = SignalSchema({"* list of": args[0]})
708
- sub_schema.print_tree(indent=indent, start_at=total_indent + indent)
753
+ sub_schema.print_tree(
754
+ indent=indent, start_at=total_indent + indent, file=file
755
+ )
709
756
 
710
757
  def get_headers_with_length(self, include_hidden: bool = True):
711
758
  paths = [
datachain/lib/udf.py CHANGED
@@ -159,6 +159,7 @@ class UDFBase(AbstractUDF):
159
159
  ```
160
160
  """
161
161
 
162
+ is_input_batched = False
162
163
  is_output_batched = False
163
164
  prefetch: int = 0
164
165
 
@@ -395,6 +396,7 @@ class Mapper(UDFBase):
395
396
  class BatchMapper(UDFBase):
396
397
  """Inherit from this class to pass to `DataChain.batch_map()`."""
397
398
 
399
+ is_input_batched = True
398
400
  is_output_batched = True
399
401
 
400
402
  def run(
@@ -481,6 +483,7 @@ class Generator(UDFBase):
481
483
  class Aggregator(UDFBase):
482
484
  """Inherit from this class to pass to `DataChain.agg()`."""
483
485
 
486
+ is_input_batched = True
484
487
  is_output_batched = True
485
488
 
486
489
  def run(
@@ -1,7 +1,7 @@
1
1
  import inspect
2
2
  from collections.abc import Generator, Iterator, Sequence
3
3
  from dataclasses import dataclass
4
- from typing import Callable, Union, get_args, get_origin
4
+ from typing import Any, Callable, Union, get_args, get_origin
5
5
 
6
6
  from datachain.lib.data_model import DataType, DataTypeNames, is_chain_type
7
7
  from datachain.lib.signal_schema import SignalSchema
@@ -18,7 +18,7 @@ class UdfSignatureError(DataChainParamsError):
18
18
  @dataclass
19
19
  class UdfSignature:
20
20
  func: Union[Callable, UDFBase]
21
- params: Sequence[str]
21
+ params: dict[str, Union[DataType, Any]]
22
22
  output_schema: SignalSchema
23
23
 
24
24
  DEFAULT_RETURN_TYPE = str
@@ -58,15 +58,23 @@ class UdfSignature:
58
58
  if not isinstance(udf_func, UDFBase) and not callable(udf_func):
59
59
  raise UdfSignatureError(chain, f"UDF '{udf_func}' is not callable")
60
60
 
61
- func_params_map_sign, func_outs_sign, is_iterator = (
62
- UdfSignature._func_signature(chain, udf_func)
61
+ func_params_map_sign, func_outs_sign, is_iterator = cls._func_signature(
62
+ chain, udf_func
63
63
  )
64
+
65
+ udf_params: dict[str, Union[DataType, Any]] = {}
64
66
  if params:
65
- udf_params = [params] if isinstance(params, str) else params
66
- elif not func_params_map_sign:
67
- udf_params = []
68
- else:
69
- udf_params = list(func_params_map_sign.keys())
67
+ udf_params = (
68
+ {params: Any} if isinstance(params, str) else dict.fromkeys(params, Any)
69
+ )
70
+ elif func_params_map_sign:
71
+ udf_params = {
72
+ param: (
73
+ param_type if param_type is not inspect.Parameter.empty else Any
74
+ )
75
+ for param, param_type in func_params_map_sign.items()
76
+ }
77
+
70
78
  if output:
71
79
  udf_output_map = UdfSignature._validate_output(
72
80
  chain, signal_name, func, func_outs_sign, output
@@ -1646,6 +1646,8 @@ class DatasetQuery:
1646
1646
  name: Optional[str] = None,
1647
1647
  version: Optional[int] = None,
1648
1648
  feature_schema: Optional[dict] = None,
1649
+ description: Optional[str] = None,
1650
+ labels: Optional[list[str]] = None,
1649
1651
  **kwargs,
1650
1652
  ) -> "Self":
1651
1653
  """Save the query as a dataset."""
@@ -1678,6 +1680,8 @@ class DatasetQuery:
1678
1680
  version=version,
1679
1681
  feature_schema=feature_schema,
1680
1682
  columns=columns,
1683
+ description=description,
1684
+ labels=labels,
1681
1685
  **kwargs,
1682
1686
  )
1683
1687
  version = version or dataset.latest_version
@@ -290,9 +290,9 @@ def adapt_datetime(val: datetime) -> str:
290
290
  val = val.astimezone(timezone.utc)
291
291
  except (OverflowError, ValueError, OSError):
292
292
  if val.year == MAXYEAR:
293
- val = datetime.max
293
+ val = datetime.max.replace(tzinfo=timezone.utc)
294
294
  elif val.year == MINYEAR:
295
- val = datetime.min
295
+ val = datetime.min.replace(tzinfo=timezone.utc)
296
296
  else:
297
297
  raise
298
298
  return val.replace(tzinfo=None).isoformat(" ")
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.12.0
3
+ Version: 0.13.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -107,6 +107,7 @@ Requires-Dist: accelerate; extra == "examples"
107
107
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
108
108
  Requires-Dist: ultralytics; extra == "examples"
109
109
  Requires-Dist: open_clip_torch; extra == "examples"
110
+ Dynamic: license-file
110
111
 
111
112
  ================
112
113
  |logo| DataChain
@@ -17,7 +17,7 @@ datachain/studio.py,sha256=9MEpFPLKI3gG4isKklcfD5BMLeNsSXhtOUboOjW4Fdc,10017
17
17
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
18
18
  datachain/utils.py,sha256=CLAYkI7iPbLYw3Pjh5EkWuc2UOs8wEbuXQnqIs4UyV8,14173
19
19
  datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
20
- datachain/catalog/catalog.py,sha256=3CGnGuQpFRi-BmPoq-0HeXntZcFG3sh7UhuRNClwYX4,60244
20
+ datachain/catalog/catalog.py,sha256=C9FmhIDlTePiFZFJ4Yq7pfchNs1nciAVlkijtFLPZy4,60822
21
21
  datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
22
22
  datachain/catalog/loader.py,sha256=AhSQR_-S-9lY3DcXn3PVZv9UtarHOMlDy2x75iDwUjo,6035
23
23
  datachain/cli/__init__.py,sha256=YPVkuQ7IezNhtzo5xrfca1hEIiZtFxOlJCOzAOEuxmA,8335
@@ -26,10 +26,10 @@ datachain/cli/commands/__init__.py,sha256=zp3bYIioO60x_X04A4-IpZqSYVnpwOa1AdERQa
26
26
  datachain/cli/commands/datasets.py,sha256=865ui6q4UVPbL_-jk18C-lYi_bGMlh7XhfRaHbbNyhk,5796
27
27
  datachain/cli/commands/du.py,sha256=9edEzDEs98K2VYk8Wf-ZMpUzALcgm9uD6YtoqbvtUGU,391
28
28
  datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR88,198
29
- datachain/cli/commands/ls.py,sha256=Wb8hXyBwyhb62Zk6ZhNFPFrj2lJhdbRcnBQQkgL_qyw,5174
29
+ datachain/cli/commands/ls.py,sha256=7yVSRzhmocnnaAXgim4NzrzEymwpLTJjhXq5EATFwsU,5286
30
30
  datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibVE,600
31
31
  datachain/cli/commands/query.py,sha256=2S7hQxialt1fkbocxi6JXZI6jS5QnFrD1aOjKgZkzfI,1471
32
- datachain/cli/commands/show.py,sha256=d-DDw4hA3TWA2vqIS-FkEXrzqvttcTdh2QPaahtLdy0,1445
32
+ datachain/cli/commands/show.py,sha256=0ITkA7wvBPfEKM1K6uE0aage38WVsy1QXi6NS8VeSJw,1643
33
33
  datachain/cli/parser/__init__.py,sha256=rtjlqSsDd4LZH9WdgvluO27M4sID1wD7YkQ4cKhNXzw,15721
34
34
  datachain/cli/parser/job.py,sha256=kvQkSfieyUmvJpOK8p78UgS8sygHhQXztRlOtVcgtaU,3449
35
35
  datachain/cli/parser/studio.py,sha256=Y-1OlQGecLVi9QofvWUfSlPd2ISyaESf7QFGZqGsrdw,3609
@@ -38,19 +38,19 @@ datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,
38
38
  datachain/client/azure.py,sha256=ma6fJcnveG8wpNy1PSrN5hgvmRdCj8Sf3RKjfd3qCyM,3221
39
39
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
40
40
  datachain/client/fsspec.py,sha256=VutCpF8MDisDwdnJvJpiTuDU9BRRAa0Km3ZkD0sKaI0,13834
41
- datachain/client/gcs.py,sha256=TY5K5INORKknTnoWDYv0EUztVLmuY1hHmdf2wUB_9uE,5114
41
+ datachain/client/gcs.py,sha256=tepsstv-6WkkJ16SVXIPKPlWdNyFlTqrUlDwulWlWGQ,5116
42
42
  datachain/client/hf.py,sha256=posnI5WOKOMG1yY_ZiV9Orcd24QsUPKZlOXgJVLxxrM,1558
43
43
  datachain/client/local.py,sha256=cGoCYflribzexiOe-Y1qbaE2fJRh-_EgQrfCSa0yK_E,4568
44
- datachain/client/s3.py,sha256=l2A4J086ZROKKHNVXnoBky0OgYYKB0EAr8Y3lObo8GY,7284
44
+ datachain/client/s3.py,sha256=YCtDhKVO_jGsMPeyqe3xk5QsF5lqMabqkt0tPFWUHOM,7286
45
45
  datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
46
46
  datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
47
47
  datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
48
- datachain/data_storage/metastore.py,sha256=hfTITcesE9XlUTxcCcdDyWGGep-QSjJL9DUxko5QCeI,37524
48
+ datachain/data_storage/metastore.py,sha256=19LP15xT2Fmz0aIZ1sIajq8i1-KnFgCBEZeU2Ka9-mc,37780
49
49
  datachain/data_storage/schema.py,sha256=qSukry2kINhVw8aj5lQrpe7N90DFeatKIKmDh6jAzR8,9515
50
50
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
51
51
  datachain/data_storage/sqlite.py,sha256=KJ8hI0Hrwv9eAA-nLUlw2AYCQxiAAZ12a-ftUBtroNQ,24545
52
52
  datachain/data_storage/warehouse.py,sha256=GGtgHcOKjnvHN6CFkGGB8m4CFgPPJBo3f-KHEFEJmDc,30730
53
- datachain/diff/__init__.py,sha256=xSbJtmj-oawXQ2qfdGtfnVsfXV7KhdkQKC9bG_5lA2k,9256
53
+ datachain/diff/__init__.py,sha256=YkGdiDbZIMhAZ2SJ4eSe00HU67VP1P6SL2L_t0ODYMs,9425
54
54
  datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
55
  datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
56
56
  datachain/fs/utils.py,sha256=s-FkTOCGBk-b6TT3toQH51s9608pofoFjUSTc1yy7oE,825
@@ -70,21 +70,21 @@ datachain/lib/arrow.py,sha256=9UBCF-lftQaz0yxdsjbLKbyzVSmrF_QSWdhp2oBDPqs,9486
70
70
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
71
71
  datachain/lib/data_model.py,sha256=ZwBXELtqROEdLL4DmxTipnwUZmhQvMz_UVDzyf7nQ9Y,2899
72
72
  datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810,2493
73
- datachain/lib/dc.py,sha256=qk6R8D1Snf3yuJ-y2PVbSjkpEllKVJ7TDkC7I2GpBHY,98590
74
- datachain/lib/file.py,sha256=o4napoPLP_BZYc05ktE9GcF1VlTj3s4lrGvbhxpDeX8,30345
73
+ datachain/lib/dc.py,sha256=fNIVsAU5_uPbjQhIjoXfEDEF7eImh0cqtIl39CI5sKs,99457
74
+ datachain/lib/file.py,sha256=HLQXS_WULm7Y-fkHMy0WpibVAcrkLPRS6CrZy6rwFe0,30450
75
75
  datachain/lib/hf.py,sha256=gjxuStZBlKtNk3-4yYSlWZDv9zBGblOdvEy_Lwap5hA,5882
76
76
  datachain/lib/image.py,sha256=butvUY_33PVEYPKX2nVCPeJjJVcBaptZwsE9REQsTS8,3247
77
77
  datachain/lib/listing.py,sha256=xrgsd1_YLLiA69LnwK56oZwe0RXTBCDicGzhavF_2AQ,6665
78
78
  datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
79
- datachain/lib/meta_formats.py,sha256=hDPfEkcmiLZOjhBBXuareMdnq65Wj8vZvxjmum6cROM,6377
79
+ datachain/lib/meta_formats.py,sha256=xEYlfN6XgiOgrqhY2kVlvK3xlo5nT3nuVrH111XMOrg,6385
80
80
  datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
81
81
  datachain/lib/pytorch.py,sha256=QxXBhrn2-D0RiFA2rdxZ7wKMxyuQ0WWHKfiFEWAA760,7710
82
82
  datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
83
- datachain/lib/signal_schema.py,sha256=WyVTXUsa4DVTIZRAX2-MdjOe4deat_Fufsd9n8ycrXQ,33629
83
+ datachain/lib/signal_schema.py,sha256=DRatqSG7OVtCUCWyZvMXe4m7r7XFO6NCfzsJRDErMtg,35185
84
84
  datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
85
85
  datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
86
- datachain/lib/udf.py,sha256=TlvikKTFvkIKaqqSkSriOyXhQ0rwRHV2ZRs1LHZOCmo,16107
87
- datachain/lib/udf_signature.py,sha256=GXw24A-Olna6DWCdgy2bC-gZh_gLGPQ-KvjuI6pUjC0,7281
86
+ datachain/lib/udf.py,sha256=4_mdcWNkyukbDjqBNszlIrZLQyl_dPVQxhKwb_iDtQQ,16192
87
+ datachain/lib/udf_signature.py,sha256=2EtsOPDNSPqcOlYwqbCdy6RF5MldI-7smii8aLy8p7Y,7543
88
88
  datachain/lib/utils.py,sha256=QrjVs_oLRXEotOPUYurBJypBFi_ReTJmxcnJeH4j2Uk,1596
89
89
  datachain/lib/video.py,sha256=suH_8Mi8VYk4-IVb1vjSduF_njs64ji1WGKHxDLnGYw,6629
90
90
  datachain/lib/webdataset.py,sha256=o7SHk5HOUWsZ5Ln04xOM04eQqiBHiJNO7xLgyVBrwo8,6924
@@ -106,7 +106,7 @@ datachain/model/ultralytics/pose.py,sha256=gXAWfAk4OWZl93hKcQPKZvqJa3nIrECB4RM8K
106
106
  datachain/model/ultralytics/segment.py,sha256=koq1HASo29isf0in6oSlzmU4IzsmOXe87F1ajQQVfh4,2911
107
107
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
108
108
  datachain/query/batch.py,sha256=6w8gzLTmLeylststu-gT5jIqEfi4-djS7_yTYyeo-fw,4190
109
- datachain/query/dataset.py,sha256=jRMclCOKUblMb-OGGUHq59Zk0d3M2eHkqIh14F7jyY4,57097
109
+ datachain/query/dataset.py,sha256=J3NgcrzSP2dFg8JVqDodyBh1QEia_B-alcyfI3xKlZE,57256
110
110
  datachain/query/dispatch.py,sha256=_1vjeQ1wjUoxlik55k0JkWqQCUfMjgVWmEOyWRkx0dU,12437
111
111
  datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
112
112
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
@@ -132,15 +132,15 @@ datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0
132
132
  datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
133
133
  datachain/sql/functions/string.py,sha256=E-T9OIzUR-GKaLgjZsEtg5CJrY_sLf1lt1awTvY7w2w,1426
134
134
  datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
135
- datachain/sql/sqlite/base.py,sha256=Rfemu8pj7V0aWhWwryDghhnbiMFfQS5X9FCihGuplb8,19593
135
+ datachain/sql/sqlite/base.py,sha256=N-cQT0Hpu9ROWe4OiKlkkn_YP1NKCRZZ3xSfTzpyaDA,19651
136
136
  datachain/sql/sqlite/types.py,sha256=cH6oge2E_YWFy22wY-txPJH8gxoQFSpCthtZR8PZjpo,1849
137
137
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
138
138
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
139
139
  datachain/toolkit/split.py,sha256=z3zRJNzjWrpPuRw-zgFbCOBKInyYxJew8ygrYQRQLNc,2930
140
140
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
141
- datachain-0.12.0.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
142
- datachain-0.12.0.dist-info/METADATA,sha256=yHNtv5QzGI6O2TJcGTgFszXBANJHjQibgj_sq_00vy0,11351
143
- datachain-0.12.0.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
144
- datachain-0.12.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
145
- datachain-0.12.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
146
- datachain-0.12.0.dist-info/RECORD,,
141
+ datachain-0.13.1.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
142
+ datachain-0.13.1.dist-info/METADATA,sha256=FUW59zu58aHCBMYTtC_K6DkXYQZXaRQRc1L4wJFcGtc,11373
143
+ datachain-0.13.1.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
144
+ datachain-0.13.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
145
+ datachain-0.13.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
146
+ datachain-0.13.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: setuptools (77.0.3)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5