datachain 0.3.11__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -12,7 +12,6 @@ import sys
12
12
  import time
13
13
  import traceback
14
14
  from collections.abc import Iterable, Iterator, Mapping, Sequence
15
- from contextlib import contextmanager, nullcontext
16
15
  from copy import copy
17
16
  from dataclasses import dataclass
18
17
  from functools import cached_property, reduce
@@ -23,7 +22,6 @@ from typing import (
23
22
  TYPE_CHECKING,
24
23
  Any,
25
24
  Callable,
26
- NamedTuple,
27
25
  NoReturn,
28
26
  Optional,
29
27
  Union,
@@ -58,7 +56,6 @@ from datachain.error import (
58
56
  PendingIndexingError,
59
57
  QueryScriptCancelError,
60
58
  QueryScriptCompileError,
61
- QueryScriptDatasetNotFound,
62
59
  QueryScriptRunError,
63
60
  )
64
61
  from datachain.listing import Listing
@@ -115,44 +112,19 @@ def noop(_: str):
115
112
  pass
116
113
 
117
114
 
118
- @contextmanager
119
- def print_and_capture(
120
- stream: "IO[bytes]|IO[str]", callback: Callable[[str], None] = noop
121
- ) -> "Iterator[list[str]]":
122
- lines: list[str] = []
123
- append = lines.append
115
+ def _process_stream(stream: "IO[bytes]", callback: Callable[[str], None]) -> None:
116
+ buffer = b""
117
+ while byt := stream.read(1): # Read one byte at a time
118
+ buffer += byt
124
119
 
125
- def loop() -> None:
126
- buffer = b""
127
- while byt := stream.read(1): # Read one byte at a time
128
- buffer += byt.encode("utf-8") if isinstance(byt, str) else byt
129
-
130
- if byt in (b"\n", b"\r"): # Check for newline or carriage return
131
- line = buffer.decode("utf-8")
132
- print(line, end="")
133
- callback(line)
134
- append(line)
135
- buffer = b"" # Clear buffer for next line
136
-
137
- if buffer: # Handle any remaining data in the buffer
120
+ if byt in (b"\n", b"\r"): # Check for newline or carriage return
138
121
  line = buffer.decode("utf-8")
139
- print(line, end="")
140
122
  callback(line)
141
- append(line)
142
-
143
- thread = Thread(target=loop, daemon=True)
144
- thread.start()
145
-
146
- try:
147
- yield lines
148
- finally:
149
- thread.join()
150
-
123
+ buffer = b"" # Clear buffer for next line
151
124
 
152
- class QueryResult(NamedTuple):
153
- dataset: Optional[DatasetRecord]
154
- version: Optional[int]
155
- output: str
125
+ if buffer: # Handle any remaining data in the buffer
126
+ line = buffer.decode("utf-8")
127
+ callback(line)
156
128
 
157
129
 
158
130
  class DatasetRowsFetcher(NodesThreadPool):
@@ -651,11 +623,6 @@ class Catalog:
651
623
  code_ast.body[-1:] = new_expressions
652
624
  return code_ast
653
625
 
654
- def compile_query_script(self, script: str) -> str:
655
- code_ast = ast.parse(script)
656
- code_ast = self.attach_query_wrapper(code_ast)
657
- return ast.unparse(code_ast)
658
-
659
626
  def parse_url(self, uri: str, **config: Any) -> tuple[Client, str]:
660
627
  config = config or self.client_config
661
628
  return Client.parse_url(uri, self.cache, **config)
@@ -1805,14 +1772,15 @@ class Catalog:
1805
1772
  def query(
1806
1773
  self,
1807
1774
  query_script: str,
1808
- envs: Optional[Mapping[str, str]] = None,
1809
- python_executable: Optional[str] = None,
1775
+ env: Optional[Mapping[str, str]] = None,
1776
+ python_executable: str = sys.executable,
1810
1777
  save: bool = False,
1811
1778
  capture_output: bool = True,
1812
1779
  output_hook: Callable[[str], None] = noop,
1813
1780
  params: Optional[dict[str, str]] = None,
1814
1781
  job_id: Optional[str] = None,
1815
- ) -> QueryResult:
1782
+ _execute_last_expression: bool = False,
1783
+ ) -> None:
1816
1784
  """
1817
1785
  Method to run custom user Python script to run a query and, as result,
1818
1786
  creates new dataset from the results of a query.
@@ -1835,92 +1803,21 @@ class Catalog:
1835
1803
  C.size > 1000
1836
1804
  )
1837
1805
  """
1838
- if not job_id:
1839
- python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
1840
- job_id = self.metastore.create_job(
1841
- name="",
1842
- query=query_script,
1843
- params=params,
1844
- python_version=python_version,
1845
- )
1846
-
1847
- lines, proc = self.run_query(
1848
- python_executable or sys.executable,
1849
- query_script,
1850
- envs,
1851
- capture_output,
1852
- output_hook,
1853
- params,
1854
- save,
1855
- job_id,
1856
- )
1857
- output = "".join(lines)
1858
-
1859
- if proc.returncode:
1860
- if proc.returncode == QUERY_SCRIPT_CANCELED_EXIT_CODE:
1861
- raise QueryScriptCancelError(
1862
- "Query script was canceled by user",
1863
- return_code=proc.returncode,
1864
- output=output,
1865
- )
1866
- raise QueryScriptRunError(
1867
- f"Query script exited with error code {proc.returncode}",
1868
- return_code=proc.returncode,
1869
- output=output,
1870
- )
1871
-
1872
- def _get_dataset_versions_by_job_id():
1873
- for dr, dv, job in self.list_datasets_versions():
1874
- if job and str(job.id) == job_id:
1875
- yield dr, dv
1876
-
1877
- try:
1878
- dr, dv = max(
1879
- _get_dataset_versions_by_job_id(), key=lambda x: x[1].created_at
1880
- )
1881
- except ValueError as e:
1882
- if not save:
1883
- return QueryResult(dataset=None, version=None, output=output)
1884
-
1885
- raise QueryScriptDatasetNotFound(
1886
- "No dataset found after running Query script",
1887
- output=output,
1888
- ) from e
1889
-
1890
- dr = self.update_dataset(
1891
- dr,
1892
- script_output=output,
1893
- query_script=query_script,
1894
- )
1895
- self.update_dataset_version_with_warehouse_info(
1896
- dr,
1897
- dv.version,
1898
- script_output=output,
1899
- query_script=query_script,
1900
- job_id=job_id,
1901
- is_job_result=True,
1902
- )
1903
- return QueryResult(dataset=dr, version=dv.version, output=output)
1806
+ if _execute_last_expression:
1807
+ try:
1808
+ code_ast = ast.parse(query_script)
1809
+ code_ast = self.attach_query_wrapper(code_ast)
1810
+ query_script_compiled = ast.unparse(code_ast)
1811
+ except Exception as exc:
1812
+ raise QueryScriptCompileError(
1813
+ f"Query script failed to compile, reason: {exc}"
1814
+ ) from exc
1815
+ else:
1816
+ query_script_compiled = query_script
1817
+ assert not save
1904
1818
 
1905
- def run_query(
1906
- self,
1907
- python_executable: str,
1908
- query_script: str,
1909
- envs: Optional[Mapping[str, str]],
1910
- capture_output: bool,
1911
- output_hook: Callable[[str], None],
1912
- params: Optional[dict[str, str]],
1913
- save: bool,
1914
- job_id: Optional[str],
1915
- ) -> tuple[list[str], subprocess.Popen]:
1916
- try:
1917
- query_script_compiled = self.compile_query_script(query_script)
1918
- except Exception as exc:
1919
- raise QueryScriptCompileError(
1920
- f"Query script failed to compile, reason: {exc}"
1921
- ) from exc
1922
- envs = dict(envs or os.environ)
1923
- envs.update(
1819
+ env = dict(env or os.environ)
1820
+ env.update(
1924
1821
  {
1925
1822
  "DATACHAIN_QUERY_PARAMS": json.dumps(params or {}),
1926
1823
  "PYTHONPATH": os.getcwd(), # For local imports
@@ -1929,19 +1826,28 @@ class Catalog:
1929
1826
  "DATACHAIN_JOB_ID": job_id or "",
1930
1827
  },
1931
1828
  )
1932
- with subprocess.Popen( # noqa: S603
1933
- [python_executable, "-c", query_script_compiled],
1934
- env=envs,
1935
- stdout=subprocess.PIPE if capture_output else None,
1936
- stderr=subprocess.STDOUT if capture_output else None,
1937
- bufsize=1,
1938
- text=False,
1939
- ) as proc:
1940
- out = proc.stdout
1941
- _lines: list[str] = []
1942
- ctx = print_and_capture(out, output_hook) if out else nullcontext(_lines)
1943
- with ctx as lines:
1944
- return lines, proc
1829
+ popen_kwargs = {}
1830
+ if capture_output:
1831
+ popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
1832
+
1833
+ cmd = [python_executable, "-c", query_script_compiled]
1834
+ with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # type: ignore[call-overload] # noqa: S603
1835
+ if capture_output:
1836
+ args = (proc.stdout, output_hook)
1837
+ thread = Thread(target=_process_stream, args=args, daemon=True)
1838
+ thread.start()
1839
+ thread.join() # wait for the reader thread
1840
+
1841
+ if proc.returncode == QUERY_SCRIPT_CANCELED_EXIT_CODE:
1842
+ raise QueryScriptCancelError(
1843
+ "Query script was canceled by user",
1844
+ return_code=proc.returncode,
1845
+ )
1846
+ if proc.returncode:
1847
+ raise QueryScriptRunError(
1848
+ f"Query script exited with error code {proc.returncode}",
1849
+ return_code=proc.returncode,
1850
+ )
1945
1851
 
1946
1852
  def cp(
1947
1853
  self,
datachain/error.py CHANGED
@@ -42,10 +42,6 @@ class QueryScriptRunError(Exception):
42
42
  super().__init__(self.message)
43
43
 
44
44
 
45
- class QueryScriptDatasetNotFound(QueryScriptRunError): # noqa: N818
46
- pass
47
-
48
-
49
45
  class QueryScriptCancelError(QueryScriptRunError):
50
46
  pass
51
47
 
datachain/lib/clip.py CHANGED
@@ -18,7 +18,7 @@ def _get_encoder(model: Any, type: Literal["image", "text"]) -> Callable:
18
18
  hasattr(model, method_name) and inspect.ismethod(getattr(model, method_name))
19
19
  ):
20
20
  method = getattr(model, method_name)
21
- return lambda x: method(torch.tensor(x))
21
+ return lambda x: method(torch.as_tensor(x).clone().detach())
22
22
 
23
23
  # Check for model from clip or open_clip library
24
24
  method_name = f"encode_{type}"
datachain/lib/dc.py CHANGED
@@ -415,7 +415,7 @@ class DataChain(DatasetQuery):
415
415
  .save(list_dataset_name, listing=True)
416
416
  )
417
417
 
418
- dc = cls.from_dataset(list_dataset_name, session=session)
418
+ dc = cls.from_dataset(list_dataset_name, session=session, settings=settings)
419
419
  dc.signals_schema = dc.signals_schema.mutate({f"{object_name}": file_type})
420
420
 
421
421
  return ls(dc, list_path, recursive=recursive, object_name=object_name)
@@ -426,6 +426,7 @@ class DataChain(DatasetQuery):
426
426
  name: str,
427
427
  version: Optional[int] = None,
428
428
  session: Optional[Session] = None,
429
+ settings: Optional[dict] = None,
429
430
  ) -> "DataChain":
430
431
  """Get data from a saved Dataset. It returns the chain itself.
431
432
 
@@ -438,7 +439,7 @@ class DataChain(DatasetQuery):
438
439
  chain = DataChain.from_dataset("my_cats")
439
440
  ```
440
441
  """
441
- return DataChain(name=name, version=version, session=session)
442
+ return DataChain(name=name, version=version, session=session, settings=settings)
442
443
 
443
444
  @classmethod
444
445
  def from_json(
@@ -1622,6 +1623,8 @@ class DataChain(DatasetQuery):
1622
1623
  model_name: str = "",
1623
1624
  source: bool = True,
1624
1625
  nrows=None,
1626
+ session: Optional[Session] = None,
1627
+ settings: Optional[dict] = None,
1625
1628
  **kwargs,
1626
1629
  ) -> "DataChain":
1627
1630
  """Generate chain from csv files.
@@ -1638,6 +1641,8 @@ class DataChain(DatasetQuery):
1638
1641
  model_name : Generated model name.
1639
1642
  source : Whether to include info about the source file.
1640
1643
  nrows : Optional row limit.
1644
+ session : Session to use for the chain.
1645
+ settings : Settings to use for the chain.
1641
1646
 
1642
1647
  Example:
1643
1648
  Reading a csv file:
@@ -1654,7 +1659,9 @@ class DataChain(DatasetQuery):
1654
1659
  from pyarrow.csv import ConvertOptions, ParseOptions, ReadOptions
1655
1660
  from pyarrow.dataset import CsvFileFormat
1656
1661
 
1657
- chain = DataChain.from_storage(path, **kwargs)
1662
+ chain = DataChain.from_storage(
1663
+ path, session=session, settings=settings, **kwargs
1664
+ )
1658
1665
 
1659
1666
  column_names = None
1660
1667
  if not header:
@@ -1701,6 +1708,8 @@ class DataChain(DatasetQuery):
1701
1708
  object_name: str = "",
1702
1709
  model_name: str = "",
1703
1710
  source: bool = True,
1711
+ session: Optional[Session] = None,
1712
+ settings: Optional[dict] = None,
1704
1713
  **kwargs,
1705
1714
  ) -> "DataChain":
1706
1715
  """Generate chain from parquet files.
@@ -1713,6 +1722,8 @@ class DataChain(DatasetQuery):
1713
1722
  object_name : Created object column name.
1714
1723
  model_name : Generated model name.
1715
1724
  source : Whether to include info about the source file.
1725
+ session : Session to use for the chain.
1726
+ settings : Settings to use for the chain.
1716
1727
 
1717
1728
  Example:
1718
1729
  Reading a single file:
@@ -1725,7 +1736,9 @@ class DataChain(DatasetQuery):
1725
1736
  dc = DataChain.from_parquet("s3://mybucket/dir")
1726
1737
  ```
1727
1738
  """
1728
- chain = DataChain.from_storage(path, **kwargs)
1739
+ chain = DataChain.from_storage(
1740
+ path, session=session, settings=settings, **kwargs
1741
+ )
1729
1742
  return chain.parse_tabular(
1730
1743
  output=output,
1731
1744
  object_name=object_name,
datachain/lib/file.py CHANGED
@@ -195,14 +195,15 @@ class File(DataModel):
195
195
  with VFileRegistry.resolve(self, self.location) as f: # type: ignore[arg-type]
196
196
  yield f
197
197
 
198
- uid = self.get_uid()
199
- client = self._catalog.get_client(self.source)
200
- if self._caching_enabled:
201
- client.download(uid, callback=self._download_cb)
202
- with client.open_object(
203
- uid, use_cache=self._caching_enabled, cb=self._download_cb
204
- ) as f:
205
- yield io.TextIOWrapper(f) if mode == "r" else f
198
+ else:
199
+ uid = self.get_uid()
200
+ client = self._catalog.get_client(self.source)
201
+ if self._caching_enabled:
202
+ client.download(uid, callback=self._download_cb)
203
+ with client.open_object(
204
+ uid, use_cache=self._caching_enabled, cb=self._download_cb
205
+ ) as f:
206
+ yield io.TextIOWrapper(f) if mode == "r" else f
206
207
 
207
208
  def read(self, length: int = -1):
208
209
  """Returns file contents."""
datachain/lib/image.py CHANGED
@@ -34,7 +34,7 @@ def convert_image(
34
34
  from transformers.image_processing_utils import BaseImageProcessor
35
35
 
36
36
  if isinstance(transform, BaseImageProcessor):
37
- img = torch.tensor(img.pixel_values[0]) # type: ignore[assignment,attr-defined]
37
+ img = torch.as_tensor(img.pixel_values[0]).clone().detach() # type: ignore[assignment,attr-defined]
38
38
  except ImportError:
39
39
  pass
40
40
  if device:
@@ -1,13 +1,10 @@
1
- # pip install datamodel-code-generator
2
- # pip install jmespath
3
- #
4
1
  import csv
5
2
  import json
6
3
  import tempfile
7
4
  import uuid
8
5
  from collections.abc import Iterator
9
6
  from pathlib import Path
10
- from typing import Any, Callable
7
+ from typing import Callable
11
8
 
12
9
  import datamodel_code_generator
13
10
  import jmespath as jsp
@@ -85,7 +82,6 @@ def read_schema(source_file, data_type="csv", expr=None, model_name=None):
85
82
  use_standard_collections=True,
86
83
  )
87
84
  epilogue = f"""
88
- {model_name}.model_rebuild()
89
85
  DataModel.register({model_name})
90
86
  spec = {model_name}
91
87
  """
@@ -122,9 +118,9 @@ def read_meta( # noqa: C901
122
118
  print(f"{model_output}")
123
119
  # Below 'spec' should be a dynamically converted DataModel from Pydantic
124
120
  if not spec:
125
- local_vars: dict[str, Any] = {}
126
- exec(model_output, globals(), local_vars) # type: ignore[arg-type] # noqa: S102
127
- spec = local_vars["spec"]
121
+ gl = globals()
122
+ exec(model_output, gl) # type: ignore[arg-type] # noqa: S102
123
+ spec = gl["spec"]
128
124
 
129
125
  if not (spec) and not (schema_from):
130
126
  raise ValueError(
@@ -1,3 +1,4 @@
1
+ import inspect
1
2
  import logging
2
3
  from typing import ClassVar, Optional
3
4
 
@@ -69,7 +70,11 @@ class ModelStore:
69
70
 
70
71
  @staticmethod
71
72
  def is_pydantic(val):
72
- return not hasattr(val, "__origin__") and issubclass(val, BaseModel)
73
+ return (
74
+ not hasattr(val, "__origin__")
75
+ and inspect.isclass(val)
76
+ and issubclass(val, BaseModel)
77
+ )
73
78
 
74
79
  @staticmethod
75
80
  def to_pydantic(val) -> Optional[type[BaseModel]]:
datachain/lib/text.py CHANGED
@@ -33,7 +33,7 @@ def convert_text(
33
33
  res = tokenizer(text)
34
34
 
35
35
  tokens = res.input_ids if isinstance(tokenizer, PreTrainedTokenizerBase) else res
36
- tokens = torch.tensor(tokens)
36
+ tokens = torch.as_tensor(tokens).clone().detach()
37
37
  if device:
38
38
  tokens = tokens.to(device)
39
39
 
@@ -1,6 +1,7 @@
1
1
  import hashlib
2
2
  import json
3
3
  import tarfile
4
+ import warnings
4
5
  from collections.abc import Iterator, Sequence
5
6
  from pathlib import Path
6
7
  from typing import (
@@ -19,6 +20,18 @@ from datachain.lib.data_model import DataModel
19
20
  from datachain.lib.file import File, TarVFile
20
21
  from datachain.lib.utils import DataChainError
21
22
 
23
+ # The `json` method of the Pydantic `BaseModel` class has been deprecated
24
+ # and will be removed in Pydantic v3. For more details, see:
25
+ # https://github.com/pydantic/pydantic/issues/10033
26
+ # Until then, we can ignore the warning.
27
+ warnings.filterwarnings(
28
+ "ignore",
29
+ category=UserWarning,
30
+ message=(
31
+ 'Field name "json" in "WDSAllFile" shadows an attribute in parent "WDSBasic"'
32
+ ),
33
+ )
34
+
22
35
 
23
36
  class WDSError(DataChainError):
24
37
  def __init__(self, tar_stream, message: str):
@@ -1,3 +1,4 @@
1
+ import warnings
1
2
  from collections.abc import Iterator
2
3
  from typing import Optional
3
4
 
@@ -7,6 +8,18 @@ from pydantic import BaseModel, Field
7
8
  from datachain.lib.file import File
8
9
  from datachain.lib.webdataset import WDSBasic, WDSReadableSubclass
9
10
 
11
+ # The `json` method of the Pydantic `BaseModel` class has been deprecated
12
+ # and will be removed in Pydantic v3. For more details, see:
13
+ # https://github.com/pydantic/pydantic/issues/10033
14
+ # Until then, we can ignore the warning.
15
+ warnings.filterwarnings(
16
+ "ignore",
17
+ category=UserWarning,
18
+ message=(
19
+ 'Field name "json" in "WDSLaion" shadows an attribute in parent "WDSBasic"'
20
+ ),
21
+ )
22
+
10
23
 
11
24
  class Laion(WDSReadableSubclass):
12
25
  uid: str = Field(default="")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.11
3
+ Version: 0.3.12
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -80,7 +80,6 @@ Requires-Dist: pytest-cov >=4.1.0 ; extra == 'tests'
80
80
  Requires-Dist: pytest-mock >=3.12.0 ; extra == 'tests'
81
81
  Requires-Dist: pytest-servers[all] >=0.5.5 ; extra == 'tests'
82
82
  Requires-Dist: pytest-benchmark[histogram] ; extra == 'tests'
83
- Requires-Dist: pytest-asyncio >=0.23.2 ; extra == 'tests'
84
83
  Requires-Dist: pytest-xdist >=3.3.1 ; extra == 'tests'
85
84
  Requires-Dist: virtualenv ; extra == 'tests'
86
85
  Requires-Dist: dulwich ; extra == 'tests'
@@ -96,12 +95,14 @@ Requires-Dist: transformers >=4.36.0 ; extra == 'torch'
96
95
  Provides-Extra: vector
97
96
  Requires-Dist: usearch ; extra == 'vector'
98
97
 
99
- .. image:: docs/assets/datachain_logotype.svg
100
- :height: 48
101
- :alt: DataChain logo
98
+ ================
99
+ |logo| DataChain
100
+ ================
102
101
 
103
102
  |PyPI| |Python Version| |Codecov| |Tests|
104
103
 
104
+ .. |logo| image:: docs/assets/datachain.svg
105
+ :height: 24
105
106
  .. |PyPI| image:: https://img.shields.io/pypi/v/datachain.svg
106
107
  :target: https://pypi.org/project/datachain/
107
108
  :alt: PyPI
@@ -115,8 +116,6 @@ Requires-Dist: usearch ; extra == 'vector'
115
116
  :target: https://github.com/iterative/datachain/actions/workflows/tests.yml
116
117
  :alt: Tests
117
118
 
118
- ----------------
119
-
120
119
  DataChain is a modern Pythonic data-frame library designed for artificial intelligence.
121
120
  It is made to organize your unstructured data into datasets and wrangle it at scale on
122
121
  your local machine. Datachain does not abstract or hide the AI models and API calls, but helps to integrate them into the postmodern data stack.
@@ -6,7 +6,7 @@ datachain/cli.py,sha256=ECf_z5X8ILDJdUn2Cpb_z-ZjSRIzn7skiuMGfM-y9i0,30999
6
6
  datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
7
7
  datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
8
8
  datachain/dataset.py,sha256=EcYjhHg1dxxPbDwSuIxc-mDRDo3v_pYf79fMy4re1oA,14740
9
- datachain/error.py,sha256=GY9KYTmb7GHXn2gGHV9X-PBhgwLj3i7VpK7tGHtAoGM,1279
9
+ datachain/error.py,sha256=OnZ8OaBtDdTZPy8XQiy29SAjqdQArQeorYbP5ju7ldc,1199
10
10
  datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
11
11
  datachain/listing.py,sha256=keLkvPfumDA3gijeIiinH5yGWe71qCxgF5HqqP5AeH4,8299
12
12
  datachain/node.py,sha256=dcm_7dVurFHpI0EHV2K6SjpJyh-gN4PVWAB-20quk04,6382
@@ -17,7 +17,7 @@ datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
18
18
  datachain/utils.py,sha256=HKUdVqreBTzcCULAYRw1sC6z33OaomVD1WoMSoFcPHg,13148
19
19
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
20
- datachain/catalog/catalog.py,sha256=NgS7_SlmpJdUSp1v8KdCuLTjFklmYvT_jOLdzTyyK5I,72313
20
+ datachain/catalog/catalog.py,sha256=xVFNUZ339u2l58ZyPaiJ6GsRRpwqq0LYUbdOHC-Otog,69654
21
21
  datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
22
22
  datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
23
23
  datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
@@ -39,27 +39,27 @@ datachain/data_storage/sqlite.py,sha256=Z4B2KDL4C8Uio2aLMxaKv0t2MoOtCV3bSqWg4X9m
39
39
  datachain/data_storage/warehouse.py,sha256=f7ETnYIXx5KMcPfwg_4bh_00QJiMLIliwE_41vmRGUo,33037
40
40
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  datachain/lib/arrow.py,sha256=dV17oGiknqEW55ogGK_9T0ycNFwd2z-EFOW0AQiR6TU,5840
42
- datachain/lib/clip.py,sha256=33RL11OIqfbwyhvBgiMGM8rDAnZx1IRmxk9dY89ls3Q,6130
42
+ datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
43
43
  datachain/lib/data_model.py,sha256=gHIjlow84GMRDa78yLL1Ud-N18or21fnTyPEwsatpXY,2045
44
44
  datachain/lib/dataset_info.py,sha256=srPPhI2UHf6hFPBecyFEVw2SS5aPisIIMsvGgKqi7ss,2366
45
- datachain/lib/dc.py,sha256=s4E-bD6_T6JFJ7TEa5Y9RS705lIfcV9OUJwDD6RNCX0,68156
46
- datachain/lib/file.py,sha256=WOOYw3LcGROA6wshJ_aZkSgcTqfB4UxTbZDTx9KqAOg,11429
45
+ datachain/lib/dc.py,sha256=gYRkrriG5RJxgLpOUccDU8DFRSoeWZjgmJwHfUo_z7w,68731
46
+ datachain/lib/file.py,sha256=tNb3rJyRYGxpOc6XxcZjIQ9yVHKc7WLAOKoTYqp2TB0,11475
47
47
  datachain/lib/hf.py,sha256=ZiMvgy3DYiklGKZv-w7gevrHOgn3bGfpTlpDPOHCNqs,5336
48
- datachain/lib/image.py,sha256=WbcwSaFzuyqjg4x4hH5CUogeUQjkZFjQHqw_oDEV1nA,2655
48
+ datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
49
49
  datachain/lib/listing.py,sha256=S9Xn_Saxu4xk3K_01VexkfMZW0INQiATlidt2bzgWKY,3938
50
50
  datachain/lib/listing_info.py,sha256=sr5KzCXlCxlPuRmy_pVadD4miLpp5y0btvyaIPcluwI,996
51
- datachain/lib/meta_formats.py,sha256=67uF9trQ2II6xFvN0u6eo5NNRf5xvCkpMHj7ThiG41Y,6777
52
- datachain/lib/model_store.py,sha256=c4USXsBBjrGH8VOh4seIgOiav-qHOwdoixtxfLgU63c,2409
51
+ datachain/lib/meta_formats.py,sha256=3f-0vpMTesagS9iMd3y9-u9r-7g0eqYsxmK4fVfNWlw,6635
52
+ datachain/lib/model_store.py,sha256=xcrQ69-jcQs716U4UFOSoSKM7EvFIWqxlPhIcE4X7oI,2497
53
53
  datachain/lib/pytorch.py,sha256=vK3GbWCy7kunN7ubul6w1hrWmJLja56uTCiMG_7XVQA,5623
54
54
  datachain/lib/settings.py,sha256=39thOpYJw-zPirzeNO6pmRC2vPrQvt4eBsw1xLWDFsw,2344
55
55
  datachain/lib/signal_schema.py,sha256=hqQLwUmt3w8RLa96MtubK9N2CBXqqTPrUkSRXc0ktt4,20275
56
- datachain/lib/text.py,sha256=vqs1SQdsw1vCzfvOanIeT4xY2R2TmPonElBgYDVeZmY,1241
56
+ datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
57
57
  datachain/lib/udf.py,sha256=nG7DDuPgZ5ZuijwvDoCq-OZMxlDM8vFNzyxMmik0Y1c,11716
58
58
  datachain/lib/udf_signature.py,sha256=gMStcEeYJka5M6cg50Z9orC6y6HzCAJ3MkFqqn1fjZg,7137
59
59
  datachain/lib/utils.py,sha256=5-kJlAZE0D9nXXweAjo7-SP_AWGo28feaDByONYaooQ,463
60
60
  datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
- datachain/lib/webdataset.py,sha256=Q3UlCk66341sq-nvFbBCX4Cv3cYXBK9n12ejG4axPXE,8298
62
- datachain/lib/webdataset_laion.py,sha256=PQP6tQmUP7Xu9fPuAGK1JDBYA6T5UufYMUTGaxgspJA,2118
61
+ datachain/lib/webdataset.py,sha256=ZzGLtOUA-QjP4kttGgNqhrioDuDnomWFlsow4fLdezQ,8717
62
+ datachain/lib/webdataset_laion.py,sha256=aGMWeFmeYNK75ewO9JTA11iB1i3QtTzUfenQA5jajfo,2535
63
63
  datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
64
  datachain/lib/convert/flatten.py,sha256=Uebc5CeqCsacp-nr6IG9i6OGuUavXqdqnoGctZBk3RQ,1384
65
65
  datachain/lib/convert/python_to_sql.py,sha256=40SAOdoOgikZRhn8iomCPDRoxC3RFxjJLivEAA9MHDU,2880
@@ -96,9 +96,9 @@ datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,
96
96
  datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
97
97
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
98
98
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
99
- datachain-0.3.11.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
100
- datachain-0.3.11.dist-info/METADATA,sha256=iSdfjWpVT1Iqzlg82eN5QzJ-icaYxkG7TUKEpEOi5sk,17124
101
- datachain-0.3.11.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
102
- datachain-0.3.11.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
103
- datachain-0.3.11.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
104
- datachain-0.3.11.dist-info/RECORD,,
99
+ datachain-0.3.12.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
100
+ datachain-0.3.12.dist-info/METADATA,sha256=I_Yz0lbiCk4KWv026U7zpDGrU72G575Hd_OnE_seb1k,17073
101
+ datachain-0.3.12.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
102
+ datachain-0.3.12.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
103
+ datachain-0.3.12.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
104
+ datachain-0.3.12.dist-info/RECORD,,