datachain 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -0,0 +1,32 @@
1
+ from datetime import datetime, timedelta, timezone
2
+ from typing import Optional
3
+
4
+ from datachain.client import Client
5
+ from datachain.lib.dataset_info import DatasetInfo
6
+ from datachain.lib.listing import LISTING_PREFIX, LISTING_TTL
7
+
8
+
9
+ class ListingInfo(DatasetInfo):
10
+ @property
11
+ def uri(self) -> str:
12
+ return self.name.removeprefix(LISTING_PREFIX)
13
+
14
+ @property
15
+ def storage_uri(self) -> str:
16
+ client, _ = Client.parse_url(self.uri, None) # type: ignore[arg-type]
17
+ return client.uri
18
+
19
+ @property
20
+ def expires(self) -> Optional[datetime]:
21
+ if not self.finished_at:
22
+ return None
23
+ return self.finished_at + timedelta(seconds=LISTING_TTL)
24
+
25
+ @property
26
+ def is_expired(self) -> bool:
27
+ return datetime.now(timezone.utc) > self.expires if self.expires else False
28
+
29
+ @property
30
+ def last_inserted_at(self):
31
+ # TODO we need to add updated_at to dataset version or explicit last_inserted_at
32
+ raise NotImplementedError
@@ -54,10 +54,10 @@ def read_schema(source_file, data_type="csv", expr=None, model_name=None):
54
54
  try:
55
55
  with source_file.open() as fd: # CSV can be larger than memory
56
56
  if data_type == "csv":
57
- data_string += fd.readline().decode("utf-8", "ignore").replace("\r", "")
58
- data_string += fd.readline().decode("utf-8", "ignore").replace("\r", "")
57
+ data_string += fd.readline().replace("\r", "")
58
+ data_string += fd.readline().replace("\r", "")
59
59
  elif data_type == "jsonl":
60
- data_string = fd.readline().decode("utf-8", "ignore").replace("\r", "")
60
+ data_string = fd.readline().replace("\r", "")
61
61
  else:
62
62
  data_string = fd.read() # other meta must fit into RAM
63
63
  except OSError as e:
@@ -120,7 +120,7 @@ def read_meta( # noqa: C901
120
120
  sys.stdout = captured_output
121
121
  try:
122
122
  chain = (
123
- DataChain.from_storage(schema_from)
123
+ DataChain.from_storage(schema_from, type="text")
124
124
  .limit(1)
125
125
  .map( # dummy column created (#1615)
126
126
  meta_schema=lambda file: read_schema(
@@ -16,7 +16,6 @@ from typing import (
16
16
  get_origin,
17
17
  )
18
18
 
19
- import sqlalchemy as sa
20
19
  from pydantic import BaseModel, create_model
21
20
  from typing_extensions import Literal as LiteralEx
22
21
 
@@ -341,7 +340,7 @@ class SignalSchema:
341
340
  signals = [
342
341
  DEFAULT_DELIMITER.join(path)
343
342
  if not as_columns
344
- else sa.Column(DEFAULT_DELIMITER.join(path), python_to_sql(_type))
343
+ else Column(DEFAULT_DELIMITER.join(path), python_to_sql(_type))
345
344
  for path, _type, has_subtree, _ in self.get_flat_tree()
346
345
  if not has_subtree
347
346
  ]
@@ -415,6 +414,10 @@ class SignalSchema:
415
414
  # renaming existing signal
416
415
  del new_values[value.name]
417
416
  new_values[name] = self.values[value.name]
417
+ elif name in self.values:
418
+ # changing the type of existing signal, e.g File -> ImageFile
419
+ del new_values[name]
420
+ new_values[name] = args_map[name]
418
421
  else:
419
422
  # adding new signal
420
423
  new_values.update(sql_to_python({name: value}))
@@ -222,7 +222,7 @@ class TarStream(File):
222
222
  self._tar = None
223
223
 
224
224
  def open(self):
225
- self._tar = tarfile.open(fileobj=super().open())
225
+ self._tar = tarfile.open(fileobj=super().open()) # noqa: SIM115
226
226
  return self
227
227
 
228
228
  def getmembers(self) -> list[tarfile.TarInfo]:
datachain/node.py CHANGED
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any, Optional
4
4
  import attrs
5
5
 
6
6
  from datachain.cache import UniqueId
7
+ from datachain.lib.file import File
7
8
  from datachain.storage import StorageURI
8
9
  from datachain.utils import TIME_ZERO, time_to_str
9
10
 
@@ -189,6 +190,18 @@ class Entry:
189
190
  return ""
190
191
  return split[0]
191
192
 
193
+ def to_file(self, source: str) -> File:
194
+ return File(
195
+ source=source,
196
+ path=self.path,
197
+ size=self.size,
198
+ version=self.version,
199
+ etag=self.etag,
200
+ is_latest=self.is_latest,
201
+ last_modified=self.last_modified,
202
+ location=self.location,
203
+ )
204
+
192
205
 
193
206
  def get_path(parent: str, name: str):
194
207
  return f"{parent}/{name}" if parent else name
@@ -1,5 +1,4 @@
1
1
  import contextlib
2
- import datetime
3
2
  import inspect
4
3
  import json
5
4
  import logging
@@ -60,7 +59,6 @@ from datachain.utils import (
60
59
  get_datachain_executable,
61
60
  )
62
61
 
63
- from .metrics import metrics
64
62
  from .schema import C, UDFParamSpec, normalize_param
65
63
  from .session import Session
66
64
  from .udf import UDFBase, UDFClassWrapper, UDFFactory, UDFType
@@ -219,7 +217,7 @@ class IndexingStep(StartingStep):
219
217
  recursive=self.recursive,
220
218
  )
221
219
 
222
- storage = self.catalog.get_storage(uri)
220
+ storage = self.catalog.metastore.get_storage(uri)
223
221
 
224
222
  return step_result(q, dataset_rows.c, dependencies=[storage.uri])
225
223
 
@@ -296,15 +294,23 @@ class DatasetDiffOperation(Step):
296
294
 
297
295
  @frozen
298
296
  class Subtract(DatasetDiffOperation):
299
- on: Sequence[str]
297
+ on: Sequence[tuple[str, str]]
300
298
 
301
299
  def query(self, source_query: Select, target_query: Select) -> sa.Selectable:
302
300
  sq = source_query.alias("source_query")
303
301
  tq = target_query.alias("target_query")
304
302
  where_clause = sa.and_(
305
- getattr(sq.c, col_name).is_not_distinct_from(getattr(tq.c, col_name))
306
- for col_name in self.on
307
- ) # type: ignore[arg-type]
303
+ *[
304
+ getattr(
305
+ sq.c, col_name[0] if isinstance(col_name, tuple) else col_name
306
+ ).is_not_distinct_from(
307
+ getattr(
308
+ tq.c, col_name[1] if isinstance(col_name, tuple) else col_name
309
+ )
310
+ )
311
+ for col_name in self.on
312
+ ]
313
+ )
308
314
  return sq.select().except_(sq.select().where(where_clause))
309
315
 
310
316
 
@@ -1571,10 +1577,10 @@ class DatasetQuery:
1571
1577
 
1572
1578
  @detach
1573
1579
  def subtract(self, dq: "DatasetQuery") -> "Self":
1574
- return self._subtract(dq, on=["source", "path"])
1580
+ return self._subtract(dq, on=[("source", "source"), ("path", "path")])
1575
1581
 
1576
1582
  @detach
1577
- def _subtract(self, dq: "DatasetQuery", on: Sequence[str]) -> "Self":
1583
+ def _subtract(self, dq: "DatasetQuery", on: Sequence[tuple[str, str]]) -> "Self":
1578
1584
  query = self.clone()
1579
1585
  query.steps.append(Subtract(dq, self.catalog, on=on))
1580
1586
  return query
@@ -1626,7 +1632,7 @@ class DatasetQuery:
1626
1632
  )
1627
1633
  else:
1628
1634
  # storage dependency - its name is a valid StorageURI
1629
- storage = self.catalog.get_storage(dependency)
1635
+ storage = self.catalog.metastore.get_storage(dependency)
1630
1636
  self.catalog.metastore.add_storage_dependency(
1631
1637
  StorageURI(dataset.name),
1632
1638
  version,
@@ -1717,54 +1723,6 @@ def _get_output_fd_for_write() -> Union[str, int]:
1717
1723
  return msvcrt.open_osfhandle(int(handle), os.O_WRONLY) # type: ignore[attr-defined]
1718
1724
 
1719
1725
 
1720
- @attrs.define
1721
- class ExecutionResult:
1722
- preview: list[dict] = attrs.field(factory=list)
1723
- dataset: Optional[tuple[str, int]] = None
1724
- metrics: dict[str, Any] = attrs.field(factory=dict)
1725
-
1726
-
1727
- def _send_result(dataset_query: DatasetQuery) -> None:
1728
- class JSONSerialize(json.JSONEncoder):
1729
- def default(self, obj):
1730
- if isinstance(obj, (datetime.datetime, datetime.date)):
1731
- return obj.isoformat()
1732
- if isinstance(obj, bytes):
1733
- return list(obj[:1024])
1734
- return super().default(obj)
1735
-
1736
- try:
1737
- preview_args: dict[str, Any] = json.loads(
1738
- os.getenv("DATACHAIN_QUERY_PREVIEW_ARGS", "")
1739
- )
1740
- except ValueError:
1741
- preview_args = {}
1742
-
1743
- columns = preview_args.get("columns") or []
1744
-
1745
- if type(dataset_query) is DatasetQuery:
1746
- preview_query = dataset_query.select(*columns)
1747
- else:
1748
- preview_query = dataset_query.select(*columns, _sys=False)
1749
-
1750
- preview_query = preview_query.limit(preview_args.get("limit", 10)).offset(
1751
- preview_args.get("offset", 0)
1752
- )
1753
-
1754
- dataset: Optional[tuple[str, int]] = None
1755
- if dataset_query.attached:
1756
- assert dataset_query.name, "Dataset name should be provided"
1757
- assert dataset_query.version, "Dataset version should be provided"
1758
- dataset = dataset_query.name, dataset_query.version
1759
-
1760
- preview = preview_query.to_db_records()
1761
- result = ExecutionResult(preview, dataset, metrics)
1762
- data = attrs.asdict(result)
1763
-
1764
- with open(_get_output_fd_for_write(), mode="w") as f:
1765
- json.dump(data, f, cls=JSONSerialize)
1766
-
1767
-
1768
1726
  def query_wrapper(dataset_query: DatasetQuery) -> DatasetQuery:
1769
1727
  """
1770
1728
  Wrapper function that wraps the last statement of user query script.
@@ -1776,41 +1734,21 @@ def query_wrapper(dataset_query: DatasetQuery) -> DatasetQuery:
1776
1734
 
1777
1735
  catalog = dataset_query.catalog
1778
1736
  save = bool(os.getenv("DATACHAIN_QUERY_SAVE"))
1779
- save_as = os.getenv("DATACHAIN_QUERY_SAVE_AS")
1780
1737
 
1781
1738
  is_session_temp_dataset = dataset_query.name and dataset_query.name.startswith(
1782
1739
  dataset_query.session.get_temp_prefix()
1783
1740
  )
1784
1741
 
1785
- if save_as:
1786
- if dataset_query.attached:
1787
- dataset_name = dataset_query.name
1788
- version = dataset_query.version
1789
- assert dataset_name, "Dataset name should be provided in attached mode"
1790
- assert version, "Dataset version should be provided in attached mode"
1791
-
1792
- dataset = catalog.get_dataset(dataset_name)
1793
-
1794
- try:
1795
- target_dataset = catalog.get_dataset(save_as)
1796
- except DatasetNotFoundError:
1797
- target_dataset = None
1798
-
1799
- if target_dataset:
1800
- dataset = catalog.register_dataset(dataset, version, target_dataset)
1801
- else:
1802
- dataset = catalog.register_new_dataset(dataset, version, save_as)
1803
-
1804
- dataset_query = DatasetQuery(
1805
- name=dataset.name,
1806
- version=dataset.latest_version,
1807
- catalog=catalog,
1808
- )
1809
- else:
1810
- dataset_query = dataset_query.save(save_as)
1811
- elif save and (is_session_temp_dataset or not dataset_query.attached):
1742
+ if save and (is_session_temp_dataset or not dataset_query.attached):
1812
1743
  name = catalog.generate_query_dataset_name()
1813
1744
  dataset_query = dataset_query.save(name)
1814
1745
 
1815
- _send_result(dataset_query)
1746
+ dataset: Optional[tuple[str, int]] = None
1747
+ if dataset_query.attached:
1748
+ assert dataset_query.name, "Dataset name should be provided"
1749
+ assert dataset_query.version, "Dataset version should be provided"
1750
+ dataset = dataset_query.name, dataset_query.version
1751
+
1752
+ with open(_get_output_fd_for_write(), mode="w") as f:
1753
+ json.dump(dataset, f)
1816
1754
  return dataset_query
@@ -1,3 +1,4 @@
1
+ import os
1
2
  from typing import Optional, Union
2
3
 
3
4
  metrics: dict[str, Union[str, int, float, bool, None]] = {}
@@ -13,6 +14,13 @@ def set(key: str, value: Union[str, int, float, bool, None]) -> None: # noqa: P
13
14
  raise TypeError("Value must be a string, int, float or bool")
14
15
  metrics[key] = value
15
16
 
17
+ if job_id := os.getenv("DATACHAIN_JOB_ID"):
18
+ from datachain.data_storage.job import JobStatus
19
+ from datachain.query.session import Session
20
+
21
+ metastore = Session.get().catalog.metastore
22
+ metastore.set_job_status(job_id, JobStatus.RUNNING, metrics=metrics)
23
+
16
24
 
17
25
  def get(key: str) -> Optional[Union[str, int, float, bool]]:
18
26
  """Get a metric value."""
datachain/utils.py CHANGED
@@ -448,3 +448,8 @@ def get_datachain_executable() -> list[str]:
448
448
  if datachain_exec_path := os.getenv("DATACHAIN_EXEC_PATH"):
449
449
  return [datachain_exec_path]
450
450
  return [sys.executable, "-m", "datachain"]
451
+
452
+
453
+ def uses_glob(path: str) -> bool:
454
+ """Checks if some URI path has glob syntax in it"""
455
+ return glob.has_magic(os.path.basename(os.path.normpath(path)))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.8
3
+ Version: 0.3.10
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -42,6 +42,7 @@ Requires-Dist: datamodel-code-generator >=0.25
42
42
  Requires-Dist: Pillow <11,>=10.0.0
43
43
  Requires-Dist: msgpack <2,>=1.0.4
44
44
  Requires-Dist: psutil
45
+ Requires-Dist: huggingface-hub
45
46
  Requires-Dist: numpy <2,>=1 ; sys_platform == "win32"
46
47
  Provides-Extra: dev
47
48
  Requires-Dist: datachain[docs,tests] ; extra == 'dev'
@@ -67,7 +68,7 @@ Requires-Dist: pdfplumber ==0.11.4 ; extra == 'examples'
67
68
  Requires-Dist: huggingface-hub[hf_transfer] ; extra == 'examples'
68
69
  Provides-Extra: hf
69
70
  Requires-Dist: numba >=0.60.0 ; extra == 'hf'
70
- Requires-Dist: datasets[audio,vision] ; extra == 'hf'
71
+ Requires-Dist: datasets[audio,vision] >=2.21.0 ; extra == 'hf'
71
72
  Provides-Extra: remote
72
73
  Requires-Dist: lz4 ; extra == 'remote'
73
74
  Requires-Dist: requests >=2.22.0 ; extra == 'remote'
@@ -115,31 +116,30 @@ AI 🔗 DataChain
115
116
 
116
117
  DataChain is a modern Pythonic data-frame library designed for artificial intelligence.
117
118
  It is made to organize your unstructured data into datasets and wrangle it at scale on
118
- your local machine.
119
+ your local machine. Datachain does not abstract or hide the AI models and API calls, but helps to integrate them into the postmodern data stack.
119
120
 
120
121
  Key Features
121
122
  ============
122
123
 
123
124
  📂 **Storage as a Source of Truth.**
124
- - Process unstructured data without redundant copies: S3, GCP, Azure, and local
125
+ - Process unstructured data without redundant copies from S3, GCP, Azure, and local
125
126
  file systems.
126
- - Multimodal data: images, video, text, PDFs, JSONs, CSVs, parquet.
127
- - Join files and metadata together into persistent, versioned, columnar datasets.
127
+ - Multimodal data support: images, video, text, PDFs, JSONs, CSVs, parquet.
128
+ - Unite files and metadata together into persistent, versioned, columnar datasets.
128
129
 
129
130
  🐍 **Python-friendly data pipelines.**
130
131
  - Operate on Python objects and object fields.
131
- - Built-in parallelization and out-of-memory compute without a need in SQL or
132
- Spark jobs.
132
+ - Built-in parallelization and out-of-memory compute without SQL or Spark.
133
133
 
134
134
  🧠 **Data Enrichment and Processing.**
135
- - Generate metadata columns using local AI models and LLM APIs.
136
- - Filter, join, and group by AI metadata. Vector similarity search.
137
- - Pass datasets to Pytorch and Tensorflow, or export back into storage.
135
+ - Generate metadata using local AI models and LLM APIs.
136
+ - Filter, join, and group by metadata. Search by vector embeddings.
137
+ - Pass datasets to Pytorch and Tensorflow, or export them back into storage.
138
138
 
139
139
  🚀 **Efficiency.**
140
140
  - Parallelization, out-of-memory workloads and data caching.
141
141
  - Vectorized operations on Python object fields: sum, count, avg, etc.
142
- - Vector search on embeddings.
142
+ - Optimized vector search.
143
143
 
144
144
 
145
145
  Quick Start
@@ -164,7 +164,7 @@ where each image has a matching JSON file like `cat.1009.json`:
164
164
  "inference": {"class": "dog", "confidence": 0.68}
165
165
  }
166
166
 
167
- Example of downloading only high-confidence cat images using JSON metadata:
167
+ Example of downloading only "high-confidence cat" inferred images using JSON metadata:
168
168
 
169
169
 
170
170
  .. code:: py
@@ -234,7 +234,7 @@ detected are then copied to the local directory.
234
234
  LLM judging chatbots
235
235
  =============================
236
236
 
237
- LLMs can work as efficient universal classifiers. In the example below,
237
+ LLMs can work as universal classifiers. In the example below,
238
238
  we employ a free API from Mistral to judge the `publicly available`_ chatbot dialogs. Please get a free
239
239
  Mistral API key at https://console.mistral.ai
240
240
 
@@ -2,62 +2,64 @@ datachain/__init__.py,sha256=GeyhE-5LgfJav2OKYGaieP2lBvf2Gm-ihj7thnK9zjI,800
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
3
  datachain/asyn.py,sha256=biF8M8fQujtj5xs0VLi8S16eBtzG6kceWlO_NILbCsg,8197
4
4
  datachain/cache.py,sha256=wznC2pge6RhlPTaJfBVGjmBc6bxWCPThu4aTFMltvFU,4076
5
- datachain/cli.py,sha256=otR2eN0JL-JhZ9SOTPcPwt_-_TiT-vHifx2h4YzD6Tg,32052
5
+ datachain/cli.py,sha256=ECf_z5X8ILDJdUn2Cpb_z-ZjSRIzn7skiuMGfM-y9i0,30999
6
6
  datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
7
7
  datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
8
- datachain/dataset.py,sha256=MZezyuJWNj_3PEtzr0epPMNyWAOTrhTSPI5FmemV6L4,14470
8
+ datachain/dataset.py,sha256=EcYjhHg1dxxPbDwSuIxc-mDRDo3v_pYf79fMy4re1oA,14740
9
9
  datachain/error.py,sha256=GY9KYTmb7GHXn2gGHV9X-PBhgwLj3i7VpK7tGHtAoGM,1279
10
10
  datachain/job.py,sha256=bk25bIqClhgRPzlXAhxpTtDeewibQe5l3S8Cf7db0gM,1229
11
11
  datachain/listing.py,sha256=keLkvPfumDA3gijeIiinH5yGWe71qCxgF5HqqP5AeH4,8299
12
- datachain/node.py,sha256=ihrP5l9HKpXLR0fR1wyb7QIdb7NR26dX6bB09qGX5B4,6005
12
+ datachain/node.py,sha256=dcm_7dVurFHpI0EHV2K6SjpJyh-gN4PVWAB-20quk04,6382
13
13
  datachain/nodes_fetcher.py,sha256=kca19yvu11JxoVY1t4_ydp1FmchiV88GnNicNBQ9NIA,831
14
14
  datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A0,3191
15
15
  datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
16
16
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
18
- datachain/utils.py,sha256=ROVCLwb37VmFRzgTlSGUDw4eJNgYGiQ4yMX581HfUX8,12988
18
+ datachain/utils.py,sha256=HKUdVqreBTzcCULAYRw1sC6z33OaomVD1WoMSoFcPHg,13148
19
19
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
20
- datachain/catalog/catalog.py,sha256=6S4AnDos4sGYGhy4wNSyV2pKPQNXvo819cd3Dl8Htgg,78271
20
+ datachain/catalog/catalog.py,sha256=VO-Otcg3QLbb3E9H8gmgu-xJWQqIbWmLP2QyPg8cUos,75386
21
21
  datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
22
22
  datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
23
23
  datachain/catalog/subclass.py,sha256=B5R0qxeTYEyVAAPM1RutBPSoXZc8L5mVVZeSGXki9Sw,2096
24
24
  datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
25
25
  datachain/client/azure.py,sha256=LXSahE0Z6r4dXqpBkKnq3J5fg7N7ymC1lSn-1SoILGc,2687
26
26
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
27
- datachain/client/fsspec.py,sha256=G4QTm3KPhlaV74T3gLXJ86345_ak8CH38ezn2ET-oLc,13230
27
+ datachain/client/fsspec.py,sha256=LQb5tr-pP9umCFYo3nGJR_dZxUyiSN7IDE8jhp1TXco,13333
28
28
  datachain/client/gcs.py,sha256=P_E3mhzhXR9mJ_wc3AYZuczzwOJ0-D3J5qhJXeSU-xk,4518
29
+ datachain/client/hf.py,sha256=R-F6Ks6aVM9wSNkIXOkOnZFwsJlfdRwJjymRa78RLjM,1246
29
30
  datachain/client/local.py,sha256=H8TNY8pi2kA8y9_f_1XLUjJF66f229qC_b2y4xGkzdU,5300
30
31
  datachain/client/s3.py,sha256=aQxfMH8G8bUjmHF1-6P90MSkXsU5DgOPEVlKWLu459I,6568
31
32
  datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
32
33
  datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
33
34
  datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
34
35
  datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
35
- datachain/data_storage/metastore.py,sha256=nxcY6nwyEmQWMAo33sNGO-FgUFQs2amBGGnZz2ftEz0,55362
36
+ datachain/data_storage/metastore.py,sha256=cHN0xmbUvChyayHHZm3Vqxr87jFqojPSlGBqhTPStlE,54519
36
37
  datachain/data_storage/schema.py,sha256=JKpSEz8igpwZ9zkpRPYVXZxEpiXuLKEs2WNhH0KqM6U,8552
37
38
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
38
- datachain/data_storage/sqlite.py,sha256=jLgkvikYkENQUO_ykoNFfsBc2ofZXwFHLMa1nyWP3aw,28316
39
- datachain/data_storage/warehouse.py,sha256=cvlfa-nyIxqrrpSRtCdeVjlTwhn7rcIoWjOq91HhItU,33668
39
+ datachain/data_storage/sqlite.py,sha256=Z4B2KDL4C8Uio2aLMxaKv0t2MoOtCV3bSqWg4X9mTFg,28048
40
+ datachain/data_storage/warehouse.py,sha256=f7ETnYIXx5KMcPfwg_4bh_00QJiMLIliwE_41vmRGUo,33037
40
41
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- datachain/lib/arrow.py,sha256=W8bIxMIe_b3dqMFYKGWmfbC_7Xe0gV3UiJjQ2i4EYLA,4925
42
+ datachain/lib/arrow.py,sha256=dV17oGiknqEW55ogGK_9T0ycNFwd2z-EFOW0AQiR6TU,5840
42
43
  datachain/lib/clip.py,sha256=33RL11OIqfbwyhvBgiMGM8rDAnZx1IRmxk9dY89ls3Q,6130
43
44
  datachain/lib/data_model.py,sha256=gHIjlow84GMRDa78yLL1Ud-N18or21fnTyPEwsatpXY,2045
44
- datachain/lib/dataset_info.py,sha256=lONGr71ozo1DS4CQEhnpKORaU4qFb6Ketv8Xm8CVm2U,2188
45
- datachain/lib/dc.py,sha256=wdMzFLglOhwWKHwh4qcLA0ezMrjuRJq2il2WnkHjyag,62490
46
- datachain/lib/file.py,sha256=ZHpdilDPYCob8uqtwUPtBvBNxVvQRq4AC_0IGg5m-G4,12003
47
- datachain/lib/hf.py,sha256=mYaHFPS4CW2-stRZHBMWW-NKN4dhrnhjZobBgRocnvo,5317
45
+ datachain/lib/dataset_info.py,sha256=srPPhI2UHf6hFPBecyFEVw2SS5aPisIIMsvGgKqi7ss,2366
46
+ datachain/lib/dc.py,sha256=TOC5-Ar8GQBkFpWkxVeg1og_iCJt_c0FCqA8IGzUrAk,66929
47
+ datachain/lib/file.py,sha256=WOOYw3LcGROA6wshJ_aZkSgcTqfB4UxTbZDTx9KqAOg,11429
48
+ datachain/lib/hf.py,sha256=ZiMvgy3DYiklGKZv-w7gevrHOgn3bGfpTlpDPOHCNqs,5336
48
49
  datachain/lib/image.py,sha256=WbcwSaFzuyqjg4x4hH5CUogeUQjkZFjQHqw_oDEV1nA,2655
49
- datachain/lib/listing.py,sha256=nXLmGae_oQke4hnurzzWiHTEjHjWiqqHdB41Wb-hMTk,3521
50
- datachain/lib/meta_formats.py,sha256=Hels85LJmNCz1aYVJvhymNdAt3qdJ2-qoxsIiUezrow,7198
50
+ datachain/lib/listing.py,sha256=S9Xn_Saxu4xk3K_01VexkfMZW0INQiATlidt2bzgWKY,3938
51
+ datachain/lib/listing_info.py,sha256=sr5KzCXlCxlPuRmy_pVadD4miLpp5y0btvyaIPcluwI,996
52
+ datachain/lib/meta_formats.py,sha256=0YM7PMcGSLpUKZppyzFi8RvoSwYOqbciFGvzkvYdTXA,7133
51
53
  datachain/lib/model_store.py,sha256=c4USXsBBjrGH8VOh4seIgOiav-qHOwdoixtxfLgU63c,2409
52
54
  datachain/lib/pytorch.py,sha256=vK3GbWCy7kunN7ubul6w1hrWmJLja56uTCiMG_7XVQA,5623
53
55
  datachain/lib/settings.py,sha256=39thOpYJw-zPirzeNO6pmRC2vPrQvt4eBsw1xLWDFsw,2344
54
- datachain/lib/signal_schema.py,sha256=rW1R6nIzdtmqWzpXk7aNAfrQD58_gbvkvEGyNTQ4WNM,20099
56
+ datachain/lib/signal_schema.py,sha256=hqQLwUmt3w8RLa96MtubK9N2CBXqqTPrUkSRXc0ktt4,20275
55
57
  datachain/lib/text.py,sha256=vqs1SQdsw1vCzfvOanIeT4xY2R2TmPonElBgYDVeZmY,1241
56
58
  datachain/lib/udf.py,sha256=nG7DDuPgZ5ZuijwvDoCq-OZMxlDM8vFNzyxMmik0Y1c,11716
57
59
  datachain/lib/udf_signature.py,sha256=gMStcEeYJka5M6cg50Z9orC6y6HzCAJ3MkFqqn1fjZg,7137
58
60
  datachain/lib/utils.py,sha256=5-kJlAZE0D9nXXweAjo7-SP_AWGo28feaDByONYaooQ,463
59
61
  datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
- datachain/lib/webdataset.py,sha256=SsjCKLSKEkHRRfeTHQhjoGqNPqIWw_SCWQcUwgUWWP0,8282
62
+ datachain/lib/webdataset.py,sha256=Q3UlCk66341sq-nvFbBCX4Cv3cYXBK9n12ejG4axPXE,8298
61
63
  datachain/lib/webdataset_laion.py,sha256=PQP6tQmUP7Xu9fPuAGK1JDBYA6T5UufYMUTGaxgspJA,2118
62
64
  datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
65
  datachain/lib/convert/flatten.py,sha256=Uebc5CeqCsacp-nr6IG9i6OGuUavXqdqnoGctZBk3RQ,1384
@@ -68,9 +70,9 @@ datachain/lib/convert/values_to_tuples.py,sha256=YOdbjzHq-uj6-cV2Qq43G72eN2avMND
68
70
  datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
69
71
  datachain/query/batch.py,sha256=-vlpINJiertlnaoUVv1C95RatU0F6zuhpIYRufJRo1M,3660
70
72
  datachain/query/builtins.py,sha256=EmKPYsoQ46zwdyOn54MuCzvYFmfsBn5F8zyF7UBUfrc,2550
71
- datachain/query/dataset.py,sha256=G6xA3ItIGUJTXhizdAb6S3L1zFwTf8I0w0jHa1A6F4A,61103
73
+ datachain/query/dataset.py,sha256=v5gCAWswv6DoEWkN7DuOc7BL4Afz8p5ZSA_GNxn5_R4,59056
72
74
  datachain/query/dispatch.py,sha256=GBh3EZHDp5AaXxrjOpfrpfsuy7Umnqxu-MAXcK9X3gc,12945
73
- datachain/query/metrics.py,sha256=vsECqbZfoSDBnvC3GQlziKXmISVYDLgHP1fMPEOtKyo,640
75
+ datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
74
76
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
75
77
  datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
76
78
  datachain/query/schema.py,sha256=BvHipN79CnSTbVFcfIEwzo1npe7HmThnk0iY-CSLEkM,7899
@@ -95,9 +97,9 @@ datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,
95
97
  datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
96
98
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
97
99
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
98
- datachain-0.3.8.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
99
- datachain-0.3.8.dist-info/METADATA,sha256=ivteXQrJgp8dKgIO2pdwUj6Qdg96rbI3Gq0kx5fyxtk,16903
100
- datachain-0.3.8.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
101
- datachain-0.3.8.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
102
- datachain-0.3.8.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
103
- datachain-0.3.8.dist-info/RECORD,,
100
+ datachain-0.3.10.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
101
+ datachain-0.3.10.dist-info/METADATA,sha256=eUsgu4Y4iK_rJbx66MCmeKuPaWS1iMKRL6mtbEB6ucY,17056
102
+ datachain-0.3.10.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
103
+ datachain-0.3.10.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
104
+ datachain-0.3.10.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
105
+ datachain-0.3.10.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.0.0)
2
+ Generator: setuptools (74.1.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5