datachain 0.3.17__py3-none-any.whl → 0.3.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -20,7 +20,7 @@ class NodeChunk:
20
20
  def next_downloadable(self):
21
21
  node = next(self.nodes, None)
22
22
  while node and (
23
- not node.is_downloadable or self.cache.contains(node.as_uid(self.storage))
23
+ not node.is_downloadable or self.cache.contains(node.to_file(self.storage))
24
24
  ):
25
25
  node = next(self.nodes, None)
26
26
  return node
datachain/progress.py CHANGED
@@ -1,8 +1,6 @@
1
1
  """Manages progress bars."""
2
2
 
3
3
  import logging
4
- import os
5
- import re
6
4
  import sys
7
5
  from threading import RLock
8
6
  from typing import Any, ClassVar
@@ -10,20 +8,12 @@ from typing import Any, ClassVar
10
8
  from fsspec.callbacks import TqdmCallback
11
9
  from tqdm import tqdm
12
10
 
11
+ from datachain.utils import env2bool
12
+
13
13
  logger = logging.getLogger(__name__)
14
14
  tqdm.set_lock(RLock())
15
15
 
16
16
 
17
- def env2bool(var, undefined=False):
18
- """
19
- undefined: return value if env var is unset
20
- """
21
- var = os.getenv(var, None)
22
- if var is None:
23
- return undefined
24
- return bool(re.search("1|y|yes|true", var, flags=re.IGNORECASE))
25
-
26
-
27
17
  class Tqdm(tqdm):
28
18
  """
29
19
  maximum-compatibility tqdm-based progressbars
@@ -53,7 +53,7 @@ from datachain.utils import (
53
53
 
54
54
  from .schema import C, UDFParamSpec, normalize_param
55
55
  from .session import Session
56
- from .udf import UDFBase, UDFClassWrapper, UDFFactory, UDFType
56
+ from .udf import UDFBase
57
57
 
58
58
  if TYPE_CHECKING:
59
59
  from sqlalchemy.sql.elements import ClauseElement
@@ -364,7 +364,7 @@ def get_generated_callback(is_generator: bool = False) -> Callback:
364
364
 
365
365
  @frozen
366
366
  class UDFStep(Step, ABC):
367
- udf: UDFType
367
+ udf: UDFBase
368
368
  catalog: "Catalog"
369
369
  partition_by: Optional[PartitionByType] = None
370
370
  parallel: Optional[int] = None
@@ -470,12 +470,6 @@ class UDFStep(Step, ABC):
470
470
 
471
471
  else:
472
472
  # Otherwise process single-threaded (faster for smaller UDFs)
473
- # Optionally instantiate the UDF instance if a class is provided.
474
- if isinstance(self.udf, UDFFactory):
475
- udf: UDFBase = self.udf()
476
- else:
477
- udf = self.udf
478
-
479
473
  warehouse = self.catalog.warehouse
480
474
 
481
475
  with contextlib.closing(
@@ -485,7 +479,7 @@ class UDFStep(Step, ABC):
485
479
  processed_cb = get_processed_callback()
486
480
  generated_cb = get_generated_callback(self.is_generator)
487
481
  try:
488
- udf_results = udf.run(
482
+ udf_results = self.udf.run(
489
483
  udf_fields,
490
484
  udf_inputs,
491
485
  self.catalog,
@@ -498,7 +492,7 @@ class UDFStep(Step, ABC):
498
492
  warehouse,
499
493
  udf_table,
500
494
  udf_results,
501
- udf,
495
+ self.udf,
502
496
  cb=generated_cb,
503
497
  )
504
498
  finally:
@@ -1471,7 +1465,7 @@ class DatasetQuery:
1471
1465
  @detach
1472
1466
  def add_signals(
1473
1467
  self,
1474
- udf: UDFType,
1468
+ udf: UDFBase,
1475
1469
  parallel: Optional[int] = None,
1476
1470
  workers: Union[bool, int] = False,
1477
1471
  min_task_size: Optional[int] = None,
@@ -1492,9 +1486,6 @@ class DatasetQuery:
1492
1486
  at least that minimum number of rows to each distributed worker, mostly useful
1493
1487
  if there are a very large number of small tasks to process.
1494
1488
  """
1495
- if isinstance(udf, UDFClassWrapper): # type: ignore[unreachable]
1496
- # This is a bare decorated class, "instantiate" it now.
1497
- udf = udf() # type: ignore[unreachable]
1498
1489
  query = self.clone()
1499
1490
  query.steps.append(
1500
1491
  UDFSignal(
@@ -1518,16 +1509,13 @@ class DatasetQuery:
1518
1509
  @detach
1519
1510
  def generate(
1520
1511
  self,
1521
- udf: UDFType,
1512
+ udf: UDFBase,
1522
1513
  parallel: Optional[int] = None,
1523
1514
  workers: Union[bool, int] = False,
1524
1515
  min_task_size: Optional[int] = None,
1525
1516
  partition_by: Optional[PartitionByType] = None,
1526
1517
  cache: bool = False,
1527
1518
  ) -> "Self":
1528
- if isinstance(udf, UDFClassWrapper): # type: ignore[unreachable]
1529
- # This is a bare decorated class, "instantiate" it now.
1530
- udf = udf() # type: ignore[unreachable]
1531
1519
  query = self.clone()
1532
1520
  steps = query.steps
1533
1521
  steps.append(
@@ -27,7 +27,7 @@ from datachain.query.queue import (
27
27
  put_into_queue,
28
28
  unmarshal,
29
29
  )
30
- from datachain.query.udf import UDFBase, UDFFactory, UDFResult
30
+ from datachain.query.udf import UDFBase, UDFResult
31
31
  from datachain.utils import batched_it
32
32
 
33
33
  DEFAULT_BATCH_SIZE = 10000
@@ -156,8 +156,6 @@ class UDFDispatcher:
156
156
 
157
157
  @property
158
158
  def batch_size(self):
159
- if not self.udf:
160
- self.udf = self.udf_factory()
161
159
  if self._batch_size is None:
162
160
  if hasattr(self.udf, "properties") and hasattr(
163
161
  self.udf.properties, "batch"
@@ -181,18 +179,7 @@ class UDFDispatcher:
181
179
  self.catalog = Catalog(
182
180
  id_generator, metastore, warehouse, **self.catalog_init_params
183
181
  )
184
- udf = loads(self.udf_data)
185
- # isinstance cannot be used here, as cloudpickle packages the entire class
186
- # definition, and so these two types are not considered exactly equal,
187
- # even if they have the same import path.
188
- if full_module_type_path(type(udf)) != full_module_type_path(UDFFactory):
189
- self.udf = udf
190
- else:
191
- self.udf = None
192
- self.udf_factory = udf
193
- if not self.udf:
194
- self.udf = self.udf_factory()
195
-
182
+ self.udf = loads(self.udf_data)
196
183
  return UDFWorker(
197
184
  self.catalog,
198
185
  self.udf,
datachain/query/schema.py CHANGED
@@ -9,6 +9,7 @@ import attrs
9
9
  import sqlalchemy as sa
10
10
  from fsspec.callbacks import DEFAULT_CALLBACK, Callback
11
11
 
12
+ from datachain.lib.file import File
12
13
  from datachain.sql.types import JSON, Boolean, DateTime, Int64, SQLType, String
13
14
 
14
15
  if TYPE_CHECKING:
@@ -97,11 +98,11 @@ class Object(UDFParameter):
97
98
  cb: Callback = DEFAULT_CALLBACK,
98
99
  **kwargs,
99
100
  ) -> Any:
100
- client = catalog.get_client(row["file__source"])
101
- uid = catalog._get_row_uid(file_signals(row))
101
+ file = File._from_row(file_signals(row))
102
+ client = catalog.get_client(file.source)
102
103
  if cache:
103
- client.download(uid, callback=cb)
104
- with client.open_object(uid, use_cache=cache, cb=cb) as f:
104
+ client.download(file, callback=cb)
105
+ with client.open_object(file, use_cache=cache, cb=cb) as f:
105
106
  return self.reader(f)
106
107
 
107
108
  async def get_value_async(
@@ -114,12 +115,12 @@ class Object(UDFParameter):
114
115
  cb: Callback = DEFAULT_CALLBACK,
115
116
  **kwargs,
116
117
  ) -> Any:
117
- client = catalog.get_client(row["file__source"])
118
- uid = catalog._get_row_uid(file_signals(row))
118
+ file = File._from_row(file_signals(row))
119
+ client = catalog.get_client(file.source)
119
120
  if cache:
120
- await client._download(uid, callback=cb)
121
+ await client._download(file, callback=cb)
121
122
  obj = await mapper.to_thread(
122
- functools.partial(client.open_object, uid, use_cache=cache, cb=cb)
123
+ functools.partial(client.open_object, file, use_cache=cache, cb=cb)
123
124
  )
124
125
  with obj:
125
126
  return await mapper.to_thread(self.reader, obj)
@@ -140,11 +141,11 @@ class Stream(UDFParameter):
140
141
  cb: Callback = DEFAULT_CALLBACK,
141
142
  **kwargs,
142
143
  ) -> Any:
143
- client = catalog.get_client(row["file__source"])
144
- uid = catalog._get_row_uid(file_signals(row))
144
+ file = File._from_row(file_signals(row))
145
+ client = catalog.get_client(file.source)
145
146
  if cache:
146
- client.download(uid, callback=cb)
147
- return client.open_object(uid, use_cache=cache, cb=cb)
147
+ client.download(file, callback=cb)
148
+ return client.open_object(file, use_cache=cache, cb=cb)
148
149
 
149
150
  async def get_value_async(
150
151
  self,
@@ -156,12 +157,12 @@ class Stream(UDFParameter):
156
157
  cb: Callback = DEFAULT_CALLBACK,
157
158
  **kwargs,
158
159
  ) -> Any:
159
- client = catalog.get_client(row["file__source"])
160
- uid = catalog._get_row_uid(file_signals(row))
160
+ file = File._from_row(file_signals(row))
161
+ client = catalog.get_client(file.source)
161
162
  if cache:
162
- await client._download(uid, callback=cb)
163
+ await client._download(file, callback=cb)
163
164
  return await mapper.to_thread(
164
- functools.partial(client.open_object, uid, use_cache=cache, cb=cb)
165
+ functools.partial(client.open_object, file, use_cache=cache, cb=cb)
165
166
  )
166
167
 
167
168
 
@@ -189,10 +190,10 @@ class LocalFilename(UDFParameter):
189
190
  # If the glob pattern is specified and the row filename
190
191
  # does not match it, then return None
191
192
  return None
192
- client = catalog.get_client(row["file__source"])
193
- uid = catalog._get_row_uid(file_signals(row))
194
- client.download(uid, callback=cb)
195
- return client.cache.get_path(uid)
193
+ file = File._from_row(file_signals(row))
194
+ client = catalog.get_client(file.source)
195
+ client.download(file, callback=cb)
196
+ return client.cache.get_path(file)
196
197
 
197
198
  async def get_value_async(
198
199
  self,
@@ -208,10 +209,10 @@ class LocalFilename(UDFParameter):
208
209
  # If the glob pattern is specified and the row filename
209
210
  # does not match it, then return None
210
211
  return None
211
- client = catalog.get_client(row["file__source"])
212
- uid = catalog._get_row_uid(file_signals(row))
213
- await client._download(uid, callback=cb)
214
- return client.cache.get_path(uid)
212
+ file = File._from_row(file_signals(row))
213
+ client = catalog.get_client(file.source)
214
+ await client._download(file, callback=cb)
215
+ return client.cache.get_path(file)
215
216
 
216
217
 
217
218
  UDFParamSpec = Union[str, Column, UDFParameter]
datachain/query/udf.py CHANGED
@@ -1,13 +1,9 @@
1
1
  import typing
2
2
  from collections.abc import Iterable, Iterator, Sequence
3
3
  from dataclasses import dataclass
4
- from functools import WRAPPER_ASSIGNMENTS
5
4
  from typing import (
6
5
  TYPE_CHECKING,
7
6
  Any,
8
- Callable,
9
- Optional,
10
- Union,
11
7
  )
12
8
 
13
9
  from fsspec.callbacks import DEFAULT_CALLBACK, Callback
@@ -128,105 +124,3 @@ class UDFBase:
128
124
  for row_id, signals in zip(row_ids, results)
129
125
  if signals is not None # skip rows with no output
130
126
  ]
131
-
132
-
133
- class UDFClassWrapper:
134
- """
135
- A wrapper for class-based (stateful) UDFs.
136
- """
137
-
138
- def __init__(
139
- self,
140
- udf_class: type,
141
- properties: UDFProperties,
142
- method: Optional[str] = None,
143
- ):
144
- self.udf_class = udf_class
145
- self.udf_method = method
146
- self.properties = properties
147
- self.output = properties.output
148
-
149
- def __call__(self, *args, **kwargs) -> "UDFFactory":
150
- return UDFFactory(
151
- self.udf_class,
152
- args,
153
- kwargs,
154
- self.properties,
155
- self.udf_method,
156
- )
157
-
158
-
159
- class UDFWrapper(UDFBase):
160
- """A wrapper class for function UDFs to be used in custom signal generation."""
161
-
162
- def __init__(
163
- self,
164
- func: Callable,
165
- properties: UDFProperties,
166
- ):
167
- self.func = func
168
- super().__init__(properties)
169
- # This emulates the behavior of functools.wraps for a class decorator
170
- for attr in WRAPPER_ASSIGNMENTS:
171
- if hasattr(func, attr):
172
- setattr(self, attr, getattr(func, attr))
173
-
174
- def run_once(
175
- self,
176
- catalog: "Catalog",
177
- arg: "UDFInput",
178
- is_generator: bool = False,
179
- cache: bool = False,
180
- cb: Callback = DEFAULT_CALLBACK,
181
- ) -> Iterable[UDFResult]:
182
- if isinstance(arg, UDFInputBatch):
183
- udf_inputs = [
184
- self.bind_parameters(catalog, row, cache=cache, cb=cb)
185
- for row in arg.rows
186
- ]
187
- udf_outputs = self.func(udf_inputs)
188
- return self._process_results(arg.rows, udf_outputs, is_generator)
189
- if isinstance(arg, RowDict):
190
- udf_inputs = self.bind_parameters(catalog, arg, cache=cache, cb=cb)
191
- udf_outputs = self.func(*udf_inputs)
192
- if not is_generator:
193
- # udf_outputs is generator already if is_generator=True
194
- udf_outputs = [udf_outputs]
195
- return self._process_results([arg], udf_outputs, is_generator)
196
- raise ValueError(f"Unexpected UDF argument: {arg}")
197
-
198
- # This emulates the behavior of functools.wraps for a class decorator
199
- def __repr__(self):
200
- return repr(self.func)
201
-
202
-
203
- class UDFFactory:
204
- """
205
- A wrapper for late instantiation of UDF classes, primarily for use in parallelized
206
- execution.
207
- """
208
-
209
- def __init__(
210
- self,
211
- udf_class: type,
212
- args,
213
- kwargs,
214
- properties: UDFProperties,
215
- method: Optional[str] = None,
216
- ):
217
- self.udf_class = udf_class
218
- self.udf_method = method
219
- self.args = args
220
- self.kwargs = kwargs
221
- self.properties = properties
222
- self.output = properties.output
223
-
224
- def __call__(self) -> UDFWrapper:
225
- udf_func = self.udf_class(*self.args, **self.kwargs)
226
- if self.udf_method:
227
- udf_func = getattr(udf_func, self.udf_method)
228
-
229
- return UDFWrapper(udf_func, self.properties)
230
-
231
-
232
- UDFType = Union[UDFBase, UDFFactory]
datachain/sql/types.py CHANGED
@@ -12,11 +12,11 @@ for sqlite we can use `sqlite.register_converter`
12
12
  ( https://docs.python.org/3/library/sqlite3.html#sqlite3.register_converter )
13
13
  """
14
14
 
15
- import json
16
15
  from datetime import datetime
17
16
  from types import MappingProxyType
18
17
  from typing import Any, Union
19
18
 
19
+ import orjson
20
20
  import sqlalchemy as sa
21
21
  from sqlalchemy import TypeDecorator, types
22
22
 
@@ -312,7 +312,7 @@ class Array(SQLType):
312
312
  def on_read_convert(self, value, dialect):
313
313
  r = read_converter(dialect).array(value, self.item_type, dialect)
314
314
  if isinstance(self.item_type, JSON):
315
- r = [json.loads(item) if isinstance(item, str) else item for item in r]
315
+ r = [orjson.loads(item) if isinstance(item, str) else item for item in r]
316
316
  return r
317
317
 
318
318
 
@@ -420,6 +420,8 @@ class TypeReadConverter:
420
420
  return [item_type.on_read_convert(x, dialect) for x in value]
421
421
 
422
422
  def json(self, value):
423
+ if isinstance(value, str):
424
+ return orjson.loads(value)
423
425
  return value
424
426
 
425
427
  def datetime(self, value):
datachain/telemetry.py ADDED
@@ -0,0 +1,37 @@
1
+ import logging
2
+ import os
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ from iterative_telemetry import IterativeTelemetryLogger
6
+
7
+ from datachain.utils import env2bool
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def is_enabled():
13
+ """
14
+ Determine if telemetry is enabled based on environment variables and configuration.
15
+ """
16
+ # Disable telemetry if running in test mode
17
+ if env2bool("DATACHAIN_TEST"):
18
+ return False
19
+
20
+ # Check if telemetry is disabled by environment variable
21
+ disabled = bool(os.getenv("DATACHAIN_NO_ANALYTICS"))
22
+ if disabled:
23
+ logger.debug("Telemetry is disabled by environment variable.")
24
+ return False
25
+
26
+ logger.debug("Telemetry is enabled.")
27
+ return True
28
+
29
+
30
+ # Try to get the version of the datachain package
31
+ try:
32
+ __version__ = version("datachain")
33
+ except PackageNotFoundError:
34
+ __version__ = "unknown"
35
+
36
+ # Initialize telemetry logger
37
+ telemetry = IterativeTelemetryLogger("datachain", __version__, is_enabled)
datachain/utils.py CHANGED
@@ -4,6 +4,7 @@ import json
4
4
  import os
5
5
  import os.path as osp
6
6
  import random
7
+ import re
7
8
  import stat
8
9
  import sys
9
10
  import time
@@ -410,3 +411,13 @@ def get_datachain_executable() -> list[str]:
410
411
  def uses_glob(path: str) -> bool:
411
412
  """Checks if some URI path has glob syntax in it"""
412
413
  return glob.has_magic(os.path.basename(os.path.normpath(path)))
414
+
415
+
416
+ def env2bool(var, undefined=False):
417
+ """
418
+ undefined: return value if env var is unset
419
+ """
420
+ var = os.getenv(var, None)
421
+ if var is None:
422
+ return undefined
423
+ return bool(re.search("1|y|yes|true", var, flags=re.IGNORECASE))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.17
3
+ Version: 0.3.18
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -43,6 +43,7 @@ Requires-Dist: Pillow <11,>=10.0.0
43
43
  Requires-Dist: msgpack <2,>=1.0.4
44
44
  Requires-Dist: psutil
45
45
  Requires-Dist: huggingface-hub
46
+ Requires-Dist: iterative-telemetry >=0.0.9
46
47
  Requires-Dist: numpy <2,>=1 ; sys_platform == "win32"
47
48
  Provides-Extra: dev
48
49
  Requires-Dist: datachain[docs,tests] ; extra == 'dev'
@@ -63,9 +64,10 @@ Requires-Dist: datachain[tests] ; extra == 'examples'
63
64
  Requires-Dist: numpy <2,>=1 ; extra == 'examples'
64
65
  Requires-Dist: defusedxml ; extra == 'examples'
65
66
  Requires-Dist: accelerate ; extra == 'examples'
66
- Requires-Dist: unstructured[pdf] ; extra == 'examples'
67
+ Requires-Dist: unstructured[embed-huggingface,pdf] ; extra == 'examples'
67
68
  Requires-Dist: pdfplumber ==0.11.4 ; extra == 'examples'
68
69
  Requires-Dist: huggingface-hub[hf_transfer] ; extra == 'examples'
70
+ Requires-Dist: onnx ==1.16.1 ; extra == 'examples'
69
71
  Provides-Extra: hf
70
72
  Requires-Dist: numba >=0.60.0 ; extra == 'hf'
71
73
  Requires-Dist: datasets[audio,vision] >=2.21.0 ; extra == 'hf'
@@ -78,7 +80,7 @@ Requires-Dist: pytest <9,>=8 ; extra == 'tests'
78
80
  Requires-Dist: pytest-sugar >=0.9.6 ; extra == 'tests'
79
81
  Requires-Dist: pytest-cov >=4.1.0 ; extra == 'tests'
80
82
  Requires-Dist: pytest-mock >=3.12.0 ; extra == 'tests'
81
- Requires-Dist: pytest-servers[all] >=0.5.5 ; extra == 'tests'
83
+ Requires-Dist: pytest-servers[all] >=0.5.7 ; extra == 'tests'
82
84
  Requires-Dist: pytest-benchmark[histogram] ; extra == 'tests'
83
85
  Requires-Dist: pytest-xdist >=3.3.1 ; extra == 'tests'
84
86
  Requires-Dist: virtualenv ; extra == 'tests'
@@ -1,32 +1,33 @@
1
1
  datachain/__init__.py,sha256=GeyhE-5LgfJav2OKYGaieP2lBvf2Gm-ihj7thnK9zjI,800
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
3
  datachain/asyn.py,sha256=Lg3Ck1PQLjQziMx9KU4atzbEnJXTE0924WMYkhgWtGU,8247
4
- datachain/cache.py,sha256=WP-ktH_bRn3w2g1JOOQ7rCPsZyR4OM6K1Kb7yZsSSns,4056
5
- datachain/cli.py,sha256=zObcD5W8dzUJKk2RGQ1MxQLEr3jnox6bybU8WyDaIqE,29941
4
+ datachain/cache.py,sha256=s0YHN7qurmQv-eC265TjeureK84TebWWAnL07cxchZQ,2997
5
+ datachain/cli.py,sha256=tRuUvlFey5zYE0UVkGylqGiG5t89gUBo2SJ_yPsvu1I,30129
6
6
  datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
7
7
  datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
8
8
  datachain/dataset.py,sha256=sHnsmKfMg2bK88gZH1izk8jlbmJDEhQpyOemdaPQVFo,14761
9
9
  datachain/error.py,sha256=OnZ8OaBtDdTZPy8XQiy29SAjqdQArQeorYbP5ju7ldc,1199
10
10
  datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
11
- datachain/listing.py,sha256=vfjOlcb98A7xkGGKWEYON6l7lfrOqNv6kldmdVnlJn4,8178
12
- datachain/node.py,sha256=-Y8O7q7NtIm_jX0HgjhjvdFwm73TrO5QBslxvFVwTJE,5208
13
- datachain/nodes_fetcher.py,sha256=kca19yvu11JxoVY1t4_ydp1FmchiV88GnNicNBQ9NIA,831
14
- datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A0,3191
15
- datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
11
+ datachain/listing.py,sha256=TkMmBzCiru26x4RaZiagWJTmTGbiy6yGrAsSJMr8cFE,8213
12
+ datachain/node.py,sha256=ThE6Ue4BqpaBvrkFFJW_ljLxchixUX2aWz3l_nbwY54,5195
13
+ datachain/nodes_fetcher.py,sha256=F-73-h19HHNGtHFBGKk7p3mc0ALm4a9zGnzhtuUjnp4,1107
14
+ datachain/nodes_thread_pool.py,sha256=uPo-xl8zG5m9YgODjPFBpbcqqHjI-dcxH87yAbj_qco,3192
15
+ datachain/progress.py,sha256=5KotcvvzAUL_RF0GEj4JY0IB1lyImnmHxe89YkT1XO4,4330
16
16
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
18
- datachain/utils.py,sha256=VGAcTWjGF0e2qB3Se77shhpiqGMo-ol0QAwf3MH5b7c,11857
18
+ datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
19
+ datachain/utils.py,sha256=KeFSRHsiYthnTu4a6bH-rw04mX1m8krTX0f2NqfQGFI,12114
19
20
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
20
- datachain/catalog/catalog.py,sha256=IAaaSVFxtJxVlIGEbu8sHinmYyeDGY6dg7APrtOchVk,68278
21
+ datachain/catalog/catalog.py,sha256=tICInYEeCRJow9hNSFnlA50hCOjFPN7fyGgoN5shcf8,67985
21
22
  datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
22
23
  datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
23
24
  datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
24
25
  datachain/client/azure.py,sha256=ffxs26zm6KLAL1aUWJm-vtzuZP3LSNha7UDGXynMBKo,2234
25
26
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
26
- datachain/client/fsspec.py,sha256=0i4EJIwdx_UNZlbSsUeohWjgVg4B5xoGxTYZKwXS22U,13459
27
+ datachain/client/fsspec.py,sha256=CO5LfxlZF58UAywLfMYeZRXDLIzcJepnQyPZfZk0Ies,12236
27
28
  datachain/client/gcs.py,sha256=cnTIr5GS6dbYOEYfqehhyQu3dr6XNjPHSg5U3FkivUk,4124
28
29
  datachain/client/hf.py,sha256=k24bpa6FEKNQn9zhoNC9kCigDwFSqobLsCnN_Nuzwh4,922
29
- datachain/client/local.py,sha256=LTyISV4oNSOPUdsai5eNZYCGXNCn8rNGuAI0bdgbtnU,5006
30
+ datachain/client/local.py,sha256=5OT3yf9QHi0If_dlqKYIYs-if-3oWhfAztMvsSa3YRA,4969
30
31
  datachain/client/s3.py,sha256=CVHBUZ1Ic2Q3370nl-Bbe69phuWjFlrVv9dTJKBpRT0,6019
31
32
  datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
32
33
  datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
@@ -38,15 +39,15 @@ datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2kru
38
39
  datachain/data_storage/sqlite.py,sha256=EBKJncuzcyQfcKFm2mUjvHjHRTODsteM-k_zndunBrw,28834
39
40
  datachain/data_storage/warehouse.py,sha256=Vwhu_OfcNAoTtg1BHui80VCzlPeTUjZQL0QWziu8awY,32186
40
41
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- datachain/lib/arrow.py,sha256=voY9KuJ2uhPxw_DS6rIjwfKjWXi84T3LFJ7kGFcDQuk,7272
42
+ datachain/lib/arrow.py,sha256=uYn9RQwJy4MsMkhu18_6cgtVO3HkniBcB1NdFmkwtvo,7292
42
43
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
43
44
  datachain/lib/data_model.py,sha256=gHIjlow84GMRDa78yLL1Ud-N18or21fnTyPEwsatpXY,2045
44
45
  datachain/lib/dataset_info.py,sha256=srPPhI2UHf6hFPBecyFEVw2SS5aPisIIMsvGgKqi7ss,2366
45
- datachain/lib/dc.py,sha256=DkzuKS14kgAEax47Gi9w_XJXV3dbboW85A0YOxwNjKY,68869
46
- datachain/lib/file.py,sha256=elQLorLbIkusuQSVfiuC_KrGSZI8cGm-iT8fHmckJlo,13774
46
+ datachain/lib/dc.py,sha256=oc9tPf5G9X3DmVCPaTuuwp8LlLogoYuEHpOkq_W7h6Y,68984
47
+ datachain/lib/file.py,sha256=flKGvmrotXWZqQQafaZQEeSQlqUVTSVWB7JIkEsr0MM,14255
47
48
  datachain/lib/hf.py,sha256=cPnmLuprr0pYABH7KqA5FARQ1JGlywdDwD3yDzVAm4k,5920
48
49
  datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
49
- datachain/lib/listing.py,sha256=NrKb7_6jwp1vEqp5TavSvx3SbLJdvuBzSEclPvbQr30,4013
50
+ datachain/lib/listing.py,sha256=cHPN5-Fq8yb0gP6DARImhmZWxykDDNqhhJujDxEp53A,4104
50
51
  datachain/lib/listing_info.py,sha256=36NZ-tXY5Y118wurkajuWWbcE8UCjkRwZlacDtN9F3g,954
51
52
  datachain/lib/meta_formats.py,sha256=3f-0vpMTesagS9iMd3y9-u9r-7g0eqYsxmK4fVfNWlw,6635
52
53
  datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
@@ -69,19 +70,19 @@ datachain/lib/convert/unflatten.py,sha256=Ogvh_5wg2f38_At_1lN0D_e2uZOOpYEvwvB2xd
69
70
  datachain/lib/convert/values_to_tuples.py,sha256=YOdbjzHq-uj6-cV2Qq43G72eN2avMNDGl4x5t6yQMl8,3931
70
71
  datachain/query/__init__.py,sha256=0NBOZVgIDpCcj1Ci883dQ9A0iiwe03xzmotkOCFbxYc,293
71
72
  datachain/query/batch.py,sha256=-vlpINJiertlnaoUVv1C95RatU0F6zuhpIYRufJRo1M,3660
72
- datachain/query/dataset.py,sha256=27QCFhwz4hK-pqTY0hvfFqwxNIH5yxuSjWRl1ZfELd0,55004
73
- datachain/query/dispatch.py,sha256=GBh3EZHDp5AaXxrjOpfrpfsuy7Umnqxu-MAXcK9X3gc,12945
73
+ datachain/query/dataset.py,sha256=k2jU0uZ86i9vr3On-o7GzHrubK5bCJjZEvz9P8extmw,54347
74
+ datachain/query/dispatch.py,sha256=CFAc09O6UllcyUSSEY1GUlEMPzeO8RYhXinNN4HBl9M,12405
74
75
  datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
75
76
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
76
77
  datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
77
- datachain/query/schema.py,sha256=A-Btmu-rR00841PNcs9CjcppfTUc176wT6-m3BOaSgo,8049
78
+ datachain/query/schema.py,sha256=I8zLWJuWl5N332ni9mAzDYtcxMJupVPgWkSDe8spNEk,8019
78
79
  datachain/query/session.py,sha256=UPH5Z4fzCDsvj81ji0e8GA6Mgra3bOAEpVq4htqOtis,4317
79
- datachain/query/udf.py,sha256=QRDcSgJ_zrY63dyMVD8nq4ky9Q2kaKSoM6bsDqpXOvQ,6682
80
+ datachain/query/udf.py,sha256=HB2hbEuiGA4ch9P2mh9iLA5Jj9mRj-4JFy9VfjTLJ8U,3622
80
81
  datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
82
  datachain/remote/studio.py,sha256=f5s6qSZ9uB4URGUoU_8_W1KZRRQQVSm6cgEBkBUEfuE,7226
82
83
  datachain/sql/__init__.py,sha256=A2djrbQwSMUZZEIKGnm-mnRA-NDSbiDJNpAmmwGNyIo,303
83
84
  datachain/sql/selectable.py,sha256=fBM-wS1TUA42kVEAAiwqGtibIevyZAEritwt8PZGyLQ,1589
84
- datachain/sql/types.py,sha256=1ofJjgzKTxFLl1WaMSI9pLvdHGZ1U24I0z5i-gChqDI,13305
85
+ datachain/sql/types.py,sha256=3aXpoxkmCYbw0Dlta5J1enwS8_FuvjfSqyrNZO-dWj4,13383
85
86
  datachain/sql/utils.py,sha256=rzlJw08etivdrcuQPqNVvVWhuVSyUPUQEEc6DOhu258,818
86
87
  datachain/sql/default/__init__.py,sha256=XQ2cEZpzWiABqjV-6yYHUBGI9vN_UHxbxZENESmVAWw,45
87
88
  datachain/sql/default/base.py,sha256=QD-31C6JnyOXzogyDx90sUhm7QvgXIYpeHEASH84igU,628
@@ -96,9 +97,9 @@ datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,
96
97
  datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
97
98
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
98
99
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
99
- datachain-0.3.17.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
100
- datachain-0.3.17.dist-info/METADATA,sha256=bPMIQkvQjnflmMQhJa3BH3Mi4DpHTAI8KQ5Vd2ur8Mo,17073
101
- datachain-0.3.17.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
102
- datachain-0.3.17.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
103
- datachain-0.3.17.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
104
- datachain-0.3.17.dist-info/RECORD,,
100
+ datachain-0.3.18.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
101
+ datachain-0.3.18.dist-info/METADATA,sha256=_LpwSHtaSTA-rz4rG9nHIbO2mLlrlI4mCnlxKx8vePo,17185
102
+ datachain-0.3.18.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
103
+ datachain-0.3.18.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
104
+ datachain-0.3.18.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
105
+ datachain-0.3.18.dist-info/RECORD,,