datachain 0.5.1__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/query/schema.py CHANGED
@@ -1,16 +1,13 @@
1
1
  import functools
2
- import json
3
2
  from abc import ABC, abstractmethod
4
- from datetime import datetime, timezone
5
3
  from fnmatch import fnmatch
6
- from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Union
4
+ from typing import TYPE_CHECKING, Any, Callable, Optional, Union
7
5
 
8
6
  import attrs
9
7
  import sqlalchemy as sa
10
8
  from fsspec.callbacks import DEFAULT_CALLBACK, Callback
11
9
 
12
10
  from datachain.lib.file import File
13
- from datachain.sql.types import JSON, Boolean, DateTime, Int64, SQLType, String
14
11
 
15
12
  if TYPE_CHECKING:
16
13
  from datachain.catalog import Catalog
@@ -228,61 +225,4 @@ def normalize_param(param: UDFParamSpec) -> UDFParameter:
228
225
  raise TypeError(f"Invalid UDF parameter: {param}")
229
226
 
230
227
 
231
- class DatasetRow:
232
- schema: ClassVar[dict[str, type[SQLType]]] = {
233
- "source": String,
234
- "path": String,
235
- "size": Int64,
236
- "location": JSON,
237
- "is_latest": Boolean,
238
- "last_modified": DateTime,
239
- "version": String,
240
- "etag": String,
241
- }
242
-
243
- @staticmethod
244
- def create(
245
- path: str,
246
- source: str = "",
247
- size: int = 0,
248
- location: Optional[dict[str, Any]] = None,
249
- is_latest: bool = True,
250
- last_modified: Optional[datetime] = None,
251
- version: str = "",
252
- etag: str = "",
253
- ) -> tuple[
254
- str,
255
- str,
256
- int,
257
- Optional[str],
258
- int,
259
- bool,
260
- datetime,
261
- str,
262
- str,
263
- int,
264
- ]:
265
- if location:
266
- location = json.dumps([location]) # type: ignore [assignment]
267
-
268
- last_modified = last_modified or datetime.now(timezone.utc)
269
-
270
- return ( # type: ignore [return-value]
271
- source,
272
- path,
273
- size,
274
- location,
275
- is_latest,
276
- last_modified,
277
- version,
278
- etag,
279
- )
280
-
281
- @staticmethod
282
- def extend(**columns):
283
- cols = {**DatasetRow.schema}
284
- cols.update(columns)
285
- return cols
286
-
287
-
288
228
  C = Column
@@ -1,9 +1,9 @@
1
1
  import atexit
2
+ import gc
2
3
  import logging
3
- import os
4
4
  import re
5
5
  import sys
6
- from typing import TYPE_CHECKING, Optional
6
+ from typing import TYPE_CHECKING, ClassVar, Optional
7
7
  from uuid import uuid4
8
8
 
9
9
  from datachain.catalog import get_catalog
@@ -11,6 +11,7 @@ from datachain.error import TableMissingError
11
11
 
12
12
  if TYPE_CHECKING:
13
13
  from datachain.catalog import Catalog
14
+ from datachain.dataset import DatasetRecord
14
15
 
15
16
  logger = logging.getLogger("datachain")
16
17
 
@@ -39,7 +40,7 @@ class Session:
39
40
  """
40
41
 
41
42
  GLOBAL_SESSION_CTX: Optional["Session"] = None
42
- GLOBAL_SESSION: Optional["Session"] = None
43
+ SESSION_CONTEXTS: ClassVar[list["Session"]] = []
43
44
  ORIGINAL_EXCEPT_HOOK = None
44
45
 
45
46
  DATASET_PREFIX = "session_"
@@ -64,18 +65,21 @@ class Session:
64
65
 
65
66
  session_uuid = uuid4().hex[: self.SESSION_UUID_LEN]
66
67
  self.name = f"{name}_{session_uuid}"
67
- self.job_id = os.getenv("DATACHAIN_JOB_ID") or str(uuid4())
68
68
  self.is_new_catalog = not catalog
69
69
  self.catalog = catalog or get_catalog(
70
70
  client_config=client_config, in_memory=in_memory
71
71
  )
72
+ self.dataset_versions: list[tuple[DatasetRecord, int]] = []
72
73
 
73
74
  def __enter__(self):
75
+ # Push the current context onto the stack
76
+ Session.SESSION_CONTEXTS.append(self)
77
+
74
78
  return self
75
79
 
76
80
  def __exit__(self, exc_type, exc_val, exc_tb):
77
81
  if exc_type:
78
- self._cleanup_created_versions(self.name)
82
+ self._cleanup_created_versions()
79
83
 
80
84
  self._cleanup_temp_datasets()
81
85
  if self.is_new_catalog:
@@ -83,6 +87,12 @@ class Session:
83
87
  self.catalog.warehouse.close_on_exit()
84
88
  self.catalog.id_generator.close_on_exit()
85
89
 
90
+ if Session.SESSION_CONTEXTS:
91
+ Session.SESSION_CONTEXTS.pop()
92
+
93
+ def add_dataset_version(self, dataset: "DatasetRecord", version: int) -> None:
94
+ self.dataset_versions.append((dataset, version))
95
+
86
96
  def generate_temp_dataset_name(self) -> str:
87
97
  return self.get_temp_prefix() + uuid4().hex[: self.TEMP_TABLE_UUID_LEN]
88
98
 
@@ -98,21 +108,15 @@ class Session:
98
108
  except TableMissingError:
99
109
  pass
100
110
 
101
- def _cleanup_created_versions(self, job_id: str) -> None:
102
- versions = self.catalog.metastore.get_job_dataset_versions(job_id)
103
- if not versions:
111
+ def _cleanup_created_versions(self) -> None:
112
+ if not self.dataset_versions:
104
113
  return
105
114
 
106
- datasets = {}
107
- for dataset_name, version in versions:
108
- if dataset_name not in datasets:
109
- datasets[dataset_name] = self.catalog.get_dataset(dataset_name)
110
- dataset = datasets[dataset_name]
111
- logger.info(
112
- "Removing dataset version %s@%s due to exception", dataset_name, version
113
- )
115
+ for dataset, version in self.dataset_versions:
114
116
  self.catalog.remove_dataset_version(dataset, version)
115
117
 
118
+ self.dataset_versions.clear()
119
+
116
120
  @classmethod
117
121
  def get(
118
122
  cls,
@@ -125,33 +129,34 @@ class Session:
125
129
 
126
130
  Parameters:
127
131
  session (Session): Optional Session(). If not provided a new session will
128
- be created. It's needed mostly for simplie API purposes.
129
- catalog (Catalog): Optional catalog. By default a new catalog is created.
132
+ be created. It's needed mostly for simple API purposes.
133
+ catalog (Catalog): Optional catalog. By default, a new catalog is created.
130
134
  """
131
135
  if session:
132
136
  return session
133
137
 
134
- if cls.GLOBAL_SESSION is None:
138
+ # Access the active (most recent) context from the stack
139
+ if cls.SESSION_CONTEXTS:
140
+ return cls.SESSION_CONTEXTS[-1]
141
+
142
+ if cls.GLOBAL_SESSION_CTX is None:
135
143
  cls.GLOBAL_SESSION_CTX = Session(
136
144
  cls.GLOBAL_SESSION_NAME,
137
145
  catalog,
138
146
  client_config=client_config,
139
147
  in_memory=in_memory,
140
148
  )
141
- cls.GLOBAL_SESSION = cls.GLOBAL_SESSION_CTX.__enter__()
142
149
 
143
150
  atexit.register(cls._global_cleanup)
144
151
  cls.ORIGINAL_EXCEPT_HOOK = sys.excepthook
145
152
  sys.excepthook = cls.except_hook
146
153
 
147
- return cls.GLOBAL_SESSION
154
+ return cls.GLOBAL_SESSION_CTX
148
155
 
149
156
  @staticmethod
150
157
  def except_hook(exc_type, exc_value, exc_traceback):
158
+ Session.GLOBAL_SESSION_CTX.__exit__(exc_type, exc_value, exc_traceback)
151
159
  Session._global_cleanup()
152
- if Session.GLOBAL_SESSION_CTX is not None:
153
- job_id = Session.GLOBAL_SESSION_CTX.job_id
154
- Session.GLOBAL_SESSION_CTX._cleanup_created_versions(job_id)
155
160
 
156
161
  if Session.ORIGINAL_EXCEPT_HOOK:
157
162
  Session.ORIGINAL_EXCEPT_HOOK(exc_type, exc_value, exc_traceback)
@@ -160,7 +165,6 @@ class Session:
160
165
  def cleanup_for_tests(cls):
161
166
  if cls.GLOBAL_SESSION_CTX is not None:
162
167
  cls.GLOBAL_SESSION_CTX.__exit__(None, None, None)
163
- cls.GLOBAL_SESSION = None
164
168
  cls.GLOBAL_SESSION_CTX = None
165
169
  atexit.unregister(cls._global_cleanup)
166
170
 
@@ -171,3 +175,7 @@ class Session:
171
175
  def _global_cleanup():
172
176
  if Session.GLOBAL_SESSION_CTX is not None:
173
177
  Session.GLOBAL_SESSION_CTX.__exit__(None, None, None)
178
+
179
+ for obj in gc.get_objects(): # Get all tracked objects
180
+ if isinstance(obj, Session): # Cleanup temp dataset for session variables.
181
+ obj.__exit__(None, None, None)
@@ -1,7 +1,7 @@
1
1
  from sqlalchemy.sql.expression import func
2
2
 
3
3
  from . import array, path, string
4
- from .array import avg
4
+ from .aggregate import avg
5
5
  from .conditional import greatest, least
6
6
  from .random import rand
7
7
 
@@ -0,0 +1,47 @@
1
+ from sqlalchemy.sql.functions import GenericFunction, ReturnTypeFromArgs
2
+
3
+ from datachain.sql.types import Float, String
4
+ from datachain.sql.utils import compiler_not_implemented
5
+
6
+
7
+ class avg(GenericFunction): # noqa: N801
8
+ """
9
+ Returns the average of the column.
10
+ """
11
+
12
+ type = Float()
13
+ package = "array"
14
+ name = "avg"
15
+ inherit_cache = True
16
+
17
+
18
+ class group_concat(GenericFunction): # noqa: N801
19
+ """
20
+ Returns the concatenated string of the column.
21
+ """
22
+
23
+ type = String()
24
+ package = "array"
25
+ name = "group_concat"
26
+ inherit_cache = True
27
+
28
+
29
+ class any_value(ReturnTypeFromArgs): # noqa: N801
30
+ """
31
+ Returns first value of the column.
32
+ """
33
+
34
+ inherit_cache = True
35
+
36
+
37
+ class collect(ReturnTypeFromArgs): # noqa: N801
38
+ """
39
+ Returns an array of the column.
40
+ """
41
+
42
+ inherit_cache = True
43
+
44
+
45
+ compiler_not_implemented(avg)
46
+ compiler_not_implemented(group_concat)
47
+ compiler_not_implemented(any_value)
@@ -44,15 +44,7 @@ class sip_hash_64(GenericFunction): # noqa: N801
44
44
  inherit_cache = True
45
45
 
46
46
 
47
- class avg(GenericFunction): # noqa: N801
48
- type = Float()
49
- package = "array"
50
- name = "avg"
51
- inherit_cache = True
52
-
53
-
54
47
  compiler_not_implemented(cosine_distance)
55
48
  compiler_not_implemented(euclidean_distance)
56
49
  compiler_not_implemented(length)
57
50
  compiler_not_implemented(sip_hash_64)
58
- compiler_not_implemented(avg)
@@ -37,6 +37,18 @@ class regexp_replace(GenericFunction): # noqa: N801
37
37
  inherit_cache = True
38
38
 
39
39
 
40
+ class replace(GenericFunction): # noqa: N801
41
+ """
42
+ Replaces substring with another string.
43
+ """
44
+
45
+ type = String()
46
+ package = "string"
47
+ name = "replace"
48
+ inherit_cache = True
49
+
50
+
40
51
  compiler_not_implemented(length)
41
52
  compiler_not_implemented(split)
42
53
  compiler_not_implemented(regexp_replace)
54
+ compiler_not_implemented(replace)
@@ -14,7 +14,7 @@ from sqlalchemy.sql.elements import literal
14
14
  from sqlalchemy.sql.expression import case
15
15
  from sqlalchemy.sql.functions import func
16
16
 
17
- from datachain.sql.functions import array, conditional, random, string
17
+ from datachain.sql.functions import aggregate, array, conditional, random, string
18
18
  from datachain.sql.functions import path as sql_path
19
19
  from datachain.sql.selectable import Values, base_values_compiler
20
20
  from datachain.sql.sqlite.types import (
@@ -78,12 +78,16 @@ def setup():
78
78
  compiles(array.length, "sqlite")(compile_array_length)
79
79
  compiles(string.length, "sqlite")(compile_string_length)
80
80
  compiles(string.split, "sqlite")(compile_string_split)
81
- compiles(string.regexp_replace, "sqlite")(compile_regexp_replace)
81
+ compiles(string.regexp_replace, "sqlite")(compile_string_regexp_replace)
82
+ compiles(string.replace, "sqlite")(compile_string_replace)
82
83
  compiles(conditional.greatest, "sqlite")(compile_greatest)
83
84
  compiles(conditional.least, "sqlite")(compile_least)
84
85
  compiles(Values, "sqlite")(compile_values)
85
86
  compiles(random.rand, "sqlite")(compile_rand)
86
- compiles(array.avg, "sqlite")(compile_avg)
87
+ compiles(aggregate.avg, "sqlite")(compile_avg)
88
+ compiles(aggregate.group_concat, "sqlite")(compile_group_concat)
89
+ compiles(aggregate.any_value, "sqlite")(compile_any_value)
90
+ compiles(aggregate.collect, "sqlite")(compile_collect)
87
91
 
88
92
  if load_usearch_extension(sqlite3.connect(":memory:")):
89
93
  compiles(array.cosine_distance, "sqlite")(compile_cosine_distance_ext)
@@ -273,10 +277,6 @@ def path_file_ext(path):
273
277
  return func.substr(path, func.length(path) - path_file_ext_length(path) + 1)
274
278
 
275
279
 
276
- def compile_regexp_replace(element, compiler, **kwargs):
277
- return f"regexp_replace({compiler.process(element.clauses, **kwargs)})"
278
-
279
-
280
280
  def compile_path_parent(element, compiler, **kwargs):
281
281
  return compiler.process(path_parent(*element.clauses.clauses), **kwargs)
282
282
 
@@ -331,6 +331,14 @@ def compile_string_split(element, compiler, **kwargs):
331
331
  return compiler.process(func.split(*element.clauses.clauses), **kwargs)
332
332
 
333
333
 
334
+ def compile_string_regexp_replace(element, compiler, **kwargs):
335
+ return f"regexp_replace({compiler.process(element.clauses, **kwargs)})"
336
+
337
+
338
+ def compile_string_replace(element, compiler, **kwargs):
339
+ return compiler.process(func.replace(*element.clauses.clauses), **kwargs)
340
+
341
+
334
342
  def compile_greatest(element, compiler, **kwargs):
335
343
  """
336
344
  Compiles a sql function for `greatest(*args)` taking 1 or more args
@@ -395,6 +403,21 @@ def compile_avg(element, compiler, **kwargs):
395
403
  return compiler.process(func.avg(*element.clauses.clauses), **kwargs)
396
404
 
397
405
 
406
+ def compile_group_concat(element, compiler, **kwargs):
407
+ return compiler.process(func.aggregate_strings(*element.clauses.clauses), **kwargs)
408
+
409
+
410
+ def compile_any_value(element, compiler, **kwargs):
411
+ # use bare column to return any value from the group,
412
+ # this is documented behavior for sqlite,
413
+ # see https://www.sqlite.org/lang_select.html#bare_columns_in_an_aggregate_query
414
+ return compiler.process(*element.clauses.clauses, **kwargs)
415
+
416
+
417
+ def compile_collect(element, compiler, **kwargs):
418
+ return compiler.process(func.json_group_array(*element.clauses.clauses), **kwargs)
419
+
420
+
398
421
  def load_usearch_extension(conn) -> bool:
399
422
  try:
400
423
  # usearch is part of the vector optional dependencies
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.5.1
3
+ Version: 0.6.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -46,7 +46,7 @@ Requires-Dist: iterative-telemetry >=0.0.9
46
46
  Requires-Dist: numpy <2,>=1 ; sys_platform == "win32"
47
47
  Provides-Extra: dev
48
48
  Requires-Dist: datachain[docs,tests] ; extra == 'dev'
49
- Requires-Dist: mypy ==1.11.2 ; extra == 'dev'
49
+ Requires-Dist: mypy ==1.12.0 ; extra == 'dev'
50
50
  Requires-Dist: types-python-dateutil ; extra == 'dev'
51
51
  Requires-Dist: types-pytz ; extra == 'dev'
52
52
  Requires-Dist: types-PyYAML ; extra == 'dev'
@@ -1,4 +1,4 @@
1
- datachain/__init__.py,sha256=ofPJ6B-d-ybSDRrE7J6wqF_ZRAB2W9U8l-eeuBtqPLg,865
1
+ datachain/__init__.py,sha256=OGzc8xZWtwqxiiutjU4AxCRPY0lrX_csgERiTrq4G0o,908
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
3
  datachain/asyn.py,sha256=Lg3Ck1PQLjQziMx9KU4atzbEnJXTE0924WMYkhgWtGU,8247
4
4
  datachain/cache.py,sha256=s0YHN7qurmQv-eC265TjeureK84TebWWAnL07cxchZQ,2997
@@ -18,7 +18,7 @@ datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
18
18
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
19
19
  datachain/utils.py,sha256=KeFSRHsiYthnTu4a6bH-rw04mX1m8krTX0f2NqfQGFI,12114
20
20
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
21
- datachain/catalog/catalog.py,sha256=BsMyk2RQibQYHgrmovFZeSEpPVMTwgb_7ntVYdc7t-E,64090
21
+ datachain/catalog/catalog.py,sha256=r5lkwwZDh8cETNniBdzPCY9Ix8G-1RdkehjvUe3d2nE,63834
22
22
  datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
23
23
  datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
24
24
  datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
@@ -36,14 +36,14 @@ datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s
36
36
  datachain/data_storage/metastore.py,sha256=HfCxk4lmDUg2Q4WsFNQGMWxllP0mToA00fxkFTwdNIE,52919
37
37
  datachain/data_storage/schema.py,sha256=AGbjyEir5UmRZXI3m0jChZogUh5wd8csj6-YlUWaAxQ,8383
38
38
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
39
- datachain/data_storage/sqlite.py,sha256=fW08P7AbJ0cDbTbcTKuAGpvMXvBjg-QkGsKT_Dslyws,28383
40
- datachain/data_storage/warehouse.py,sha256=fXhVfao3NfWFGbbG5uJ-Ga4bX1FiKVfcbDyQgECYfk8,32122
39
+ datachain/data_storage/sqlite.py,sha256=V8fGRPjSwIT7kdw1qyQfUfdqGjXB8dE68npkyXfKW0o,28702
40
+ datachain/data_storage/warehouse.py,sha256=Ea0wVcWxe7Bu-8V8eqrPJ8Ov5-DT1dvv1MgxMINettc,31931
41
41
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
42
  datachain/lib/arrow.py,sha256=0R2CYsN82nNa5_03iS6jVix9EKeeqNZNAMgpSQP2hfo,9482
43
43
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
44
- datachain/lib/data_model.py,sha256=gHIjlow84GMRDa78yLL1Ud-N18or21fnTyPEwsatpXY,2045
44
+ datachain/lib/data_model.py,sha256=ECTbvlnzM98hp2mZ4fo82Yi0-MuoqTIQasQKGIyd89I,2040
45
45
  datachain/lib/dataset_info.py,sha256=srPPhI2UHf6hFPBecyFEVw2SS5aPisIIMsvGgKqi7ss,2366
46
- datachain/lib/dc.py,sha256=HLOAkJEKFHJV_PqwSu0Pyl1m7JmUea8_wiMJFr14Nfk,75960
46
+ datachain/lib/dc.py,sha256=wEqBDCENfBmeow0-uu8R4qJhQa8taEIzveUiNdr2CyY,78341
47
47
  datachain/lib/file.py,sha256=LjTW_-PDAnoUhvyB4bJ8Y8n__XGqrxvmd9mDOF0Gir8,14875
48
48
  datachain/lib/hf.py,sha256=cPnmLuprr0pYABH7KqA5FARQ1JGlywdDwD3yDzVAm4k,5920
49
49
  datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
@@ -53,30 +53,33 @@ datachain/lib/meta_formats.py,sha256=3f-0vpMTesagS9iMd3y9-u9r-7g0eqYsxmK4fVfNWlw
53
53
  datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
54
54
  datachain/lib/pytorch.py,sha256=W-ARi2xH1f1DUkVfRuerW-YWYgSaJASmNCxtz2lrJGI,6072
55
55
  datachain/lib/settings.py,sha256=39thOpYJw-zPirzeNO6pmRC2vPrQvt4eBsw1xLWDFsw,2344
56
- datachain/lib/signal_schema.py,sha256=iqgubjCBRiUJB30miv05qFX4uU04dA_Pzi3DCUsHZGs,24177
56
+ datachain/lib/signal_schema.py,sha256=6fgQIZz4jFvuiaL1mqK5Cq6yr4WC57o2ptHxk36MRNY,24438
57
57
  datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
58
58
  datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
59
- datachain/lib/udf.py,sha256=oHhJWb0gVTxcybGzYDzAeN0Gb1IMhZBoGefncT88dIY,12339
59
+ datachain/lib/udf.py,sha256=GvhWLCXZUY7sz1QMRBj1AJDSzzhyj15xs3Ia9hjJrJE,12697
60
60
  datachain/lib/udf_signature.py,sha256=GXw24A-Olna6DWCdgy2bC-gZh_gLGPQ-KvjuI6pUjC0,7281
61
- datachain/lib/utils.py,sha256=5-kJlAZE0D9nXXweAjo7-SP_AWGo28feaDByONYaooQ,463
61
+ datachain/lib/utils.py,sha256=12elAX6eTFgMGKIf2UfZ4IW07kRwjK6wz8yGE41RtNM,618
62
62
  datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
63
  datachain/lib/webdataset.py,sha256=o7SHk5HOUWsZ5Ln04xOM04eQqiBHiJNO7xLgyVBrwo8,6924
64
64
  datachain/lib/webdataset_laion.py,sha256=aGMWeFmeYNK75ewO9JTA11iB1i3QtTzUfenQA5jajfo,2535
65
65
  datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
66
  datachain/lib/convert/flatten.py,sha256=Uebc5CeqCsacp-nr6IG9i6OGuUavXqdqnoGctZBk3RQ,1384
67
67
  datachain/lib/convert/python_to_sql.py,sha256=40SAOdoOgikZRhn8iomCPDRoxC3RFxjJLivEAA9MHDU,2880
68
- datachain/lib/convert/sql_to_python.py,sha256=lGnKzSF_tz9Y_5SSKkrIU95QEjpcDzvOxIRkEKTQag0,443
68
+ datachain/lib/convert/sql_to_python.py,sha256=XXCBYDQFUXJIBNWkjEP944cnCfJ8GF2Tji0DLF3A_zQ,315
69
69
  datachain/lib/convert/unflatten.py,sha256=Ogvh_5wg2f38_At_1lN0D_e2uZOOpYEvwvB2xdq56Tw,2012
70
- datachain/lib/convert/values_to_tuples.py,sha256=YOdbjzHq-uj6-cV2Qq43G72eN2avMNDGl4x5t6yQMl8,3931
71
- datachain/query/__init__.py,sha256=0NBOZVgIDpCcj1Ci883dQ9A0iiwe03xzmotkOCFbxYc,293
72
- datachain/query/batch.py,sha256=-vlpINJiertlnaoUVv1C95RatU0F6zuhpIYRufJRo1M,3660
73
- datachain/query/dataset.py,sha256=1c7y178ccFSeL_WIba0vT87Md_Oo4F8zaTVDjB9Bp3I,53641
74
- datachain/query/dispatch.py,sha256=JVcZ4REE_GOsqXbar_Cb_fk-pHgQoabQLzXwuu7IhOg,12409
70
+ datachain/lib/convert/values_to_tuples.py,sha256=varRCnSMT_pZmHznrd2Yi05qXLLz_v9YH_pOCpHSkdc,3921
71
+ datachain/lib/func/__init__.py,sha256=ucJ15J_Q5Hy--boKV-tPuhKagVD3NpnuUPhLtDp7doI,230
72
+ datachain/lib/func/aggregate.py,sha256=B5VV6WoSYYiO_9uN4_nXPMkF9OOkgyE6suJ7XD-JiPI,938
73
+ datachain/lib/func/func.py,sha256=kFhVZlWZzgAfM7-DpkpZWf5zzdEutp_3NxIFWxXww_I,1956
74
+ datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
75
+ datachain/query/batch.py,sha256=5fEhORFe7li12SdYddaSK3LyqksMfCHhwN1_A6TfsA4,3485
76
+ datachain/query/dataset.py,sha256=-J8t8XGUQveh-4aM5HrnbYx9xLfMQ8p6P9sKmBaTpLU,52683
77
+ datachain/query/dispatch.py,sha256=wjjTWw6sFQbB9SKRh78VbfvwSMgJXCfqJklS3-9KnCU,12025
75
78
  datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
76
79
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
77
80
  datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
78
- datachain/query/schema.py,sha256=I8zLWJuWl5N332ni9mAzDYtcxMJupVPgWkSDe8spNEk,8019
79
- datachain/query/session.py,sha256=kpFFJMfWBnxaMPojMGhJRbk-BOsSYI8Ckl6vvqnx7d0,5787
81
+ datachain/query/schema.py,sha256=b_KnVy6B26Ol4nYG0LqNNpeQ1QYPk95YRGUjXfdaQWs,6606
82
+ datachain/query/session.py,sha256=50SOdLNCjqHHKI-L4xGXyzTVxzMWfANqKqjeYre-c2k,5959
80
83
  datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
84
  datachain/remote/studio.py,sha256=f5s6qSZ9uB4URGUoU_8_W1KZRRQQVSm6cgEBkBUEfuE,7226
82
85
  datachain/sql/__init__.py,sha256=A2djrbQwSMUZZEIKGnm-mnRA-NDSbiDJNpAmmwGNyIo,303
@@ -85,20 +88,21 @@ datachain/sql/types.py,sha256=3aXpoxkmCYbw0Dlta5J1enwS8_FuvjfSqyrNZO-dWj4,13383
85
88
  datachain/sql/utils.py,sha256=rzlJw08etivdrcuQPqNVvVWhuVSyUPUQEEc6DOhu258,818
86
89
  datachain/sql/default/__init__.py,sha256=XQ2cEZpzWiABqjV-6yYHUBGI9vN_UHxbxZENESmVAWw,45
87
90
  datachain/sql/default/base.py,sha256=QD-31C6JnyOXzogyDx90sUhm7QvgXIYpeHEASH84igU,628
88
- datachain/sql/functions/__init__.py,sha256=Ioyy7nSetrTLVnHGcGcmZU99HxUFcx-5PFbrh2dPNH0,396
89
- datachain/sql/functions/array.py,sha256=EB7nJSncUc1PuxlHyzU2gVhF8DuXaxpGlxb5e8X2KFY,1297
91
+ datachain/sql/functions/__init__.py,sha256=-vIkU0AqwOW5FX6P89xYl-uBIUdt46CEnCtshmN85gM,400
92
+ datachain/sql/functions/aggregate.py,sha256=3AQdA8YHPFdtCEfwZKQXTT8SlQWdG9gD5PBtGN3Odqs,944
93
+ datachain/sql/functions/array.py,sha256=rvH27SWN9gdh_mFnp0GIiXuCrNW6n8ZbY4I_JUS-_e0,1140
90
94
  datachain/sql/functions/conditional.py,sha256=q7YUKfunXeEldXaxgT-p5pUTcOEVU_tcQ2BJlquTRPs,207
91
95
  datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0mg,1294
92
96
  datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
93
- datachain/sql/functions/string.py,sha256=NSQIpmtQgm68hz3TFJsgHMBuo4MjBNhDSyEIC3pWkT8,916
97
+ datachain/sql/functions/string.py,sha256=DYgiw8XSk7ge7GXvyRI1zbaMruIizNeI-puOjriQGZQ,1148
94
98
  datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
95
- datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,13364
99
+ datachain/sql/sqlite/base.py,sha256=aHSZVvh4XSVkvZ07h3jMoRlHI4sWD8y3SnmGs9xMG9Y,14375
96
100
  datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
97
101
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
98
102
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
99
- datachain-0.5.1.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
100
- datachain-0.5.1.dist-info/METADATA,sha256=n8TFKjDmTzNBMaW5Oa6MUUUOAQbAjPzkAMaKCW3Y9NU,17156
101
- datachain-0.5.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
102
- datachain-0.5.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
103
- datachain-0.5.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
104
- datachain-0.5.1.dist-info/RECORD,,
103
+ datachain-0.6.1.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
104
+ datachain-0.6.1.dist-info/METADATA,sha256=kOEDXkaNjPHB-A1fLt60s_EJvnjuLIU3xdfp5UhflUA,17156
105
+ datachain-0.6.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
106
+ datachain-0.6.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
107
+ datachain-0.6.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
108
+ datachain-0.6.1.dist-info/RECORD,,