datachain 0.5.1__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/__init__.py +2 -0
- datachain/catalog/catalog.py +1 -9
- datachain/data_storage/sqlite.py +8 -0
- datachain/data_storage/warehouse.py +0 -4
- datachain/lib/convert/sql_to_python.py +8 -12
- datachain/lib/convert/values_to_tuples.py +2 -2
- datachain/lib/data_model.py +1 -1
- datachain/lib/dc.py +82 -30
- datachain/lib/func/__init__.py +14 -0
- datachain/lib/func/aggregate.py +42 -0
- datachain/lib/func/func.py +64 -0
- datachain/lib/signal_schema.py +15 -9
- datachain/lib/udf.py +177 -151
- datachain/lib/utils.py +5 -0
- datachain/query/__init__.py +1 -2
- datachain/query/batch.py +0 -11
- datachain/query/dataset.py +23 -44
- datachain/query/dispatch.py +0 -12
- datachain/query/schema.py +1 -61
- datachain/query/session.py +33 -25
- datachain/sql/functions/__init__.py +1 -1
- datachain/sql/functions/aggregate.py +47 -0
- datachain/sql/functions/array.py +0 -8
- datachain/sql/functions/string.py +12 -0
- datachain/sql/sqlite/base.py +30 -7
- {datachain-0.5.1.dist-info → datachain-0.6.1.dist-info}/METADATA +2 -2
- {datachain-0.5.1.dist-info → datachain-0.6.1.dist-info}/RECORD +31 -27
- {datachain-0.5.1.dist-info → datachain-0.6.1.dist-info}/LICENSE +0 -0
- {datachain-0.5.1.dist-info → datachain-0.6.1.dist-info}/WHEEL +0 -0
- {datachain-0.5.1.dist-info → datachain-0.6.1.dist-info}/entry_points.txt +0 -0
- {datachain-0.5.1.dist-info → datachain-0.6.1.dist-info}/top_level.txt +0 -0
datachain/query/schema.py
CHANGED
|
@@ -1,16 +1,13 @@
|
|
|
1
1
|
import functools
|
|
2
|
-
import json
|
|
3
2
|
from abc import ABC, abstractmethod
|
|
4
|
-
from datetime import datetime, timezone
|
|
5
3
|
from fnmatch import fnmatch
|
|
6
|
-
from typing import TYPE_CHECKING, Any, Callable,
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
|
|
7
5
|
|
|
8
6
|
import attrs
|
|
9
7
|
import sqlalchemy as sa
|
|
10
8
|
from fsspec.callbacks import DEFAULT_CALLBACK, Callback
|
|
11
9
|
|
|
12
10
|
from datachain.lib.file import File
|
|
13
|
-
from datachain.sql.types import JSON, Boolean, DateTime, Int64, SQLType, String
|
|
14
11
|
|
|
15
12
|
if TYPE_CHECKING:
|
|
16
13
|
from datachain.catalog import Catalog
|
|
@@ -228,61 +225,4 @@ def normalize_param(param: UDFParamSpec) -> UDFParameter:
|
|
|
228
225
|
raise TypeError(f"Invalid UDF parameter: {param}")
|
|
229
226
|
|
|
230
227
|
|
|
231
|
-
class DatasetRow:
|
|
232
|
-
schema: ClassVar[dict[str, type[SQLType]]] = {
|
|
233
|
-
"source": String,
|
|
234
|
-
"path": String,
|
|
235
|
-
"size": Int64,
|
|
236
|
-
"location": JSON,
|
|
237
|
-
"is_latest": Boolean,
|
|
238
|
-
"last_modified": DateTime,
|
|
239
|
-
"version": String,
|
|
240
|
-
"etag": String,
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
@staticmethod
|
|
244
|
-
def create(
|
|
245
|
-
path: str,
|
|
246
|
-
source: str = "",
|
|
247
|
-
size: int = 0,
|
|
248
|
-
location: Optional[dict[str, Any]] = None,
|
|
249
|
-
is_latest: bool = True,
|
|
250
|
-
last_modified: Optional[datetime] = None,
|
|
251
|
-
version: str = "",
|
|
252
|
-
etag: str = "",
|
|
253
|
-
) -> tuple[
|
|
254
|
-
str,
|
|
255
|
-
str,
|
|
256
|
-
int,
|
|
257
|
-
Optional[str],
|
|
258
|
-
int,
|
|
259
|
-
bool,
|
|
260
|
-
datetime,
|
|
261
|
-
str,
|
|
262
|
-
str,
|
|
263
|
-
int,
|
|
264
|
-
]:
|
|
265
|
-
if location:
|
|
266
|
-
location = json.dumps([location]) # type: ignore [assignment]
|
|
267
|
-
|
|
268
|
-
last_modified = last_modified or datetime.now(timezone.utc)
|
|
269
|
-
|
|
270
|
-
return ( # type: ignore [return-value]
|
|
271
|
-
source,
|
|
272
|
-
path,
|
|
273
|
-
size,
|
|
274
|
-
location,
|
|
275
|
-
is_latest,
|
|
276
|
-
last_modified,
|
|
277
|
-
version,
|
|
278
|
-
etag,
|
|
279
|
-
)
|
|
280
|
-
|
|
281
|
-
@staticmethod
|
|
282
|
-
def extend(**columns):
|
|
283
|
-
cols = {**DatasetRow.schema}
|
|
284
|
-
cols.update(columns)
|
|
285
|
-
return cols
|
|
286
|
-
|
|
287
|
-
|
|
288
228
|
C = Column
|
datachain/query/session.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import atexit
|
|
2
|
+
import gc
|
|
2
3
|
import logging
|
|
3
|
-
import os
|
|
4
4
|
import re
|
|
5
5
|
import sys
|
|
6
|
-
from typing import TYPE_CHECKING, Optional
|
|
6
|
+
from typing import TYPE_CHECKING, ClassVar, Optional
|
|
7
7
|
from uuid import uuid4
|
|
8
8
|
|
|
9
9
|
from datachain.catalog import get_catalog
|
|
@@ -11,6 +11,7 @@ from datachain.error import TableMissingError
|
|
|
11
11
|
|
|
12
12
|
if TYPE_CHECKING:
|
|
13
13
|
from datachain.catalog import Catalog
|
|
14
|
+
from datachain.dataset import DatasetRecord
|
|
14
15
|
|
|
15
16
|
logger = logging.getLogger("datachain")
|
|
16
17
|
|
|
@@ -39,7 +40,7 @@ class Session:
|
|
|
39
40
|
"""
|
|
40
41
|
|
|
41
42
|
GLOBAL_SESSION_CTX: Optional["Session"] = None
|
|
42
|
-
|
|
43
|
+
SESSION_CONTEXTS: ClassVar[list["Session"]] = []
|
|
43
44
|
ORIGINAL_EXCEPT_HOOK = None
|
|
44
45
|
|
|
45
46
|
DATASET_PREFIX = "session_"
|
|
@@ -64,18 +65,21 @@ class Session:
|
|
|
64
65
|
|
|
65
66
|
session_uuid = uuid4().hex[: self.SESSION_UUID_LEN]
|
|
66
67
|
self.name = f"{name}_{session_uuid}"
|
|
67
|
-
self.job_id = os.getenv("DATACHAIN_JOB_ID") or str(uuid4())
|
|
68
68
|
self.is_new_catalog = not catalog
|
|
69
69
|
self.catalog = catalog or get_catalog(
|
|
70
70
|
client_config=client_config, in_memory=in_memory
|
|
71
71
|
)
|
|
72
|
+
self.dataset_versions: list[tuple[DatasetRecord, int]] = []
|
|
72
73
|
|
|
73
74
|
def __enter__(self):
|
|
75
|
+
# Push the current context onto the stack
|
|
76
|
+
Session.SESSION_CONTEXTS.append(self)
|
|
77
|
+
|
|
74
78
|
return self
|
|
75
79
|
|
|
76
80
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
77
81
|
if exc_type:
|
|
78
|
-
self._cleanup_created_versions(
|
|
82
|
+
self._cleanup_created_versions()
|
|
79
83
|
|
|
80
84
|
self._cleanup_temp_datasets()
|
|
81
85
|
if self.is_new_catalog:
|
|
@@ -83,6 +87,12 @@ class Session:
|
|
|
83
87
|
self.catalog.warehouse.close_on_exit()
|
|
84
88
|
self.catalog.id_generator.close_on_exit()
|
|
85
89
|
|
|
90
|
+
if Session.SESSION_CONTEXTS:
|
|
91
|
+
Session.SESSION_CONTEXTS.pop()
|
|
92
|
+
|
|
93
|
+
def add_dataset_version(self, dataset: "DatasetRecord", version: int) -> None:
|
|
94
|
+
self.dataset_versions.append((dataset, version))
|
|
95
|
+
|
|
86
96
|
def generate_temp_dataset_name(self) -> str:
|
|
87
97
|
return self.get_temp_prefix() + uuid4().hex[: self.TEMP_TABLE_UUID_LEN]
|
|
88
98
|
|
|
@@ -98,21 +108,15 @@ class Session:
|
|
|
98
108
|
except TableMissingError:
|
|
99
109
|
pass
|
|
100
110
|
|
|
101
|
-
def _cleanup_created_versions(self
|
|
102
|
-
|
|
103
|
-
if not versions:
|
|
111
|
+
def _cleanup_created_versions(self) -> None:
|
|
112
|
+
if not self.dataset_versions:
|
|
104
113
|
return
|
|
105
114
|
|
|
106
|
-
|
|
107
|
-
for dataset_name, version in versions:
|
|
108
|
-
if dataset_name not in datasets:
|
|
109
|
-
datasets[dataset_name] = self.catalog.get_dataset(dataset_name)
|
|
110
|
-
dataset = datasets[dataset_name]
|
|
111
|
-
logger.info(
|
|
112
|
-
"Removing dataset version %s@%s due to exception", dataset_name, version
|
|
113
|
-
)
|
|
115
|
+
for dataset, version in self.dataset_versions:
|
|
114
116
|
self.catalog.remove_dataset_version(dataset, version)
|
|
115
117
|
|
|
118
|
+
self.dataset_versions.clear()
|
|
119
|
+
|
|
116
120
|
@classmethod
|
|
117
121
|
def get(
|
|
118
122
|
cls,
|
|
@@ -125,33 +129,34 @@ class Session:
|
|
|
125
129
|
|
|
126
130
|
Parameters:
|
|
127
131
|
session (Session): Optional Session(). If not provided a new session will
|
|
128
|
-
be created. It's needed mostly for
|
|
129
|
-
catalog (Catalog): Optional catalog. By default a new catalog is created.
|
|
132
|
+
be created. It's needed mostly for simple API purposes.
|
|
133
|
+
catalog (Catalog): Optional catalog. By default, a new catalog is created.
|
|
130
134
|
"""
|
|
131
135
|
if session:
|
|
132
136
|
return session
|
|
133
137
|
|
|
134
|
-
|
|
138
|
+
# Access the active (most recent) context from the stack
|
|
139
|
+
if cls.SESSION_CONTEXTS:
|
|
140
|
+
return cls.SESSION_CONTEXTS[-1]
|
|
141
|
+
|
|
142
|
+
if cls.GLOBAL_SESSION_CTX is None:
|
|
135
143
|
cls.GLOBAL_SESSION_CTX = Session(
|
|
136
144
|
cls.GLOBAL_SESSION_NAME,
|
|
137
145
|
catalog,
|
|
138
146
|
client_config=client_config,
|
|
139
147
|
in_memory=in_memory,
|
|
140
148
|
)
|
|
141
|
-
cls.GLOBAL_SESSION = cls.GLOBAL_SESSION_CTX.__enter__()
|
|
142
149
|
|
|
143
150
|
atexit.register(cls._global_cleanup)
|
|
144
151
|
cls.ORIGINAL_EXCEPT_HOOK = sys.excepthook
|
|
145
152
|
sys.excepthook = cls.except_hook
|
|
146
153
|
|
|
147
|
-
return cls.
|
|
154
|
+
return cls.GLOBAL_SESSION_CTX
|
|
148
155
|
|
|
149
156
|
@staticmethod
|
|
150
157
|
def except_hook(exc_type, exc_value, exc_traceback):
|
|
158
|
+
Session.GLOBAL_SESSION_CTX.__exit__(exc_type, exc_value, exc_traceback)
|
|
151
159
|
Session._global_cleanup()
|
|
152
|
-
if Session.GLOBAL_SESSION_CTX is not None:
|
|
153
|
-
job_id = Session.GLOBAL_SESSION_CTX.job_id
|
|
154
|
-
Session.GLOBAL_SESSION_CTX._cleanup_created_versions(job_id)
|
|
155
160
|
|
|
156
161
|
if Session.ORIGINAL_EXCEPT_HOOK:
|
|
157
162
|
Session.ORIGINAL_EXCEPT_HOOK(exc_type, exc_value, exc_traceback)
|
|
@@ -160,7 +165,6 @@ class Session:
|
|
|
160
165
|
def cleanup_for_tests(cls):
|
|
161
166
|
if cls.GLOBAL_SESSION_CTX is not None:
|
|
162
167
|
cls.GLOBAL_SESSION_CTX.__exit__(None, None, None)
|
|
163
|
-
cls.GLOBAL_SESSION = None
|
|
164
168
|
cls.GLOBAL_SESSION_CTX = None
|
|
165
169
|
atexit.unregister(cls._global_cleanup)
|
|
166
170
|
|
|
@@ -171,3 +175,7 @@ class Session:
|
|
|
171
175
|
def _global_cleanup():
|
|
172
176
|
if Session.GLOBAL_SESSION_CTX is not None:
|
|
173
177
|
Session.GLOBAL_SESSION_CTX.__exit__(None, None, None)
|
|
178
|
+
|
|
179
|
+
for obj in gc.get_objects(): # Get all tracked objects
|
|
180
|
+
if isinstance(obj, Session): # Cleanup temp dataset for session variables.
|
|
181
|
+
obj.__exit__(None, None, None)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from sqlalchemy.sql.functions import GenericFunction, ReturnTypeFromArgs
|
|
2
|
+
|
|
3
|
+
from datachain.sql.types import Float, String
|
|
4
|
+
from datachain.sql.utils import compiler_not_implemented
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class avg(GenericFunction): # noqa: N801
|
|
8
|
+
"""
|
|
9
|
+
Returns the average of the column.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
type = Float()
|
|
13
|
+
package = "array"
|
|
14
|
+
name = "avg"
|
|
15
|
+
inherit_cache = True
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class group_concat(GenericFunction): # noqa: N801
|
|
19
|
+
"""
|
|
20
|
+
Returns the concatenated string of the column.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
type = String()
|
|
24
|
+
package = "array"
|
|
25
|
+
name = "group_concat"
|
|
26
|
+
inherit_cache = True
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class any_value(ReturnTypeFromArgs): # noqa: N801
|
|
30
|
+
"""
|
|
31
|
+
Returns first value of the column.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
inherit_cache = True
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class collect(ReturnTypeFromArgs): # noqa: N801
|
|
38
|
+
"""
|
|
39
|
+
Returns an array of the column.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
inherit_cache = True
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
compiler_not_implemented(avg)
|
|
46
|
+
compiler_not_implemented(group_concat)
|
|
47
|
+
compiler_not_implemented(any_value)
|
datachain/sql/functions/array.py
CHANGED
|
@@ -44,15 +44,7 @@ class sip_hash_64(GenericFunction): # noqa: N801
|
|
|
44
44
|
inherit_cache = True
|
|
45
45
|
|
|
46
46
|
|
|
47
|
-
class avg(GenericFunction): # noqa: N801
|
|
48
|
-
type = Float()
|
|
49
|
-
package = "array"
|
|
50
|
-
name = "avg"
|
|
51
|
-
inherit_cache = True
|
|
52
|
-
|
|
53
|
-
|
|
54
47
|
compiler_not_implemented(cosine_distance)
|
|
55
48
|
compiler_not_implemented(euclidean_distance)
|
|
56
49
|
compiler_not_implemented(length)
|
|
57
50
|
compiler_not_implemented(sip_hash_64)
|
|
58
|
-
compiler_not_implemented(avg)
|
|
@@ -37,6 +37,18 @@ class regexp_replace(GenericFunction): # noqa: N801
|
|
|
37
37
|
inherit_cache = True
|
|
38
38
|
|
|
39
39
|
|
|
40
|
+
class replace(GenericFunction): # noqa: N801
|
|
41
|
+
"""
|
|
42
|
+
Replaces substring with another string.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
type = String()
|
|
46
|
+
package = "string"
|
|
47
|
+
name = "replace"
|
|
48
|
+
inherit_cache = True
|
|
49
|
+
|
|
50
|
+
|
|
40
51
|
compiler_not_implemented(length)
|
|
41
52
|
compiler_not_implemented(split)
|
|
42
53
|
compiler_not_implemented(regexp_replace)
|
|
54
|
+
compiler_not_implemented(replace)
|
datachain/sql/sqlite/base.py
CHANGED
|
@@ -14,7 +14,7 @@ from sqlalchemy.sql.elements import literal
|
|
|
14
14
|
from sqlalchemy.sql.expression import case
|
|
15
15
|
from sqlalchemy.sql.functions import func
|
|
16
16
|
|
|
17
|
-
from datachain.sql.functions import array, conditional, random, string
|
|
17
|
+
from datachain.sql.functions import aggregate, array, conditional, random, string
|
|
18
18
|
from datachain.sql.functions import path as sql_path
|
|
19
19
|
from datachain.sql.selectable import Values, base_values_compiler
|
|
20
20
|
from datachain.sql.sqlite.types import (
|
|
@@ -78,12 +78,16 @@ def setup():
|
|
|
78
78
|
compiles(array.length, "sqlite")(compile_array_length)
|
|
79
79
|
compiles(string.length, "sqlite")(compile_string_length)
|
|
80
80
|
compiles(string.split, "sqlite")(compile_string_split)
|
|
81
|
-
compiles(string.regexp_replace, "sqlite")(
|
|
81
|
+
compiles(string.regexp_replace, "sqlite")(compile_string_regexp_replace)
|
|
82
|
+
compiles(string.replace, "sqlite")(compile_string_replace)
|
|
82
83
|
compiles(conditional.greatest, "sqlite")(compile_greatest)
|
|
83
84
|
compiles(conditional.least, "sqlite")(compile_least)
|
|
84
85
|
compiles(Values, "sqlite")(compile_values)
|
|
85
86
|
compiles(random.rand, "sqlite")(compile_rand)
|
|
86
|
-
compiles(
|
|
87
|
+
compiles(aggregate.avg, "sqlite")(compile_avg)
|
|
88
|
+
compiles(aggregate.group_concat, "sqlite")(compile_group_concat)
|
|
89
|
+
compiles(aggregate.any_value, "sqlite")(compile_any_value)
|
|
90
|
+
compiles(aggregate.collect, "sqlite")(compile_collect)
|
|
87
91
|
|
|
88
92
|
if load_usearch_extension(sqlite3.connect(":memory:")):
|
|
89
93
|
compiles(array.cosine_distance, "sqlite")(compile_cosine_distance_ext)
|
|
@@ -273,10 +277,6 @@ def path_file_ext(path):
|
|
|
273
277
|
return func.substr(path, func.length(path) - path_file_ext_length(path) + 1)
|
|
274
278
|
|
|
275
279
|
|
|
276
|
-
def compile_regexp_replace(element, compiler, **kwargs):
|
|
277
|
-
return f"regexp_replace({compiler.process(element.clauses, **kwargs)})"
|
|
278
|
-
|
|
279
|
-
|
|
280
280
|
def compile_path_parent(element, compiler, **kwargs):
|
|
281
281
|
return compiler.process(path_parent(*element.clauses.clauses), **kwargs)
|
|
282
282
|
|
|
@@ -331,6 +331,14 @@ def compile_string_split(element, compiler, **kwargs):
|
|
|
331
331
|
return compiler.process(func.split(*element.clauses.clauses), **kwargs)
|
|
332
332
|
|
|
333
333
|
|
|
334
|
+
def compile_string_regexp_replace(element, compiler, **kwargs):
|
|
335
|
+
return f"regexp_replace({compiler.process(element.clauses, **kwargs)})"
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def compile_string_replace(element, compiler, **kwargs):
|
|
339
|
+
return compiler.process(func.replace(*element.clauses.clauses), **kwargs)
|
|
340
|
+
|
|
341
|
+
|
|
334
342
|
def compile_greatest(element, compiler, **kwargs):
|
|
335
343
|
"""
|
|
336
344
|
Compiles a sql function for `greatest(*args)` taking 1 or more args
|
|
@@ -395,6 +403,21 @@ def compile_avg(element, compiler, **kwargs):
|
|
|
395
403
|
return compiler.process(func.avg(*element.clauses.clauses), **kwargs)
|
|
396
404
|
|
|
397
405
|
|
|
406
|
+
def compile_group_concat(element, compiler, **kwargs):
|
|
407
|
+
return compiler.process(func.aggregate_strings(*element.clauses.clauses), **kwargs)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def compile_any_value(element, compiler, **kwargs):
|
|
411
|
+
# use bare column to return any value from the group,
|
|
412
|
+
# this is documented behavior for sqlite,
|
|
413
|
+
# see https://www.sqlite.org/lang_select.html#bare_columns_in_an_aggregate_query
|
|
414
|
+
return compiler.process(*element.clauses.clauses, **kwargs)
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def compile_collect(element, compiler, **kwargs):
|
|
418
|
+
return compiler.process(func.json_group_array(*element.clauses.clauses), **kwargs)
|
|
419
|
+
|
|
420
|
+
|
|
398
421
|
def load_usearch_extension(conn) -> bool:
|
|
399
422
|
try:
|
|
400
423
|
# usearch is part of the vector optional dependencies
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.1
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -46,7 +46,7 @@ Requires-Dist: iterative-telemetry >=0.0.9
|
|
|
46
46
|
Requires-Dist: numpy <2,>=1 ; sys_platform == "win32"
|
|
47
47
|
Provides-Extra: dev
|
|
48
48
|
Requires-Dist: datachain[docs,tests] ; extra == 'dev'
|
|
49
|
-
Requires-Dist: mypy ==1.
|
|
49
|
+
Requires-Dist: mypy ==1.12.0 ; extra == 'dev'
|
|
50
50
|
Requires-Dist: types-python-dateutil ; extra == 'dev'
|
|
51
51
|
Requires-Dist: types-pytz ; extra == 'dev'
|
|
52
52
|
Requires-Dist: types-PyYAML ; extra == 'dev'
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datachain/__init__.py,sha256=
|
|
1
|
+
datachain/__init__.py,sha256=OGzc8xZWtwqxiiutjU4AxCRPY0lrX_csgERiTrq4G0o,908
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
3
|
datachain/asyn.py,sha256=Lg3Ck1PQLjQziMx9KU4atzbEnJXTE0924WMYkhgWtGU,8247
|
|
4
4
|
datachain/cache.py,sha256=s0YHN7qurmQv-eC265TjeureK84TebWWAnL07cxchZQ,2997
|
|
@@ -18,7 +18,7 @@ datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
|
|
|
18
18
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
19
19
|
datachain/utils.py,sha256=KeFSRHsiYthnTu4a6bH-rw04mX1m8krTX0f2NqfQGFI,12114
|
|
20
20
|
datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
|
|
21
|
-
datachain/catalog/catalog.py,sha256=
|
|
21
|
+
datachain/catalog/catalog.py,sha256=r5lkwwZDh8cETNniBdzPCY9Ix8G-1RdkehjvUe3d2nE,63834
|
|
22
22
|
datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
|
|
23
23
|
datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
|
|
24
24
|
datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
|
|
@@ -36,14 +36,14 @@ datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s
|
|
|
36
36
|
datachain/data_storage/metastore.py,sha256=HfCxk4lmDUg2Q4WsFNQGMWxllP0mToA00fxkFTwdNIE,52919
|
|
37
37
|
datachain/data_storage/schema.py,sha256=AGbjyEir5UmRZXI3m0jChZogUh5wd8csj6-YlUWaAxQ,8383
|
|
38
38
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
39
|
-
datachain/data_storage/sqlite.py,sha256=
|
|
40
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
39
|
+
datachain/data_storage/sqlite.py,sha256=V8fGRPjSwIT7kdw1qyQfUfdqGjXB8dE68npkyXfKW0o,28702
|
|
40
|
+
datachain/data_storage/warehouse.py,sha256=Ea0wVcWxe7Bu-8V8eqrPJ8Ov5-DT1dvv1MgxMINettc,31931
|
|
41
41
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
42
|
datachain/lib/arrow.py,sha256=0R2CYsN82nNa5_03iS6jVix9EKeeqNZNAMgpSQP2hfo,9482
|
|
43
43
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
44
|
-
datachain/lib/data_model.py,sha256=
|
|
44
|
+
datachain/lib/data_model.py,sha256=ECTbvlnzM98hp2mZ4fo82Yi0-MuoqTIQasQKGIyd89I,2040
|
|
45
45
|
datachain/lib/dataset_info.py,sha256=srPPhI2UHf6hFPBecyFEVw2SS5aPisIIMsvGgKqi7ss,2366
|
|
46
|
-
datachain/lib/dc.py,sha256=
|
|
46
|
+
datachain/lib/dc.py,sha256=wEqBDCENfBmeow0-uu8R4qJhQa8taEIzveUiNdr2CyY,78341
|
|
47
47
|
datachain/lib/file.py,sha256=LjTW_-PDAnoUhvyB4bJ8Y8n__XGqrxvmd9mDOF0Gir8,14875
|
|
48
48
|
datachain/lib/hf.py,sha256=cPnmLuprr0pYABH7KqA5FARQ1JGlywdDwD3yDzVAm4k,5920
|
|
49
49
|
datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
|
|
@@ -53,30 +53,33 @@ datachain/lib/meta_formats.py,sha256=3f-0vpMTesagS9iMd3y9-u9r-7g0eqYsxmK4fVfNWlw
|
|
|
53
53
|
datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
|
|
54
54
|
datachain/lib/pytorch.py,sha256=W-ARi2xH1f1DUkVfRuerW-YWYgSaJASmNCxtz2lrJGI,6072
|
|
55
55
|
datachain/lib/settings.py,sha256=39thOpYJw-zPirzeNO6pmRC2vPrQvt4eBsw1xLWDFsw,2344
|
|
56
|
-
datachain/lib/signal_schema.py,sha256=
|
|
56
|
+
datachain/lib/signal_schema.py,sha256=6fgQIZz4jFvuiaL1mqK5Cq6yr4WC57o2ptHxk36MRNY,24438
|
|
57
57
|
datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
|
|
58
58
|
datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
59
|
-
datachain/lib/udf.py,sha256=
|
|
59
|
+
datachain/lib/udf.py,sha256=GvhWLCXZUY7sz1QMRBj1AJDSzzhyj15xs3Ia9hjJrJE,12697
|
|
60
60
|
datachain/lib/udf_signature.py,sha256=GXw24A-Olna6DWCdgy2bC-gZh_gLGPQ-KvjuI6pUjC0,7281
|
|
61
|
-
datachain/lib/utils.py,sha256=
|
|
61
|
+
datachain/lib/utils.py,sha256=12elAX6eTFgMGKIf2UfZ4IW07kRwjK6wz8yGE41RtNM,618
|
|
62
62
|
datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
63
|
datachain/lib/webdataset.py,sha256=o7SHk5HOUWsZ5Ln04xOM04eQqiBHiJNO7xLgyVBrwo8,6924
|
|
64
64
|
datachain/lib/webdataset_laion.py,sha256=aGMWeFmeYNK75ewO9JTA11iB1i3QtTzUfenQA5jajfo,2535
|
|
65
65
|
datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
66
|
datachain/lib/convert/flatten.py,sha256=Uebc5CeqCsacp-nr6IG9i6OGuUavXqdqnoGctZBk3RQ,1384
|
|
67
67
|
datachain/lib/convert/python_to_sql.py,sha256=40SAOdoOgikZRhn8iomCPDRoxC3RFxjJLivEAA9MHDU,2880
|
|
68
|
-
datachain/lib/convert/sql_to_python.py,sha256=
|
|
68
|
+
datachain/lib/convert/sql_to_python.py,sha256=XXCBYDQFUXJIBNWkjEP944cnCfJ8GF2Tji0DLF3A_zQ,315
|
|
69
69
|
datachain/lib/convert/unflatten.py,sha256=Ogvh_5wg2f38_At_1lN0D_e2uZOOpYEvwvB2xdq56Tw,2012
|
|
70
|
-
datachain/lib/convert/values_to_tuples.py,sha256=
|
|
71
|
-
datachain/
|
|
72
|
-
datachain/
|
|
73
|
-
datachain/
|
|
74
|
-
datachain/query/
|
|
70
|
+
datachain/lib/convert/values_to_tuples.py,sha256=varRCnSMT_pZmHznrd2Yi05qXLLz_v9YH_pOCpHSkdc,3921
|
|
71
|
+
datachain/lib/func/__init__.py,sha256=ucJ15J_Q5Hy--boKV-tPuhKagVD3NpnuUPhLtDp7doI,230
|
|
72
|
+
datachain/lib/func/aggregate.py,sha256=B5VV6WoSYYiO_9uN4_nXPMkF9OOkgyE6suJ7XD-JiPI,938
|
|
73
|
+
datachain/lib/func/func.py,sha256=kFhVZlWZzgAfM7-DpkpZWf5zzdEutp_3NxIFWxXww_I,1956
|
|
74
|
+
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
75
|
+
datachain/query/batch.py,sha256=5fEhORFe7li12SdYddaSK3LyqksMfCHhwN1_A6TfsA4,3485
|
|
76
|
+
datachain/query/dataset.py,sha256=-J8t8XGUQveh-4aM5HrnbYx9xLfMQ8p6P9sKmBaTpLU,52683
|
|
77
|
+
datachain/query/dispatch.py,sha256=wjjTWw6sFQbB9SKRh78VbfvwSMgJXCfqJklS3-9KnCU,12025
|
|
75
78
|
datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
|
|
76
79
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
77
80
|
datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
|
|
78
|
-
datachain/query/schema.py,sha256=
|
|
79
|
-
datachain/query/session.py,sha256=
|
|
81
|
+
datachain/query/schema.py,sha256=b_KnVy6B26Ol4nYG0LqNNpeQ1QYPk95YRGUjXfdaQWs,6606
|
|
82
|
+
datachain/query/session.py,sha256=50SOdLNCjqHHKI-L4xGXyzTVxzMWfANqKqjeYre-c2k,5959
|
|
80
83
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
81
84
|
datachain/remote/studio.py,sha256=f5s6qSZ9uB4URGUoU_8_W1KZRRQQVSm6cgEBkBUEfuE,7226
|
|
82
85
|
datachain/sql/__init__.py,sha256=A2djrbQwSMUZZEIKGnm-mnRA-NDSbiDJNpAmmwGNyIo,303
|
|
@@ -85,20 +88,21 @@ datachain/sql/types.py,sha256=3aXpoxkmCYbw0Dlta5J1enwS8_FuvjfSqyrNZO-dWj4,13383
|
|
|
85
88
|
datachain/sql/utils.py,sha256=rzlJw08etivdrcuQPqNVvVWhuVSyUPUQEEc6DOhu258,818
|
|
86
89
|
datachain/sql/default/__init__.py,sha256=XQ2cEZpzWiABqjV-6yYHUBGI9vN_UHxbxZENESmVAWw,45
|
|
87
90
|
datachain/sql/default/base.py,sha256=QD-31C6JnyOXzogyDx90sUhm7QvgXIYpeHEASH84igU,628
|
|
88
|
-
datachain/sql/functions/__init__.py,sha256
|
|
89
|
-
datachain/sql/functions/
|
|
91
|
+
datachain/sql/functions/__init__.py,sha256=-vIkU0AqwOW5FX6P89xYl-uBIUdt46CEnCtshmN85gM,400
|
|
92
|
+
datachain/sql/functions/aggregate.py,sha256=3AQdA8YHPFdtCEfwZKQXTT8SlQWdG9gD5PBtGN3Odqs,944
|
|
93
|
+
datachain/sql/functions/array.py,sha256=rvH27SWN9gdh_mFnp0GIiXuCrNW6n8ZbY4I_JUS-_e0,1140
|
|
90
94
|
datachain/sql/functions/conditional.py,sha256=q7YUKfunXeEldXaxgT-p5pUTcOEVU_tcQ2BJlquTRPs,207
|
|
91
95
|
datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0mg,1294
|
|
92
96
|
datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
|
|
93
|
-
datachain/sql/functions/string.py,sha256=
|
|
97
|
+
datachain/sql/functions/string.py,sha256=DYgiw8XSk7ge7GXvyRI1zbaMruIizNeI-puOjriQGZQ,1148
|
|
94
98
|
datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
|
|
95
|
-
datachain/sql/sqlite/base.py,sha256=
|
|
99
|
+
datachain/sql/sqlite/base.py,sha256=aHSZVvh4XSVkvZ07h3jMoRlHI4sWD8y3SnmGs9xMG9Y,14375
|
|
96
100
|
datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
|
|
97
101
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
98
102
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
99
|
-
datachain-0.
|
|
100
|
-
datachain-0.
|
|
101
|
-
datachain-0.
|
|
102
|
-
datachain-0.
|
|
103
|
-
datachain-0.
|
|
104
|
-
datachain-0.
|
|
103
|
+
datachain-0.6.1.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
104
|
+
datachain-0.6.1.dist-info/METADATA,sha256=kOEDXkaNjPHB-A1fLt60s_EJvnjuLIU3xdfp5UhflUA,17156
|
|
105
|
+
datachain-0.6.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
106
|
+
datachain-0.6.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
107
|
+
datachain-0.6.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
108
|
+
datachain-0.6.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|