datachain 0.30.4__py3-none-any.whl → 0.30.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/cli/commands/datasets.py +32 -17
- datachain/data_storage/warehouse.py +2 -2
- datachain/delta.py +36 -20
- datachain/lib/arrow.py +2 -2
- datachain/lib/dc/datachain.py +17 -7
- datachain/lib/dc/datasets.py +4 -0
- datachain/lib/dc/storage.py +5 -0
- datachain/lib/model_store.py +12 -0
- datachain/query/dispatch.py +5 -0
- datachain/sql/sqlite/base.py +12 -11
- datachain/sql/sqlite/types.py +8 -13
- datachain/sql/types.py +3 -3
- datachain/utils.py +1 -1
- {datachain-0.30.4.dist-info → datachain-0.30.6.dist-info}/METADATA +3 -3
- {datachain-0.30.4.dist-info → datachain-0.30.6.dist-info}/RECORD +19 -19
- {datachain-0.30.4.dist-info → datachain-0.30.6.dist-info}/WHEEL +0 -0
- {datachain-0.30.4.dist-info → datachain-0.30.6.dist-info}/entry_points.txt +0 -0
- {datachain-0.30.4.dist-info → datachain-0.30.6.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.30.4.dist-info → datachain-0.30.6.dist-info}/top_level.txt +0 -0
|
@@ -1,30 +1,41 @@
|
|
|
1
1
|
import sys
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable, Iterator
|
|
3
|
+
from typing import TYPE_CHECKING, Optional, Union
|
|
3
4
|
|
|
4
5
|
from tabulate import tabulate
|
|
5
6
|
|
|
6
|
-
|
|
7
|
-
from datachain.catalog import Catalog
|
|
8
|
-
|
|
7
|
+
from datachain import semver
|
|
9
8
|
from datachain.catalog import is_namespace_local
|
|
10
9
|
from datachain.cli.utils import determine_flavors
|
|
11
10
|
from datachain.config import Config
|
|
12
11
|
from datachain.error import DataChainError, DatasetNotFoundError
|
|
13
12
|
from datachain.studio import list_datasets as list_datasets_studio
|
|
14
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from datachain.catalog import Catalog
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def group_dataset_versions(
|
|
19
|
+
datasets: Iterable[tuple[str, str]], latest_only=True
|
|
20
|
+
) -> dict[str, Union[str, list[str]]]:
|
|
21
|
+
grouped: dict[str, list[tuple[int, int, int]]] = {}
|
|
15
22
|
|
|
16
|
-
def group_dataset_versions(datasets, latest_only=True):
|
|
17
|
-
grouped = {}
|
|
18
23
|
# Sort to ensure groupby works as expected
|
|
19
24
|
# (groupby expects consecutive items with the same key)
|
|
20
25
|
for name, version in sorted(datasets):
|
|
21
|
-
grouped.setdefault(name, []).append(version)
|
|
26
|
+
grouped.setdefault(name, []).append(semver.parse(version))
|
|
22
27
|
|
|
23
28
|
if latest_only:
|
|
24
29
|
# For each dataset name, pick the highest version.
|
|
25
|
-
return {
|
|
30
|
+
return {
|
|
31
|
+
name: semver.create(*(max(versions))) for name, versions in grouped.items()
|
|
32
|
+
}
|
|
33
|
+
|
|
26
34
|
# For each dataset name, return a sorted list of unique versions.
|
|
27
|
-
return {
|
|
35
|
+
return {
|
|
36
|
+
name: [semver.create(*v) for v in sorted(set(versions))]
|
|
37
|
+
for name, versions in grouped.items()
|
|
38
|
+
}
|
|
28
39
|
|
|
29
40
|
|
|
30
41
|
def list_datasets(
|
|
@@ -35,7 +46,7 @@ def list_datasets(
|
|
|
35
46
|
team: Optional[str] = None,
|
|
36
47
|
latest_only: bool = True,
|
|
37
48
|
name: Optional[str] = None,
|
|
38
|
-
):
|
|
49
|
+
) -> None:
|
|
39
50
|
token = Config().read().get("studio", {}).get("token")
|
|
40
51
|
all, local, studio = determine_flavors(studio, local, all, token)
|
|
41
52
|
if name:
|
|
@@ -95,27 +106,31 @@ def list_datasets(
|
|
|
95
106
|
print(tabulate(rows, headers="keys"))
|
|
96
107
|
|
|
97
108
|
|
|
98
|
-
def list_datasets_local(
|
|
109
|
+
def list_datasets_local(
|
|
110
|
+
catalog: "Catalog", name: Optional[str] = None
|
|
111
|
+
) -> Iterator[tuple[str, str]]:
|
|
99
112
|
if name:
|
|
100
113
|
yield from list_datasets_local_versions(catalog, name)
|
|
101
114
|
return
|
|
102
115
|
|
|
103
116
|
for d in catalog.ls_datasets():
|
|
104
117
|
for v in d.versions:
|
|
105
|
-
yield
|
|
118
|
+
yield d.full_name, v.version
|
|
106
119
|
|
|
107
120
|
|
|
108
|
-
def list_datasets_local_versions(
|
|
121
|
+
def list_datasets_local_versions(
|
|
122
|
+
catalog: "Catalog", name: str
|
|
123
|
+
) -> Iterator[tuple[str, str]]:
|
|
109
124
|
namespace_name, project_name, name = catalog.get_full_dataset_name(name)
|
|
110
125
|
|
|
111
126
|
ds = catalog.get_dataset(
|
|
112
127
|
name, namespace_name=namespace_name, project_name=project_name
|
|
113
128
|
)
|
|
114
129
|
for v in ds.versions:
|
|
115
|
-
yield
|
|
130
|
+
yield name, v.version
|
|
116
131
|
|
|
117
132
|
|
|
118
|
-
def _datasets_tabulate_row(name, both, local_version, studio_version):
|
|
133
|
+
def _datasets_tabulate_row(name, both, local_version, studio_version) -> dict[str, str]:
|
|
119
134
|
row = {
|
|
120
135
|
"Name": name,
|
|
121
136
|
}
|
|
@@ -136,7 +151,7 @@ def rm_dataset(
|
|
|
136
151
|
force: Optional[bool] = False,
|
|
137
152
|
studio: Optional[bool] = False,
|
|
138
153
|
team: Optional[str] = None,
|
|
139
|
-
):
|
|
154
|
+
) -> None:
|
|
140
155
|
namespace_name, project_name, name = catalog.get_full_dataset_name(name)
|
|
141
156
|
|
|
142
157
|
if studio:
|
|
@@ -166,7 +181,7 @@ def edit_dataset(
|
|
|
166
181
|
description: Optional[str] = None,
|
|
167
182
|
attrs: Optional[list[str]] = None,
|
|
168
183
|
team: Optional[str] = None,
|
|
169
|
-
):
|
|
184
|
+
) -> None:
|
|
170
185
|
from datachain.lib.dc.utils import is_studio
|
|
171
186
|
|
|
172
187
|
namespace_name, project_name, name = catalog.get_full_dataset_name(name)
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import glob
|
|
2
|
-
import json
|
|
3
2
|
import logging
|
|
4
3
|
import posixpath
|
|
5
4
|
import random
|
|
@@ -11,6 +10,7 @@ from urllib.parse import urlparse
|
|
|
11
10
|
|
|
12
11
|
import attrs
|
|
13
12
|
import sqlalchemy as sa
|
|
13
|
+
import ujson as json
|
|
14
14
|
from sqlalchemy.sql.expression import true
|
|
15
15
|
|
|
16
16
|
from datachain.client import Client
|
|
@@ -122,7 +122,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
122
122
|
if value_type is str:
|
|
123
123
|
return val
|
|
124
124
|
if value_type in (dict, list):
|
|
125
|
-
return json.dumps(val)
|
|
125
|
+
return json.dumps(val, ensure_ascii=False)
|
|
126
126
|
raise ValueError(
|
|
127
127
|
f"Cannot convert value {val!r} with type {value_type} to JSON"
|
|
128
128
|
)
|
datachain/delta.py
CHANGED
|
@@ -4,7 +4,7 @@ from functools import wraps
|
|
|
4
4
|
from typing import TYPE_CHECKING, Callable, Optional, TypeVar, Union
|
|
5
5
|
|
|
6
6
|
import datachain
|
|
7
|
-
from datachain.dataset import DatasetDependency
|
|
7
|
+
from datachain.dataset import DatasetDependency, DatasetRecord
|
|
8
8
|
from datachain.error import DatasetNotFoundError
|
|
9
9
|
from datachain.project import Project
|
|
10
10
|
|
|
@@ -30,9 +30,10 @@ def delta_disabled(
|
|
|
30
30
|
|
|
31
31
|
@wraps(method)
|
|
32
32
|
def _inner(self: T, *args: "P.args", **kwargs: "P.kwargs") -> T:
|
|
33
|
-
if self.delta:
|
|
33
|
+
if self.delta and not self._delta_unsafe:
|
|
34
34
|
raise NotImplementedError(
|
|
35
|
-
f"
|
|
35
|
+
f"Cannot use {method.__name__} with delta datasets - may cause"
|
|
36
|
+
" inconsistency. Use delta_unsafe flag to allow this operation."
|
|
36
37
|
)
|
|
37
38
|
return method(self, *args, **kwargs)
|
|
38
39
|
|
|
@@ -124,10 +125,19 @@ def _get_retry_chain(
|
|
|
124
125
|
# Subtract also diff chain since some items might be picked
|
|
125
126
|
# up by `delta=True` itself (e.g. records got modified AND are missing in the
|
|
126
127
|
# result dataset atm)
|
|
127
|
-
|
|
128
|
+
on = [on] if isinstance(on, str) else on
|
|
129
|
+
|
|
130
|
+
return (
|
|
131
|
+
retry_chain.diff(
|
|
132
|
+
diff_chain, on=on, added=True, same=True, modified=False, deleted=False
|
|
133
|
+
).distinct(*on)
|
|
134
|
+
if retry_chain
|
|
135
|
+
else None
|
|
136
|
+
)
|
|
128
137
|
|
|
129
138
|
|
|
130
139
|
def _get_source_info(
|
|
140
|
+
source_ds: DatasetRecord,
|
|
131
141
|
name: str,
|
|
132
142
|
namespace_name: str,
|
|
133
143
|
project_name: str,
|
|
@@ -154,25 +164,23 @@ def _get_source_info(
|
|
|
154
164
|
indirect=False,
|
|
155
165
|
)
|
|
156
166
|
|
|
157
|
-
|
|
158
|
-
if not
|
|
167
|
+
source_ds_dep = next((d for d in dependencies if d.name == source_ds.name), None)
|
|
168
|
+
if not source_ds_dep:
|
|
159
169
|
# Starting dataset was removed, back off to normal dataset creation
|
|
160
170
|
return None, None, None, None, None
|
|
161
171
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
project_name=source_ds_project.name,
|
|
169
|
-
).latest_version
|
|
172
|
+
# Refresh starting dataset to have new versions if they are created
|
|
173
|
+
source_ds = catalog.get_dataset(
|
|
174
|
+
source_ds.name,
|
|
175
|
+
namespace_name=source_ds.project.namespace.name,
|
|
176
|
+
project_name=source_ds.project.name,
|
|
177
|
+
)
|
|
170
178
|
|
|
171
179
|
return (
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
180
|
+
source_ds.name,
|
|
181
|
+
source_ds.project,
|
|
182
|
+
source_ds_dep.version,
|
|
183
|
+
source_ds.latest_version,
|
|
176
184
|
dependencies,
|
|
177
185
|
)
|
|
178
186
|
|
|
@@ -244,7 +252,14 @@ def delta_retry_update(
|
|
|
244
252
|
source_ds_version,
|
|
245
253
|
source_ds_latest_version,
|
|
246
254
|
dependencies,
|
|
247
|
-
) = _get_source_info(
|
|
255
|
+
) = _get_source_info(
|
|
256
|
+
dc._query.starting_step.dataset, # type: ignore[union-attr]
|
|
257
|
+
name,
|
|
258
|
+
namespace_name,
|
|
259
|
+
project_name,
|
|
260
|
+
latest_version,
|
|
261
|
+
catalog,
|
|
262
|
+
)
|
|
248
263
|
|
|
249
264
|
# If source_ds_name is None, starting dataset was removed
|
|
250
265
|
if source_ds_name is None:
|
|
@@ -267,8 +282,9 @@ def delta_retry_update(
|
|
|
267
282
|
if dependencies:
|
|
268
283
|
dependencies = copy(dependencies)
|
|
269
284
|
dependencies = [d for d in dependencies if d is not None]
|
|
285
|
+
source_ds_dep = next(d for d in dependencies if d.name == source_ds_name)
|
|
270
286
|
# Update to latest version
|
|
271
|
-
|
|
287
|
+
source_ds_dep.version = source_ds_latest_version # type: ignore[union-attr]
|
|
272
288
|
|
|
273
289
|
# Handle retry functionality if enabled
|
|
274
290
|
if delta_retry:
|
datachain/lib/arrow.py
CHANGED
|
@@ -2,8 +2,8 @@ from collections.abc import Sequence
|
|
|
2
2
|
from itertools import islice
|
|
3
3
|
from typing import TYPE_CHECKING, Any, Optional
|
|
4
4
|
|
|
5
|
-
import orjson
|
|
6
5
|
import pyarrow as pa
|
|
6
|
+
import ujson as json
|
|
7
7
|
from pyarrow._csv import ParseOptions
|
|
8
8
|
from pyarrow.dataset import CsvFileFormat, dataset
|
|
9
9
|
from tqdm.auto import tqdm
|
|
@@ -269,7 +269,7 @@ def _get_hf_schema(
|
|
|
269
269
|
def _get_datachain_schema(schema: "pa.Schema") -> Optional[SignalSchema]:
|
|
270
270
|
"""Return a restored SignalSchema from parquet metadata, if any is found."""
|
|
271
271
|
if schema.metadata and DATACHAIN_SIGNAL_SCHEMA_PARQUET_KEY in schema.metadata:
|
|
272
|
-
serialized_signal_schema =
|
|
272
|
+
serialized_signal_schema = json.loads(
|
|
273
273
|
schema.metadata[DATACHAIN_SIGNAL_SCHEMA_PARQUET_KEY]
|
|
274
274
|
)
|
|
275
275
|
return SignalSchema.deserialize(serialized_signal_schema)
|
datachain/lib/dc/datachain.py
CHANGED
|
@@ -19,8 +19,8 @@ from typing import (
|
|
|
19
19
|
overload,
|
|
20
20
|
)
|
|
21
21
|
|
|
22
|
-
import orjson
|
|
23
22
|
import sqlalchemy
|
|
23
|
+
import ujson as json
|
|
24
24
|
from pydantic import BaseModel
|
|
25
25
|
from sqlalchemy.sql.elements import ColumnElement
|
|
26
26
|
from tqdm import tqdm
|
|
@@ -193,6 +193,7 @@ class DataChain:
|
|
|
193
193
|
self._setup: dict = setup or {}
|
|
194
194
|
self._sys = _sys
|
|
195
195
|
self._delta = False
|
|
196
|
+
self._delta_unsafe = False
|
|
196
197
|
self._delta_on: Optional[Union[str, Sequence[str]]] = None
|
|
197
198
|
self._delta_result_on: Optional[Union[str, Sequence[str]]] = None
|
|
198
199
|
self._delta_compare: Optional[Union[str, Sequence[str]]] = None
|
|
@@ -216,6 +217,7 @@ class DataChain:
|
|
|
216
217
|
right_on: Optional[Union[str, Sequence[str]]] = None,
|
|
217
218
|
compare: Optional[Union[str, Sequence[str]]] = None,
|
|
218
219
|
delta_retry: Optional[Union[bool, str]] = None,
|
|
220
|
+
delta_unsafe: bool = False,
|
|
219
221
|
) -> "Self":
|
|
220
222
|
"""Marks this chain as delta, which means special delta process will be
|
|
221
223
|
called on saving dataset for optimization"""
|
|
@@ -226,6 +228,7 @@ class DataChain:
|
|
|
226
228
|
self._delta_result_on = right_on
|
|
227
229
|
self._delta_compare = compare
|
|
228
230
|
self._delta_retry = delta_retry
|
|
231
|
+
self._delta_unsafe = delta_unsafe
|
|
229
232
|
return self
|
|
230
233
|
|
|
231
234
|
@property
|
|
@@ -238,6 +241,10 @@ class DataChain:
|
|
|
238
241
|
"""Returns True if this chain is ran in "delta" update mode"""
|
|
239
242
|
return self._delta
|
|
240
243
|
|
|
244
|
+
@property
|
|
245
|
+
def delta_unsafe(self) -> bool:
|
|
246
|
+
return self._delta_unsafe
|
|
247
|
+
|
|
241
248
|
@property
|
|
242
249
|
def schema(self) -> dict[str, DataType]:
|
|
243
250
|
"""Get schema of the chain."""
|
|
@@ -328,6 +335,7 @@ class DataChain:
|
|
|
328
335
|
right_on=self._delta_result_on,
|
|
329
336
|
compare=self._delta_compare,
|
|
330
337
|
delta_retry=self._delta_retry,
|
|
338
|
+
delta_unsafe=self._delta_unsafe,
|
|
331
339
|
)
|
|
332
340
|
|
|
333
341
|
return chain
|
|
@@ -462,8 +470,6 @@ class DataChain:
|
|
|
462
470
|
Returns:
|
|
463
471
|
DataChain: A new DataChain instance with the new set of columns.
|
|
464
472
|
"""
|
|
465
|
-
import json
|
|
466
|
-
|
|
467
473
|
import pyarrow as pa
|
|
468
474
|
|
|
469
475
|
from datachain.lib.arrow import schema_to_output
|
|
@@ -2129,9 +2135,9 @@ class DataChain:
|
|
|
2129
2135
|
fsspec_fs = client.create_fs(**fs_kwargs)
|
|
2130
2136
|
|
|
2131
2137
|
_partition_cols = list(partition_cols) if partition_cols else None
|
|
2132
|
-
signal_schema_metadata =
|
|
2133
|
-
self._effective_signals_schema.serialize()
|
|
2134
|
-
)
|
|
2138
|
+
signal_schema_metadata = json.dumps(
|
|
2139
|
+
self._effective_signals_schema.serialize(), ensure_ascii=False
|
|
2140
|
+
).encode("utf-8")
|
|
2135
2141
|
|
|
2136
2142
|
column_names, column_chunks = self.to_columnar_data_with_names(chunk_size)
|
|
2137
2143
|
|
|
@@ -2278,7 +2284,11 @@ class DataChain:
|
|
|
2278
2284
|
f.write(b"\n")
|
|
2279
2285
|
else:
|
|
2280
2286
|
is_first = False
|
|
2281
|
-
f.write(
|
|
2287
|
+
f.write(
|
|
2288
|
+
json.dumps(
|
|
2289
|
+
row_to_nested_dict(headers, row), ensure_ascii=False
|
|
2290
|
+
).encode("utf-8")
|
|
2291
|
+
)
|
|
2282
2292
|
if include_outer_list:
|
|
2283
2293
|
# This makes the file JSON instead of JSON lines.
|
|
2284
2294
|
f.write(b"\n]\n")
|
datachain/lib/dc/datasets.py
CHANGED
|
@@ -40,6 +40,7 @@ def read_dataset(
|
|
|
40
40
|
delta_result_on: Optional[Union[str, Sequence[str]]] = None,
|
|
41
41
|
delta_compare: Optional[Union[str, Sequence[str]]] = None,
|
|
42
42
|
delta_retry: Optional[Union[bool, str]] = None,
|
|
43
|
+
delta_unsafe: bool = False,
|
|
43
44
|
update: bool = False,
|
|
44
45
|
) -> "DataChain":
|
|
45
46
|
"""Get data from a saved Dataset. It returns the chain itself.
|
|
@@ -80,6 +81,8 @@ def read_dataset(
|
|
|
80
81
|
update: If True always checks for newer versions available on Studio, even if
|
|
81
82
|
some version of the dataset exists locally already. If False (default), it
|
|
82
83
|
will only fetch the dataset from Studio if it is not found locally.
|
|
84
|
+
delta_unsafe: Allow restricted ops in delta: merge, agg, union, group_by,
|
|
85
|
+
distinct.
|
|
83
86
|
|
|
84
87
|
|
|
85
88
|
Example:
|
|
@@ -205,6 +208,7 @@ def read_dataset(
|
|
|
205
208
|
right_on=delta_result_on,
|
|
206
209
|
compare=delta_compare,
|
|
207
210
|
delta_retry=delta_retry,
|
|
211
|
+
delta_unsafe=delta_unsafe,
|
|
208
212
|
)
|
|
209
213
|
|
|
210
214
|
return chain
|
datachain/lib/dc/storage.py
CHANGED
|
@@ -43,6 +43,7 @@ def read_storage(
|
|
|
43
43
|
delta_result_on: Optional[Union[str, Sequence[str]]] = None,
|
|
44
44
|
delta_compare: Optional[Union[str, Sequence[str]]] = None,
|
|
45
45
|
delta_retry: Optional[Union[bool, str]] = None,
|
|
46
|
+
delta_unsafe: bool = False,
|
|
46
47
|
client_config: Optional[dict] = None,
|
|
47
48
|
) -> "DataChain":
|
|
48
49
|
"""Get data from storage(s) as a list of file with all file attributes.
|
|
@@ -77,6 +78,9 @@ def read_storage(
|
|
|
77
78
|
(error mode)
|
|
78
79
|
- True: Reprocess records missing from the result dataset (missing mode)
|
|
79
80
|
- None: No retry processing (default)
|
|
81
|
+
delta_unsafe: Allow restricted ops in delta: merge, agg, union, group_by,
|
|
82
|
+
distinct. Caller must ensure datasets are consistent and not partially
|
|
83
|
+
updated.
|
|
80
84
|
|
|
81
85
|
Returns:
|
|
82
86
|
DataChain: A DataChain object containing the file information.
|
|
@@ -218,6 +222,7 @@ def read_storage(
|
|
|
218
222
|
right_on=delta_result_on,
|
|
219
223
|
compare=delta_compare,
|
|
220
224
|
delta_retry=delta_retry,
|
|
225
|
+
delta_unsafe=delta_unsafe,
|
|
221
226
|
)
|
|
222
227
|
|
|
223
228
|
return storage_chain
|
datachain/lib/model_store.py
CHANGED
|
@@ -89,3 +89,15 @@ class ModelStore:
|
|
|
89
89
|
and ModelStore.is_pydantic(parent_type)
|
|
90
90
|
and "@" in ModelStore.get_name(parent_type)
|
|
91
91
|
)
|
|
92
|
+
|
|
93
|
+
@classmethod
|
|
94
|
+
def rebuild_all(cls) -> None:
|
|
95
|
+
"""Ensure pydantic schemas are (re)built for all registered models.
|
|
96
|
+
|
|
97
|
+
Uses ``force=True`` to avoid subtle cases where a deserialized class
|
|
98
|
+
(e.g. from by-value cloudpickle in workers) reports built state but
|
|
99
|
+
nested model field schemas aren't fully resolved yet.
|
|
100
|
+
"""
|
|
101
|
+
for versions in cls.store.values():
|
|
102
|
+
for model in versions.values():
|
|
103
|
+
model.model_rebuild(force=True)
|
datachain/query/dispatch.py
CHANGED
|
@@ -13,6 +13,7 @@ from multiprocess import get_context
|
|
|
13
13
|
from datachain.catalog import Catalog
|
|
14
14
|
from datachain.catalog.catalog import clone_catalog_with_cache
|
|
15
15
|
from datachain.catalog.loader import DISTRIBUTED_IMPORT_PATH, get_udf_distributor_class
|
|
16
|
+
from datachain.lib.model_store import ModelStore
|
|
16
17
|
from datachain.lib.udf import _get_cache
|
|
17
18
|
from datachain.query.dataset import (
|
|
18
19
|
get_download_callback,
|
|
@@ -130,6 +131,8 @@ class UDFDispatcher:
|
|
|
130
131
|
|
|
131
132
|
def _create_worker(self) -> "UDFWorker":
|
|
132
133
|
udf: UDFAdapter = loads(self.udf_data)
|
|
134
|
+
# Ensure all registered DataModels have rebuilt schemas in worker processes.
|
|
135
|
+
ModelStore.rebuild_all()
|
|
133
136
|
return UDFWorker(
|
|
134
137
|
self.catalog,
|
|
135
138
|
udf,
|
|
@@ -196,6 +199,8 @@ class UDFDispatcher:
|
|
|
196
199
|
generated_cb: Callback = DEFAULT_CALLBACK,
|
|
197
200
|
) -> None:
|
|
198
201
|
udf: UDFAdapter = loads(self.udf_data)
|
|
202
|
+
# Rebuild schemas in single process too for consistency (cheap, idempotent).
|
|
203
|
+
ModelStore.rebuild_all()
|
|
199
204
|
|
|
200
205
|
if ids_only and not self.is_batching:
|
|
201
206
|
input_rows = flatten(input_rows)
|
datachain/sql/sqlite/base.py
CHANGED
|
@@ -8,8 +8,8 @@ from functools import cache
|
|
|
8
8
|
from types import MappingProxyType
|
|
9
9
|
from typing import Callable, Optional
|
|
10
10
|
|
|
11
|
-
import orjson
|
|
12
11
|
import sqlalchemy as sa
|
|
12
|
+
import ujson as json
|
|
13
13
|
from sqlalchemy.dialects import sqlite
|
|
14
14
|
from sqlalchemy.ext.compiler import compiles
|
|
15
15
|
from sqlalchemy.sql.elements import literal
|
|
@@ -182,7 +182,7 @@ def missing_vector_function(name, exc):
|
|
|
182
182
|
|
|
183
183
|
|
|
184
184
|
def sqlite_string_split(string: str, sep: str, maxsplit: int = -1) -> str:
|
|
185
|
-
return
|
|
185
|
+
return json.dumps(string.split(sep, maxsplit), ensure_ascii=False)
|
|
186
186
|
|
|
187
187
|
|
|
188
188
|
def sqlite_int_hash_64(x: int) -> int:
|
|
@@ -453,17 +453,17 @@ def compile_byte_hamming_distance(element, compiler, **kwargs):
|
|
|
453
453
|
|
|
454
454
|
|
|
455
455
|
def py_json_array_length(arr):
|
|
456
|
-
return len(
|
|
456
|
+
return len(json.loads(arr))
|
|
457
457
|
|
|
458
458
|
|
|
459
459
|
def py_json_array_contains(arr, value, is_json):
|
|
460
460
|
if is_json:
|
|
461
|
-
value =
|
|
462
|
-
return value in
|
|
461
|
+
value = json.loads(value)
|
|
462
|
+
return value in json.loads(arr)
|
|
463
463
|
|
|
464
464
|
|
|
465
465
|
def py_json_array_get_element(val, idx):
|
|
466
|
-
arr =
|
|
466
|
+
arr = json.loads(val)
|
|
467
467
|
try:
|
|
468
468
|
return arr[idx]
|
|
469
469
|
except IndexError:
|
|
@@ -471,17 +471,18 @@ def py_json_array_get_element(val, idx):
|
|
|
471
471
|
|
|
472
472
|
|
|
473
473
|
def py_json_array_slice(val, offset: int, length: Optional[int] = None):
|
|
474
|
-
arr =
|
|
474
|
+
arr = json.loads(val)
|
|
475
475
|
try:
|
|
476
|
-
return
|
|
477
|
-
list(arr[offset : offset + length] if length is not None else arr[offset:])
|
|
478
|
-
|
|
476
|
+
return json.dumps(
|
|
477
|
+
list(arr[offset : offset + length] if length is not None else arr[offset:]),
|
|
478
|
+
ensure_ascii=False,
|
|
479
|
+
)
|
|
479
480
|
except IndexError:
|
|
480
481
|
return None
|
|
481
482
|
|
|
482
483
|
|
|
483
484
|
def py_json_array_join(val, sep: str):
|
|
484
|
-
return sep.join(
|
|
485
|
+
return sep.join(json.loads(val))
|
|
485
486
|
|
|
486
487
|
|
|
487
488
|
def compile_array_get_element(element, compiler, **kwargs):
|
datachain/sql/sqlite/types.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import sqlite3
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
import ujson as json
|
|
4
4
|
from sqlalchemy import types
|
|
5
5
|
|
|
6
6
|
from datachain.sql.types import TypeConverter, TypeReadConverter
|
|
@@ -28,26 +28,21 @@ class Array(types.UserDefinedType):
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def adapt_array(arr):
|
|
31
|
-
return
|
|
31
|
+
return json.dumps(arr, ensure_ascii=False)
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
def adapt_dict(dct):
|
|
35
|
-
return
|
|
35
|
+
return json.dumps(dct, ensure_ascii=False)
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
def convert_array(arr):
|
|
39
|
-
return
|
|
39
|
+
return json.loads(arr)
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
def adapt_np_array(arr):
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
return obj
|
|
47
|
-
|
|
48
|
-
return orjson.dumps(
|
|
49
|
-
arr, option=orjson.OPT_SERIALIZE_NUMPY, default=_json_serialize
|
|
50
|
-
).decode("utf-8")
|
|
43
|
+
# Primarily needed for UDF numpy results (e.g. WDS)
|
|
44
|
+
# tolist() gives nested Python lists + native scalars; ujson.dumps handles NaN/Inf.
|
|
45
|
+
return json.dumps(arr.tolist(), ensure_ascii=False)
|
|
51
46
|
|
|
52
47
|
|
|
53
48
|
def adapt_np_generic(val):
|
|
@@ -74,5 +69,5 @@ class SQLiteTypeConverter(TypeConverter):
|
|
|
74
69
|
class SQLiteTypeReadConverter(TypeReadConverter):
|
|
75
70
|
def array(self, value, item_type, dialect):
|
|
76
71
|
if isinstance(value, str):
|
|
77
|
-
value =
|
|
72
|
+
value = json.loads(value)
|
|
78
73
|
return super().array(value, item_type, dialect)
|
datachain/sql/types.py
CHANGED
|
@@ -16,8 +16,8 @@ from datetime import datetime
|
|
|
16
16
|
from types import MappingProxyType
|
|
17
17
|
from typing import Any, Union
|
|
18
18
|
|
|
19
|
-
import orjson
|
|
20
19
|
import sqlalchemy as sa
|
|
20
|
+
import ujson as jsonlib
|
|
21
21
|
from sqlalchemy import TypeDecorator, types
|
|
22
22
|
|
|
23
23
|
from datachain.lib.data_model import StandardType
|
|
@@ -352,7 +352,7 @@ class Array(SQLType):
|
|
|
352
352
|
def on_read_convert(self, value, dialect):
|
|
353
353
|
r = read_converter(dialect).array(value, self.item_type, dialect)
|
|
354
354
|
if isinstance(self.item_type, JSON):
|
|
355
|
-
r = [
|
|
355
|
+
r = [jsonlib.loads(item) if isinstance(item, str) else item for item in r]
|
|
356
356
|
return r
|
|
357
357
|
|
|
358
358
|
|
|
@@ -466,7 +466,7 @@ class TypeReadConverter:
|
|
|
466
466
|
if isinstance(value, str):
|
|
467
467
|
if value == "":
|
|
468
468
|
return {}
|
|
469
|
-
return
|
|
469
|
+
return jsonlib.loads(value)
|
|
470
470
|
return value
|
|
471
471
|
|
|
472
472
|
def datetime(self, value):
|
datachain/utils.py
CHANGED
|
@@ -417,7 +417,7 @@ class JSONSerialize(json.JSONEncoder):
|
|
|
417
417
|
|
|
418
418
|
def inside_colab() -> bool:
|
|
419
419
|
try:
|
|
420
|
-
from google import colab # noqa: F401
|
|
420
|
+
from google import colab # type: ignore[attr-defined] # noqa: F401
|
|
421
421
|
except ImportError:
|
|
422
422
|
return False
|
|
423
423
|
return True
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.30.
|
|
3
|
+
Version: 0.30.6
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -22,6 +22,7 @@ Requires-Dist: tomlkit
|
|
|
22
22
|
Requires-Dist: tqdm
|
|
23
23
|
Requires-Dist: numpy<3,>=1
|
|
24
24
|
Requires-Dist: pandas>=2.0.0
|
|
25
|
+
Requires-Dist: ujson>=5.10.0
|
|
25
26
|
Requires-Dist: packaging
|
|
26
27
|
Requires-Dist: pyarrow
|
|
27
28
|
Requires-Dist: typing-extensions
|
|
@@ -38,7 +39,6 @@ Requires-Dist: shtab<2,>=1.3.4
|
|
|
38
39
|
Requires-Dist: sqlalchemy>=2
|
|
39
40
|
Requires-Dist: multiprocess==0.70.16
|
|
40
41
|
Requires-Dist: cloudpickle
|
|
41
|
-
Requires-Dist: orjson>=3.10.5
|
|
42
42
|
Requires-Dist: pydantic
|
|
43
43
|
Requires-Dist: jmespath>=1.0
|
|
44
44
|
Requires-Dist: datamodel-code-generator>=0.25
|
|
@@ -92,7 +92,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
|
|
|
92
92
|
Requires-Dist: pytest-servers[all]>=0.5.9; extra == "tests"
|
|
93
93
|
Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
|
|
94
94
|
Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
|
|
95
|
-
Requires-Dist: pytest-
|
|
95
|
+
Requires-Dist: pytest-dotenv; extra == "tests"
|
|
96
96
|
Requires-Dist: virtualenv; extra == "tests"
|
|
97
97
|
Requires-Dist: dulwich; extra == "tests"
|
|
98
98
|
Requires-Dist: hypothesis; extra == "tests"
|
|
@@ -4,7 +4,7 @@ datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
|
|
|
4
4
|
datachain/cache.py,sha256=ESVRaCJXEThMIfGEFVHx6wJPOZA7FYk9V6WxjyuqUBY,3626
|
|
5
5
|
datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
|
|
6
6
|
datachain/dataset.py,sha256=ATGa-CBTFoZeTN2V40-zHEzfMBcdYK0WuoJ6H2yEAvo,25268
|
|
7
|
-
datachain/delta.py,sha256=
|
|
7
|
+
datachain/delta.py,sha256=X5Lw6GQ8MAYNl2YIExNvl0tPIkylQEWwnCw0We7NtHM,10693
|
|
8
8
|
datachain/error.py,sha256=OWwWMkzZYJrkcoEDGhJHMf7SfKvxcsOLRF94mjPf29I,1609
|
|
9
9
|
datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
|
|
10
10
|
datachain/listing.py,sha256=aqayl5St3D9PwdwM6nR1STkpLSw-S3U8pudO9PWi3N8,7241
|
|
@@ -19,7 +19,7 @@ datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
|
|
|
19
19
|
datachain/semver.py,sha256=UB8GHPBtAP3UJGeiuJoInD7SK-DnB93_Xd1qy_CQ9cU,2074
|
|
20
20
|
datachain/studio.py,sha256=27750qCSNxIChEzhV02damIFreLMfr7UdiWqMFyk8AA,15361
|
|
21
21
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
22
|
-
datachain/utils.py,sha256=
|
|
22
|
+
datachain/utils.py,sha256=RKe1-VuC9juQSIbIpMnELJ7QrsKQggj8l7Q8_FiCZHE,15664
|
|
23
23
|
datachain/catalog/__init__.py,sha256=9NBaywvAOaXdkyqiHjbBEiXs7JImR1OJsY9r8D5Q16g,403
|
|
24
24
|
datachain/catalog/catalog.py,sha256=a1AN6eDHWWzII1wi46T_1JvTsW1AeMudwR_6sVQ4f7I,67588
|
|
25
25
|
datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
|
|
@@ -27,7 +27,7 @@ datachain/catalog/loader.py,sha256=53VnuSRkt_CO9RdlHWkzQsPF55qMxcXvEm3ecsZREw8,6
|
|
|
27
27
|
datachain/cli/__init__.py,sha256=so3WxEQF03KdGvjav15Sw7a6-lriiE24uDSGbBDBp8o,8298
|
|
28
28
|
datachain/cli/utils.py,sha256=wrLnAh7Wx8O_ojZE8AE4Lxn5WoxHbOj7as8NWlLAA74,3036
|
|
29
29
|
datachain/cli/commands/__init__.py,sha256=zp3bYIioO60x_X04A4-IpZqSYVnpwOa1AdERQaRlIhI,493
|
|
30
|
-
datachain/cli/commands/datasets.py,sha256=
|
|
30
|
+
datachain/cli/commands/datasets.py,sha256=DAbONwcA__JM1qkcKVOP5sKukGbCGqLWCMBkBscA3_s,6971
|
|
31
31
|
datachain/cli/commands/du.py,sha256=9edEzDEs98K2VYk8Wf-ZMpUzALcgm9uD6YtoqbvtUGU,391
|
|
32
32
|
datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR88,198
|
|
33
33
|
datachain/cli/commands/ls.py,sha256=CBmk838Q-EQp04lE2Qdnpsc1GXAkC4-I-b-a_828n1E,5272
|
|
@@ -53,7 +53,7 @@ datachain/data_storage/metastore.py,sha256=aSeTRh43hmrOhULi9YD2VlgCj8B4bjE3jqCOv
|
|
|
53
53
|
datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
|
|
54
54
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
55
55
|
datachain/data_storage/sqlite.py,sha256=edcTegzEoAEdEp62Rg9oERvHWXDcpg8d4onrD-P2xKM,30159
|
|
56
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
56
|
+
datachain/data_storage/warehouse.py,sha256=sEbNiWKdB7yuLt88FuIfRur7U7WiOZrcHWhnBS_eMAg,32642
|
|
57
57
|
datachain/diff/__init__.py,sha256=-OFZzgOplqO84iWgGY7kfe60NXaWR9JRIh9T-uJboAM,9668
|
|
58
58
|
datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
59
|
datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
|
|
@@ -70,7 +70,7 @@ datachain/func/random.py,sha256=t7jwXsI8-hy0qAdvjAntgzy-AHtTAfozlZ1CpKR-QZE,458
|
|
|
70
70
|
datachain/func/string.py,sha256=6-fZM7wHv0JZ2ZzpLFPLLYW15K_CT5VfYsmx56zBrpA,7419
|
|
71
71
|
datachain/func/window.py,sha256=ImyRpc1QI8QUSPO7KdD60e_DPVo7Ja0G5kcm6BlyMcw,1584
|
|
72
72
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
|
-
datachain/lib/arrow.py,sha256=
|
|
73
|
+
datachain/lib/arrow.py,sha256=aedsosbFNjIBa6LQIxR2zhIVcA4pVw1p5hCVmrDhWsQ,10781
|
|
74
74
|
datachain/lib/audio.py,sha256=fQmIBq-9hrUZtkgeJdPHYA_D8Wfe9D4cQZk4_ijxpNc,7580
|
|
75
75
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
76
76
|
datachain/lib/data_model.py,sha256=Rjah76GHwIV6AZQk4rsdg6JLre5D8Kb9T4PS5SXzsPA,3740
|
|
@@ -81,7 +81,7 @@ datachain/lib/image.py,sha256=erWvZW5M3emnbl6_fGAOPyKm-1EKbt3vOdWPfe3Oo7U,3265
|
|
|
81
81
|
datachain/lib/listing.py,sha256=U-2stsTEwEsq4Y80dqGfktGzkmB5-ZntnL1_rzXlH0k,7089
|
|
82
82
|
datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
|
|
83
83
|
datachain/lib/meta_formats.py,sha256=zdyg6XLk3QIsSk3I7s0Ez5kaCJSlE3uq7JiGxf7UwtU,6348
|
|
84
|
-
datachain/lib/model_store.py,sha256=
|
|
84
|
+
datachain/lib/model_store.py,sha256=A0pSVQ7uaZ9RvANapzirF8Cqq9N6ysosPpMSkzdRPkU,3226
|
|
85
85
|
datachain/lib/namespaces.py,sha256=I6gLC4ZzgyatFtHL85MWR4ml7-yuQOzxHE7IQNbt_ac,2107
|
|
86
86
|
datachain/lib/projects.py,sha256=VJgmzHzKjmNPZD1tm0a1RNHmUQwn6WLWCLpKyc4UrSk,2605
|
|
87
87
|
datachain/lib/pytorch.py,sha256=S-st2SAczYut13KMf6eSqP_OQ8otWI5TRmzhK5fN3k0,7828
|
|
@@ -104,15 +104,15 @@ datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUO
|
|
|
104
104
|
datachain/lib/dc/__init__.py,sha256=UrUzmDH6YyVl8fxM5iXTSFtl5DZTUzEYm1MaazK4vdQ,900
|
|
105
105
|
datachain/lib/dc/csv.py,sha256=q6a9BpapGwP6nwy6c5cklxQumep2fUp9l2LAjtTJr6s,4411
|
|
106
106
|
datachain/lib/dc/database.py,sha256=F6EOjPKwSdp26kJsOKGq49D9OxqyKEalINHEwLQav2s,14716
|
|
107
|
-
datachain/lib/dc/datachain.py,sha256=
|
|
108
|
-
datachain/lib/dc/datasets.py,sha256
|
|
107
|
+
datachain/lib/dc/datachain.py,sha256=2UtDhtBzx5VejkDE0UTS3t1517jCGr7YEKvO5wqNU-Q,99709
|
|
108
|
+
datachain/lib/dc/datasets.py,sha256=-Bvyyu4XXDXLiWa-bOnsp0Q11RSYXRO0j5DaX8ShaFs,15355
|
|
109
109
|
datachain/lib/dc/hf.py,sha256=AP_MUHg6HJWae10PN9hD_beQVjrl0cleZ6Cvhtl1yoI,2901
|
|
110
110
|
datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
|
|
111
111
|
datachain/lib/dc/listings.py,sha256=V379Cb-7ZyquM0w7sWArQZkzInZy4GB7QQ1ZfowKzQY,4544
|
|
112
112
|
datachain/lib/dc/pandas.py,sha256=ObueUXDUFKJGu380GmazdG02ARpKAHPhSaymfmOH13E,1489
|
|
113
113
|
datachain/lib/dc/parquet.py,sha256=zYcSgrWwyEDW9UxGUSVdIVsCu15IGEf0xL8KfWQqK94,1782
|
|
114
114
|
datachain/lib/dc/records.py,sha256=4N1Fq-j5r4GK-PR5jIO-9B2u_zTNX9l-6SmcRhQDAsw,3136
|
|
115
|
-
datachain/lib/dc/storage.py,sha256=
|
|
115
|
+
datachain/lib/dc/storage.py,sha256=OMJE-9ob9Ku5le8W6O8J1W-XJ0pwHt2PsO-ZCcee1ZA,7950
|
|
116
116
|
datachain/lib/dc/utils.py,sha256=9OMiFu2kXIbtMqzJTEr1qbCoCBGpOmTnkWImVgFTKgo,4112
|
|
117
117
|
datachain/lib/dc/values.py,sha256=7l1n352xWrEdql2NhBcZ3hj8xyPglWiY4qHjFPjn6iw,1428
|
|
118
118
|
datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
|
|
@@ -127,7 +127,7 @@ datachain/model/ultralytics/segment.py,sha256=v9_xDxd5zw_I8rXsbl7yQXgEdTs2T38zyY
|
|
|
127
127
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
128
128
|
datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
|
|
129
129
|
datachain/query/dataset.py,sha256=OaGRBNSWYNaRbYn6avij0fiFN5DT-nwdM-wJ4yTfaYs,63317
|
|
130
|
-
datachain/query/dispatch.py,sha256=
|
|
130
|
+
datachain/query/dispatch.py,sha256=f8IIvuLBJaCEwSRv7bWPMy1uXyc28W0LGqrBffjYf98,15831
|
|
131
131
|
datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
|
|
132
132
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
133
133
|
datachain/query/queue.py,sha256=v0UeK4ilmdiRoJ5OdjB5qpnHTYDxRP4vhVp5Iw_toaI,3512
|
|
@@ -141,7 +141,7 @@ datachain/sql/__init__.py,sha256=8D2omsBiATt8bjLjGo6jBEtaKEkOlnlNFWhVryHMDv0,388
|
|
|
141
141
|
datachain/sql/postgresql_dialect.py,sha256=pDTfH8xaXz5xZsq8O1aQUvWLRIv_ogYeAqtmKlPp3Rw,280
|
|
142
142
|
datachain/sql/postgresql_types.py,sha256=ryb_0lzuA9UOJ_B6nW9Yb8nJjzeSmEItAL_Ceue65lc,627
|
|
143
143
|
datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
|
|
144
|
-
datachain/sql/types.py,sha256=
|
|
144
|
+
datachain/sql/types.py,sha256=2XbNaQTTc2BGJ6qL7RcwrBByIEbf9PXcsElIz6q9Mkg,15018
|
|
145
145
|
datachain/sql/utils.py,sha256=rzlJw08etivdrcuQPqNVvVWhuVSyUPUQEEc6DOhu258,818
|
|
146
146
|
datachain/sql/default/__init__.py,sha256=XQ2cEZpzWiABqjV-6yYHUBGI9vN_UHxbxZENESmVAWw,45
|
|
147
147
|
datachain/sql/default/base.py,sha256=QD-31C6JnyOXzogyDx90sUhm7QvgXIYpeHEASH84igU,628
|
|
@@ -154,15 +154,15 @@ datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0
|
|
|
154
154
|
datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
|
|
155
155
|
datachain/sql/functions/string.py,sha256=E-T9OIzUR-GKaLgjZsEtg5CJrY_sLf1lt1awTvY7w2w,1426
|
|
156
156
|
datachain/sql/sqlite/__init__.py,sha256=PsLaDSij9a03VxGSpagpNl7NQsGtgm72ArUeALZONoc,183
|
|
157
|
-
datachain/sql/sqlite/base.py,sha256=
|
|
158
|
-
datachain/sql/sqlite/types.py,sha256=
|
|
157
|
+
datachain/sql/sqlite/base.py,sha256=WzRxJ8lHAeBCQlh4Z_NmX0CCkxeOt10M_vudCQzY4gE,21510
|
|
158
|
+
datachain/sql/sqlite/types.py,sha256=DCK7q-Zdc_m1o1T33xrKjYX1zRg1231gw3o3ACO_qho,1815
|
|
159
159
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
160
160
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
161
161
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
162
162
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
163
|
-
datachain-0.30.
|
|
164
|
-
datachain-0.30.
|
|
165
|
-
datachain-0.30.
|
|
166
|
-
datachain-0.30.
|
|
167
|
-
datachain-0.30.
|
|
168
|
-
datachain-0.30.
|
|
163
|
+
datachain-0.30.6.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
164
|
+
datachain-0.30.6.dist-info/METADATA,sha256=ZyXo8wdTrN08k--Soy3UHpCu_Jni_6ocO3_PbjCswCE,13898
|
|
165
|
+
datachain-0.30.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
166
|
+
datachain-0.30.6.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
167
|
+
datachain-0.30.6.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
168
|
+
datachain-0.30.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|