datachain 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/_version.py +2 -2
- datachain/catalog/catalog.py +47 -3
- datachain/data_storage/metastore.py +2 -0
- datachain/dataset.py +5 -7
- datachain/lib/dc.py +117 -1
- datachain/lib/feature.py +0 -10
- datachain/lib/meta_formats.py +164 -0
- datachain/lib/pytorch.py +33 -4
- datachain/lib/signal_schema.py +63 -6
- datachain/query/dataset.py +1 -1
- {datachain-0.1.10.dist-info → datachain-0.1.11.dist-info}/METADATA +3 -1
- {datachain-0.1.10.dist-info → datachain-0.1.11.dist-info}/RECORD +16 -15
- {datachain-0.1.10.dist-info → datachain-0.1.11.dist-info}/LICENSE +0 -0
- {datachain-0.1.10.dist-info → datachain-0.1.11.dist-info}/WHEEL +0 -0
- {datachain-0.1.10.dist-info → datachain-0.1.11.dist-info}/entry_points.txt +0 -0
- {datachain-0.1.10.dist-info → datachain-0.1.11.dist-info}/top_level.txt +0 -0
datachain/_version.py
CHANGED
datachain/catalog/catalog.py
CHANGED
|
@@ -1580,10 +1580,54 @@ class Catalog:
|
|
|
1580
1580
|
|
|
1581
1581
|
return dst
|
|
1582
1582
|
|
|
1583
|
-
def
|
|
1583
|
+
def get_file_signals(
|
|
1584
|
+
self, dataset_name: str, dataset_version: int, row: RowDict
|
|
1585
|
+
) -> Optional[dict]:
|
|
1586
|
+
"""
|
|
1587
|
+
Function that returns file signals from dataset row.
|
|
1588
|
+
Note that signal names are without prefix, so if there was 'laion__file__source'
|
|
1589
|
+
in original row, result will have just 'source'
|
|
1590
|
+
Example output:
|
|
1591
|
+
{
|
|
1592
|
+
"source": "s3://ldb-public",
|
|
1593
|
+
"parent": "animals/dogs",
|
|
1594
|
+
"name": "dog.jpg",
|
|
1595
|
+
...
|
|
1596
|
+
}
|
|
1597
|
+
"""
|
|
1598
|
+
from datachain.lib.signal_schema import SignalSchema
|
|
1599
|
+
|
|
1600
|
+
version = self.get_dataset(dataset_name).get_version(dataset_version)
|
|
1601
|
+
|
|
1602
|
+
file_signals_values = SignalSchema.deserialize(
|
|
1603
|
+
version.feature_schema
|
|
1604
|
+
).get_file_signals_values(row)
|
|
1605
|
+
if not file_signals_values:
|
|
1606
|
+
return None
|
|
1607
|
+
|
|
1608
|
+
# there can be multiple file signals in a schema, but taking the first
|
|
1609
|
+
# one for now. In future we might add ability to choose from which one
|
|
1610
|
+
# to open object
|
|
1611
|
+
return next(iter(file_signals_values.values()))
|
|
1612
|
+
|
|
1613
|
+
def open_object(
|
|
1614
|
+
self,
|
|
1615
|
+
dataset_name: str,
|
|
1616
|
+
dataset_version: int,
|
|
1617
|
+
row: RowDict,
|
|
1618
|
+
use_cache: bool = True,
|
|
1619
|
+
**config: Any,
|
|
1620
|
+
):
|
|
1621
|
+
file_signals = self.get_file_signals(dataset_name, dataset_version, row)
|
|
1622
|
+
if not file_signals:
|
|
1623
|
+
raise RuntimeError("Cannot open object without file signals")
|
|
1624
|
+
|
|
1584
1625
|
config = config or self.client_config
|
|
1585
|
-
client = self.get_client(
|
|
1586
|
-
return client.open_object(
|
|
1626
|
+
client = self.get_client(file_signals["source"], **config)
|
|
1627
|
+
return client.open_object(
|
|
1628
|
+
self._get_row_uid(file_signals), # type: ignore [arg-type]
|
|
1629
|
+
use_cache=use_cache,
|
|
1630
|
+
)
|
|
1587
1631
|
|
|
1588
1632
|
def _get_row_uid(self, row: RowDict) -> UniqueId:
|
|
1589
1633
|
return UniqueId(
|
|
@@ -1142,6 +1142,8 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1142
1142
|
if field == "schema":
|
|
1143
1143
|
dataset_version.update(**{field: DatasetRecord.parse_schema(value)})
|
|
1144
1144
|
values[field] = json.dumps(value) if value else None
|
|
1145
|
+
elif field == "feature_schema":
|
|
1146
|
+
values[field] = json.dumps(value) if value else None
|
|
1145
1147
|
elif field == "preview" and isinstance(value, list):
|
|
1146
1148
|
values[field] = json.dumps(value, cls=JSONSerialize)
|
|
1147
1149
|
else:
|
datachain/dataset.py
CHANGED
|
@@ -157,7 +157,7 @@ class DatasetVersion:
|
|
|
157
157
|
dataset_id: int
|
|
158
158
|
version: int
|
|
159
159
|
status: int
|
|
160
|
-
feature_schema:
|
|
160
|
+
feature_schema: dict
|
|
161
161
|
created_at: datetime
|
|
162
162
|
finished_at: Optional[datetime]
|
|
163
163
|
error_message: str
|
|
@@ -199,7 +199,7 @@ class DatasetVersion:
|
|
|
199
199
|
dataset_id,
|
|
200
200
|
version,
|
|
201
201
|
status,
|
|
202
|
-
feature_schema,
|
|
202
|
+
json.loads(feature_schema) if feature_schema else {},
|
|
203
203
|
created_at,
|
|
204
204
|
finished_at,
|
|
205
205
|
error_message,
|
|
@@ -263,9 +263,9 @@ class DatasetRecord:
|
|
|
263
263
|
labels: list[str]
|
|
264
264
|
shadow: bool
|
|
265
265
|
schema: dict[str, Union[SQLType, type[SQLType]]]
|
|
266
|
+
feature_schema: dict
|
|
266
267
|
versions: list[DatasetVersion]
|
|
267
268
|
status: int = DatasetStatus.CREATED
|
|
268
|
-
feature_schema: Optional[dict] = None
|
|
269
269
|
created_at: Optional[datetime] = None
|
|
270
270
|
finished_at: Optional[datetime] = None
|
|
271
271
|
error_message: str = ""
|
|
@@ -320,8 +320,6 @@ class DatasetRecord:
|
|
|
320
320
|
version_job_id: Optional[str] = None,
|
|
321
321
|
version_is_job_result: bool = False,
|
|
322
322
|
) -> "DatasetRecord":
|
|
323
|
-
fr_schema = json.loads(feature_schema) if feature_schema else {}
|
|
324
|
-
|
|
325
323
|
labels_lst: list[str] = json.loads(labels) if labels else []
|
|
326
324
|
schema_dct: dict[str, Any] = json.loads(schema) if schema else {}
|
|
327
325
|
version_schema_dct: dict[str, str] = (
|
|
@@ -333,7 +331,7 @@ class DatasetRecord:
|
|
|
333
331
|
version_dataset_id,
|
|
334
332
|
version,
|
|
335
333
|
version_status,
|
|
336
|
-
|
|
334
|
+
version_feature_schema,
|
|
337
335
|
version_created_at,
|
|
338
336
|
version_finished_at,
|
|
339
337
|
version_error_message,
|
|
@@ -356,9 +354,9 @@ class DatasetRecord:
|
|
|
356
354
|
labels_lst,
|
|
357
355
|
bool(shadow),
|
|
358
356
|
cls.parse_schema(schema_dct), # type: ignore[arg-type]
|
|
357
|
+
json.loads(feature_schema) if feature_schema else {},
|
|
359
358
|
[dataset_version],
|
|
360
359
|
status,
|
|
361
|
-
fr_schema,
|
|
362
360
|
created_at,
|
|
363
361
|
finished_at,
|
|
364
362
|
error_message,
|
datachain/lib/dc.py
CHANGED
|
@@ -6,6 +6,7 @@ import sqlalchemy
|
|
|
6
6
|
from datachain.lib.feature import Feature, FeatureType
|
|
7
7
|
from datachain.lib.feature_utils import features_to_tuples
|
|
8
8
|
from datachain.lib.file import File, get_file
|
|
9
|
+
from datachain.lib.meta_formats import read_meta
|
|
9
10
|
from datachain.lib.settings import Settings
|
|
10
11
|
from datachain.lib.signal_schema import SignalSchema
|
|
11
12
|
from datachain.lib.udf import (
|
|
@@ -219,6 +220,89 @@ class DataChain(DatasetQuery):
|
|
|
219
220
|
"""
|
|
220
221
|
return DataChain(name=name, version=version)
|
|
221
222
|
|
|
223
|
+
@classmethod
|
|
224
|
+
def from_csv(
|
|
225
|
+
cls,
|
|
226
|
+
path,
|
|
227
|
+
type: Literal["binary", "text", "image"] = "text",
|
|
228
|
+
anon: bool = False,
|
|
229
|
+
spec: Optional[FeatureType] = None,
|
|
230
|
+
schema_from: Optional[str] = "auto",
|
|
231
|
+
show_schema: Optional[bool] = False,
|
|
232
|
+
) -> "DataChain":
|
|
233
|
+
"""Get data from CSV. It returns the chain itself.
|
|
234
|
+
|
|
235
|
+
Parameters
|
|
236
|
+
----------
|
|
237
|
+
path : storage URI with directory. URI must start with storage prefix such
|
|
238
|
+
as `s3://`, `gs://`, `az://` or "file:///"
|
|
239
|
+
type : read file as "binary", "text", or "image" data. Default is "binary".
|
|
240
|
+
anon : use anonymous mode to access the storage.
|
|
241
|
+
spec : optional Data Model
|
|
242
|
+
schema_from : path to sample to infer spec from
|
|
243
|
+
show_schema : print auto-generated schema
|
|
244
|
+
|
|
245
|
+
Examples
|
|
246
|
+
--------
|
|
247
|
+
|
|
248
|
+
>>> chain = DataChain.from_csv("gs://csv")
|
|
249
|
+
"""
|
|
250
|
+
if schema_from == "auto":
|
|
251
|
+
schema_from = path
|
|
252
|
+
|
|
253
|
+
chain = DataChain.from_storage(path=path, type=type, anon=anon)
|
|
254
|
+
return chain.gen(
|
|
255
|
+
csv=read_meta(
|
|
256
|
+
schema_from=schema_from,
|
|
257
|
+
meta_type="csv",
|
|
258
|
+
spec=spec,
|
|
259
|
+
show_schema=show_schema,
|
|
260
|
+
)
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
@classmethod
|
|
264
|
+
def from_json(
|
|
265
|
+
cls,
|
|
266
|
+
path,
|
|
267
|
+
type: Literal["binary", "text", "image"] = "text",
|
|
268
|
+
anon: bool = False,
|
|
269
|
+
spec: Optional[FeatureType] = None,
|
|
270
|
+
schema_from: Optional[str] = "auto",
|
|
271
|
+
jmespath: Optional[str] = None,
|
|
272
|
+
show_schema: Optional[bool] = False,
|
|
273
|
+
) -> "DataChain":
|
|
274
|
+
"""Get data from CSV. It returns the chain itself.
|
|
275
|
+
|
|
276
|
+
Parameters
|
|
277
|
+
----------
|
|
278
|
+
path : storage URI with directory. URI must start with storage prefix such
|
|
279
|
+
as `s3://`, `gs://`, `az://` or "file:///"
|
|
280
|
+
type : read file as "binary", "text", or "image" data. Default is "binary".
|
|
281
|
+
anon : use anonymous mode to access the storage.
|
|
282
|
+
spec : optional Data Model
|
|
283
|
+
schema_from : path to sample to infer spec from
|
|
284
|
+
show_schema : print auto-generated schema
|
|
285
|
+
jmespath : JMESPATH expression to reduce JSON
|
|
286
|
+
name : return object name
|
|
287
|
+
Examples
|
|
288
|
+
--------
|
|
289
|
+
|
|
290
|
+
>>> chain = DataChain.from_json("gs://json")
|
|
291
|
+
"""
|
|
292
|
+
if schema_from == "auto":
|
|
293
|
+
schema_from = path
|
|
294
|
+
|
|
295
|
+
chain = DataChain.from_storage(path=path, type=type, anon=anon)
|
|
296
|
+
return chain.gen(
|
|
297
|
+
json=read_meta(
|
|
298
|
+
schema_from=schema_from,
|
|
299
|
+
meta_type="json",
|
|
300
|
+
spec=spec,
|
|
301
|
+
show_schema=show_schema,
|
|
302
|
+
jmespath=jmespath,
|
|
303
|
+
)
|
|
304
|
+
)
|
|
305
|
+
|
|
222
306
|
def save( # type: ignore[override]
|
|
223
307
|
self, name: Optional[str] = None, version: Optional[int] = None
|
|
224
308
|
) -> "DataChain":
|
|
@@ -408,7 +492,7 @@ class DataChain(DatasetQuery):
|
|
|
408
492
|
chain.signals_schema = new_schema
|
|
409
493
|
return chain
|
|
410
494
|
|
|
411
|
-
def get_values(self) -> Iterator[
|
|
495
|
+
def get_values(self) -> Iterator[list]:
|
|
412
496
|
"""Iterate over rows, getting feature values and applying reader calls."""
|
|
413
497
|
for features in self.iterate():
|
|
414
498
|
yield [fr.get_value() if isinstance(fr, Feature) else fr for fr in features]
|
|
@@ -607,3 +691,35 @@ class DataChain(DatasetQuery):
|
|
|
607
691
|
|
|
608
692
|
def max(self, fr: FeatureType): # type: ignore[override]
|
|
609
693
|
return self._extend_features("max", fr)
|
|
694
|
+
|
|
695
|
+
@detach
|
|
696
|
+
def gen_random(self) -> "DataChain":
|
|
697
|
+
from random import getrandbits
|
|
698
|
+
|
|
699
|
+
from datachain.data_storage.warehouse import RANDOM_BITS
|
|
700
|
+
|
|
701
|
+
if "random" not in self.signals_schema.values:
|
|
702
|
+
chain = self.map(random=lambda: getrandbits(RANDOM_BITS), output=int).save()
|
|
703
|
+
return chain.select_except("random")
|
|
704
|
+
|
|
705
|
+
return self
|
|
706
|
+
|
|
707
|
+
@detach
|
|
708
|
+
def shuffle(self) -> "DataChain":
|
|
709
|
+
"""Return results in deterministic random order."""
|
|
710
|
+
chain = self.gen_random()
|
|
711
|
+
return DatasetQuery.shuffle(chain)
|
|
712
|
+
|
|
713
|
+
@detach
|
|
714
|
+
def chunk(self, index: int, total: int) -> "DataChain":
|
|
715
|
+
"""Split a query into smaller chunks for e.g. parallelization.
|
|
716
|
+
Example:
|
|
717
|
+
>>> dc = DataChain(...)
|
|
718
|
+
>>> chunk_1 = dc._chunk(0, 2)
|
|
719
|
+
>>> chunk_2 = dc._chunk(1, 2)
|
|
720
|
+
Note:
|
|
721
|
+
Bear in mind that `index` is 0-indexed but `total` isn't.
|
|
722
|
+
Use 0/3, 1/3 and 2/3, not 1/3, 2/3 and 3/3.
|
|
723
|
+
"""
|
|
724
|
+
chain = self.gen_random()
|
|
725
|
+
return DatasetQuery.chunk(chain, index, total)
|
datachain/lib/feature.py
CHANGED
|
@@ -78,16 +78,6 @@ DATACHAIN_TO_TYPE = {
|
|
|
78
78
|
JSON: dict,
|
|
79
79
|
}
|
|
80
80
|
|
|
81
|
-
NAMES_TO_TYPES = {
|
|
82
|
-
"int": int,
|
|
83
|
-
"str": str,
|
|
84
|
-
"float": float,
|
|
85
|
-
"bool": bool,
|
|
86
|
-
"list": list,
|
|
87
|
-
"dict": dict,
|
|
88
|
-
"bytes": bytes,
|
|
89
|
-
"datetime": datetime,
|
|
90
|
-
}
|
|
91
81
|
|
|
92
82
|
NUMPY_TO_DATACHAIN = {
|
|
93
83
|
np.dtype("int8"): Int,
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# pip install datamodel-code-generator
|
|
2
|
+
# pip install jmespath
|
|
3
|
+
#
|
|
4
|
+
import csv
|
|
5
|
+
import io
|
|
6
|
+
import json
|
|
7
|
+
import subprocess
|
|
8
|
+
import sys
|
|
9
|
+
import uuid
|
|
10
|
+
from collections.abc import Iterator
|
|
11
|
+
from typing import Any, Callable
|
|
12
|
+
|
|
13
|
+
import jmespath as jsp
|
|
14
|
+
|
|
15
|
+
from datachain.lib.feature_utils import pydantic_to_feature # noqa: F401
|
|
16
|
+
from datachain.lib.file import File
|
|
17
|
+
|
|
18
|
+
# from datachain.lib.dc import C, DataChain
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def generate_uuid():
|
|
22
|
+
return uuid.uuid4() # Generates a random UUID.
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# JSON decoder
|
|
26
|
+
def load_json_from_string(json_string):
|
|
27
|
+
try:
|
|
28
|
+
data = json.loads(json_string)
|
|
29
|
+
print("Successfully parsed JSON", file=sys.stderr)
|
|
30
|
+
return data
|
|
31
|
+
except json.JSONDecodeError:
|
|
32
|
+
print("Failed to decode JSON: The string is not formatted correctly.")
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# Read valid JSON and return a data object sample
|
|
37
|
+
def process_json(data_string, jmespath):
|
|
38
|
+
json_dict = load_json_from_string(data_string)
|
|
39
|
+
if jmespath:
|
|
40
|
+
json_dict = jsp.search(jmespath, json_dict)
|
|
41
|
+
# we allow non-list JSONs here to print the root schema
|
|
42
|
+
# but if jmespath expression is given, we assume a list
|
|
43
|
+
if not isinstance(json_dict, list):
|
|
44
|
+
raise ValueError("JMESPATH expression must resolve to a list")
|
|
45
|
+
return None
|
|
46
|
+
json_dict = json_dict[0] # sample the first object
|
|
47
|
+
return json.dumps(json_dict)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# Print a dynamic datamodel-codegen output from JSON or CSV on stdout
|
|
51
|
+
def read_schema(source_file, data_type="csv", expr=None):
|
|
52
|
+
data_string = ""
|
|
53
|
+
uid_str = str(generate_uuid()).replace("-", "") # comply with Python class names
|
|
54
|
+
# using uiid to get around issue #1617
|
|
55
|
+
model_name = f"Model{uid_str}"
|
|
56
|
+
try:
|
|
57
|
+
with source_file.open() as fd: # CSV can be larger than memory
|
|
58
|
+
if data_type == "csv":
|
|
59
|
+
data_string += fd.readline().decode("utf-8", "ignore").replace("\r", "")
|
|
60
|
+
data_string += fd.readline().decode("utf-8", "ignore").replace("\r", "")
|
|
61
|
+
else:
|
|
62
|
+
data_string = fd.read() # other meta must fit into RAM
|
|
63
|
+
except OSError as e:
|
|
64
|
+
print(f"An unexpected file error occurred: {e}")
|
|
65
|
+
return
|
|
66
|
+
if data_type == "json":
|
|
67
|
+
data_string = process_json(data_string, expr)
|
|
68
|
+
command = [
|
|
69
|
+
"datamodel-codegen",
|
|
70
|
+
"--input-file-type",
|
|
71
|
+
data_type,
|
|
72
|
+
"--class-name",
|
|
73
|
+
model_name,
|
|
74
|
+
]
|
|
75
|
+
try:
|
|
76
|
+
result = subprocess.run(
|
|
77
|
+
command, # noqa: S603
|
|
78
|
+
input=data_string,
|
|
79
|
+
text=True,
|
|
80
|
+
capture_output=True,
|
|
81
|
+
check=True,
|
|
82
|
+
)
|
|
83
|
+
model_output = (
|
|
84
|
+
result.stdout
|
|
85
|
+
) # This will contain the output from datamodel-codegen
|
|
86
|
+
except subprocess.CalledProcessError as e:
|
|
87
|
+
model_output = f"An error occurred in datamodel-codegen: {e.stderr}"
|
|
88
|
+
print(f"{model_output}")
|
|
89
|
+
print("\n" + f"spec=pydantic_to_feature({model_name})" + "\n")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
#
|
|
93
|
+
# UDF mapper which calls chain in the setup to infer the dynamic schema
|
|
94
|
+
#
|
|
95
|
+
def read_meta(
|
|
96
|
+
spec=None, schema_from=None, meta_type="json", jmespath=None, show_schema=False
|
|
97
|
+
) -> Callable:
|
|
98
|
+
from datachain.lib.dc import DataChain
|
|
99
|
+
|
|
100
|
+
# ugly hack: datachain is run redirecting printed outputs to a variable
|
|
101
|
+
if schema_from:
|
|
102
|
+
captured_output = io.StringIO()
|
|
103
|
+
current_stdout = sys.stdout
|
|
104
|
+
sys.stdout = captured_output
|
|
105
|
+
try:
|
|
106
|
+
chain = (
|
|
107
|
+
DataChain.from_storage(schema_from)
|
|
108
|
+
.limit(1)
|
|
109
|
+
.map( # dummy column created (#1615)
|
|
110
|
+
meta_schema=lambda file: read_schema(
|
|
111
|
+
file, data_type=meta_type, expr=jmespath
|
|
112
|
+
),
|
|
113
|
+
output=str,
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
# dummy executor (#1616)
|
|
117
|
+
chain.save()
|
|
118
|
+
finally:
|
|
119
|
+
sys.stdout = current_stdout
|
|
120
|
+
model_output = captured_output.getvalue()
|
|
121
|
+
captured_output.close()
|
|
122
|
+
if show_schema:
|
|
123
|
+
print(f"{model_output}")
|
|
124
|
+
# Below 'spec' should be a dynamically converted Feature from Pydantic datamodel
|
|
125
|
+
if not spec:
|
|
126
|
+
local_vars: dict[str, Any] = {}
|
|
127
|
+
exec(model_output, globals(), local_vars) # noqa: S102
|
|
128
|
+
spec = local_vars["spec"]
|
|
129
|
+
|
|
130
|
+
if not (spec) and not (schema_from):
|
|
131
|
+
raise ValueError(
|
|
132
|
+
"Must provide a static schema in spec: or metadata sample in schema_from:"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
#
|
|
136
|
+
# UDF mapper parsing a JSON or CSV file using schema spec
|
|
137
|
+
#
|
|
138
|
+
def parse_data(
|
|
139
|
+
file: File, data_model=spec, meta_type=meta_type, jmespath=jmespath
|
|
140
|
+
) -> Iterator[spec]:
|
|
141
|
+
if meta_type == "csv":
|
|
142
|
+
with (
|
|
143
|
+
file.open() as fd
|
|
144
|
+
): # TODO: if schema is statically given, should allow CSV without headers
|
|
145
|
+
reader = csv.DictReader(fd)
|
|
146
|
+
for row in reader: # CSV can be larger than memory
|
|
147
|
+
json_string = json.dumps(row)
|
|
148
|
+
yield data_model.model_validate_json(json_string)
|
|
149
|
+
if meta_type == "json":
|
|
150
|
+
try:
|
|
151
|
+
with file.open() as fd: # JSON must fit into RAM
|
|
152
|
+
data_string = fd.read()
|
|
153
|
+
except OSError as e:
|
|
154
|
+
print(f"An unexpected file error occurred: {e}")
|
|
155
|
+
json_object = load_json_from_string(data_string)
|
|
156
|
+
if jmespath:
|
|
157
|
+
json_object = jsp.search(jmespath, json_object)
|
|
158
|
+
if not isinstance(json_object, list):
|
|
159
|
+
raise ValueError("JSON expression must resolve in a list of objects")
|
|
160
|
+
for json_dict in json_object:
|
|
161
|
+
json_string = json.dumps(json_dict)
|
|
162
|
+
yield data_model.model_validate_json(json_string)
|
|
163
|
+
|
|
164
|
+
return parse_data
|
datachain/lib/pytorch.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections.abc import Iterator
|
|
3
|
-
from typing import TYPE_CHECKING, Any, Optional
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional
|
|
4
4
|
|
|
5
5
|
from torch import float32
|
|
6
6
|
from torch.distributed import get_rank, get_world_size
|
|
@@ -8,6 +8,7 @@ from torch.utils.data import IterableDataset, get_worker_info
|
|
|
8
8
|
|
|
9
9
|
from datachain.catalog import Catalog, get_catalog
|
|
10
10
|
from datachain.lib.dc import DataChain
|
|
11
|
+
from datachain.lib.text import convert_text
|
|
11
12
|
|
|
12
13
|
if TYPE_CHECKING:
|
|
13
14
|
from torchvision.transforms.v2 import Transform
|
|
@@ -17,14 +18,25 @@ logger = logging.getLogger("datachain")
|
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
try:
|
|
21
|
+
from PIL import Image
|
|
20
22
|
from torchvision.transforms import v2
|
|
21
23
|
|
|
22
24
|
DEFAULT_TRANSFORM = v2.Compose([v2.ToImage(), v2.ToDtype(float32, scale=True)])
|
|
23
25
|
except ImportError:
|
|
24
|
-
logger.warning(
|
|
26
|
+
logger.warning(
|
|
27
|
+
"Missing dependencies for computer vision:\n"
|
|
28
|
+
"To install run:\n\n"
|
|
29
|
+
" pip install 'datachain[cv]'\n"
|
|
30
|
+
)
|
|
31
|
+
Image = None # type: ignore[assignment]
|
|
32
|
+
v2 = None
|
|
25
33
|
DEFAULT_TRANSFORM = None
|
|
26
34
|
|
|
27
35
|
|
|
36
|
+
def label_to_int(value: str, classes: list) -> int:
|
|
37
|
+
return classes.index(value)
|
|
38
|
+
|
|
39
|
+
|
|
28
40
|
class PytorchDataset(IterableDataset):
|
|
29
41
|
def __init__(
|
|
30
42
|
self,
|
|
@@ -32,6 +44,8 @@ class PytorchDataset(IterableDataset):
|
|
|
32
44
|
version: Optional[int] = None,
|
|
33
45
|
catalog: Optional["Catalog"] = None,
|
|
34
46
|
transform: Optional["Transform"] = DEFAULT_TRANSFORM,
|
|
47
|
+
tokenizer: Optional[Callable] = None,
|
|
48
|
+
tokenizer_kwargs: Optional[dict[str, Any]] = None,
|
|
35
49
|
num_samples: int = 0,
|
|
36
50
|
):
|
|
37
51
|
"""
|
|
@@ -41,13 +55,17 @@ class PytorchDataset(IterableDataset):
|
|
|
41
55
|
name (str): Name of DataChain dataset to stream.
|
|
42
56
|
version (int): Version of DataChain dataset to stream.
|
|
43
57
|
catalog (Catalog): DataChain catalog to which dataset belongs.
|
|
44
|
-
transform (Transform): Torchvision
|
|
58
|
+
transform (Transform): Torchvision transforms to apply to the dataset.
|
|
59
|
+
tokenizer (Callable): Tokenizer to use to tokenize text values.
|
|
60
|
+
tokenizer_kwargs (dict): Additional kwargs to pass when calling tokenizer.
|
|
45
61
|
num_samples (int): Number of random samples to draw for each epoch.
|
|
46
62
|
This argument is ignored if `num_samples=0` (the default).
|
|
47
63
|
"""
|
|
48
64
|
self.name = name
|
|
49
65
|
self.version = version
|
|
50
66
|
self.transform = transform
|
|
67
|
+
self.tokenizer = tokenizer
|
|
68
|
+
self.tokenizer_kwargs = tokenizer_kwargs or {}
|
|
51
69
|
self.num_samples = num_samples
|
|
52
70
|
if catalog is None:
|
|
53
71
|
catalog = get_catalog()
|
|
@@ -87,10 +105,21 @@ class PytorchDataset(IterableDataset):
|
|
|
87
105
|
# Apply transforms
|
|
88
106
|
if self.transform:
|
|
89
107
|
try:
|
|
90
|
-
|
|
108
|
+
if v2 and isinstance(self.transform, v2.Transform):
|
|
109
|
+
row = self.transform(row)
|
|
110
|
+
elif Image:
|
|
111
|
+
for i, val in enumerate(row):
|
|
112
|
+
if isinstance(val, Image.Image):
|
|
113
|
+
row[i] = self.transform(val)
|
|
91
114
|
except ValueError:
|
|
92
115
|
logger.warning("Skipping transform due to unsupported data types.")
|
|
93
116
|
self.transform = None
|
|
117
|
+
if self.tokenizer:
|
|
118
|
+
for i, val in enumerate(row):
|
|
119
|
+
if isinstance(val, str):
|
|
120
|
+
row[i] = convert_text(
|
|
121
|
+
val, self.tokenizer, self.tokenizer_kwargs
|
|
122
|
+
)
|
|
94
123
|
yield row
|
|
95
124
|
|
|
96
125
|
@staticmethod
|
datachain/lib/signal_schema.py
CHANGED
|
@@ -1,19 +1,45 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
from collections.abc import Sequence
|
|
3
|
-
from
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Optional, Union, get_args, get_origin
|
|
4
5
|
|
|
5
|
-
from datachain.catalog import Catalog
|
|
6
6
|
from datachain.lib.feature import (
|
|
7
7
|
DATACHAIN_TO_TYPE,
|
|
8
8
|
DEFAULT_DELIMITER,
|
|
9
|
-
NAMES_TO_TYPES,
|
|
10
9
|
Feature,
|
|
11
10
|
FeatureType,
|
|
12
11
|
convert_type_to_datachain,
|
|
13
12
|
)
|
|
14
13
|
from datachain.lib.feature_registry import Registry
|
|
15
|
-
from datachain.lib.file import File
|
|
14
|
+
from datachain.lib.file import File, ImageFile, TextFile
|
|
16
15
|
from datachain.lib.utils import DataChainParamsError
|
|
16
|
+
from datachain.lib.webdataset import TarStream, WDSAllFile, WDSBasic
|
|
17
|
+
from datachain.lib.webdataset_laion import Laion, LaionParquet, WDSLaion
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from datachain.catalog import Catalog
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# TODO fix hardcoded Feature class names with://github.com/iterative/dvcx/issues/1625
|
|
24
|
+
NAMES_TO_TYPES = {
|
|
25
|
+
"int": int,
|
|
26
|
+
"str": str,
|
|
27
|
+
"float": float,
|
|
28
|
+
"bool": bool,
|
|
29
|
+
"list": list,
|
|
30
|
+
"dict": dict,
|
|
31
|
+
"bytes": bytes,
|
|
32
|
+
"datetime": datetime,
|
|
33
|
+
"WDSLaion": WDSLaion,
|
|
34
|
+
"Laion": Laion,
|
|
35
|
+
"LaionParquet": LaionParquet,
|
|
36
|
+
"File": File,
|
|
37
|
+
"ImageFile": ImageFile,
|
|
38
|
+
"TextFile": TextFile,
|
|
39
|
+
"TarStream": TarStream,
|
|
40
|
+
"WDSBasic": WDSBasic,
|
|
41
|
+
"WDSAllFile": WDSAllFile,
|
|
42
|
+
}
|
|
17
43
|
|
|
18
44
|
|
|
19
45
|
class SignalSchemaError(DataChainParamsError):
|
|
@@ -74,7 +100,7 @@ class SignalSchema:
|
|
|
74
100
|
signals: dict[str, FeatureType] = {}
|
|
75
101
|
for signal, type_name in schema.items():
|
|
76
102
|
try:
|
|
77
|
-
fr = NAMES_TO_TYPES.get(type_name
|
|
103
|
+
fr = NAMES_TO_TYPES.get(type_name)
|
|
78
104
|
if not fr:
|
|
79
105
|
type_name, version = Registry.parse_name_version(type_name)
|
|
80
106
|
fr = Registry.get(type_name, version)
|
|
@@ -137,7 +163,7 @@ class SignalSchema:
|
|
|
137
163
|
def slice(self, keys: Sequence[str]) -> "SignalSchema":
|
|
138
164
|
return SignalSchema({k: v for k, v in self.values.items() if k in keys})
|
|
139
165
|
|
|
140
|
-
def row_to_features(self, row: Sequence, catalog: Catalog) -> list[FeatureType]:
|
|
166
|
+
def row_to_features(self, row: Sequence, catalog: "Catalog") -> list[FeatureType]:
|
|
141
167
|
res = []
|
|
142
168
|
pos = 0
|
|
143
169
|
for fr_cls in self.values.values():
|
|
@@ -279,3 +305,34 @@ class SignalSchema:
|
|
|
279
305
|
for signal in signals:
|
|
280
306
|
res.append(".".join(signal))
|
|
281
307
|
return res
|
|
308
|
+
|
|
309
|
+
def get_file_signals_values(self, row: dict[str, Any]) -> dict[str, Any]:
|
|
310
|
+
"""
|
|
311
|
+
Method that returns values with clean field names (without prefix) for
|
|
312
|
+
all file signals found in this schema for some row
|
|
313
|
+
Output example:
|
|
314
|
+
{
|
|
315
|
+
laion.file: {
|
|
316
|
+
"source": "s3://ldb-public",
|
|
317
|
+
"name": "dog.jpg",
|
|
318
|
+
...
|
|
319
|
+
},
|
|
320
|
+
meta.file: {
|
|
321
|
+
"source": "s3://datacomp",
|
|
322
|
+
"name": "cat.jpg",
|
|
323
|
+
...
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
"""
|
|
327
|
+
res = {}
|
|
328
|
+
|
|
329
|
+
for file_signals in self.get_file_signals():
|
|
330
|
+
prefix = file_signals.replace(".", DEFAULT_DELIMITER) + DEFAULT_DELIMITER
|
|
331
|
+
res[file_signals] = {
|
|
332
|
+
c_name.removeprefix(prefix): c_value
|
|
333
|
+
for c_name, c_value in row.items()
|
|
334
|
+
if c_name.startswith(prefix)
|
|
335
|
+
and DEFAULT_DELIMITER not in c_name.removeprefix(prefix)
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
return res
|
datachain/query/dataset.py
CHANGED
|
@@ -1462,7 +1462,7 @@ class DatasetQuery:
|
|
|
1462
1462
|
|
|
1463
1463
|
return cls.from_dataframe(pd_df, *args, **kwargs)
|
|
1464
1464
|
|
|
1465
|
-
def shuffle(self) -> "
|
|
1465
|
+
def shuffle(self) -> "Self":
|
|
1466
1466
|
# ToDo: implement shaffle based on seed and/or generating random column
|
|
1467
1467
|
return self.order_by(C.random)
|
|
1468
1468
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.11
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -35,6 +35,8 @@ Requires-Dist: multiprocess ==0.70.16
|
|
|
35
35
|
Requires-Dist: dill ==0.3.8
|
|
36
36
|
Requires-Dist: ujson >=5.9.0
|
|
37
37
|
Requires-Dist: pydantic <3,>=2
|
|
38
|
+
Requires-Dist: jmespath >=1.0
|
|
39
|
+
Requires-Dist: datamodel-code-generator >=0.25
|
|
38
40
|
Requires-Dist: numpy <2,>=1 ; sys_platform == "win32"
|
|
39
41
|
Provides-Extra: cv
|
|
40
42
|
Requires-Dist: Pillow <11,>=10.0.0 ; extra == 'cv'
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
datachain/__init__.py,sha256=9a0qX6tqyA9KC3ahLmGarqlRTZJXhM7HijAWpfUaOnQ,102
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
|
-
datachain/_version.py,sha256=
|
|
3
|
+
datachain/_version.py,sha256=HreDwlLXV189L3kiBj3huM_kqWD1usijlC8LN1YXcCM,413
|
|
4
4
|
datachain/asyn.py,sha256=opARBVZJxTKU3EGYd-8gcpNXoshuCfVz_b0ut3oxC50,7641
|
|
5
5
|
datachain/cache.py,sha256=FaPWrqWznPffmskTb1pdPkt2jAMMf__9FC2zEnP0vDU,4022
|
|
6
6
|
datachain/cli.py,sha256=1mBozBJS9Nq-EeahxwyKH8ef64E2v93o0CAEzxjcbkY,32209
|
|
7
7
|
datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
|
|
8
8
|
datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
|
|
9
|
-
datachain/dataset.py,sha256=
|
|
9
|
+
datachain/dataset.py,sha256=4ksFJlfo_CEmt5xqXPca-hhQL1syFpKxCl_ZOhTS30s,14506
|
|
10
10
|
datachain/error.py,sha256=GY9KYTmb7GHXn2gGHV9X-PBhgwLj3i7VpK7tGHtAoGM,1279
|
|
11
11
|
datachain/listing.py,sha256=-Cm74Ne2Q36QuCpA22feDA_v-7uPqkwAOg-QzkiZAGQ,8243
|
|
12
12
|
datachain/node.py,sha256=jCBvwiEUYSKQa27Tb6RORgaUjoiz7mOX63NQmP7JQY0,5703
|
|
@@ -17,7 +17,7 @@ datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
17
17
|
datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
|
|
18
18
|
datachain/utils.py,sha256=FW1LR5qCL5BtCYk-B-6LUCCMq8zOobkKKMrLqfFfCAg,13535
|
|
19
19
|
datachain/catalog/__init__.py,sha256=Gkto1V7rUbVjJmgMEnB_VpVeHOfV47IQh1fSjEKnit4,409
|
|
20
|
-
datachain/catalog/catalog.py,sha256=
|
|
20
|
+
datachain/catalog/catalog.py,sha256=7ZqCsyr7W4enOIX6jiLJbBfFZvjkqjI1E_NOyL3V3AA,78585
|
|
21
21
|
datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
|
|
22
22
|
datachain/catalog/loader.py,sha256=FTI9s1b8iX0_TffSAx1mwm-ucsRV14NHX-F1xtTXRSE,7310
|
|
23
23
|
datachain/catalog/subclass.py,sha256=B5R0qxeTYEyVAAPM1RutBPSoXZc8L5mVVZeSGXki9Sw,2096
|
|
@@ -32,7 +32,7 @@ datachain/data_storage/__init__.py,sha256=arlkQIj2J0ozcT_GvNDxm6PLT9NeabHvIsxPND
|
|
|
32
32
|
datachain/data_storage/db_engine.py,sha256=mxOoWP4ntBMgLeTAk4dlEeIJArAz4x_tFrHytcAfLpo,3341
|
|
33
33
|
datachain/data_storage/id_generator.py,sha256=VlDALKijggegAnNMJwuMETJgnLoPYxpkrkld5DNTPQw,3839
|
|
34
34
|
datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
|
|
35
|
-
datachain/data_storage/metastore.py,sha256=
|
|
35
|
+
datachain/data_storage/metastore.py,sha256=GnJH2NlFngdj30aK9CSaimJNnh_x_pSjntWUnvQuI2A,53649
|
|
36
36
|
datachain/data_storage/schema.py,sha256=pF3KBi-8Pz3n5jRYoJpDR3gF8qUFdyAu2XR58J4Fyuo,8724
|
|
37
37
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
38
38
|
datachain/data_storage/sqlite.py,sha256=eHTiJ0VIxU-chnhKNTN14EsaSnw5LAaxTLi9aMCZpl4,24978
|
|
@@ -40,8 +40,8 @@ datachain/data_storage/warehouse.py,sha256=sQLOrv6DH8UcWH1aqlg3YJKmaHr696XkVafBx
|
|
|
40
40
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
41
|
datachain/lib/cached_stream.py,sha256=BQI6gpJ2y7_-jqQo_0VB9ntbkOVISvj9wlDwGDQbqw8,3537
|
|
42
42
|
datachain/lib/claude.py,sha256=iAauA1zNVNONpLzUo1t0QN5PZ5Ot6cZkfib7Ka_c638,1969
|
|
43
|
-
datachain/lib/dc.py,sha256=
|
|
44
|
-
datachain/lib/feature.py,sha256=
|
|
43
|
+
datachain/lib/dc.py,sha256=kyuSg-l7HciqFaunqPx41WKyAeuJ2H2tpWJplCXhZJc,26086
|
|
44
|
+
datachain/lib/feature.py,sha256=C5lxQ_Ef4rL0-mef4A4EeoqB0rcNZ0ExRE26ehx20RM,14196
|
|
45
45
|
datachain/lib/feature_registry.py,sha256=hg_S_9JPEYaQ-8PI64mU0sEhSJ-rcrKtwQk5TPBotEw,1570
|
|
46
46
|
datachain/lib/feature_utils.py,sha256=6wbKZ2xq08b751EFBRJy1OZLqWYd_gxq9A_Em_aMFk4,4713
|
|
47
47
|
datachain/lib/file.py,sha256=ZNGzmJSq7PNVxLhGLNdR9YSYkP-1ZeqY_yhDMcDNfkI,8586
|
|
@@ -51,11 +51,12 @@ datachain/lib/hf_pipeline.py,sha256=f0AH_XCziOF1OKN3d1w1swTBLaeajMJ8xgdsX37i5-o,
|
|
|
51
51
|
datachain/lib/image.py,sha256=gb-My4rx5zMwOlDkcu_2G8GtRAMfsRvd7-QWUBErDw8,3486
|
|
52
52
|
datachain/lib/image_transform.py,sha256=NXWtnVOcofWBgl_YMxb4ABpaT7JTBMx7tLKvErH1IC4,3024
|
|
53
53
|
datachain/lib/iptc_exif_xmp.py,sha256=xrbxFeY-wRP6T5JsUgE3EXfTxKvZVymRaRD_VIfxD0A,2236
|
|
54
|
+
datachain/lib/meta_formats.py,sha256=-JAS47NOO6rx1vmr0Cy-G_txxmTvMflXfzJiFD7rWlQ,5742
|
|
54
55
|
datachain/lib/parquet.py,sha256=_MbRBzcgLLLegjKZNGF9Rm9IkYRSy0IqOksVjL1nntg,917
|
|
55
|
-
datachain/lib/pytorch.py,sha256=
|
|
56
|
+
datachain/lib/pytorch.py,sha256=oU16XXAyAmiiabe1IoQoID00-u3uZ5GhCN48uAl6WDs,5421
|
|
56
57
|
datachain/lib/reader.py,sha256=rPXXNoTUdm6PQwkAlaU-nOBreP_q4ett_EjFStrA_W0,1727
|
|
57
58
|
datachain/lib/settings.py,sha256=mVtzyA_y9JA-6chMv1baggDvgeFsaUszySp660Gu4gw,2854
|
|
58
|
-
datachain/lib/signal_schema.py,sha256=
|
|
59
|
+
datachain/lib/signal_schema.py,sha256=WPKHzgZ6HatbDQ2IN_L0JPi46n6acfHpkq91DYdlgSg,11753
|
|
59
60
|
datachain/lib/text.py,sha256=EEZrYohADi5rAGg3aLLRwtvyAV9js_yWAGhr2C3QbwI,2424
|
|
60
61
|
datachain/lib/udf.py,sha256=PeZ-UbprfxlmgVbzH4FtNib3kIhTi9C869QM8RuM5dw,6292
|
|
61
62
|
datachain/lib/udf_signature.py,sha256=1cOMcGXHbdBjyBRkvNxIEt9A_CoyiADxio2wkYu8U5M,7140
|
|
@@ -67,7 +68,7 @@ datachain/lib/webdataset_laion.py,sha256=tHn3Zhqx7Eb5Ywy_mobs6jDI0o_pFUbsuHqv0W_
|
|
|
67
68
|
datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
|
|
68
69
|
datachain/query/batch.py,sha256=sOMxXbaNii7lVyFIEZ2noqbhy_S8qtZ-WWxrka72shc,3474
|
|
69
70
|
datachain/query/builtins.py,sha256=RyVEPZEuC7K1vlulrsaUjATLG_tZEvYYW7N5i6Fg-tQ,2781
|
|
70
|
-
datachain/query/dataset.py,sha256=
|
|
71
|
+
datachain/query/dataset.py,sha256=2DZAaEwX9gQlQgrRY3t-ymXN9SUkN_3XN0AfMFT6Mto,66861
|
|
71
72
|
datachain/query/dispatch.py,sha256=9zcwKkLIuK5-xyRSQNw3yTqYLMHVbuZIn6KcB0g_ZBQ,13107
|
|
72
73
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
73
74
|
datachain/query/schema.py,sha256=CGu9NBIFvX4iHQnaThLLxwWndxqkyUtYmo2JBgnZ4YQ,7660
|
|
@@ -91,9 +92,9 @@ datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7
|
|
|
91
92
|
datachain/sql/sqlite/base.py,sha256=XVxn4pB-N4pPfiby5uVvfH7feNzRKlBNzsc5eyKPvhI,10965
|
|
92
93
|
datachain/sql/sqlite/types.py,sha256=oP93nLfTBaYnN0z_4Dsv-HZm8j9rrUf1esMM-z3JLbg,1754
|
|
93
94
|
datachain/sql/sqlite/vector.py,sha256=stBeEW6fbVbILmAtV4khjXdJIGT13HkRWJeCoqIOk50,315
|
|
94
|
-
datachain-0.1.
|
|
95
|
-
datachain-0.1.
|
|
96
|
-
datachain-0.1.
|
|
97
|
-
datachain-0.1.
|
|
98
|
-
datachain-0.1.
|
|
99
|
-
datachain-0.1.
|
|
95
|
+
datachain-0.1.11.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
96
|
+
datachain-0.1.11.dist-info/METADATA,sha256=BFTmlt8_vtCHF80AHQcIQkE9YMCigp7k1jcAZV1D7j4,13972
|
|
97
|
+
datachain-0.1.11.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
|
|
98
|
+
datachain-0.1.11.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
99
|
+
datachain-0.1.11.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
100
|
+
datachain-0.1.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|