datachain 0.9.1__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/client/fsspec.py +1 -1
- datachain/lib/dc.py +60 -4
- datachain/lib/file.py +20 -5
- datachain/query/dataset.py +2 -2
- datachain/query/session.py +15 -3
- datachain/script_meta.py +147 -0
- {datachain-0.9.1.dist-info → datachain-0.11.0.dist-info}/METADATA +5 -4
- {datachain-0.9.1.dist-info → datachain-0.11.0.dist-info}/RECORD +12 -11
- {datachain-0.9.1.dist-info → datachain-0.11.0.dist-info}/WHEEL +1 -1
- {datachain-0.9.1.dist-info → datachain-0.11.0.dist-info}/LICENSE +0 -0
- {datachain-0.9.1.dist-info → datachain-0.11.0.dist-info}/entry_points.txt +0 -0
- {datachain-0.9.1.dist-info → datachain-0.11.0.dist-info}/top_level.txt +0 -0
datachain/client/fsspec.py
CHANGED
|
@@ -390,7 +390,7 @@ class Client(ABC):
|
|
|
390
390
|
) # type: ignore[return-value]
|
|
391
391
|
|
|
392
392
|
def upload(self, data: bytes, path: str) -> "File":
|
|
393
|
-
full_path = self.get_full_path(path)
|
|
393
|
+
full_path = path if path.startswith(self.PREFIX) else self.get_full_path(path)
|
|
394
394
|
|
|
395
395
|
parent = posixpath.dirname(full_path)
|
|
396
396
|
self.fs.makedirs(parent, exist_ok=True)
|
datachain/lib/dc.py
CHANGED
|
@@ -411,6 +411,7 @@ class DataChain:
|
|
|
411
411
|
object_name: str = "file",
|
|
412
412
|
update: bool = False,
|
|
413
413
|
anon: bool = False,
|
|
414
|
+
client_config: Optional[dict] = None,
|
|
414
415
|
) -> "Self":
|
|
415
416
|
"""Get data from a storage as a list of file with all file attributes.
|
|
416
417
|
It returns the chain itself as usual.
|
|
@@ -423,15 +424,32 @@ class DataChain:
|
|
|
423
424
|
object_name : Created object column name.
|
|
424
425
|
update : force storage reindexing. Default is False.
|
|
425
426
|
anon : If True, we will treat cloud bucket as public one
|
|
427
|
+
client_config : Optional client configuration for the storage client.
|
|
426
428
|
|
|
427
429
|
Example:
|
|
430
|
+
Simple call from s3
|
|
428
431
|
```py
|
|
429
432
|
chain = DataChain.from_storage("s3://my-bucket/my-dir")
|
|
430
433
|
```
|
|
434
|
+
|
|
435
|
+
With AWS S3-compatible storage
|
|
436
|
+
```py
|
|
437
|
+
chain = DataChain.from_storage(
|
|
438
|
+
"s3://my-bucket/my-dir",
|
|
439
|
+
client_config = {"aws_endpoint_url": "<minio-endpoint-url>"}
|
|
440
|
+
)
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
Pass existing session
|
|
444
|
+
```py
|
|
445
|
+
session = Session.get()
|
|
446
|
+
chain = DataChain.from_storage("s3://my-bucket/my-dir", session=session)
|
|
447
|
+
```
|
|
431
448
|
"""
|
|
432
449
|
file_type = get_file_type(type)
|
|
433
450
|
|
|
434
|
-
|
|
451
|
+
if anon:
|
|
452
|
+
client_config = (client_config or {}) | {"anon": True}
|
|
435
453
|
session = Session.get(session, client_config=client_config, in_memory=in_memory)
|
|
436
454
|
cache = session.catalog.cache
|
|
437
455
|
client_config = session.catalog.client_config
|
|
@@ -481,25 +499,56 @@ class DataChain:
|
|
|
481
499
|
version: Optional[int] = None,
|
|
482
500
|
session: Optional[Session] = None,
|
|
483
501
|
settings: Optional[dict] = None,
|
|
484
|
-
|
|
502
|
+
fallback_to_studio: bool = True,
|
|
485
503
|
) -> "Self":
|
|
486
504
|
"""Get data from a saved Dataset. It returns the chain itself.
|
|
505
|
+
If dataset or version is not found locally, it will try to pull it from Studio.
|
|
487
506
|
|
|
488
507
|
Parameters:
|
|
489
508
|
name : dataset name
|
|
490
509
|
version : dataset version
|
|
510
|
+
session : Session to use for the chain.
|
|
511
|
+
settings : Settings to use for the chain.
|
|
512
|
+
fallback_to_studio : Try to pull dataset from Studio if not found locally.
|
|
513
|
+
Default is True.
|
|
491
514
|
|
|
492
515
|
Example:
|
|
493
516
|
```py
|
|
494
517
|
chain = DataChain.from_dataset("my_cats")
|
|
495
518
|
```
|
|
519
|
+
|
|
520
|
+
```py
|
|
521
|
+
chain = DataChain.from_dataset("my_cats", fallback_to_studio=False)
|
|
522
|
+
```
|
|
523
|
+
|
|
524
|
+
```py
|
|
525
|
+
chain = DataChain.from_dataset("my_cats", version=1)
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
```py
|
|
529
|
+
session = Session.get(client_config={"aws_endpoint_url": "<minio-url>"})
|
|
530
|
+
settings = {
|
|
531
|
+
"cache": True,
|
|
532
|
+
"parallel": 4,
|
|
533
|
+
"workers": 4,
|
|
534
|
+
"min_task_size": 1000,
|
|
535
|
+
"prefetch": 10,
|
|
536
|
+
}
|
|
537
|
+
chain = DataChain.from_dataset(
|
|
538
|
+
name="my_cats",
|
|
539
|
+
version=1,
|
|
540
|
+
session=session,
|
|
541
|
+
settings=settings,
|
|
542
|
+
fallback_to_studio=True,
|
|
543
|
+
)
|
|
544
|
+
```
|
|
496
545
|
"""
|
|
497
546
|
query = DatasetQuery(
|
|
498
547
|
name=name,
|
|
499
548
|
version=version,
|
|
500
549
|
session=session,
|
|
501
550
|
indexing_column_types=File._datachain_column_types,
|
|
502
|
-
|
|
551
|
+
fallback_to_studio=fallback_to_studio,
|
|
503
552
|
)
|
|
504
553
|
telemetry.send_event_once("class", "datachain_init", name=name, version=version)
|
|
505
554
|
if settings:
|
|
@@ -2444,7 +2493,7 @@ class DataChain:
|
|
|
2444
2493
|
self._setup = self._setup | kwargs
|
|
2445
2494
|
return self
|
|
2446
2495
|
|
|
2447
|
-
def
|
|
2496
|
+
def to_storage(
|
|
2448
2497
|
self,
|
|
2449
2498
|
output: str,
|
|
2450
2499
|
signal: str = "file",
|
|
@@ -2462,6 +2511,13 @@ class DataChain:
|
|
|
2462
2511
|
use_cache: If `True`, cache the files before exporting.
|
|
2463
2512
|
link_type: Method to use for exporting files.
|
|
2464
2513
|
Falls back to `'copy'` if symlinking fails.
|
|
2514
|
+
|
|
2515
|
+
Example:
|
|
2516
|
+
Cross cloud transfer
|
|
2517
|
+
```py
|
|
2518
|
+
ds = DataChain.from_storage("s3://mybucket")
|
|
2519
|
+
ds.to_storage("gs://mybucket", placement="filename")
|
|
2520
|
+
```
|
|
2465
2521
|
"""
|
|
2466
2522
|
if placement == "filename" and (
|
|
2467
2523
|
self._query.distinct(pathfunc.name(C(f"{signal}__path"))).count()
|
datachain/lib/file.py
CHANGED
|
@@ -17,6 +17,7 @@ from urllib.parse import unquote, urlparse
|
|
|
17
17
|
from urllib.request import url2pathname
|
|
18
18
|
|
|
19
19
|
from fsspec.callbacks import DEFAULT_CALLBACK, Callback
|
|
20
|
+
from fsspec.utils import stringify_path
|
|
20
21
|
from PIL import Image as PilImage
|
|
21
22
|
from pydantic import Field, field_validator
|
|
22
23
|
|
|
@@ -270,8 +271,13 @@ class File(DataModel):
|
|
|
270
271
|
|
|
271
272
|
def save(self, destination: str):
|
|
272
273
|
"""Writes it's content to destination"""
|
|
273
|
-
|
|
274
|
-
|
|
274
|
+
destination = stringify_path(destination)
|
|
275
|
+
client: Client = self._catalog.get_client(destination)
|
|
276
|
+
|
|
277
|
+
if client.PREFIX == "file://" and not destination.startswith(client.PREFIX):
|
|
278
|
+
destination = Path(destination).absolute().as_uri()
|
|
279
|
+
|
|
280
|
+
client.upload(self.read(), destination)
|
|
275
281
|
|
|
276
282
|
def _symlink_to(self, destination: str):
|
|
277
283
|
if self.location:
|
|
@@ -285,6 +291,7 @@ class File(DataModel):
|
|
|
285
291
|
source = self.get_path()
|
|
286
292
|
else:
|
|
287
293
|
raise OSError(errno.EXDEV, "can't link across filesystems")
|
|
294
|
+
|
|
288
295
|
return os.symlink(source, destination)
|
|
289
296
|
|
|
290
297
|
def export(
|
|
@@ -299,7 +306,8 @@ class File(DataModel):
|
|
|
299
306
|
self._caching_enabled = use_cache
|
|
300
307
|
dst = self.get_destination_path(output, placement)
|
|
301
308
|
dst_dir = os.path.dirname(dst)
|
|
302
|
-
|
|
309
|
+
client: Client = self._catalog.get_client(dst_dir)
|
|
310
|
+
client.fs.makedirs(dst_dir, exist_ok=True)
|
|
303
311
|
|
|
304
312
|
if link_type == "symlink":
|
|
305
313
|
try:
|
|
@@ -496,7 +504,10 @@ class TextFile(File):
|
|
|
496
504
|
|
|
497
505
|
def save(self, destination: str):
|
|
498
506
|
"""Writes it's content to destination"""
|
|
499
|
-
|
|
507
|
+
destination = stringify_path(destination)
|
|
508
|
+
|
|
509
|
+
client: Client = self._catalog.get_client(destination)
|
|
510
|
+
with client.fs.open(destination, mode="w") as f:
|
|
500
511
|
f.write(self.read_text())
|
|
501
512
|
|
|
502
513
|
|
|
@@ -510,7 +521,11 @@ class ImageFile(File):
|
|
|
510
521
|
|
|
511
522
|
def save(self, destination: str):
|
|
512
523
|
"""Writes it's content to destination"""
|
|
513
|
-
|
|
524
|
+
destination = stringify_path(destination)
|
|
525
|
+
|
|
526
|
+
client: Client = self._catalog.get_client(destination)
|
|
527
|
+
with client.fs.open(destination, mode="wb") as f:
|
|
528
|
+
self.read().save(f)
|
|
514
529
|
|
|
515
530
|
|
|
516
531
|
class Image(DataModel):
|
datachain/query/dataset.py
CHANGED
|
@@ -1085,7 +1085,7 @@ class DatasetQuery:
|
|
|
1085
1085
|
session: Optional[Session] = None,
|
|
1086
1086
|
indexing_column_types: Optional[dict[str, Any]] = None,
|
|
1087
1087
|
in_memory: bool = False,
|
|
1088
|
-
|
|
1088
|
+
fallback_to_studio: bool = True,
|
|
1089
1089
|
) -> None:
|
|
1090
1090
|
self.session = Session.get(session, catalog=catalog, in_memory=in_memory)
|
|
1091
1091
|
self.catalog = catalog or self.session.catalog
|
|
@@ -1103,7 +1103,7 @@ class DatasetQuery:
|
|
|
1103
1103
|
|
|
1104
1104
|
self.name = name
|
|
1105
1105
|
|
|
1106
|
-
if
|
|
1106
|
+
if fallback_to_studio and is_token_set():
|
|
1107
1107
|
ds = self.catalog.get_dataset_with_remote_fallback(name, version)
|
|
1108
1108
|
else:
|
|
1109
1109
|
ds = self.catalog.get_dataset(name)
|
datachain/query/session.py
CHANGED
|
@@ -139,21 +139,33 @@ class Session:
|
|
|
139
139
|
|
|
140
140
|
# Access the active (most recent) context from the stack
|
|
141
141
|
if cls.SESSION_CONTEXTS:
|
|
142
|
-
|
|
142
|
+
session = cls.SESSION_CONTEXTS[-1]
|
|
143
143
|
|
|
144
|
-
|
|
144
|
+
elif cls.GLOBAL_SESSION_CTX is None:
|
|
145
145
|
cls.GLOBAL_SESSION_CTX = Session(
|
|
146
146
|
cls.GLOBAL_SESSION_NAME,
|
|
147
147
|
catalog,
|
|
148
148
|
client_config=client_config,
|
|
149
149
|
in_memory=in_memory,
|
|
150
150
|
)
|
|
151
|
+
session = cls.GLOBAL_SESSION_CTX
|
|
151
152
|
|
|
152
153
|
atexit.register(cls._global_cleanup)
|
|
153
154
|
cls.ORIGINAL_EXCEPT_HOOK = sys.excepthook
|
|
154
155
|
sys.excepthook = cls.except_hook
|
|
156
|
+
else:
|
|
157
|
+
session = cls.GLOBAL_SESSION_CTX
|
|
155
158
|
|
|
156
|
-
|
|
159
|
+
if client_config and session.catalog.client_config != client_config:
|
|
160
|
+
session = Session(
|
|
161
|
+
"session" + uuid4().hex[:4],
|
|
162
|
+
catalog,
|
|
163
|
+
client_config=client_config,
|
|
164
|
+
in_memory=in_memory,
|
|
165
|
+
)
|
|
166
|
+
session.__enter__()
|
|
167
|
+
|
|
168
|
+
return session
|
|
157
169
|
|
|
158
170
|
@staticmethod
|
|
159
171
|
def except_hook(exc_type, exc_value, exc_traceback):
|
datachain/script_meta.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
import tomllib
|
|
7
|
+
except ModuleNotFoundError:
|
|
8
|
+
# tomllib is in standard library from python 3.11 so for earlier versions
|
|
9
|
+
# we need tomli
|
|
10
|
+
import tomli as tomllib # type: ignore[no-redef]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ScriptConfigParsingError(Exception):
|
|
14
|
+
def __init__(self, message):
|
|
15
|
+
super().__init__(message)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ScriptConfig:
|
|
20
|
+
"""
|
|
21
|
+
Class that is parsing inline script metadata to get some basic information for
|
|
22
|
+
running datachain script like python version, dependencies, attachments etc.
|
|
23
|
+
Inline script metadata must follow the format described in https://packaging.python.org/en/latest/specifications/inline-script-metadata/#inline-script-metadata.
|
|
24
|
+
Example of script with inline metadata:
|
|
25
|
+
# /// script
|
|
26
|
+
# requires-python = ">=3.12"
|
|
27
|
+
#
|
|
28
|
+
# dependencies = [
|
|
29
|
+
# "pandas < 2.1.0",
|
|
30
|
+
# "numpy == 1.26.4"
|
|
31
|
+
# ]
|
|
32
|
+
#
|
|
33
|
+
# [tools.datachain.workers]
|
|
34
|
+
# num_workers = 3
|
|
35
|
+
#
|
|
36
|
+
# [tools.datachain.attachments]
|
|
37
|
+
# image1 = "s3://ldb-public/image1.jpg"
|
|
38
|
+
# file1 = "s3://ldb-public/file.pdf"
|
|
39
|
+
#
|
|
40
|
+
# [tools.datachain.params]
|
|
41
|
+
# min_length_sec = 1
|
|
42
|
+
# cache = false
|
|
43
|
+
#
|
|
44
|
+
# [tools.datachain.inputs]
|
|
45
|
+
# threshold = 0.5
|
|
46
|
+
# start_ds_name = "ds://start"
|
|
47
|
+
#
|
|
48
|
+
# [tools.datachain.outputs]
|
|
49
|
+
# result_dataset = "ds://res"
|
|
50
|
+
# result_dir = "/temp"
|
|
51
|
+
#
|
|
52
|
+
# ///
|
|
53
|
+
|
|
54
|
+
import sys
|
|
55
|
+
import pandas as pd
|
|
56
|
+
|
|
57
|
+
print(f"Python version: {sys.version_info}")
|
|
58
|
+
print(f"Pandas version: {pd.__version__}")
|
|
59
|
+
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
python_version: Optional[str]
|
|
63
|
+
dependencies: list[str]
|
|
64
|
+
attachments: dict[str, str]
|
|
65
|
+
params: dict[str, Any]
|
|
66
|
+
inputs: dict[str, Any]
|
|
67
|
+
outputs: dict[str, Any]
|
|
68
|
+
num_workers: Optional[int] = None
|
|
69
|
+
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
python_version: Optional[str] = None,
|
|
73
|
+
dependencies: Optional[list[str]] = None,
|
|
74
|
+
attachments: Optional[dict[str, str]] = None,
|
|
75
|
+
params: Optional[dict[str, Any]] = None,
|
|
76
|
+
inputs: Optional[dict[str, Any]] = None,
|
|
77
|
+
outputs: Optional[dict[str, Any]] = None,
|
|
78
|
+
num_workers: Optional[int] = None,
|
|
79
|
+
):
|
|
80
|
+
self.python_version = python_version
|
|
81
|
+
self.dependencies = dependencies or []
|
|
82
|
+
self.attachments = attachments or {}
|
|
83
|
+
self.params = params or {}
|
|
84
|
+
self.inputs = inputs or {}
|
|
85
|
+
self.outputs = outputs or {}
|
|
86
|
+
self.num_workers = num_workers
|
|
87
|
+
|
|
88
|
+
def get_param(self, name: str, default: Any) -> Any:
|
|
89
|
+
return self.params.get(name, default)
|
|
90
|
+
|
|
91
|
+
def get_input(self, name: str, default: Any) -> Any:
|
|
92
|
+
return self.inputs.get(name, default)
|
|
93
|
+
|
|
94
|
+
def get_output(self, name: str, default: Any) -> Any:
|
|
95
|
+
return self.outputs.get(name, default)
|
|
96
|
+
|
|
97
|
+
def get_attachment(self, name: str, default: Any) -> Any:
|
|
98
|
+
return self.attachments.get(name, default)
|
|
99
|
+
|
|
100
|
+
@staticmethod
|
|
101
|
+
def read(script: str) -> Optional[dict]:
|
|
102
|
+
"""Converts inline script metadata to dict with all found data"""
|
|
103
|
+
regex = (
|
|
104
|
+
r"(?m)^# \/\/\/ (?P<type>[a-zA-Z0-9-]+)[ \t]*$[\r\n|\r|\n]"
|
|
105
|
+
"(?P<content>(?:^#(?:| .*)$[\r\n|\r|\n])+)^# \\/\\/\\/[ \t]*$"
|
|
106
|
+
)
|
|
107
|
+
name = "script"
|
|
108
|
+
matches = list(
|
|
109
|
+
filter(lambda m: m.group("type") == name, re.finditer(regex, script))
|
|
110
|
+
)
|
|
111
|
+
if len(matches) > 1:
|
|
112
|
+
raise ValueError(f"Multiple {name} blocks found")
|
|
113
|
+
if len(matches) == 1:
|
|
114
|
+
content = "".join(
|
|
115
|
+
line[2:] if line.startswith("# ") else line[1:]
|
|
116
|
+
for line in matches[0].group("content").splitlines(keepends=True)
|
|
117
|
+
)
|
|
118
|
+
return tomllib.loads(content)
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
@staticmethod
|
|
122
|
+
def parse(script: str) -> Optional["ScriptConfig"]:
|
|
123
|
+
"""
|
|
124
|
+
Method that is parsing inline script metadata from datachain script and
|
|
125
|
+
instantiating ScriptConfig class with found data. If no inline metadata is
|
|
126
|
+
found, it returns None
|
|
127
|
+
"""
|
|
128
|
+
try:
|
|
129
|
+
meta = ScriptConfig.read(script)
|
|
130
|
+
if not meta:
|
|
131
|
+
return None
|
|
132
|
+
custom = meta.get("tools", {}).get("datachain", {})
|
|
133
|
+
return ScriptConfig(
|
|
134
|
+
python_version=meta.get("requires-python"),
|
|
135
|
+
dependencies=meta.get("dependencies"),
|
|
136
|
+
num_workers=custom.get("workers", {}).get("num_workers"),
|
|
137
|
+
attachments=custom.get("attachments"),
|
|
138
|
+
params={k: str(v) for k, v in custom.get("params").items()}
|
|
139
|
+
if custom.get("params")
|
|
140
|
+
else None,
|
|
141
|
+
inputs=custom.get("inputs"),
|
|
142
|
+
outputs=custom.get("outputs"),
|
|
143
|
+
)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
raise ScriptConfigParsingError(
|
|
146
|
+
f"Error when parsing script meta: {e}"
|
|
147
|
+
) from e
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.11.0
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -49,6 +49,7 @@ Requires-Dist: platformdirs
|
|
|
49
49
|
Requires-Dist: dvc-studio-client<1,>=0.21
|
|
50
50
|
Requires-Dist: tabulate
|
|
51
51
|
Requires-Dist: websockets
|
|
52
|
+
Requires-Dist: tomli; python_version < "3.11"
|
|
52
53
|
Provides-Extra: docs
|
|
53
54
|
Requires-Dist: mkdocs>=1.5.2; extra == "docs"
|
|
54
55
|
Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
|
|
@@ -102,7 +103,7 @@ Requires-Dist: datachain[tests]; extra == "examples"
|
|
|
102
103
|
Requires-Dist: defusedxml; extra == "examples"
|
|
103
104
|
Requires-Dist: accelerate; extra == "examples"
|
|
104
105
|
Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
105
|
-
Requires-Dist: ultralytics==8.3.
|
|
106
|
+
Requires-Dist: ultralytics==8.3.78; extra == "examples"
|
|
106
107
|
Requires-Dist: open_clip_torch; extra == "examples"
|
|
107
108
|
|
|
108
109
|
================
|
|
@@ -175,7 +176,7 @@ high confidence scores.
|
|
|
175
176
|
|
|
176
177
|
likely_cats = annotated.filter((Column("meta.inference.confidence") > 0.93) \
|
|
177
178
|
& (Column("meta.inference.class_") == "cat"))
|
|
178
|
-
likely_cats.
|
|
179
|
+
likely_cats.to_storage("high-confidence-cats/", signal="file")
|
|
179
180
|
|
|
180
181
|
|
|
181
182
|
Example: LLM based text-file evaluation
|
|
@@ -216,7 +217,7 @@ Python code:
|
|
|
216
217
|
)
|
|
217
218
|
|
|
218
219
|
successful_chain = chain.filter(Column("is_success") == True)
|
|
219
|
-
successful_chain.
|
|
220
|
+
successful_chain.to_storage("./output_mistral")
|
|
220
221
|
|
|
221
222
|
print(f"{successful_chain.count()} files were exported")
|
|
222
223
|
|
|
@@ -12,6 +12,7 @@ datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,10
|
|
|
12
12
|
datachain/nodes_thread_pool.py,sha256=uPo-xl8zG5m9YgODjPFBpbcqqHjI-dcxH87yAbj_qco,3192
|
|
13
13
|
datachain/progress.py,sha256=lRzxoYP4Qv2XBwD78sOkmYRzHFpZ2ExVNJF8wAeICtY,770
|
|
14
14
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
+
datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
|
|
15
16
|
datachain/studio.py,sha256=Coo_6murSjh-RypiHDWNsVXGmfsopyMPCpPS1sA6uUc,9844
|
|
16
17
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
17
18
|
datachain/utils.py,sha256=n8fcyOM8P_2CEFK4h8BZxCAwCkOpt8NAeJK5tm1gIOg,14433
|
|
@@ -36,7 +37,7 @@ datachain/cli/parser/utils.py,sha256=GEzxfPJ4i6nt6JhjvZ3PQesXl9islEV3E-N1NZGrLaA
|
|
|
36
37
|
datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
|
|
37
38
|
datachain/client/azure.py,sha256=ma6fJcnveG8wpNy1PSrN5hgvmRdCj8Sf3RKjfd3qCyM,3221
|
|
38
39
|
datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
|
|
39
|
-
datachain/client/fsspec.py,sha256=
|
|
40
|
+
datachain/client/fsspec.py,sha256=N_n3_DtZuKsLst8-XVda2xYCUHreUU3ld0MNTl8L9f4,14008
|
|
40
41
|
datachain/client/gcs.py,sha256=TY5K5INORKknTnoWDYv0EUztVLmuY1hHmdf2wUB_9uE,5114
|
|
41
42
|
datachain/client/hf.py,sha256=XeVJVbiNViZCpn3sfb90Fr8SYO3BdLmfE3hOWMoqInE,951
|
|
42
43
|
datachain/client/local.py,sha256=Pv67SYdkNkkNExBoKJF9AnNu0FSrt4JqLRkSVsUnveU,4672
|
|
@@ -68,8 +69,8 @@ datachain/lib/arrow.py,sha256=9UBCF-lftQaz0yxdsjbLKbyzVSmrF_QSWdhp2oBDPqs,9486
|
|
|
68
69
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
69
70
|
datachain/lib/data_model.py,sha256=zS4lmXHVBXc9ntcyea2a1CRLXGSAN_0glXcF88CohgY,2685
|
|
70
71
|
datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810,2493
|
|
71
|
-
datachain/lib/dc.py,sha256=
|
|
72
|
-
datachain/lib/file.py,sha256=
|
|
72
|
+
datachain/lib/dc.py,sha256=QQPnrS_OB1d3CfjLnYtRByGc7wNX_YT24WOjaoFPJgw,95372
|
|
73
|
+
datachain/lib/file.py,sha256=Bbnb7JBiAFRD1RsZwPdvoiWFKHkl7V3haDLh672xTZg,27658
|
|
73
74
|
datachain/lib/hf.py,sha256=gjxuStZBlKtNk3-4yYSlWZDv9zBGblOdvEy_Lwap5hA,5882
|
|
74
75
|
datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
|
|
75
76
|
datachain/lib/listing.py,sha256=auodM0HitYZsL0DybdgQUYhne_LgkVW-LKGYYOACP90,7272
|
|
@@ -103,13 +104,13 @@ datachain/model/ultralytics/pose.py,sha256=71KBTcoST2wcEtsyGXqLVpvUtqbp9gwZGA15p
|
|
|
103
104
|
datachain/model/ultralytics/segment.py,sha256=Z1ab0tZRJubSYNH4KkFlzhYeGNTfAyC71KmkQcToHDQ,2760
|
|
104
105
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
105
106
|
datachain/query/batch.py,sha256=6w8gzLTmLeylststu-gT5jIqEfi4-djS7_yTYyeo-fw,4190
|
|
106
|
-
datachain/query/dataset.py,sha256=
|
|
107
|
+
datachain/query/dataset.py,sha256=wK_etZkH558pzLKAMBArlj1TQD9n96YK-kpVYBCSR38,57083
|
|
107
108
|
datachain/query/dispatch.py,sha256=_1vjeQ1wjUoxlik55k0JkWqQCUfMjgVWmEOyWRkx0dU,12437
|
|
108
109
|
datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
|
|
109
110
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
110
111
|
datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
|
|
111
112
|
datachain/query/schema.py,sha256=b_KnVy6B26Ol4nYG0LqNNpeQ1QYPk95YRGUjXfdaQWs,6606
|
|
112
|
-
datachain/query/session.py,sha256=
|
|
113
|
+
datachain/query/session.py,sha256=I1KG8jDIaxGAfRfDRucMx8DqsANf_VYWtwtXjeD19lI,6399
|
|
113
114
|
datachain/query/udf.py,sha256=GY8E9pnzPE7ZKl_jvetZpn9R2rlUtMlhoYj4UmrzFzw,594
|
|
114
115
|
datachain/query/utils.py,sha256=u0A_BwG9PNs0DxoDcvSWgWLpj3ByTUv8CqH13CIuGag,1293
|
|
115
116
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -135,9 +136,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
135
136
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
136
137
|
datachain/toolkit/split.py,sha256=z3zRJNzjWrpPuRw-zgFbCOBKInyYxJew8ygrYQRQLNc,2930
|
|
137
138
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
138
|
-
datachain-0.
|
|
139
|
-
datachain-0.
|
|
140
|
-
datachain-0.
|
|
141
|
-
datachain-0.
|
|
142
|
-
datachain-0.
|
|
143
|
-
datachain-0.
|
|
139
|
+
datachain-0.11.0.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
140
|
+
datachain-0.11.0.dist-info/METADATA,sha256=ijLSRDc7IAZe6YxdX0ZRRNY2LOUlsFFib660U_upu20,11241
|
|
141
|
+
datachain-0.11.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
142
|
+
datachain-0.11.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
143
|
+
datachain-0.11.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
144
|
+
datachain-0.11.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|