datachain 0.3.18__py3-none-any.whl → 0.3.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/__init__.py +5 -2
- datachain/catalog/catalog.py +9 -79
- datachain/cli.py +0 -1
- datachain/dataset.py +7 -2
- datachain/error.py +6 -4
- datachain/lib/arrow.py +8 -3
- datachain/lib/dc.py +2 -2
- datachain/lib/file.py +23 -5
- datachain/query/dataset.py +0 -22
- {datachain-0.3.18.dist-info → datachain-0.3.19.dist-info}/METADATA +1 -2
- {datachain-0.3.18.dist-info → datachain-0.3.19.dist-info}/RECORD +15 -15
- {datachain-0.3.18.dist-info → datachain-0.3.19.dist-info}/LICENSE +0 -0
- {datachain-0.3.18.dist-info → datachain-0.3.19.dist-info}/WHEEL +0 -0
- {datachain-0.3.18.dist-info → datachain-0.3.19.dist-info}/entry_points.txt +0 -0
- {datachain-0.3.18.dist-info → datachain-0.3.19.dist-info}/top_level.txt +0 -0
datachain/__init__.py
CHANGED
|
@@ -1,21 +1,23 @@
|
|
|
1
1
|
from datachain.lib.data_model import DataModel, DataType, is_chain_type
|
|
2
2
|
from datachain.lib.dc import C, Column, DataChain, Sys
|
|
3
3
|
from datachain.lib.file import (
|
|
4
|
+
ArrowRow,
|
|
4
5
|
File,
|
|
5
6
|
FileError,
|
|
6
7
|
ImageFile,
|
|
7
|
-
IndexedFile,
|
|
8
8
|
TarVFile,
|
|
9
9
|
TextFile,
|
|
10
10
|
)
|
|
11
11
|
from datachain.lib.model_store import ModelStore
|
|
12
12
|
from datachain.lib.udf import Aggregator, Generator, Mapper
|
|
13
13
|
from datachain.lib.utils import AbstractUDF, DataChainError
|
|
14
|
+
from datachain.query import metrics, param
|
|
14
15
|
from datachain.query.session import Session
|
|
15
16
|
|
|
16
17
|
__all__ = [
|
|
17
18
|
"AbstractUDF",
|
|
18
19
|
"Aggregator",
|
|
20
|
+
"ArrowRow",
|
|
19
21
|
"C",
|
|
20
22
|
"Column",
|
|
21
23
|
"DataChain",
|
|
@@ -26,7 +28,6 @@ __all__ = [
|
|
|
26
28
|
"FileError",
|
|
27
29
|
"Generator",
|
|
28
30
|
"ImageFile",
|
|
29
|
-
"IndexedFile",
|
|
30
31
|
"Mapper",
|
|
31
32
|
"ModelStore",
|
|
32
33
|
"Session",
|
|
@@ -34,4 +35,6 @@ __all__ = [
|
|
|
34
35
|
"TarVFile",
|
|
35
36
|
"TextFile",
|
|
36
37
|
"is_chain_type",
|
|
38
|
+
"metrics",
|
|
39
|
+
"param",
|
|
37
40
|
]
|
datachain/catalog/catalog.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import ast
|
|
2
1
|
import glob
|
|
3
2
|
import io
|
|
4
3
|
import json
|
|
@@ -53,9 +52,9 @@ from datachain.error import (
|
|
|
53
52
|
DataChainError,
|
|
54
53
|
DatasetInvalidVersionError,
|
|
55
54
|
DatasetNotFoundError,
|
|
55
|
+
DatasetVersionNotFoundError,
|
|
56
56
|
PendingIndexingError,
|
|
57
57
|
QueryScriptCancelError,
|
|
58
|
-
QueryScriptCompileError,
|
|
59
58
|
QueryScriptRunError,
|
|
60
59
|
)
|
|
61
60
|
from datachain.listing import Listing
|
|
@@ -588,37 +587,6 @@ class Catalog:
|
|
|
588
587
|
def generate_query_dataset_name(cls) -> str:
|
|
589
588
|
return f"{QUERY_DATASET_PREFIX}_{uuid4().hex}"
|
|
590
589
|
|
|
591
|
-
def attach_query_wrapper(self, code_ast):
|
|
592
|
-
if code_ast.body:
|
|
593
|
-
last_expr = code_ast.body[-1]
|
|
594
|
-
if isinstance(last_expr, ast.Expr):
|
|
595
|
-
new_expressions = [
|
|
596
|
-
ast.Import(
|
|
597
|
-
names=[ast.alias(name="datachain.query.dataset", asname=None)]
|
|
598
|
-
),
|
|
599
|
-
ast.Expr(
|
|
600
|
-
value=ast.Call(
|
|
601
|
-
func=ast.Attribute(
|
|
602
|
-
value=ast.Attribute(
|
|
603
|
-
value=ast.Attribute(
|
|
604
|
-
value=ast.Name(id="datachain", ctx=ast.Load()),
|
|
605
|
-
attr="query",
|
|
606
|
-
ctx=ast.Load(),
|
|
607
|
-
),
|
|
608
|
-
attr="dataset",
|
|
609
|
-
ctx=ast.Load(),
|
|
610
|
-
),
|
|
611
|
-
attr="query_wrapper",
|
|
612
|
-
ctx=ast.Load(),
|
|
613
|
-
),
|
|
614
|
-
args=[last_expr],
|
|
615
|
-
keywords=[],
|
|
616
|
-
)
|
|
617
|
-
),
|
|
618
|
-
]
|
|
619
|
-
code_ast.body[-1:] = new_expressions
|
|
620
|
-
return code_ast
|
|
621
|
-
|
|
622
590
|
def get_client(self, uri: str, **config: Any) -> Client:
|
|
623
591
|
"""
|
|
624
592
|
Return the client corresponding to the given source `uri`.
|
|
@@ -1218,7 +1186,9 @@ class Catalog:
|
|
|
1218
1186
|
|
|
1219
1187
|
dataset_version = dataset.get_version(version)
|
|
1220
1188
|
if not dataset_version:
|
|
1221
|
-
raise
|
|
1189
|
+
raise DatasetVersionNotFoundError(
|
|
1190
|
+
f"Dataset {dataset.name} does not have version {version}"
|
|
1191
|
+
)
|
|
1222
1192
|
|
|
1223
1193
|
if not dataset_version.is_final_status():
|
|
1224
1194
|
raise ValueError("Cannot register dataset version in non final status")
|
|
@@ -1581,7 +1551,7 @@ class Catalog:
|
|
|
1581
1551
|
|
|
1582
1552
|
try:
|
|
1583
1553
|
remote_dataset_version = remote_dataset.get_version(version)
|
|
1584
|
-
except (
|
|
1554
|
+
except (DatasetVersionNotFoundError, StopIteration) as exc:
|
|
1585
1555
|
raise DataChainError(
|
|
1586
1556
|
f"Dataset {remote_dataset_name} doesn't have version {version}"
|
|
1587
1557
|
" on server"
|
|
@@ -1722,64 +1692,24 @@ class Catalog:
|
|
|
1722
1692
|
query_script: str,
|
|
1723
1693
|
env: Optional[Mapping[str, str]] = None,
|
|
1724
1694
|
python_executable: str = sys.executable,
|
|
1725
|
-
|
|
1726
|
-
capture_output: bool = True,
|
|
1695
|
+
capture_output: bool = False,
|
|
1727
1696
|
output_hook: Callable[[str], None] = noop,
|
|
1728
1697
|
params: Optional[dict[str, str]] = None,
|
|
1729
1698
|
job_id: Optional[str] = None,
|
|
1730
|
-
_execute_last_expression: bool = False,
|
|
1731
1699
|
) -> None:
|
|
1732
|
-
""
|
|
1733
|
-
Method to run custom user Python script to run a query and, as result,
|
|
1734
|
-
creates new dataset from the results of a query.
|
|
1735
|
-
Returns tuple of result dataset and script output.
|
|
1736
|
-
|
|
1737
|
-
Constraints on query script:
|
|
1738
|
-
1. datachain.query.DatasetQuery should be used in order to create query
|
|
1739
|
-
for a dataset
|
|
1740
|
-
2. There should not be any .save() call on DatasetQuery since the idea
|
|
1741
|
-
is to create only one dataset as the outcome of the script
|
|
1742
|
-
3. Last statement must be an instance of DatasetQuery
|
|
1743
|
-
|
|
1744
|
-
If save is set to True, we are creating new dataset with results
|
|
1745
|
-
from dataset query. If it's set to False, we will just print results
|
|
1746
|
-
without saving anything
|
|
1747
|
-
|
|
1748
|
-
Example of query script:
|
|
1749
|
-
from datachain.query import DatasetQuery, C
|
|
1750
|
-
DatasetQuery('s3://ldb-public/remote/datasets/mnist-tiny/').filter(
|
|
1751
|
-
C.size > 1000
|
|
1752
|
-
)
|
|
1753
|
-
"""
|
|
1754
|
-
if _execute_last_expression:
|
|
1755
|
-
try:
|
|
1756
|
-
code_ast = ast.parse(query_script)
|
|
1757
|
-
code_ast = self.attach_query_wrapper(code_ast)
|
|
1758
|
-
query_script_compiled = ast.unparse(code_ast)
|
|
1759
|
-
except Exception as exc:
|
|
1760
|
-
raise QueryScriptCompileError(
|
|
1761
|
-
f"Query script failed to compile, reason: {exc}"
|
|
1762
|
-
) from exc
|
|
1763
|
-
else:
|
|
1764
|
-
query_script_compiled = query_script
|
|
1765
|
-
assert not save
|
|
1766
|
-
|
|
1700
|
+
cmd = [python_executable, "-c", query_script]
|
|
1767
1701
|
env = dict(env or os.environ)
|
|
1768
1702
|
env.update(
|
|
1769
1703
|
{
|
|
1770
1704
|
"DATACHAIN_QUERY_PARAMS": json.dumps(params or {}),
|
|
1771
|
-
"PYTHONPATH": os.getcwd(), # For local imports
|
|
1772
|
-
"DATACHAIN_QUERY_SAVE": "1" if save else "",
|
|
1773
|
-
"PYTHONUNBUFFERED": "1",
|
|
1774
1705
|
"DATACHAIN_JOB_ID": job_id or "",
|
|
1775
1706
|
},
|
|
1776
1707
|
)
|
|
1777
|
-
popen_kwargs = {}
|
|
1708
|
+
popen_kwargs: dict[str, Any] = {}
|
|
1778
1709
|
if capture_output:
|
|
1779
1710
|
popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
|
|
1780
1711
|
|
|
1781
|
-
cmd =
|
|
1782
|
-
with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # type: ignore[call-overload] # noqa: S603
|
|
1712
|
+
with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # noqa: S603
|
|
1783
1713
|
if capture_output:
|
|
1784
1714
|
args = (proc.stdout, output_hook)
|
|
1785
1715
|
thread = Thread(target=_process_stream, args=args, daemon=True)
|
datachain/cli.py
CHANGED
datachain/dataset.py
CHANGED
|
@@ -12,6 +12,7 @@ from typing import (
|
|
|
12
12
|
from urllib.parse import urlparse
|
|
13
13
|
|
|
14
14
|
from datachain.client import Client
|
|
15
|
+
from datachain.error import DatasetVersionNotFoundError
|
|
15
16
|
from datachain.sql.types import NAME_TYPES_MAPPING, SQLType
|
|
16
17
|
|
|
17
18
|
if TYPE_CHECKING:
|
|
@@ -417,7 +418,9 @@ class DatasetRecord:
|
|
|
417
418
|
|
|
418
419
|
def get_version(self, version: int) -> DatasetVersion:
|
|
419
420
|
if not self.has_version(version):
|
|
420
|
-
raise
|
|
421
|
+
raise DatasetVersionNotFoundError(
|
|
422
|
+
f"Dataset {self.name} does not have version {version}"
|
|
423
|
+
)
|
|
421
424
|
return next(
|
|
422
425
|
v
|
|
423
426
|
for v in self.versions # type: ignore [union-attr]
|
|
@@ -435,7 +438,9 @@ class DatasetRecord:
|
|
|
435
438
|
Get identifier in the form my-dataset@v3
|
|
436
439
|
"""
|
|
437
440
|
if not self.has_version(version):
|
|
438
|
-
raise
|
|
441
|
+
raise DatasetVersionNotFoundError(
|
|
442
|
+
f"Dataset {self.name} doesn't have a version {version}"
|
|
443
|
+
)
|
|
439
444
|
return f"{self.name}@v{version}"
|
|
440
445
|
|
|
441
446
|
def uri(self, version: int) -> str:
|
datachain/error.py
CHANGED
|
@@ -10,6 +10,10 @@ class DatasetNotFoundError(NotFoundError):
|
|
|
10
10
|
pass
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
class DatasetVersionNotFoundError(NotFoundError):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
13
17
|
class DatasetInvalidVersionError(Exception):
|
|
14
18
|
pass
|
|
15
19
|
|
|
@@ -32,14 +36,12 @@ class QueryScriptRunError(Exception):
|
|
|
32
36
|
Attributes:
|
|
33
37
|
message Explanation of the error
|
|
34
38
|
return_code Code returned by the subprocess
|
|
35
|
-
output STDOUT + STDERR output of the subprocess
|
|
36
39
|
"""
|
|
37
40
|
|
|
38
|
-
def __init__(self, message: str, return_code: int = 0
|
|
41
|
+
def __init__(self, message: str, return_code: int = 0):
|
|
39
42
|
self.message = message
|
|
40
43
|
self.return_code = return_code
|
|
41
|
-
|
|
42
|
-
super().__init__(self.message)
|
|
44
|
+
super().__init__(message)
|
|
43
45
|
|
|
44
46
|
|
|
45
47
|
class QueryScriptCancelError(QueryScriptRunError):
|
datachain/lib/arrow.py
CHANGED
|
@@ -4,11 +4,11 @@ from tempfile import NamedTemporaryFile
|
|
|
4
4
|
from typing import TYPE_CHECKING, Optional
|
|
5
5
|
|
|
6
6
|
import pyarrow as pa
|
|
7
|
-
from pyarrow.dataset import dataset
|
|
7
|
+
from pyarrow.dataset import CsvFileFormat, dataset
|
|
8
8
|
from tqdm import tqdm
|
|
9
9
|
|
|
10
10
|
from datachain.lib.data_model import dict_to_data_model
|
|
11
|
-
from datachain.lib.file import
|
|
11
|
+
from datachain.lib.file import ArrowRow, File
|
|
12
12
|
from datachain.lib.model_store import ModelStore
|
|
13
13
|
from datachain.lib.udf import Generator
|
|
14
14
|
|
|
@@ -84,7 +84,12 @@ class ArrowGenerator(Generator):
|
|
|
84
84
|
vals_dict[field] = val
|
|
85
85
|
vals = [self.output_schema(**vals_dict)]
|
|
86
86
|
if self.source:
|
|
87
|
-
|
|
87
|
+
kwargs: dict = self.kwargs
|
|
88
|
+
# Can't serialize CsvFileFormat; may lose formatting options.
|
|
89
|
+
if isinstance(kwargs.get("format"), CsvFileFormat):
|
|
90
|
+
kwargs["format"] = "csv"
|
|
91
|
+
arrow_file = ArrowRow(file=file, index=index, kwargs=kwargs)
|
|
92
|
+
yield [arrow_file, *vals]
|
|
88
93
|
else:
|
|
89
94
|
yield vals
|
|
90
95
|
index += 1
|
datachain/lib/dc.py
CHANGED
|
@@ -26,8 +26,8 @@ from datachain.lib.convert.python_to_sql import python_to_sql
|
|
|
26
26
|
from datachain.lib.convert.values_to_tuples import values_to_tuples
|
|
27
27
|
from datachain.lib.data_model import DataModel, DataType, dict_to_data_model
|
|
28
28
|
from datachain.lib.dataset_info import DatasetInfo
|
|
29
|
+
from datachain.lib.file import ArrowRow, File, get_file_type
|
|
29
30
|
from datachain.lib.file import ExportPlacement as FileExportPlacement
|
|
30
|
-
from datachain.lib.file import File, IndexedFile, get_file_type
|
|
31
31
|
from datachain.lib.listing import (
|
|
32
32
|
is_listing_dataset,
|
|
33
33
|
is_listing_expired,
|
|
@@ -1614,7 +1614,7 @@ class DataChain(DatasetQuery):
|
|
|
1614
1614
|
for name, info in output.model_fields.items()
|
|
1615
1615
|
}
|
|
1616
1616
|
if source:
|
|
1617
|
-
output = {"source":
|
|
1617
|
+
output = {"source": ArrowRow} | output # type: ignore[assignment,operator]
|
|
1618
1618
|
return self.gen(
|
|
1619
1619
|
ArrowGenerator(schema, model, source, nrows, **kwargs), output=output
|
|
1620
1620
|
)
|
datachain/lib/file.py
CHANGED
|
@@ -17,6 +17,7 @@ from urllib.request import url2pathname
|
|
|
17
17
|
|
|
18
18
|
from fsspec.callbacks import DEFAULT_CALLBACK, Callback
|
|
19
19
|
from PIL import Image
|
|
20
|
+
from pyarrow.dataset import dataset
|
|
20
21
|
from pydantic import Field, field_validator
|
|
21
22
|
|
|
22
23
|
if TYPE_CHECKING:
|
|
@@ -439,14 +440,31 @@ class ImageFile(File):
|
|
|
439
440
|
self.read().save(destination)
|
|
440
441
|
|
|
441
442
|
|
|
442
|
-
class
|
|
443
|
-
"""
|
|
444
|
-
|
|
445
|
-
Includes `file` and `index` signals.
|
|
446
|
-
"""
|
|
443
|
+
class ArrowRow(DataModel):
|
|
444
|
+
"""`DataModel` for reading row from Arrow-supported file."""
|
|
447
445
|
|
|
448
446
|
file: File
|
|
449
447
|
index: int
|
|
448
|
+
kwargs: dict
|
|
449
|
+
|
|
450
|
+
@contextmanager
|
|
451
|
+
def open(self):
|
|
452
|
+
"""Stream row contents from indexed file."""
|
|
453
|
+
if self.file._caching_enabled:
|
|
454
|
+
self.file.ensure_cached()
|
|
455
|
+
path = self.file.get_local_path()
|
|
456
|
+
ds = dataset(path, **self.kwargs)
|
|
457
|
+
|
|
458
|
+
else:
|
|
459
|
+
path = self.file.get_path()
|
|
460
|
+
ds = dataset(path, filesystem=self.file.get_fs(), **self.kwargs)
|
|
461
|
+
|
|
462
|
+
return ds.take([self.index]).to_reader()
|
|
463
|
+
|
|
464
|
+
def read(self):
|
|
465
|
+
"""Returns row contents as dict."""
|
|
466
|
+
with self.open() as record_batch:
|
|
467
|
+
return record_batch.to_pylist()[0]
|
|
450
468
|
|
|
451
469
|
|
|
452
470
|
def get_file_type(type_: Literal["binary", "text", "image"] = "binary") -> type[File]:
|
datachain/query/dataset.py
CHANGED
|
@@ -1604,25 +1604,3 @@ class DatasetQuery:
|
|
|
1604
1604
|
finally:
|
|
1605
1605
|
self.cleanup()
|
|
1606
1606
|
return self.__class__(name=name, version=version, catalog=self.catalog)
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
def query_wrapper(dataset_query: Any) -> Any:
|
|
1610
|
-
"""
|
|
1611
|
-
Wrapper function that wraps the last statement of user query script.
|
|
1612
|
-
Last statement MUST be instance of DatasetQuery, otherwise script exits with
|
|
1613
|
-
error code 10
|
|
1614
|
-
"""
|
|
1615
|
-
if not isinstance(dataset_query, DatasetQuery):
|
|
1616
|
-
return dataset_query
|
|
1617
|
-
|
|
1618
|
-
catalog = dataset_query.catalog
|
|
1619
|
-
save = bool(os.getenv("DATACHAIN_QUERY_SAVE"))
|
|
1620
|
-
|
|
1621
|
-
is_session_temp_dataset = dataset_query.name and dataset_query.name.startswith(
|
|
1622
|
-
dataset_query.session.get_temp_prefix()
|
|
1623
|
-
)
|
|
1624
|
-
|
|
1625
|
-
if save and (is_session_temp_dataset or not dataset_query.attached):
|
|
1626
|
-
name = catalog.generate_query_dataset_name()
|
|
1627
|
-
dataset_query = dataset_query.save(name)
|
|
1628
|
-
return dataset_query
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.19
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -33,7 +33,6 @@ Requires-Dist: dvc-objects <6,>=4
|
|
|
33
33
|
Requires-Dist: shtab <2,>=1.3.4
|
|
34
34
|
Requires-Dist: sqlalchemy >=2
|
|
35
35
|
Requires-Dist: multiprocess ==0.70.16
|
|
36
|
-
Requires-Dist: dill ==0.3.8
|
|
37
36
|
Requires-Dist: cloudpickle
|
|
38
37
|
Requires-Dist: orjson >=3.10.5
|
|
39
38
|
Requires-Dist: pydantic <3,>=2
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
datachain/__init__.py,sha256=
|
|
1
|
+
datachain/__init__.py,sha256=ofPJ6B-d-ybSDRrE7J6wqF_ZRAB2W9U8l-eeuBtqPLg,865
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
3
|
datachain/asyn.py,sha256=Lg3Ck1PQLjQziMx9KU4atzbEnJXTE0924WMYkhgWtGU,8247
|
|
4
4
|
datachain/cache.py,sha256=s0YHN7qurmQv-eC265TjeureK84TebWWAnL07cxchZQ,2997
|
|
5
|
-
datachain/cli.py,sha256=
|
|
5
|
+
datachain/cli.py,sha256=TQ1OKMulAcsJndKLCyxJpfNqbMWQgOa4Aeihnu36cR8,30095
|
|
6
6
|
datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
|
|
7
7
|
datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
|
|
8
|
-
datachain/dataset.py,sha256=
|
|
9
|
-
datachain/error.py,sha256=
|
|
8
|
+
datachain/dataset.py,sha256=2NCQU9ZSgNGhA01SP5ON18VhMohXif-btOB4Lz-Uvds,14911
|
|
9
|
+
datachain/error.py,sha256=vbIbamnFMIojh1UpmxWoA6Omup7WFAFNJnf8xAkGWwI,1146
|
|
10
10
|
datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
|
|
11
11
|
datachain/listing.py,sha256=TkMmBzCiru26x4RaZiagWJTmTGbiy6yGrAsSJMr8cFE,8213
|
|
12
12
|
datachain/node.py,sha256=ThE6Ue4BqpaBvrkFFJW_ljLxchixUX2aWz3l_nbwY54,5195
|
|
@@ -18,7 +18,7 @@ datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
|
|
|
18
18
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
19
19
|
datachain/utils.py,sha256=KeFSRHsiYthnTu4a6bH-rw04mX1m8krTX0f2NqfQGFI,12114
|
|
20
20
|
datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
|
|
21
|
-
datachain/catalog/catalog.py,sha256=
|
|
21
|
+
datachain/catalog/catalog.py,sha256=poTu_B5va35MTCV60ntsn4jvAFXepqa2peCjYCXWeU0,64982
|
|
22
22
|
datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
|
|
23
23
|
datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
|
|
24
24
|
datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
|
|
@@ -39,12 +39,12 @@ datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2kru
|
|
|
39
39
|
datachain/data_storage/sqlite.py,sha256=EBKJncuzcyQfcKFm2mUjvHjHRTODsteM-k_zndunBrw,28834
|
|
40
40
|
datachain/data_storage/warehouse.py,sha256=Vwhu_OfcNAoTtg1BHui80VCzlPeTUjZQL0QWziu8awY,32186
|
|
41
41
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
-
datachain/lib/arrow.py,sha256=
|
|
42
|
+
datachain/lib/arrow.py,sha256=aUsoQmxDmuSnB8Ik9p57Y66gc_dgx6NBqkDDIfLsvno,7630
|
|
43
43
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
44
44
|
datachain/lib/data_model.py,sha256=gHIjlow84GMRDa78yLL1Ud-N18or21fnTyPEwsatpXY,2045
|
|
45
45
|
datachain/lib/dataset_info.py,sha256=srPPhI2UHf6hFPBecyFEVw2SS5aPisIIMsvGgKqi7ss,2366
|
|
46
|
-
datachain/lib/dc.py,sha256=
|
|
47
|
-
datachain/lib/file.py,sha256=
|
|
46
|
+
datachain/lib/dc.py,sha256=kabEHnqbcoat7gd-yl0PvmuC6SyKbRa8r7NWKcN6GEQ,68978
|
|
47
|
+
datachain/lib/file.py,sha256=LjTW_-PDAnoUhvyB4bJ8Y8n__XGqrxvmd9mDOF0Gir8,14875
|
|
48
48
|
datachain/lib/hf.py,sha256=cPnmLuprr0pYABH7KqA5FARQ1JGlywdDwD3yDzVAm4k,5920
|
|
49
49
|
datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
|
|
50
50
|
datachain/lib/listing.py,sha256=cHPN5-Fq8yb0gP6DARImhmZWxykDDNqhhJujDxEp53A,4104
|
|
@@ -70,7 +70,7 @@ datachain/lib/convert/unflatten.py,sha256=Ogvh_5wg2f38_At_1lN0D_e2uZOOpYEvwvB2xd
|
|
|
70
70
|
datachain/lib/convert/values_to_tuples.py,sha256=YOdbjzHq-uj6-cV2Qq43G72eN2avMNDGl4x5t6yQMl8,3931
|
|
71
71
|
datachain/query/__init__.py,sha256=0NBOZVgIDpCcj1Ci883dQ9A0iiwe03xzmotkOCFbxYc,293
|
|
72
72
|
datachain/query/batch.py,sha256=-vlpINJiertlnaoUVv1C95RatU0F6zuhpIYRufJRo1M,3660
|
|
73
|
-
datachain/query/dataset.py,sha256=
|
|
73
|
+
datachain/query/dataset.py,sha256=F9WEVhDuFm6NQT6l-Vi3PMU-mQVpqwKHMgZIA4eWB18,53602
|
|
74
74
|
datachain/query/dispatch.py,sha256=CFAc09O6UllcyUSSEY1GUlEMPzeO8RYhXinNN4HBl9M,12405
|
|
75
75
|
datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
|
|
76
76
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
@@ -97,9 +97,9 @@ datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,
|
|
|
97
97
|
datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
|
|
98
98
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
99
99
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
100
|
-
datachain-0.3.
|
|
101
|
-
datachain-0.3.
|
|
102
|
-
datachain-0.3.
|
|
103
|
-
datachain-0.3.
|
|
104
|
-
datachain-0.3.
|
|
105
|
-
datachain-0.3.
|
|
100
|
+
datachain-0.3.19.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
101
|
+
datachain-0.3.19.dist-info/METADATA,sha256=yMBpXwOmeoWOmpS0m_hp8GFiMs3Zu_ixMzkG6GF_Z2U,17157
|
|
102
|
+
datachain-0.3.19.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
103
|
+
datachain-0.3.19.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
104
|
+
datachain-0.3.19.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
105
|
+
datachain-0.3.19.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|