datachain 0.8.10__py3-none-any.whl → 0.8.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/cache.py +4 -4
- datachain/catalog/__init__.py +0 -2
- datachain/catalog/catalog.py +103 -158
- datachain/cli/__init__.py +7 -14
- datachain/cli/commands/__init__.py +0 -2
- datachain/cli/commands/datasets.py +0 -19
- datachain/cli/parser/__init__.py +27 -41
- datachain/cli/parser/studio.py +7 -6
- datachain/cli/parser/utils.py +18 -0
- datachain/client/fsspec.py +11 -8
- datachain/client/local.py +4 -4
- datachain/data_storage/schema.py +1 -1
- datachain/dataset.py +1 -7
- datachain/error.py +12 -0
- datachain/func/__init__.py +2 -1
- datachain/func/conditional.py +77 -26
- datachain/func/func.py +17 -6
- datachain/lib/dc.py +24 -4
- datachain/lib/file.py +16 -0
- datachain/lib/listing.py +30 -12
- datachain/lib/pytorch.py +1 -1
- datachain/lib/udf.py +1 -1
- datachain/listing.py +1 -13
- datachain/node.py +0 -15
- datachain/nodes_fetcher.py +2 -2
- datachain/remote/studio.py +2 -14
- datachain/studio.py +1 -1
- {datachain-0.8.10.dist-info → datachain-0.8.12.dist-info}/METADATA +3 -7
- {datachain-0.8.10.dist-info → datachain-0.8.12.dist-info}/RECORD +33 -33
- {datachain-0.8.10.dist-info → datachain-0.8.12.dist-info}/LICENSE +0 -0
- {datachain-0.8.10.dist-info → datachain-0.8.12.dist-info}/WHEEL +0 -0
- {datachain-0.8.10.dist-info → datachain-0.8.12.dist-info}/entry_points.txt +0 -0
- {datachain-0.8.10.dist-info → datachain-0.8.12.dist-info}/top_level.txt +0 -0
datachain/lib/dc.py
CHANGED
|
@@ -25,6 +25,7 @@ from sqlalchemy.sql.functions import GenericFunction
|
|
|
25
25
|
from sqlalchemy.sql.sqltypes import NullType
|
|
26
26
|
|
|
27
27
|
from datachain.dataset import DatasetRecord
|
|
28
|
+
from datachain.func import literal
|
|
28
29
|
from datachain.func.base import Function
|
|
29
30
|
from datachain.func.func import Func
|
|
30
31
|
from datachain.lib.convert.python_to_sql import python_to_sql
|
|
@@ -1129,8 +1130,12 @@ class DataChain:
|
|
|
1129
1130
|
)
|
|
1130
1131
|
```
|
|
1131
1132
|
"""
|
|
1133
|
+
primitives = (bool, str, int, float)
|
|
1134
|
+
|
|
1132
1135
|
for col_name, expr in kwargs.items():
|
|
1133
|
-
if not isinstance(expr, (Column, Func)) and isinstance(
|
|
1136
|
+
if not isinstance(expr, (*primitives, Column, Func)) and isinstance(
|
|
1137
|
+
expr.type, NullType
|
|
1138
|
+
):
|
|
1134
1139
|
raise DataChainColumnError(
|
|
1135
1140
|
col_name, f"Cannot infer type with expression {expr}"
|
|
1136
1141
|
)
|
|
@@ -1145,6 +1150,11 @@ class DataChain:
|
|
|
1145
1150
|
elif isinstance(value, Func):
|
|
1146
1151
|
# adding new signal
|
|
1147
1152
|
mutated[name] = value.get_column(schema)
|
|
1153
|
+
elif isinstance(value, primitives):
|
|
1154
|
+
# adding simple python constant primitives like str, int, float, bool
|
|
1155
|
+
val = literal(value)
|
|
1156
|
+
val.type = python_to_sql(type(value))()
|
|
1157
|
+
mutated[name] = val # type: ignore[assignment]
|
|
1148
1158
|
else:
|
|
1149
1159
|
# adding new signal
|
|
1150
1160
|
mutated[name] = value
|
|
@@ -1942,7 +1952,7 @@ class DataChain:
|
|
|
1942
1952
|
def from_csv(
|
|
1943
1953
|
cls,
|
|
1944
1954
|
path,
|
|
1945
|
-
delimiter: str =
|
|
1955
|
+
delimiter: Optional[str] = None,
|
|
1946
1956
|
header: bool = True,
|
|
1947
1957
|
output: OutputType = None,
|
|
1948
1958
|
object_name: str = "",
|
|
@@ -1952,6 +1962,7 @@ class DataChain:
|
|
|
1952
1962
|
session: Optional[Session] = None,
|
|
1953
1963
|
settings: Optional[dict] = None,
|
|
1954
1964
|
column_types: Optional[dict[str, "Union[str, ArrowDataType]"]] = None,
|
|
1965
|
+
parse_options: Optional[dict[str, "Union[str, Union[bool, Callable]]"]] = None,
|
|
1955
1966
|
**kwargs,
|
|
1956
1967
|
) -> "DataChain":
|
|
1957
1968
|
"""Generate chain from csv files.
|
|
@@ -1959,7 +1970,8 @@ class DataChain:
|
|
|
1959
1970
|
Parameters:
|
|
1960
1971
|
path : Storage URI with directory. URI must start with storage prefix such
|
|
1961
1972
|
as `s3://`, `gs://`, `az://` or "file:///".
|
|
1962
|
-
delimiter : Character for delimiting columns.
|
|
1973
|
+
delimiter : Character for delimiting columns. Takes precedence if also
|
|
1974
|
+
specified in `parse_options`. Defaults to ",".
|
|
1963
1975
|
header : Whether the files include a header row.
|
|
1964
1976
|
output : Dictionary or feature class defining column names and their
|
|
1965
1977
|
corresponding types. List of column names is also accepted, in which
|
|
@@ -1973,6 +1985,8 @@ class DataChain:
|
|
|
1973
1985
|
column_types : Dictionary of column names and their corresponding types.
|
|
1974
1986
|
It is passed to CSV reader and for each column specified type auto
|
|
1975
1987
|
inference is disabled.
|
|
1988
|
+
parse_options: Tells the parser how to process lines.
|
|
1989
|
+
See https://arrow.apache.org/docs/python/generated/pyarrow.csv.ParseOptions.html
|
|
1976
1990
|
|
|
1977
1991
|
Example:
|
|
1978
1992
|
Reading a csv file:
|
|
@@ -1990,6 +2004,12 @@ class DataChain:
|
|
|
1990
2004
|
from pyarrow.dataset import CsvFileFormat
|
|
1991
2005
|
from pyarrow.lib import type_for_alias
|
|
1992
2006
|
|
|
2007
|
+
parse_options = parse_options or {}
|
|
2008
|
+
if "delimiter" not in parse_options:
|
|
2009
|
+
parse_options["delimiter"] = ","
|
|
2010
|
+
if delimiter:
|
|
2011
|
+
parse_options["delimiter"] = delimiter
|
|
2012
|
+
|
|
1993
2013
|
if column_types:
|
|
1994
2014
|
column_types = {
|
|
1995
2015
|
name: type_for_alias(typ) if isinstance(typ, str) else typ
|
|
@@ -2017,7 +2037,7 @@ class DataChain:
|
|
|
2017
2037
|
msg = f"error parsing csv - incompatible output type {type(output)}"
|
|
2018
2038
|
raise DatasetPrepareError(chain.name, msg)
|
|
2019
2039
|
|
|
2020
|
-
parse_options = ParseOptions(
|
|
2040
|
+
parse_options = ParseOptions(**parse_options)
|
|
2021
2041
|
read_options = ReadOptions(column_names=column_names)
|
|
2022
2042
|
convert_options = ConvertOptions(
|
|
2023
2043
|
strings_can_be_null=True,
|
datachain/lib/file.py
CHANGED
|
@@ -190,6 +190,22 @@ class File(DataModel):
|
|
|
190
190
|
self._catalog = None
|
|
191
191
|
self._caching_enabled: bool = False
|
|
192
192
|
|
|
193
|
+
@classmethod
|
|
194
|
+
def upload(
|
|
195
|
+
cls, data: bytes, path: str, catalog: Optional["Catalog"] = None
|
|
196
|
+
) -> "File":
|
|
197
|
+
if catalog is None:
|
|
198
|
+
from datachain.catalog.loader import get_catalog
|
|
199
|
+
|
|
200
|
+
catalog = get_catalog()
|
|
201
|
+
|
|
202
|
+
parent, name = posixpath.split(path)
|
|
203
|
+
|
|
204
|
+
client = catalog.get_client(parent)
|
|
205
|
+
file = client.upload(data, name)
|
|
206
|
+
file._set_stream(catalog)
|
|
207
|
+
return file
|
|
208
|
+
|
|
193
209
|
@classmethod
|
|
194
210
|
def _from_row(cls, row: "RowDict") -> "Self":
|
|
195
211
|
return cls(**{key: row[key] for key in cls._datachain_column_types})
|
datachain/lib/listing.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
1
3
|
import posixpath
|
|
2
4
|
from collections.abc import Iterator
|
|
3
5
|
from typing import TYPE_CHECKING, Callable, Optional, TypeVar
|
|
@@ -7,6 +9,7 @@ from sqlalchemy.sql.expression import true
|
|
|
7
9
|
|
|
8
10
|
from datachain.asyn import iter_over_async
|
|
9
11
|
from datachain.client import Client
|
|
12
|
+
from datachain.error import REMOTE_ERRORS, ClientError
|
|
10
13
|
from datachain.lib.file import File
|
|
11
14
|
from datachain.query.schema import Column
|
|
12
15
|
from datachain.sql.functions import path as pathfunc
|
|
@@ -22,6 +25,10 @@ LISTING_PREFIX = "lst__" # listing datasets start with this name
|
|
|
22
25
|
|
|
23
26
|
D = TypeVar("D", bound="DataChain")
|
|
24
27
|
|
|
28
|
+
# Disable warnings for remote errors in clients
|
|
29
|
+
logging.getLogger("aiobotocore.credentials").setLevel(logging.CRITICAL)
|
|
30
|
+
logging.getLogger("gcsfs").setLevel(logging.CRITICAL)
|
|
31
|
+
|
|
25
32
|
|
|
26
33
|
def list_bucket(uri: str, cache, client_config=None) -> Callable:
|
|
27
34
|
"""
|
|
@@ -90,6 +97,15 @@ def _isfile(client: "Client", path: str) -> bool:
|
|
|
90
97
|
Returns True if uri points to a file
|
|
91
98
|
"""
|
|
92
99
|
try:
|
|
100
|
+
if "://" in path:
|
|
101
|
+
# This makes sure that the uppercase scheme is converted to lowercase
|
|
102
|
+
scheme, path = path.split("://", 1)
|
|
103
|
+
path = f"{scheme.lower()}://{path}"
|
|
104
|
+
|
|
105
|
+
if os.name == "nt" and "*" in path:
|
|
106
|
+
# On Windows, the glob pattern "*" is not supported
|
|
107
|
+
return False
|
|
108
|
+
|
|
93
109
|
info = client.fs.info(path)
|
|
94
110
|
name = info.get("name")
|
|
95
111
|
# case for special simulated directories on some clouds
|
|
@@ -99,21 +115,21 @@ def _isfile(client: "Client", path: str) -> bool:
|
|
|
99
115
|
return False
|
|
100
116
|
|
|
101
117
|
return info["type"] == "file"
|
|
102
|
-
except
|
|
118
|
+
except FileNotFoundError:
|
|
103
119
|
return False
|
|
120
|
+
except REMOTE_ERRORS as e:
|
|
121
|
+
raise ClientError(
|
|
122
|
+
message=str(e),
|
|
123
|
+
error_code=getattr(e, "code", None),
|
|
124
|
+
) from e
|
|
104
125
|
|
|
105
126
|
|
|
106
|
-
def parse_listing_uri(uri: str,
|
|
127
|
+
def parse_listing_uri(uri: str, client_config) -> tuple[str, str, str]:
|
|
107
128
|
"""
|
|
108
129
|
Parsing uri and returns listing dataset name, listing uri and listing path
|
|
109
130
|
"""
|
|
110
131
|
client_config = client_config or {}
|
|
111
|
-
client = Client.get_client(uri, cache, **client_config)
|
|
112
132
|
storage_uri, path = Client.parse_url(uri)
|
|
113
|
-
telemetry.log_param("client", client.PREFIX)
|
|
114
|
-
|
|
115
|
-
if not uri.endswith("/") and _isfile(client, uri):
|
|
116
|
-
return None, f"{storage_uri}/{path.lstrip('/')}", path
|
|
117
133
|
if uses_glob(path):
|
|
118
134
|
lst_uri_path = posixpath.dirname(path)
|
|
119
135
|
else:
|
|
@@ -157,13 +173,15 @@ def get_listing(
|
|
|
157
173
|
client_config = catalog.client_config
|
|
158
174
|
|
|
159
175
|
client = Client.get_client(uri, cache, **client_config)
|
|
160
|
-
|
|
161
|
-
listing = None
|
|
176
|
+
telemetry.log_param("client", client.PREFIX)
|
|
162
177
|
|
|
163
|
-
#
|
|
164
|
-
if not
|
|
165
|
-
|
|
178
|
+
# we don't want to use cached dataset (e.g. for a single file listing)
|
|
179
|
+
if not uri.endswith("/") and _isfile(client, uri):
|
|
180
|
+
storage_uri, path = Client.parse_url(uri)
|
|
181
|
+
return None, f"{storage_uri}/{path.lstrip('/')}", path, False
|
|
166
182
|
|
|
183
|
+
ds_name, list_uri, list_path = parse_listing_uri(uri, client_config)
|
|
184
|
+
listing = None
|
|
167
185
|
listings = [
|
|
168
186
|
ls for ls in catalog.listings() if not ls.is_expired and ls.contains(ds_name)
|
|
169
187
|
]
|
datachain/lib/pytorch.py
CHANGED
|
@@ -23,7 +23,7 @@ from datachain.query.dataset import get_download_callback
|
|
|
23
23
|
if TYPE_CHECKING:
|
|
24
24
|
from torchvision.transforms.v2 import Transform
|
|
25
25
|
|
|
26
|
-
from datachain.cache import
|
|
26
|
+
from datachain.cache import Cache
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
logger = logging.getLogger("datachain")
|
datachain/lib/udf.py
CHANGED
|
@@ -32,7 +32,7 @@ if TYPE_CHECKING:
|
|
|
32
32
|
|
|
33
33
|
from typing_extensions import Self
|
|
34
34
|
|
|
35
|
-
from datachain.cache import
|
|
35
|
+
from datachain.cache import Cache
|
|
36
36
|
from datachain.catalog import Catalog
|
|
37
37
|
from datachain.lib.signal_schema import SignalSchema
|
|
38
38
|
from datachain.lib.udf_signature import UdfSignature
|
datachain/listing.py
CHANGED
|
@@ -2,7 +2,6 @@ import glob
|
|
|
2
2
|
import os
|
|
3
3
|
from collections.abc import Iterable, Iterator
|
|
4
4
|
from functools import cached_property
|
|
5
|
-
from itertools import zip_longest
|
|
6
5
|
from typing import TYPE_CHECKING, Optional
|
|
7
6
|
|
|
8
7
|
from sqlalchemy import Column
|
|
@@ -101,11 +100,8 @@ class Listing:
|
|
|
101
100
|
copy_to_filename: Optional[str],
|
|
102
101
|
recursive=False,
|
|
103
102
|
copy_dir_contents=False,
|
|
104
|
-
relative_path=None,
|
|
105
|
-
from_edatachain=False,
|
|
106
103
|
from_dataset=False,
|
|
107
104
|
) -> list[NodeWithPath]:
|
|
108
|
-
rel_path_elements = relative_path.split("/") if relative_path else []
|
|
109
105
|
all_nodes: list[NodeWithPath] = []
|
|
110
106
|
for src in sources:
|
|
111
107
|
node = src.node
|
|
@@ -119,15 +115,7 @@ class Listing:
|
|
|
119
115
|
)
|
|
120
116
|
else:
|
|
121
117
|
node_path = []
|
|
122
|
-
if
|
|
123
|
-
for rpe, npe in zip_longest(
|
|
124
|
-
rel_path_elements, node.path.split("/")
|
|
125
|
-
):
|
|
126
|
-
if rpe == npe:
|
|
127
|
-
continue
|
|
128
|
-
if npe:
|
|
129
|
-
node_path.append(npe)
|
|
130
|
-
elif copy_to_filename:
|
|
118
|
+
if copy_to_filename:
|
|
131
119
|
node_path = [os.path.basename(copy_to_filename)]
|
|
132
120
|
elif from_dataset:
|
|
133
121
|
node_path = [
|
datachain/node.py
CHANGED
|
@@ -84,18 +84,6 @@ class Node:
|
|
|
84
84
|
fd.write(f" size: {self.size}\n")
|
|
85
85
|
return size
|
|
86
86
|
|
|
87
|
-
def get_metafile_data(self, path: str):
|
|
88
|
-
data: dict[str, Any] = {
|
|
89
|
-
"name": path,
|
|
90
|
-
"etag": self.etag,
|
|
91
|
-
}
|
|
92
|
-
version = self.version
|
|
93
|
-
if version:
|
|
94
|
-
data["version"] = version
|
|
95
|
-
data["last_modified"] = time_to_str(self.last_modified)
|
|
96
|
-
data["size"] = self.size
|
|
97
|
-
return data
|
|
98
|
-
|
|
99
87
|
@property
|
|
100
88
|
def full_path(self) -> str:
|
|
101
89
|
if self.is_dir and self.path:
|
|
@@ -181,9 +169,6 @@ class NodeWithPath:
|
|
|
181
169
|
def append_to_file(self, fd):
|
|
182
170
|
return self.n.append_to_file(fd, "/".join(self.path))
|
|
183
171
|
|
|
184
|
-
def get_metafile_data(self):
|
|
185
|
-
return self.n.get_metafile_data("/".join(self.path))
|
|
186
|
-
|
|
187
172
|
@property
|
|
188
173
|
def full_path(self) -> str:
|
|
189
174
|
path = "/".join(self.path)
|
datachain/nodes_fetcher.py
CHANGED
|
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING
|
|
|
5
5
|
from datachain.nodes_thread_pool import NodesThreadPool
|
|
6
6
|
|
|
7
7
|
if TYPE_CHECKING:
|
|
8
|
-
from datachain.cache import
|
|
8
|
+
from datachain.cache import Cache
|
|
9
9
|
from datachain.client.fsspec import Client
|
|
10
10
|
from datachain.node import Node
|
|
11
11
|
|
|
@@ -13,7 +13,7 @@ logger = logging.getLogger("datachain")
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class NodesFetcher(NodesThreadPool):
|
|
16
|
-
def __init__(self, client: "Client", max_threads: int, cache: "
|
|
16
|
+
def __init__(self, client: "Client", max_threads: int, cache: "Cache"):
|
|
17
17
|
super().__init__(max_threads)
|
|
18
18
|
self.client = client
|
|
19
19
|
self.cache = cache
|
datachain/remote/studio.py
CHANGED
|
@@ -16,14 +16,12 @@ from urllib.parse import urlparse, urlunparse
|
|
|
16
16
|
import websockets
|
|
17
17
|
|
|
18
18
|
from datachain.config import Config
|
|
19
|
-
from datachain.dataset import DatasetStats
|
|
20
19
|
from datachain.error import DataChainError
|
|
21
20
|
from datachain.utils import STUDIO_URL, retry_with_backoff
|
|
22
21
|
|
|
23
22
|
T = TypeVar("T")
|
|
24
23
|
LsData = Optional[list[dict[str, Any]]]
|
|
25
24
|
DatasetInfoData = Optional[dict[str, Any]]
|
|
26
|
-
DatasetStatsData = Optional[DatasetStats]
|
|
27
25
|
DatasetRowsData = Optional[Iterable[dict[str, Any]]]
|
|
28
26
|
DatasetJobVersionsData = Optional[dict[str, Any]]
|
|
29
27
|
DatasetExportStatus = Optional[dict[str, Any]]
|
|
@@ -309,7 +307,7 @@ class StudioClient:
|
|
|
309
307
|
"datachain/datasets",
|
|
310
308
|
{
|
|
311
309
|
"dataset_name": name,
|
|
312
|
-
"
|
|
310
|
+
"dataset_version": version,
|
|
313
311
|
"force": force,
|
|
314
312
|
},
|
|
315
313
|
method="DELETE",
|
|
@@ -347,16 +345,6 @@ class StudioClient:
|
|
|
347
345
|
method="GET",
|
|
348
346
|
)
|
|
349
347
|
|
|
350
|
-
def dataset_stats(self, name: str, version: int) -> Response[DatasetStatsData]:
|
|
351
|
-
response = self._send_request(
|
|
352
|
-
"datachain/datasets/stats",
|
|
353
|
-
{"dataset_name": name, "dataset_version": version},
|
|
354
|
-
method="GET",
|
|
355
|
-
)
|
|
356
|
-
if response.ok:
|
|
357
|
-
response.data = DatasetStats(**response.data)
|
|
358
|
-
return response
|
|
359
|
-
|
|
360
348
|
def export_dataset_table(
|
|
361
349
|
self, name: str, version: int
|
|
362
350
|
) -> Response[DatasetExportSignedUrls]:
|
|
@@ -375,7 +363,7 @@ class StudioClient:
|
|
|
375
363
|
method="GET",
|
|
376
364
|
)
|
|
377
365
|
|
|
378
|
-
def upload_file(self,
|
|
366
|
+
def upload_file(self, content: bytes, file_name: str) -> Response[FileUploadData]:
|
|
379
367
|
data = {
|
|
380
368
|
"file_content": base64.b64encode(content).decode("utf-8"),
|
|
381
369
|
"file_name": file_name,
|
datachain/studio.py
CHANGED
|
@@ -282,7 +282,7 @@ def upload_files(client: StudioClient, files: list[str]) -> list[str]:
|
|
|
282
282
|
file_name = os.path.basename(file)
|
|
283
283
|
with open(file, "rb") as f:
|
|
284
284
|
file_content = f.read()
|
|
285
|
-
response = client.upload_file(
|
|
285
|
+
response = client.upload_file(file_content, file_name)
|
|
286
286
|
if not response.ok:
|
|
287
287
|
raise_remote_error(response.message)
|
|
288
288
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.12
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -78,7 +78,6 @@ Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
|
|
|
78
78
|
Requires-Dist: virtualenv; extra == "tests"
|
|
79
79
|
Requires-Dist: dulwich; extra == "tests"
|
|
80
80
|
Requires-Dist: hypothesis; extra == "tests"
|
|
81
|
-
Requires-Dist: open_clip_torch; extra == "tests"
|
|
82
81
|
Requires-Dist: aiotools>=1.7.0; extra == "tests"
|
|
83
82
|
Requires-Dist: requests-mock; extra == "tests"
|
|
84
83
|
Requires-Dist: scipy; extra == "tests"
|
|
@@ -94,12 +93,9 @@ Provides-Extra: examples
|
|
|
94
93
|
Requires-Dist: datachain[tests]; extra == "examples"
|
|
95
94
|
Requires-Dist: defusedxml; extra == "examples"
|
|
96
95
|
Requires-Dist: accelerate; extra == "examples"
|
|
97
|
-
Requires-Dist: unstructured_ingest[embed-huggingface]; extra == "examples"
|
|
98
|
-
Requires-Dist: unstructured[pdf]<0.16.12; extra == "examples"
|
|
99
|
-
Requires-Dist: pdfplumber==0.11.5; extra == "examples"
|
|
100
96
|
Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
101
|
-
Requires-Dist:
|
|
102
|
-
Requires-Dist:
|
|
97
|
+
Requires-Dist: ultralytics==8.3.68; extra == "examples"
|
|
98
|
+
Requires-Dist: open_clip_torch; extra == "examples"
|
|
103
99
|
|
|
104
100
|
================
|
|
105
101
|
|logo| DataChain
|
|
@@ -1,61 +1,61 @@
|
|
|
1
1
|
datachain/__init__.py,sha256=ofPJ6B-d-ybSDRrE7J6wqF_ZRAB2W9U8l-eeuBtqPLg,865
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
3
|
datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
|
|
4
|
-
datachain/cache.py,sha256=
|
|
4
|
+
datachain/cache.py,sha256=yQblPhOh_Mq74Ma7xT1CL1idLJ0HgrQxpGVYvRy_9Eg,3623
|
|
5
5
|
datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
|
|
6
|
-
datachain/dataset.py,sha256=
|
|
7
|
-
datachain/error.py,sha256=
|
|
6
|
+
datachain/dataset.py,sha256=uqP6gtVFcVMVUFyB9Twr6Uk2onx-aBurbli8_VZu4-s,18993
|
|
7
|
+
datachain/error.py,sha256=P1VI-etraA08ZrXHUEg1-xnOa2MkONd7vV0qA5uxBig,1314
|
|
8
8
|
datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
|
|
9
|
-
datachain/listing.py,sha256=
|
|
10
|
-
datachain/node.py,sha256=
|
|
11
|
-
datachain/nodes_fetcher.py,sha256=
|
|
9
|
+
datachain/listing.py,sha256=HNB-xeKA6aUA-HTWr--H22S6jVOxP2OVQ-3d07ISqAk,7109
|
|
10
|
+
datachain/node.py,sha256=KWDT0ClYXB7FYI-QOvzAa-UDkLJErUI2eWm5FBteYuU,5577
|
|
11
|
+
datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,1095
|
|
12
12
|
datachain/nodes_thread_pool.py,sha256=uPo-xl8zG5m9YgODjPFBpbcqqHjI-dcxH87yAbj_qco,3192
|
|
13
13
|
datachain/progress.py,sha256=lRzxoYP4Qv2XBwD78sOkmYRzHFpZ2ExVNJF8wAeICtY,770
|
|
14
14
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
datachain/studio.py,sha256=
|
|
15
|
+
datachain/studio.py,sha256=iMVDm9wfc86_G02N2p7qF4sdmKDGUkGz7kTOKc9m3Ao,9408
|
|
16
16
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
17
17
|
datachain/utils.py,sha256=LBeg-9n48saBTHSPk7u_j-kjJnPUAq5Oyps_peSaqlM,14128
|
|
18
|
-
datachain/catalog/__init__.py,sha256=
|
|
19
|
-
datachain/catalog/catalog.py,sha256=
|
|
18
|
+
datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
|
|
19
|
+
datachain/catalog/catalog.py,sha256=Kg5JBfuf-e7QoiHx1wLKRq4h3KmWEMpCHpHLd-WBX9E,58611
|
|
20
20
|
datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
|
|
21
21
|
datachain/catalog/loader.py,sha256=HA_mBC7q_My8j2WnSvIjUGuJpl6SIdg5vvy_lagxJlA,5733
|
|
22
|
-
datachain/cli/__init__.py,sha256=
|
|
22
|
+
datachain/cli/__init__.py,sha256=B6xw0qTcBgrICPqeWOhVXPaWJcxdKKg0Os6j2_IGAIc,8219
|
|
23
23
|
datachain/cli/utils.py,sha256=wrLnAh7Wx8O_ojZE8AE4Lxn5WoxHbOj7as8NWlLAA74,3036
|
|
24
|
-
datachain/cli/commands/__init__.py,sha256=
|
|
25
|
-
datachain/cli/commands/datasets.py,sha256=
|
|
24
|
+
datachain/cli/commands/__init__.py,sha256=zp3bYIioO60x_X04A4-IpZqSYVnpwOa1AdERQaRlIhI,493
|
|
25
|
+
datachain/cli/commands/datasets.py,sha256=k_CwJ_wYX-Jcc_Z8t9a8vX5jFHVt7qDQ0dehaA94iKs,3140
|
|
26
26
|
datachain/cli/commands/du.py,sha256=9edEzDEs98K2VYk8Wf-ZMpUzALcgm9uD6YtoqbvtUGU,391
|
|
27
27
|
datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR88,198
|
|
28
28
|
datachain/cli/commands/ls.py,sha256=Wb8hXyBwyhb62Zk6ZhNFPFrj2lJhdbRcnBQQkgL_qyw,5174
|
|
29
29
|
datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibVE,600
|
|
30
30
|
datachain/cli/commands/query.py,sha256=2S7hQxialt1fkbocxi6JXZI6jS5QnFrD1aOjKgZkzfI,1471
|
|
31
31
|
datachain/cli/commands/show.py,sha256=RVb_7Kjd1kzqTxRKYFvmD04LaJHOtrCc4FYMyc-ZEYw,1149
|
|
32
|
-
datachain/cli/parser/__init__.py,sha256=
|
|
32
|
+
datachain/cli/parser/__init__.py,sha256=mfOf3tbN4xGr4WQND1B5qMQ4LXoqEU9OhYac7wI-WBc,14393
|
|
33
33
|
datachain/cli/parser/job.py,sha256=Zpi_bEsMp71YCr8xay0I93Taz8zS0_jHbxtvvTzXj6c,3197
|
|
34
|
-
datachain/cli/parser/studio.py,sha256=
|
|
35
|
-
datachain/cli/parser/utils.py,sha256=
|
|
34
|
+
datachain/cli/parser/studio.py,sha256=CwmfdnsDNvDTOEbhLmjun18s4yo8zCgrtGTpF67qf8Q,2968
|
|
35
|
+
datachain/cli/parser/utils.py,sha256=7ZtzGXfBAjVthnc-agz7R9rplnxqFan0LT6x2tq-6Fk,2007
|
|
36
36
|
datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
|
|
37
37
|
datachain/client/azure.py,sha256=ma6fJcnveG8wpNy1PSrN5hgvmRdCj8Sf3RKjfd3qCyM,3221
|
|
38
38
|
datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
|
|
39
|
-
datachain/client/fsspec.py,sha256=
|
|
39
|
+
datachain/client/fsspec.py,sha256=whQWKD0tGQUjc8FpA0irxc31wiJuQS4x2arHc98-Lv0,13966
|
|
40
40
|
datachain/client/gcs.py,sha256=TY5K5INORKknTnoWDYv0EUztVLmuY1hHmdf2wUB_9uE,5114
|
|
41
41
|
datachain/client/hf.py,sha256=XeVJVbiNViZCpn3sfb90Fr8SYO3BdLmfE3hOWMoqInE,951
|
|
42
|
-
datachain/client/local.py,sha256=
|
|
42
|
+
datachain/client/local.py,sha256=Pv67SYdkNkkNExBoKJF9AnNu0FSrt4JqLRkSVsUnveU,4672
|
|
43
43
|
datachain/client/s3.py,sha256=l2A4J086ZROKKHNVXnoBky0OgYYKB0EAr8Y3lObo8GY,7284
|
|
44
44
|
datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
|
|
45
45
|
datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
|
|
46
46
|
datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
|
|
47
47
|
datachain/data_storage/metastore.py,sha256=hfTITcesE9XlUTxcCcdDyWGGep-QSjJL9DUxko5QCeI,37524
|
|
48
|
-
datachain/data_storage/schema.py,sha256=
|
|
48
|
+
datachain/data_storage/schema.py,sha256=qSukry2kINhVw8aj5lQrpe7N90DFeatKIKmDh6jAzR8,9515
|
|
49
49
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
50
50
|
datachain/data_storage/sqlite.py,sha256=KJ8hI0Hrwv9eAA-nLUlw2AYCQxiAAZ12a-ftUBtroNQ,24545
|
|
51
51
|
datachain/data_storage/warehouse.py,sha256=ovdH9LmOWLfCrvf0UvXnrNC-CrdAjns3EmXEgFdz4KM,30824
|
|
52
52
|
datachain/diff/__init__.py,sha256=OapNRBsyGDOQHelefUEoXoFHRWCJuBnhvD0ibebKvBc,10486
|
|
53
|
-
datachain/func/__init__.py,sha256=
|
|
53
|
+
datachain/func/__init__.py,sha256=qaSjakSaTsRtnU7Hcb4lJk71tbwk7M0oWmjRqXExCLA,1099
|
|
54
54
|
datachain/func/aggregate.py,sha256=7_IPrIwb2XSs3zG4iOr1eTvzn6kNVe2mkzvNzjusDHk,10942
|
|
55
55
|
datachain/func/array.py,sha256=zHDNWuWLA7HVa9FEvQeHhVi00_xqenyleTqcLwkXWBI,5477
|
|
56
56
|
datachain/func/base.py,sha256=wA0sBQAVyN9LPxoo7Ox83peS0zUVnyuKxukwAcjGLfY,534
|
|
57
|
-
datachain/func/conditional.py,sha256=
|
|
58
|
-
datachain/func/func.py,sha256=
|
|
57
|
+
datachain/func/conditional.py,sha256=g46zwW-i87uA45zWJnPHtHaqr6qOXSg6xLb4p9W3Gtk,6400
|
|
58
|
+
datachain/func/func.py,sha256=PnwTRAiEJUus3e4NYdQ-hldqLzKS9hY0FjiyBMZhsSo,16183
|
|
59
59
|
datachain/func/numeric.py,sha256=gMe1Ks0dqQKHkjcpvj7I5S-neECzQ_gltPQLNoaWOyo,5632
|
|
60
60
|
datachain/func/path.py,sha256=mqN_mfkwv44z2II7DMTp_fGGw95hmTCNls_TOFNpr4k,3155
|
|
61
61
|
datachain/func/random.py,sha256=pENOLj9rSmWfGCnOsUIaCsVC5486zQb66qfQvXaz9Z4,452
|
|
@@ -66,20 +66,20 @@ datachain/lib/arrow.py,sha256=sU6cbjz2W1UuTfez6tCYPfVPJXlmfMDbnaVWPhMu0XU,9906
|
|
|
66
66
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
67
67
|
datachain/lib/data_model.py,sha256=zS4lmXHVBXc9ntcyea2a1CRLXGSAN_0glXcF88CohgY,2685
|
|
68
68
|
datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810,2493
|
|
69
|
-
datachain/lib/dc.py,sha256=
|
|
70
|
-
datachain/lib/file.py,sha256=
|
|
69
|
+
datachain/lib/dc.py,sha256=hJR0rxP4mQozYubERiw35mkTpsm2J0nvB7dJIAWEQ30,93367
|
|
70
|
+
datachain/lib/file.py,sha256=SLgaE5sJJu0-c981Ux1OsR4-IbULD17wGO7fttbCcXU,16743
|
|
71
71
|
datachain/lib/hf.py,sha256=DvoI8fv-WkL3FDEuIT80T9WrRs6fXesjbU0bmIDDsNE,5882
|
|
72
72
|
datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
|
|
73
|
-
datachain/lib/listing.py,sha256=
|
|
73
|
+
datachain/lib/listing.py,sha256=auodM0HitYZsL0DybdgQUYhne_LgkVW-LKGYYOACP90,7272
|
|
74
74
|
datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
|
|
75
75
|
datachain/lib/meta_formats.py,sha256=hDPfEkcmiLZOjhBBXuareMdnq65Wj8vZvxjmum6cROM,6377
|
|
76
76
|
datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
|
|
77
|
-
datachain/lib/pytorch.py,sha256=
|
|
77
|
+
datachain/lib/pytorch.py,sha256=QxXBhrn2-D0RiFA2rdxZ7wKMxyuQ0WWHKfiFEWAA760,7710
|
|
78
78
|
datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
|
|
79
79
|
datachain/lib/signal_schema.py,sha256=ps5od6zhWtdX3Khx2fwArl2xlGkK8SKi6vCQ6QmbaR0,27404
|
|
80
80
|
datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
|
|
81
81
|
datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
82
|
-
datachain/lib/udf.py,sha256=
|
|
82
|
+
datachain/lib/udf.py,sha256=TlvikKTFvkIKaqqSkSriOyXhQ0rwRHV2ZRs1LHZOCmo,16107
|
|
83
83
|
datachain/lib/udf_signature.py,sha256=GXw24A-Olna6DWCdgy2bC-gZh_gLGPQ-KvjuI6pUjC0,7281
|
|
84
84
|
datachain/lib/utils.py,sha256=QrjVs_oLRXEotOPUYurBJypBFi_ReTJmxcnJeH4j2Uk,1596
|
|
85
85
|
datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -111,7 +111,7 @@ datachain/query/session.py,sha256=fQAtl5zRESRDfRS2d5J9KgrWauunCtrd96vP4Ns1KlE,59
|
|
|
111
111
|
datachain/query/udf.py,sha256=GY8E9pnzPE7ZKl_jvetZpn9R2rlUtMlhoYj4UmrzFzw,594
|
|
112
112
|
datachain/query/utils.py,sha256=u0A_BwG9PNs0DxoDcvSWgWLpj3ByTUv8CqH13CIuGag,1293
|
|
113
113
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
114
|
-
datachain/remote/studio.py,sha256=
|
|
114
|
+
datachain/remote/studio.py,sha256=3v4ZqP06BwBMLXQ4mbcTS95oUodYgBv9A5XisL6ffWo,12915
|
|
115
115
|
datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
|
|
116
116
|
datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
|
|
117
117
|
datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
|
|
@@ -133,9 +133,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
133
133
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
134
134
|
datachain/toolkit/split.py,sha256=z3zRJNzjWrpPuRw-zgFbCOBKInyYxJew8ygrYQRQLNc,2930
|
|
135
135
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
136
|
-
datachain-0.8.
|
|
137
|
-
datachain-0.8.
|
|
138
|
-
datachain-0.8.
|
|
139
|
-
datachain-0.8.
|
|
140
|
-
datachain-0.8.
|
|
141
|
-
datachain-0.8.
|
|
136
|
+
datachain-0.8.12.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
137
|
+
datachain-0.8.12.dist-info/METADATA,sha256=C1vaFTVw44GIVe32CcfLthfCi5nbbqTgS7HL61iSFGg,10880
|
|
138
|
+
datachain-0.8.12.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
139
|
+
datachain-0.8.12.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
140
|
+
datachain-0.8.12.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
141
|
+
datachain-0.8.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|