datachain 0.15.0__py3-none-any.whl → 0.16.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/catalog/catalog.py +9 -9
- datachain/cli/__init__.py +1 -1
- datachain/cli/commands/datasets.py +3 -3
- datachain/cli/commands/show.py +2 -2
- datachain/cli/parser/__init__.py +2 -2
- datachain/data_storage/metastore.py +5 -5
- datachain/dataset.py +8 -8
- datachain/func/aggregate.py +3 -3
- datachain/lib/convert/values_to_tuples.py +6 -8
- datachain/lib/dataset_info.py +18 -0
- datachain/lib/dc/datachain.py +20 -13
- datachain/lib/dc/datasets.py +9 -0
- datachain/lib/dc/records.py +16 -10
- datachain/lib/dc/utils.py +2 -2
- datachain/lib/signal_schema.py +1 -10
- datachain/lib/udf.py +2 -1
- datachain/query/dataset.py +15 -8
- datachain/query/schema.py +1 -4
- datachain/remote/studio.py +2 -2
- datachain/studio.py +2 -2
- {datachain-0.15.0.dist-info → datachain-0.16.1.dist-info}/METADATA +1 -1
- {datachain-0.15.0.dist-info → datachain-0.16.1.dist-info}/RECORD +26 -26
- {datachain-0.15.0.dist-info → datachain-0.16.1.dist-info}/WHEEL +1 -1
- {datachain-0.15.0.dist-info → datachain-0.16.1.dist-info}/entry_points.txt +0 -0
- {datachain-0.15.0.dist-info → datachain-0.16.1.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.15.0.dist-info → datachain-0.16.1.dist-info}/top_level.txt +0 -0
datachain/catalog/catalog.py
CHANGED
|
@@ -776,7 +776,7 @@ class Catalog:
|
|
|
776
776
|
listing: Optional[bool] = False,
|
|
777
777
|
uuid: Optional[str] = None,
|
|
778
778
|
description: Optional[str] = None,
|
|
779
|
-
|
|
779
|
+
attrs: Optional[list[str]] = None,
|
|
780
780
|
) -> "DatasetRecord":
|
|
781
781
|
"""
|
|
782
782
|
Creates new dataset of a specific version.
|
|
@@ -794,16 +794,16 @@ class Catalog:
|
|
|
794
794
|
dataset = self.get_dataset(name)
|
|
795
795
|
default_version = dataset.next_version
|
|
796
796
|
|
|
797
|
-
if (description or
|
|
798
|
-
dataset.description != description or dataset.
|
|
797
|
+
if (description or attrs) and (
|
|
798
|
+
dataset.description != description or dataset.attrs != attrs
|
|
799
799
|
):
|
|
800
800
|
description = description or dataset.description
|
|
801
|
-
|
|
801
|
+
attrs = attrs or dataset.attrs
|
|
802
802
|
|
|
803
803
|
self.update_dataset(
|
|
804
804
|
dataset,
|
|
805
805
|
description=description,
|
|
806
|
-
|
|
806
|
+
attrs=attrs,
|
|
807
807
|
)
|
|
808
808
|
|
|
809
809
|
except DatasetNotFoundError:
|
|
@@ -817,7 +817,7 @@ class Catalog:
|
|
|
817
817
|
schema=schema,
|
|
818
818
|
ignore_if_exists=True,
|
|
819
819
|
description=description,
|
|
820
|
-
|
|
820
|
+
attrs=attrs,
|
|
821
821
|
)
|
|
822
822
|
|
|
823
823
|
version = version or default_version
|
|
@@ -1334,15 +1334,15 @@ class Catalog:
|
|
|
1334
1334
|
name: str,
|
|
1335
1335
|
new_name: Optional[str] = None,
|
|
1336
1336
|
description: Optional[str] = None,
|
|
1337
|
-
|
|
1337
|
+
attrs: Optional[list[str]] = None,
|
|
1338
1338
|
) -> DatasetRecord:
|
|
1339
1339
|
update_data = {}
|
|
1340
1340
|
if new_name:
|
|
1341
1341
|
update_data["name"] = new_name
|
|
1342
1342
|
if description is not None:
|
|
1343
1343
|
update_data["description"] = description
|
|
1344
|
-
if
|
|
1345
|
-
update_data["
|
|
1344
|
+
if attrs is not None:
|
|
1345
|
+
update_data["attrs"] = attrs # type: ignore[assignment]
|
|
1346
1346
|
|
|
1347
1347
|
dataset = self.get_dataset(name)
|
|
1348
1348
|
return self.update_dataset(dataset, **update_data)
|
datachain/cli/__init__.py
CHANGED
|
@@ -154,7 +154,7 @@ def edit_dataset(
|
|
|
154
154
|
name: str,
|
|
155
155
|
new_name: Optional[str] = None,
|
|
156
156
|
description: Optional[str] = None,
|
|
157
|
-
|
|
157
|
+
attrs: Optional[list[str]] = None,
|
|
158
158
|
studio: bool = False,
|
|
159
159
|
local: bool = False,
|
|
160
160
|
all: bool = True,
|
|
@@ -167,9 +167,9 @@ def edit_dataset(
|
|
|
167
167
|
|
|
168
168
|
if all or local:
|
|
169
169
|
try:
|
|
170
|
-
catalog.edit_dataset(name, new_name, description,
|
|
170
|
+
catalog.edit_dataset(name, new_name, description, attrs)
|
|
171
171
|
except DatasetNotFoundError:
|
|
172
172
|
print("Dataset not found in local", file=sys.stderr)
|
|
173
173
|
|
|
174
174
|
if (all or studio) and token:
|
|
175
|
-
edit_studio_dataset(team, name, new_name, description,
|
|
175
|
+
edit_studio_dataset(team, name, new_name, description, attrs)
|
datachain/cli/commands/show.py
CHANGED
|
@@ -42,8 +42,8 @@ def show(
|
|
|
42
42
|
print("Name: ", name)
|
|
43
43
|
if dataset.description:
|
|
44
44
|
print("Description: ", dataset.description)
|
|
45
|
-
if dataset.
|
|
46
|
-
print("
|
|
45
|
+
if dataset.attrs:
|
|
46
|
+
print("Attributes: ", ",".join(dataset.attrs))
|
|
47
47
|
print("\n")
|
|
48
48
|
|
|
49
49
|
show_records(records, collapse_columns=not no_collapse, hidden_fields=hidden_fields)
|
datachain/cli/parser/__init__.py
CHANGED
|
@@ -217,9 +217,9 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
217
217
|
help="Dataset description",
|
|
218
218
|
)
|
|
219
219
|
parse_edit_dataset.add_argument(
|
|
220
|
-
"--
|
|
220
|
+
"--attrs",
|
|
221
221
|
nargs="+",
|
|
222
|
-
help="Dataset
|
|
222
|
+
help="Dataset attributes",
|
|
223
223
|
)
|
|
224
224
|
parse_edit_dataset.add_argument(
|
|
225
225
|
"--studio",
|
|
@@ -120,7 +120,7 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
120
120
|
schema: Optional[dict[str, Any]] = None,
|
|
121
121
|
ignore_if_exists: bool = False,
|
|
122
122
|
description: Optional[str] = None,
|
|
123
|
-
|
|
123
|
+
attrs: Optional[list[str]] = None,
|
|
124
124
|
) -> DatasetRecord:
|
|
125
125
|
"""Creates new dataset."""
|
|
126
126
|
|
|
@@ -326,7 +326,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
326
326
|
Column("id", Integer, primary_key=True),
|
|
327
327
|
Column("name", Text, nullable=False),
|
|
328
328
|
Column("description", Text),
|
|
329
|
-
Column("
|
|
329
|
+
Column("attrs", JSON, nullable=True),
|
|
330
330
|
Column("status", Integer, nullable=False),
|
|
331
331
|
Column("feature_schema", JSON, nullable=True),
|
|
332
332
|
Column("created_at", DateTime(timezone=True)),
|
|
@@ -521,7 +521,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
521
521
|
schema: Optional[dict[str, Any]] = None,
|
|
522
522
|
ignore_if_exists: bool = False,
|
|
523
523
|
description: Optional[str] = None,
|
|
524
|
-
|
|
524
|
+
attrs: Optional[list[str]] = None,
|
|
525
525
|
**kwargs, # TODO registered = True / False
|
|
526
526
|
) -> DatasetRecord:
|
|
527
527
|
"""Creates new dataset."""
|
|
@@ -538,7 +538,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
538
538
|
query_script=query_script,
|
|
539
539
|
schema=json.dumps(schema or {}),
|
|
540
540
|
description=description,
|
|
541
|
-
|
|
541
|
+
attrs=json.dumps(attrs or []),
|
|
542
542
|
)
|
|
543
543
|
if ignore_if_exists and hasattr(query, "on_conflict_do_nothing"):
|
|
544
544
|
# SQLite and PostgreSQL both support 'on_conflict_do_nothing',
|
|
@@ -621,7 +621,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
621
621
|
dataset_values = {}
|
|
622
622
|
for field, value in kwargs.items():
|
|
623
623
|
if field in self._dataset_fields[1:]:
|
|
624
|
-
if field in ["
|
|
624
|
+
if field in ["attrs", "schema"]:
|
|
625
625
|
values[field] = json.dumps(value) if value else None
|
|
626
626
|
else:
|
|
627
627
|
values[field] = value
|
datachain/dataset.py
CHANGED
|
@@ -329,7 +329,7 @@ class DatasetRecord:
|
|
|
329
329
|
id: int
|
|
330
330
|
name: str
|
|
331
331
|
description: Optional[str]
|
|
332
|
-
|
|
332
|
+
attrs: list[str]
|
|
333
333
|
schema: dict[str, Union[SQLType, type[SQLType]]]
|
|
334
334
|
feature_schema: dict
|
|
335
335
|
versions: list[DatasetVersion]
|
|
@@ -357,7 +357,7 @@ class DatasetRecord:
|
|
|
357
357
|
id: int,
|
|
358
358
|
name: str,
|
|
359
359
|
description: Optional[str],
|
|
360
|
-
|
|
360
|
+
attrs: str,
|
|
361
361
|
status: int,
|
|
362
362
|
feature_schema: Optional[str],
|
|
363
363
|
created_at: datetime,
|
|
@@ -387,7 +387,7 @@ class DatasetRecord:
|
|
|
387
387
|
version_schema: str,
|
|
388
388
|
version_job_id: Optional[str] = None,
|
|
389
389
|
) -> "DatasetRecord":
|
|
390
|
-
|
|
390
|
+
attrs_lst: list[str] = json.loads(attrs) if attrs else []
|
|
391
391
|
schema_dct: dict[str, Any] = json.loads(schema) if schema else {}
|
|
392
392
|
version_schema_dct: dict[str, str] = (
|
|
393
393
|
json.loads(version_schema) if version_schema else {}
|
|
@@ -418,7 +418,7 @@ class DatasetRecord:
|
|
|
418
418
|
id,
|
|
419
419
|
name,
|
|
420
420
|
description,
|
|
421
|
-
|
|
421
|
+
attrs_lst,
|
|
422
422
|
cls.parse_schema(schema_dct), # type: ignore[arg-type]
|
|
423
423
|
json.loads(feature_schema) if feature_schema else {},
|
|
424
424
|
[dataset_version],
|
|
@@ -562,7 +562,7 @@ class DatasetListRecord:
|
|
|
562
562
|
id: int
|
|
563
563
|
name: str
|
|
564
564
|
description: Optional[str]
|
|
565
|
-
|
|
565
|
+
attrs: list[str]
|
|
566
566
|
versions: list[DatasetListVersion]
|
|
567
567
|
created_at: Optional[datetime] = None
|
|
568
568
|
|
|
@@ -572,7 +572,7 @@ class DatasetListRecord:
|
|
|
572
572
|
id: int,
|
|
573
573
|
name: str,
|
|
574
574
|
description: Optional[str],
|
|
575
|
-
|
|
575
|
+
attrs: str,
|
|
576
576
|
created_at: datetime,
|
|
577
577
|
version_id: int,
|
|
578
578
|
version_uuid: str,
|
|
@@ -588,7 +588,7 @@ class DatasetListRecord:
|
|
|
588
588
|
version_query_script: Optional[str],
|
|
589
589
|
version_job_id: Optional[str] = None,
|
|
590
590
|
) -> "DatasetListRecord":
|
|
591
|
-
|
|
591
|
+
attrs_lst: list[str] = json.loads(attrs) if attrs else []
|
|
592
592
|
|
|
593
593
|
dataset_version = DatasetListVersion.parse(
|
|
594
594
|
version_id,
|
|
@@ -610,7 +610,7 @@ class DatasetListRecord:
|
|
|
610
610
|
id,
|
|
611
611
|
name,
|
|
612
612
|
description,
|
|
613
|
-
|
|
613
|
+
attrs_lst,
|
|
614
614
|
[dataset_version],
|
|
615
615
|
created_at,
|
|
616
616
|
)
|
datachain/func/aggregate.py
CHANGED
|
@@ -165,7 +165,7 @@ def any_value(col: str) -> Func:
|
|
|
165
165
|
Example:
|
|
166
166
|
```py
|
|
167
167
|
dc.group_by(
|
|
168
|
-
file_example=func.any_value("file.
|
|
168
|
+
file_example=func.any_value("file.path"),
|
|
169
169
|
partition_by="signal.category",
|
|
170
170
|
)
|
|
171
171
|
```
|
|
@@ -227,7 +227,7 @@ def concat(col: str, separator="") -> Func:
|
|
|
227
227
|
Example:
|
|
228
228
|
```py
|
|
229
229
|
dc.group_by(
|
|
230
|
-
files=func.concat("file.
|
|
230
|
+
files=func.concat("file.path", separator=", "),
|
|
231
231
|
partition_by="signal.category",
|
|
232
232
|
)
|
|
233
233
|
```
|
|
@@ -343,7 +343,7 @@ def first(col: str) -> Func:
|
|
|
343
343
|
```py
|
|
344
344
|
window = func.window(partition_by="signal.category", order_by="created_at")
|
|
345
345
|
dc.mutate(
|
|
346
|
-
first_file=func.first("file.
|
|
346
|
+
first_file=func.first("file.path").over(window),
|
|
347
347
|
)
|
|
348
348
|
```
|
|
349
349
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import itertools
|
|
2
2
|
from collections.abc import Sequence
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Union
|
|
4
4
|
|
|
5
5
|
from datachain.lib.data_model import (
|
|
6
6
|
DataType,
|
|
@@ -71,14 +71,13 @@ def values_to_tuples( # noqa: C901, PLR0912
|
|
|
71
71
|
# If a non-None value appears early, it won't check the remaining items for
|
|
72
72
|
# `None` values.
|
|
73
73
|
try:
|
|
74
|
-
|
|
75
|
-
itertools.dropwhile(lambda
|
|
74
|
+
first_not_none_element = next(
|
|
75
|
+
itertools.dropwhile(lambda i: i is None, v)
|
|
76
76
|
)
|
|
77
77
|
except StopIteration:
|
|
78
|
-
|
|
79
|
-
|
|
78
|
+
# set default type to `str` if column is empty or all values are `None`
|
|
79
|
+
typ = str
|
|
80
80
|
else:
|
|
81
|
-
nullable = pos > 0
|
|
82
81
|
typ = type(first_not_none_element) # type: ignore[assignment]
|
|
83
82
|
if not is_chain_type(typ):
|
|
84
83
|
raise ValuesToTupleError(
|
|
@@ -88,8 +87,7 @@ def values_to_tuples( # noqa: C901, PLR0912
|
|
|
88
87
|
)
|
|
89
88
|
if isinstance(first_not_none_element, list):
|
|
90
89
|
typ = list[type(first_not_none_element[0])] # type: ignore[assignment, misc]
|
|
91
|
-
|
|
92
|
-
types_map[k] = Optional[typ] if nullable else typ # type: ignore[assignment]
|
|
90
|
+
types_map[k] = typ
|
|
93
91
|
|
|
94
92
|
if length < 0:
|
|
95
93
|
length = len_
|
datachain/lib/dataset_info.py
CHANGED
|
@@ -32,11 +32,28 @@ class DatasetInfo(DataModel):
|
|
|
32
32
|
metrics: dict[str, Any] = Field(default={})
|
|
33
33
|
error_message: str = Field(default="")
|
|
34
34
|
error_stack: str = Field(default="")
|
|
35
|
+
attrs: list[str] = Field(default=[])
|
|
35
36
|
|
|
36
37
|
@property
|
|
37
38
|
def is_temp(self) -> bool:
|
|
38
39
|
return Session.is_temp_dataset(self.name)
|
|
39
40
|
|
|
41
|
+
def has_attr(self, attr: str) -> bool:
|
|
42
|
+
s = attr.split("=")
|
|
43
|
+
if len(s) == 1:
|
|
44
|
+
return attr in self.attrs
|
|
45
|
+
|
|
46
|
+
name = s[0]
|
|
47
|
+
value = s[1]
|
|
48
|
+
for a in self.attrs:
|
|
49
|
+
s = a.split("=")
|
|
50
|
+
if value == "*" and s[0] == name:
|
|
51
|
+
return True
|
|
52
|
+
if len(s) == 2 and s[0] == name and s[1] == value:
|
|
53
|
+
return True
|
|
54
|
+
|
|
55
|
+
return False
|
|
56
|
+
|
|
40
57
|
@staticmethod
|
|
41
58
|
def _validate_dict(
|
|
42
59
|
v: Optional[Union[str, dict]],
|
|
@@ -83,4 +100,5 @@ class DatasetInfo(DataModel):
|
|
|
83
100
|
metrics=job.metrics if job else {},
|
|
84
101
|
error_message=version.error_message,
|
|
85
102
|
error_stack=version.error_stack,
|
|
103
|
+
attrs=dataset.attrs,
|
|
86
104
|
)
|
datachain/lib/dc/datachain.py
CHANGED
|
@@ -459,7 +459,7 @@ class DataChain:
|
|
|
459
459
|
name: str,
|
|
460
460
|
version: Optional[int] = None,
|
|
461
461
|
description: Optional[str] = None,
|
|
462
|
-
|
|
462
|
+
attrs: Optional[list[str]] = None,
|
|
463
463
|
**kwargs,
|
|
464
464
|
) -> "Self":
|
|
465
465
|
"""Save to a Dataset. It returns the chain itself.
|
|
@@ -468,7 +468,8 @@ class DataChain:
|
|
|
468
468
|
name : dataset name.
|
|
469
469
|
version : version of a dataset. Default - the last version that exist.
|
|
470
470
|
description : description of a dataset.
|
|
471
|
-
|
|
471
|
+
attrs : attributes of a dataset. They can be without value, e.g "NLP",
|
|
472
|
+
or with a value, e.g "location=US".
|
|
472
473
|
"""
|
|
473
474
|
schema = self.signals_schema.clone_without_sys_signals().serialize()
|
|
474
475
|
return self._evolve(
|
|
@@ -476,7 +477,7 @@ class DataChain:
|
|
|
476
477
|
name=name,
|
|
477
478
|
version=version,
|
|
478
479
|
description=description,
|
|
479
|
-
|
|
480
|
+
attrs=attrs,
|
|
480
481
|
feature_schema=schema,
|
|
481
482
|
**kwargs,
|
|
482
483
|
)
|
|
@@ -755,7 +756,7 @@ class DataChain:
|
|
|
755
756
|
|
|
756
757
|
Example:
|
|
757
758
|
```py
|
|
758
|
-
dc.distinct("file.
|
|
759
|
+
dc.distinct("file.path")
|
|
759
760
|
```
|
|
760
761
|
"""
|
|
761
762
|
return self._evolve(
|
|
@@ -881,7 +882,7 @@ class DataChain:
|
|
|
881
882
|
```py
|
|
882
883
|
dc.mutate(
|
|
883
884
|
area=Column("image.height") * Column("image.width"),
|
|
884
|
-
extension=file_ext(Column("file.
|
|
885
|
+
extension=file_ext(Column("file.path")),
|
|
885
886
|
dist=cosine_distance(embedding_text, embedding_image)
|
|
886
887
|
)
|
|
887
888
|
```
|
|
@@ -1070,13 +1071,13 @@ class DataChain:
|
|
|
1070
1071
|
|
|
1071
1072
|
Iterating over all rows with selected columns:
|
|
1072
1073
|
```py
|
|
1073
|
-
for name, size in dc.collect("file.
|
|
1074
|
+
for name, size in dc.collect("file.path", "file.size"):
|
|
1074
1075
|
print(name, size)
|
|
1075
1076
|
```
|
|
1076
1077
|
|
|
1077
1078
|
Iterating over a single column:
|
|
1078
1079
|
```py
|
|
1079
|
-
for file in dc.collect("file.
|
|
1080
|
+
for file in dc.collect("file.path"):
|
|
1080
1081
|
print(file)
|
|
1081
1082
|
```
|
|
1082
1083
|
"""
|
|
@@ -1629,7 +1630,7 @@ class DataChain:
|
|
|
1629
1630
|
import datachain as dc
|
|
1630
1631
|
|
|
1631
1632
|
chain = dc.read_storage("s3://mybucket")
|
|
1632
|
-
chain = chain.filter(dc.C("file.
|
|
1633
|
+
chain = chain.filter(dc.C("file.path").glob("*.jsonl"))
|
|
1633
1634
|
chain = chain.parse_tabular(format="json")
|
|
1634
1635
|
```
|
|
1635
1636
|
"""
|
|
@@ -2088,25 +2089,31 @@ class DataChain:
|
|
|
2088
2089
|
|
|
2089
2090
|
Using glob to match patterns
|
|
2090
2091
|
```py
|
|
2091
|
-
dc.filter(C("file.
|
|
2092
|
+
dc.filter(C("file.path").glob("*.jpg"))
|
|
2093
|
+
```
|
|
2094
|
+
|
|
2095
|
+
Using in to match lists
|
|
2096
|
+
```py
|
|
2097
|
+
ids = [1,2,3]
|
|
2098
|
+
dc.filter(C("experiment_id").in_(ids))
|
|
2092
2099
|
```
|
|
2093
2100
|
|
|
2094
2101
|
Using `datachain.func`
|
|
2095
2102
|
```py
|
|
2096
2103
|
from datachain.func import string
|
|
2097
|
-
dc.filter(string.length(C("file.
|
|
2104
|
+
dc.filter(string.length(C("file.path")) > 5)
|
|
2098
2105
|
```
|
|
2099
2106
|
|
|
2100
2107
|
Combining filters with "or"
|
|
2101
2108
|
```py
|
|
2102
|
-
dc.filter(C("file.
|
|
2109
|
+
dc.filter(C("file.path").glob("cat*") | C("file.path").glob("dog*))
|
|
2103
2110
|
```
|
|
2104
2111
|
|
|
2105
2112
|
Combining filters with "and"
|
|
2106
2113
|
```py
|
|
2107
2114
|
dc.filter(
|
|
2108
|
-
C("file.
|
|
2109
|
-
(string.length(C("file.
|
|
2115
|
+
C("file.path").glob("*.jpg) &
|
|
2116
|
+
(string.length(C("file.path")) > 5)
|
|
2110
2117
|
)
|
|
2111
2118
|
```
|
|
2112
2119
|
"""
|
datachain/lib/dc/datasets.py
CHANGED
|
@@ -102,6 +102,7 @@ def datasets(
|
|
|
102
102
|
column: Optional[str] = None,
|
|
103
103
|
include_listing: bool = False,
|
|
104
104
|
studio: bool = False,
|
|
105
|
+
attrs: Optional[list[str]] = None,
|
|
105
106
|
) -> "DataChain":
|
|
106
107
|
"""Generate chain with list of registered datasets.
|
|
107
108
|
|
|
@@ -114,6 +115,10 @@ def datasets(
|
|
|
114
115
|
include_listing: If True, includes listing datasets. Defaults to False.
|
|
115
116
|
studio: If True, returns datasets from Studio only,
|
|
116
117
|
otherwise returns all local datasets. Defaults to False.
|
|
118
|
+
attrs: Optional list of attributes to filter datasets on. It can be just
|
|
119
|
+
attribute without value e.g "NLP", or attribute with value
|
|
120
|
+
e.g "location=US". Attribute with value can also accept "*" to target
|
|
121
|
+
all that have specific name e.g "location=*"
|
|
117
122
|
|
|
118
123
|
Returns:
|
|
119
124
|
DataChain: A new DataChain instance containing dataset information.
|
|
@@ -139,6 +144,10 @@ def datasets(
|
|
|
139
144
|
]
|
|
140
145
|
datasets_values = [d for d in datasets_values if not d.is_temp]
|
|
141
146
|
|
|
147
|
+
if attrs:
|
|
148
|
+
for attr in attrs:
|
|
149
|
+
datasets_values = [d for d in datasets_values if d.has_attr(attr)]
|
|
150
|
+
|
|
142
151
|
if not column:
|
|
143
152
|
# flattening dataset fields
|
|
144
153
|
schema = {
|
datachain/lib/dc/records.py
CHANGED
|
@@ -4,12 +4,9 @@ from typing import TYPE_CHECKING, Optional, Union
|
|
|
4
4
|
import sqlalchemy
|
|
5
5
|
|
|
6
6
|
from datachain.lib.data_model import DataType
|
|
7
|
-
from datachain.lib.file import
|
|
8
|
-
File,
|
|
9
|
-
)
|
|
7
|
+
from datachain.lib.file import File
|
|
10
8
|
from datachain.lib.signal_schema import SignalSchema
|
|
11
9
|
from datachain.query import Session
|
|
12
|
-
from datachain.query.schema import Column
|
|
13
10
|
|
|
14
11
|
if TYPE_CHECKING:
|
|
15
12
|
from typing_extensions import ParamSpec
|
|
@@ -41,6 +38,9 @@ def read_records(
|
|
|
41
38
|
single_record = dc.read_records(dc.DEFAULT_FILE_RECORD)
|
|
42
39
|
```
|
|
43
40
|
"""
|
|
41
|
+
from datachain.query.dataset import adjust_outputs, get_col_types
|
|
42
|
+
from datachain.sql.types import SQLType
|
|
43
|
+
|
|
44
44
|
from .datasets import read_dataset
|
|
45
45
|
|
|
46
46
|
session = Session.get(session, in_memory=in_memory)
|
|
@@ -52,11 +52,10 @@ def read_records(
|
|
|
52
52
|
|
|
53
53
|
if schema:
|
|
54
54
|
signal_schema = SignalSchema(schema)
|
|
55
|
-
columns = [
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
columns.append(sqlalchemy.Column(c.name, c.type, **kw))
|
|
55
|
+
columns = [
|
|
56
|
+
sqlalchemy.Column(c.name, c.type) # type: ignore[union-attr]
|
|
57
|
+
for c in signal_schema.db_signals(as_columns=True)
|
|
58
|
+
]
|
|
60
59
|
else:
|
|
61
60
|
columns = [
|
|
62
61
|
sqlalchemy.Column(name, typ)
|
|
@@ -83,6 +82,13 @@ def read_records(
|
|
|
83
82
|
warehouse = catalog.warehouse
|
|
84
83
|
dr = warehouse.dataset_rows(dsr)
|
|
85
84
|
table = dr.get_table()
|
|
86
|
-
|
|
85
|
+
|
|
86
|
+
# Optimization: Compute row types once, rather than for every row.
|
|
87
|
+
col_types = get_col_types(
|
|
88
|
+
warehouse,
|
|
89
|
+
{c.name: c.type for c in columns if isinstance(c.type, SQLType)},
|
|
90
|
+
)
|
|
91
|
+
records = (adjust_outputs(warehouse, record, col_types) for record in to_insert)
|
|
92
|
+
warehouse.insert_rows(table, records)
|
|
87
93
|
warehouse.insert_rows_done(table)
|
|
88
94
|
return read_dataset(name=dsr.name, session=session, settings=settings)
|
datachain/lib/dc/utils.py
CHANGED
|
@@ -31,8 +31,8 @@ def resolve_columns(
|
|
|
31
31
|
) -> "Callable[Concatenate[D, P], D]":
|
|
32
32
|
"""Decorator that resolvs input column names to their actual DB names. This is
|
|
33
33
|
specially important for nested columns as user works with them by using dot
|
|
34
|
-
notation e.g (file.
|
|
35
|
-
in DB, e.g
|
|
34
|
+
notation e.g (file.path) but are actually defined with default delimiter
|
|
35
|
+
in DB, e.g file__path.
|
|
36
36
|
If there are any sql functions in arguments, they will just be transferred as is
|
|
37
37
|
to a method.
|
|
38
38
|
"""
|
datachain/lib/signal_schema.py
CHANGED
|
@@ -581,11 +581,7 @@ class SignalSchema:
|
|
|
581
581
|
signals = [
|
|
582
582
|
DEFAULT_DELIMITER.join(path)
|
|
583
583
|
if not as_columns
|
|
584
|
-
else Column(
|
|
585
|
-
DEFAULT_DELIMITER.join(path),
|
|
586
|
-
python_to_sql(_type),
|
|
587
|
-
nullable=is_optional(_type),
|
|
588
|
-
)
|
|
584
|
+
else Column(DEFAULT_DELIMITER.join(path), python_to_sql(_type))
|
|
589
585
|
for path, _type, has_subtree, _ in self.get_flat_tree(
|
|
590
586
|
include_hidden=include_hidden
|
|
591
587
|
)
|
|
@@ -994,8 +990,3 @@ class SignalSchema:
|
|
|
994
990
|
}
|
|
995
991
|
|
|
996
992
|
return SignalSchema.deserialize(schema)
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
def is_optional(type_: Any) -> bool:
|
|
1000
|
-
"""Check if a type is Optional."""
|
|
1001
|
-
return get_origin(type_) is Union and type(None) in get_args(type_)
|
datachain/lib/udf.py
CHANGED
|
@@ -474,8 +474,9 @@ class Generator(UDFBase):
|
|
|
474
474
|
remove_prefetched=bool(self.prefetch) and not cache,
|
|
475
475
|
)
|
|
476
476
|
with closing(prepared_inputs):
|
|
477
|
-
for row in
|
|
477
|
+
for row in prepared_inputs:
|
|
478
478
|
yield _process_row(row)
|
|
479
|
+
processed_cb.relative_update(1)
|
|
479
480
|
|
|
480
481
|
self.teardown()
|
|
481
482
|
|
datachain/query/dataset.py
CHANGED
|
@@ -57,6 +57,7 @@ from datachain.query.schema import C, UDFParamSpec, normalize_param
|
|
|
57
57
|
from datachain.query.session import Session
|
|
58
58
|
from datachain.query.udf import UdfInfo
|
|
59
59
|
from datachain.sql.functions.random import rand
|
|
60
|
+
from datachain.sql.types import SQLType
|
|
60
61
|
from datachain.utils import (
|
|
61
62
|
batched,
|
|
62
63
|
determine_processes,
|
|
@@ -67,6 +68,8 @@ from datachain.utils import (
|
|
|
67
68
|
)
|
|
68
69
|
|
|
69
70
|
if TYPE_CHECKING:
|
|
71
|
+
from collections.abc import Mapping
|
|
72
|
+
|
|
70
73
|
from sqlalchemy.sql.elements import ClauseElement
|
|
71
74
|
from sqlalchemy.sql.schema import Table
|
|
72
75
|
from sqlalchemy.sql.selectable import GenerativeSelect
|
|
@@ -273,7 +276,9 @@ class Subtract(DatasetDiffOperation):
|
|
|
273
276
|
|
|
274
277
|
|
|
275
278
|
def adjust_outputs(
|
|
276
|
-
warehouse: "AbstractWarehouse",
|
|
279
|
+
warehouse: "AbstractWarehouse",
|
|
280
|
+
row: dict[str, Any],
|
|
281
|
+
col_types: list[tuple[str, SQLType, type, str, Any]],
|
|
277
282
|
) -> dict[str, Any]:
|
|
278
283
|
"""
|
|
279
284
|
This function does a couple of things to prepare a row for inserting into the db:
|
|
@@ -289,7 +294,7 @@ def adjust_outputs(
|
|
|
289
294
|
col_python_type,
|
|
290
295
|
col_type_name,
|
|
291
296
|
default_value,
|
|
292
|
-
) in
|
|
297
|
+
) in col_types:
|
|
293
298
|
row_val = row.get(col_name)
|
|
294
299
|
|
|
295
300
|
# Fill None or missing values with defaults (get returns None if not in the row)
|
|
@@ -304,8 +309,10 @@ def adjust_outputs(
|
|
|
304
309
|
return row
|
|
305
310
|
|
|
306
311
|
|
|
307
|
-
def
|
|
308
|
-
""
|
|
312
|
+
def get_col_types(
|
|
313
|
+
warehouse: "AbstractWarehouse", output: "Mapping[str, Any]"
|
|
314
|
+
) -> list[tuple]:
|
|
315
|
+
"""Optimization: Precompute column types so these don't have to be computed
|
|
309
316
|
in the convert_type function for each row in a loop."""
|
|
310
317
|
dialect = warehouse.db.dialect
|
|
311
318
|
return [
|
|
@@ -317,7 +324,7 @@ def get_udf_col_types(warehouse: "AbstractWarehouse", udf: "UDFAdapter") -> list
|
|
|
317
324
|
type(col_type_inst).__name__,
|
|
318
325
|
col_type.default_value(dialect),
|
|
319
326
|
)
|
|
320
|
-
for col_name, col_type in
|
|
327
|
+
for col_name, col_type in output.items()
|
|
321
328
|
]
|
|
322
329
|
|
|
323
330
|
|
|
@@ -333,7 +340,7 @@ def process_udf_outputs(
|
|
|
333
340
|
|
|
334
341
|
rows: list[UDFResult] = []
|
|
335
342
|
# Optimization: Compute row types once, rather than for every row.
|
|
336
|
-
udf_col_types =
|
|
343
|
+
udf_col_types = get_col_types(warehouse, udf.output)
|
|
337
344
|
|
|
338
345
|
for udf_output in udf_results:
|
|
339
346
|
if not udf_output:
|
|
@@ -1680,7 +1687,7 @@ class DatasetQuery:
|
|
|
1680
1687
|
version: Optional[int] = None,
|
|
1681
1688
|
feature_schema: Optional[dict] = None,
|
|
1682
1689
|
description: Optional[str] = None,
|
|
1683
|
-
|
|
1690
|
+
attrs: Optional[list[str]] = None,
|
|
1684
1691
|
**kwargs,
|
|
1685
1692
|
) -> "Self":
|
|
1686
1693
|
"""Save the query as a dataset."""
|
|
@@ -1714,7 +1721,7 @@ class DatasetQuery:
|
|
|
1714
1721
|
feature_schema=feature_schema,
|
|
1715
1722
|
columns=columns,
|
|
1716
1723
|
description=description,
|
|
1717
|
-
|
|
1724
|
+
attrs=attrs,
|
|
1718
1725
|
**kwargs,
|
|
1719
1726
|
)
|
|
1720
1727
|
version = version or dataset.latest_version
|
datachain/query/schema.py
CHANGED
|
@@ -40,15 +40,12 @@ class ColumnMeta(type):
|
|
|
40
40
|
class Column(sa.ColumnClause, metaclass=ColumnMeta):
|
|
41
41
|
inherit_cache: Optional[bool] = True
|
|
42
42
|
|
|
43
|
-
def __init__(
|
|
44
|
-
self, text, type_=None, is_literal=False, nullable=None, _selectable=None
|
|
45
|
-
):
|
|
43
|
+
def __init__(self, text, type_=None, is_literal=False, _selectable=None):
|
|
46
44
|
"""Dataset column."""
|
|
47
45
|
self.name = ColumnMeta.to_db_name(text)
|
|
48
46
|
super().__init__(
|
|
49
47
|
self.name, type_=type_, is_literal=is_literal, _selectable=_selectable
|
|
50
48
|
)
|
|
51
|
-
self.nullable = nullable
|
|
52
49
|
|
|
53
50
|
def __getattr__(self, name: str):
|
|
54
51
|
return Column(self.name + DEFAULT_DELIMITER + name)
|
datachain/remote/studio.py
CHANGED
|
@@ -290,13 +290,13 @@ class StudioClient:
|
|
|
290
290
|
name: str,
|
|
291
291
|
new_name: Optional[str] = None,
|
|
292
292
|
description: Optional[str] = None,
|
|
293
|
-
|
|
293
|
+
attrs: Optional[list[str]] = None,
|
|
294
294
|
) -> Response[DatasetInfoData]:
|
|
295
295
|
body = {
|
|
296
296
|
"new_name": new_name,
|
|
297
297
|
"dataset_name": name,
|
|
298
298
|
"description": description,
|
|
299
|
-
"
|
|
299
|
+
"attrs": attrs,
|
|
300
300
|
}
|
|
301
301
|
|
|
302
302
|
return self._send_request(
|
datachain/studio.py
CHANGED
|
@@ -187,10 +187,10 @@ def edit_studio_dataset(
|
|
|
187
187
|
name: str,
|
|
188
188
|
new_name: Optional[str] = None,
|
|
189
189
|
description: Optional[str] = None,
|
|
190
|
-
|
|
190
|
+
attrs: Optional[list[str]] = None,
|
|
191
191
|
):
|
|
192
192
|
client = StudioClient(team=team_name)
|
|
193
|
-
response = client.edit_dataset(name, new_name, description,
|
|
193
|
+
response = client.edit_dataset(name, new_name, description, attrs)
|
|
194
194
|
if not response.ok:
|
|
195
195
|
raise DataChainError(response.message)
|
|
196
196
|
|
|
@@ -3,7 +3,7 @@ datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
|
3
3
|
datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
|
|
4
4
|
datachain/cache.py,sha256=yQblPhOh_Mq74Ma7xT1CL1idLJ0HgrQxpGVYvRy_9Eg,3623
|
|
5
5
|
datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
|
|
6
|
-
datachain/dataset.py,sha256=
|
|
6
|
+
datachain/dataset.py,sha256=msBC62M_HAv3hT4tKFEGOlH3sMCMg5DVd5lhmqkDGB4,19379
|
|
7
7
|
datachain/error.py,sha256=bxAAL32lSeMgzsQDEHbGTGORj-mPzzpCRvWDPueJNN4,1092
|
|
8
8
|
datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
|
|
9
9
|
datachain/listing.py,sha256=kNSCFYWo2iM1wWg1trwq4WpYZxYqz4RKxkTtsppEzAw,7079
|
|
@@ -13,24 +13,24 @@ datachain/nodes_thread_pool.py,sha256=mdo0s-VybuSZkRUARcUO4Tjh8KFfZr9foHqmupx2Sm
|
|
|
13
13
|
datachain/progress.py,sha256=lRzxoYP4Qv2XBwD78sOkmYRzHFpZ2ExVNJF8wAeICtY,770
|
|
14
14
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
|
|
16
|
-
datachain/studio.py,sha256=
|
|
16
|
+
datachain/studio.py,sha256=CwXrZ3PXJFIoilelIHblDV05kzcWj9vbV3KanMPVrRQ,10015
|
|
17
17
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
18
18
|
datachain/utils.py,sha256=8Qz8lRrX0bUTGvwYd-OR-l6ElVRsQBdBO5QMvwt56T4,15190
|
|
19
19
|
datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
|
|
20
|
-
datachain/catalog/catalog.py,sha256=
|
|
20
|
+
datachain/catalog/catalog.py,sha256=drCemStFXk2MZgexbUsSIBJuUvn0YwL1tJO69KrWeeg,61004
|
|
21
21
|
datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
|
|
22
22
|
datachain/catalog/loader.py,sha256=UXjYD6BNRoupPvkiz3-b04jepXhtLHCA4gzKFnXxOtQ,5987
|
|
23
|
-
datachain/cli/__init__.py,sha256=
|
|
23
|
+
datachain/cli/__init__.py,sha256=i40xHzVZP3iZFBw3UixQ2OU-s_GQq6OyvQ-_6opwIYc,8333
|
|
24
24
|
datachain/cli/utils.py,sha256=wrLnAh7Wx8O_ojZE8AE4Lxn5WoxHbOj7as8NWlLAA74,3036
|
|
25
25
|
datachain/cli/commands/__init__.py,sha256=zp3bYIioO60x_X04A4-IpZqSYVnpwOa1AdERQaRlIhI,493
|
|
26
|
-
datachain/cli/commands/datasets.py,sha256=
|
|
26
|
+
datachain/cli/commands/datasets.py,sha256=sQ83zxHLuP04cXqBYD3iVcsr49LHA3lnjYxdL142HMk,5793
|
|
27
27
|
datachain/cli/commands/du.py,sha256=9edEzDEs98K2VYk8Wf-ZMpUzALcgm9uD6YtoqbvtUGU,391
|
|
28
28
|
datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR88,198
|
|
29
29
|
datachain/cli/commands/ls.py,sha256=dSD2_MHng4t9HRFJZWMOCjPL4XU3qaBV3piNl8UXP08,5275
|
|
30
30
|
datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibVE,600
|
|
31
31
|
datachain/cli/commands/query.py,sha256=2S7hQxialt1fkbocxi6JXZI6jS5QnFrD1aOjKgZkzfI,1471
|
|
32
|
-
datachain/cli/commands/show.py,sha256=
|
|
33
|
-
datachain/cli/parser/__init__.py,sha256=
|
|
32
|
+
datachain/cli/commands/show.py,sha256=K__cCLDJLTRt-sBTMxDID0A_4dFgRRMvjDrrVWcbMUQ,1606
|
|
33
|
+
datachain/cli/parser/__init__.py,sha256=SKB94ZS9kRHV7UOrQcIXsSQ7BOFlp4U2To4wseXXcaI,15724
|
|
34
34
|
datachain/cli/parser/job.py,sha256=kvQkSfieyUmvJpOK8p78UgS8sygHhQXztRlOtVcgtaU,3449
|
|
35
35
|
datachain/cli/parser/studio.py,sha256=Y-1OlQGecLVi9QofvWUfSlPd2ISyaESf7QFGZqGsrdw,3609
|
|
36
36
|
datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI,2888
|
|
@@ -45,7 +45,7 @@ datachain/client/s3.py,sha256=YCtDhKVO_jGsMPeyqe3xk5QsF5lqMabqkt0tPFWUHOM,7286
|
|
|
45
45
|
datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
|
|
46
46
|
datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
|
|
47
47
|
datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
|
|
48
|
-
datachain/data_storage/metastore.py,sha256=
|
|
48
|
+
datachain/data_storage/metastore.py,sha256=bhfAaijM7p_D5ltMWg-CVEv9lTflL3bGUWqAmJ8qFbc,37774
|
|
49
49
|
datachain/data_storage/schema.py,sha256=asZYz1cg_WKfe2Q-k5W51E2z2CzHU5B4QEDZDMFr8yo,9346
|
|
50
50
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
51
51
|
datachain/data_storage/sqlite.py,sha256=f4tvq0gzYQP7aYGnfL3j4IBUNvctpBxI_ioFU-B1LFc,24540
|
|
@@ -55,7 +55,7 @@ datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
55
55
|
datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
|
|
56
56
|
datachain/fs/utils.py,sha256=s-FkTOCGBk-b6TT3toQH51s9608pofoFjUSTc1yy7oE,825
|
|
57
57
|
datachain/func/__init__.py,sha256=CjNLHfJkepdXdRZ6HjJBjNSIjOeFMuMkwPDaPUrM75g,1270
|
|
58
|
-
datachain/func/aggregate.py,sha256=
|
|
58
|
+
datachain/func/aggregate.py,sha256=UfxENlw56Qv3UEkj2sZ-JZHmr9q8Rnic9io9_63gF-E,10942
|
|
59
59
|
datachain/func/array.py,sha256=O784_uwmaP5CjZX4VSF4RmS8cmpaForQc8zASxHJB6A,6717
|
|
60
60
|
datachain/func/base.py,sha256=wA0sBQAVyN9LPxoo7Ox83peS0zUVnyuKxukwAcjGLfY,534
|
|
61
61
|
datachain/func/conditional.py,sha256=HkNamQr9dLyIMDEbIeO6CZR0emQoDqeaWrZ1fECod4M,8062
|
|
@@ -69,7 +69,7 @@ datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
69
69
|
datachain/lib/arrow.py,sha256=9UBCF-lftQaz0yxdsjbLKbyzVSmrF_QSWdhp2oBDPqs,9486
|
|
70
70
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
71
71
|
datachain/lib/data_model.py,sha256=ZwBXELtqROEdLL4DmxTipnwUZmhQvMz_UVDzyf7nQ9Y,2899
|
|
72
|
-
datachain/lib/dataset_info.py,sha256=
|
|
72
|
+
datachain/lib/dataset_info.py,sha256=Mmo3r_MWRb-47H4QueSaUqgeENJiJZmjkTYBMpRuKM8,3128
|
|
73
73
|
datachain/lib/file.py,sha256=HLQXS_WULm7Y-fkHMy0WpibVAcrkLPRS6CrZy6rwFe0,30450
|
|
74
74
|
datachain/lib/hf.py,sha256=gjxuStZBlKtNk3-4yYSlWZDv9zBGblOdvEy_Lwap5hA,5882
|
|
75
75
|
datachain/lib/image.py,sha256=butvUY_33PVEYPKX2nVCPeJjJVcBaptZwsE9REQsTS8,3247
|
|
@@ -79,10 +79,10 @@ datachain/lib/meta_formats.py,sha256=Epydbdch1g4CojK8wd_ePzmwmljC4fVWlJtZ16jsX-A
|
|
|
79
79
|
datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
|
|
80
80
|
datachain/lib/pytorch.py,sha256=YS6yR13iVlrAXo5wzJswFFUHwWOql9KTdWIa86DXB-k,7712
|
|
81
81
|
datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
|
|
82
|
-
datachain/lib/signal_schema.py,sha256=
|
|
82
|
+
datachain/lib/signal_schema.py,sha256=uIBHYXtu_XpLbOUVC-kq-GduEOCfz9hQORi9ZG3JFqo,35820
|
|
83
83
|
datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
|
|
84
84
|
datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
85
|
-
datachain/lib/udf.py,sha256=
|
|
85
|
+
datachain/lib/udf.py,sha256=zCdO5__gLMCgrdHmOvIa0eoWKCDAU1uO-MMAu_EU13o,16228
|
|
86
86
|
datachain/lib/udf_signature.py,sha256=2EtsOPDNSPqcOlYwqbCdy6RF5MldI-7smii8aLy8p7Y,7543
|
|
87
87
|
datachain/lib/utils.py,sha256=QrjVs_oLRXEotOPUYurBJypBFi_ReTJmxcnJeH4j2Uk,1596
|
|
88
88
|
datachain/lib/video.py,sha256=suH_8Mi8VYk4-IVb1vjSduF_njs64ji1WGKHxDLnGYw,6629
|
|
@@ -93,20 +93,20 @@ datachain/lib/convert/flatten.py,sha256=IZFiUYbgXSxXhPSG5Cqf5IjnJ4ZDZKXMr4o_yCR1
|
|
|
93
93
|
datachain/lib/convert/python_to_sql.py,sha256=wg-O5FRKX3x3Wh8ZL1b9ntMlgf1zRO4djMP3t8CHJLo,3188
|
|
94
94
|
datachain/lib/convert/sql_to_python.py,sha256=XXCBYDQFUXJIBNWkjEP944cnCfJ8GF2Tji0DLF3A_zQ,315
|
|
95
95
|
datachain/lib/convert/unflatten.py,sha256=ysMkstwJzPMWUlnxn-Z-tXJR3wmhjHeSN_P-sDcLS6s,2010
|
|
96
|
-
datachain/lib/convert/values_to_tuples.py,sha256=
|
|
96
|
+
datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUOzHUGPoyZXAB0,4360
|
|
97
97
|
datachain/lib/dc/__init__.py,sha256=HD0NYrdy44u6kkpvgGjJcvGz-UGTHui2azghcT8ZUg0,838
|
|
98
98
|
datachain/lib/dc/csv.py,sha256=asWPAxhMgIoLAdD2dObDlnGL8CTSD3TAuFuM4ci89bQ,4374
|
|
99
99
|
datachain/lib/dc/database.py,sha256=gYKh1iO5hOWMPFTU1vZC5kOXkJzVse14TYTWE4_1iEA,5940
|
|
100
|
-
datachain/lib/dc/datachain.py,sha256=
|
|
101
|
-
datachain/lib/dc/datasets.py,sha256=
|
|
100
|
+
datachain/lib/dc/datachain.py,sha256=36J8QIB04hKKumQgLvHNTC94Pd7G2yE4slZ9RfwI9zw,76980
|
|
101
|
+
datachain/lib/dc/datasets.py,sha256=u6hlz0Eodh_s39TOW6kz0VIL3nGfadqu8FLoWqDxSJs,6890
|
|
102
102
|
datachain/lib/dc/hf.py,sha256=PJl2wiLjdRsMz0SYbLT-6H8b-D5i2WjeH7li8HHOk_0,2145
|
|
103
103
|
datachain/lib/dc/json.py,sha256=ZUThPDAaP2gBFIL5vsQTwKBcuN_dhvC_O44wdDv0jEc,2683
|
|
104
104
|
datachain/lib/dc/listings.py,sha256=2na9v63xO1vPUNaoBSzA-TSN49V7zQAb-4iS1wOPLFE,1029
|
|
105
105
|
datachain/lib/dc/pandas.py,sha256=ObueUXDUFKJGu380GmazdG02ARpKAHPhSaymfmOH13E,1489
|
|
106
106
|
datachain/lib/dc/parquet.py,sha256=zYcSgrWwyEDW9UxGUSVdIVsCu15IGEf0xL8KfWQqK94,1782
|
|
107
|
-
datachain/lib/dc/records.py,sha256=
|
|
107
|
+
datachain/lib/dc/records.py,sha256=Z6EWy6c6hf87cWiDlQduvrDgOHMLwqF22g-XksOnXsU,2884
|
|
108
108
|
datachain/lib/dc/storage.py,sha256=QLf3-xMV2Gmy3AA8qF9WqAsb7R8Rk87l4s5hBoiCH98,5285
|
|
109
|
-
datachain/lib/dc/utils.py,sha256=
|
|
109
|
+
datachain/lib/dc/utils.py,sha256=VawOAlJSvAtZbsMg33s5tJe21TRx1Km3QggI1nN6tnw,3984
|
|
110
110
|
datachain/lib/dc/values.py,sha256=cBQubhmPNEDMJldUXzGh-UKbdim4P6O2B91Gp39roKw,1389
|
|
111
111
|
datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
|
|
112
112
|
datachain/model/bbox.py,sha256=cQNHuQuVsh6bW3n3Hj40F2Cc20cExQ9Lg_q7R2jxUMI,9324
|
|
@@ -119,17 +119,17 @@ datachain/model/ultralytics/pose.py,sha256=gXAWfAk4OWZl93hKcQPKZvqJa3nIrECB4RM8K
|
|
|
119
119
|
datachain/model/ultralytics/segment.py,sha256=koq1HASo29isf0in6oSlzmU4IzsmOXe87F1ajQQVfh4,2911
|
|
120
120
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
121
121
|
datachain/query/batch.py,sha256=6w8gzLTmLeylststu-gT5jIqEfi4-djS7_yTYyeo-fw,4190
|
|
122
|
-
datachain/query/dataset.py,sha256
|
|
122
|
+
datachain/query/dataset.py,sha256=8O9TFOBLyh_ylqY4gZ7MRLziwAQaU6YdDR_SfBSK65c,58806
|
|
123
123
|
datachain/query/dispatch.py,sha256=5p_jXxKJVCfIA4jLSQ0tAY1IhZUS3oJvyQXUH0Dk3bc,13215
|
|
124
124
|
datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
|
|
125
125
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
126
126
|
datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
|
|
127
|
-
datachain/query/schema.py,sha256=
|
|
127
|
+
datachain/query/schema.py,sha256=b_KnVy6B26Ol4nYG0LqNNpeQ1QYPk95YRGUjXfdaQWs,6606
|
|
128
128
|
datachain/query/session.py,sha256=wNdOHAi4HrsEihfzdcTlfB5i1xyj0dw6rlUz84StOoU,6512
|
|
129
129
|
datachain/query/udf.py,sha256=ljAYaF-J77t7iS4zc1-g1ssYd4c6Q-ccKGEc3VQQmeM,1322
|
|
130
130
|
datachain/query/utils.py,sha256=u0A_BwG9PNs0DxoDcvSWgWLpj3ByTUv8CqH13CIuGag,1293
|
|
131
131
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
132
|
-
datachain/remote/studio.py,sha256=
|
|
132
|
+
datachain/remote/studio.py,sha256=SCmsYURwqYTXfxQpizOoyxlPE2ECJv-sZWVitStRPgc,13107
|
|
133
133
|
datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
|
|
134
134
|
datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
|
|
135
135
|
datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
|
|
@@ -151,9 +151,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
151
151
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
152
152
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
153
153
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
154
|
-
datachain-0.
|
|
155
|
-
datachain-0.
|
|
156
|
-
datachain-0.
|
|
157
|
-
datachain-0.
|
|
158
|
-
datachain-0.
|
|
159
|
-
datachain-0.
|
|
154
|
+
datachain-0.16.1.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
155
|
+
datachain-0.16.1.dist-info/METADATA,sha256=9YPqP6Sthuf_fuxFX3miQyp9MEjRq8j2DqubLXvZg0k,11328
|
|
156
|
+
datachain-0.16.1.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
|
157
|
+
datachain-0.16.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
158
|
+
datachain-0.16.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
159
|
+
datachain-0.16.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|