datachain 0.2.9__py3-none-any.whl → 0.2.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/__init__.py +17 -8
- datachain/catalog/catalog.py +5 -5
- datachain/cli.py +0 -2
- datachain/data_storage/schema.py +5 -5
- datachain/data_storage/sqlite.py +1 -1
- datachain/data_storage/warehouse.py +7 -7
- datachain/lib/arrow.py +25 -8
- datachain/lib/clip.py +6 -11
- datachain/lib/convert/__init__.py +0 -0
- datachain/lib/convert/flatten.py +67 -0
- datachain/lib/convert/type_converter.py +96 -0
- datachain/lib/convert/unflatten.py +69 -0
- datachain/lib/convert/values_to_tuples.py +85 -0
- datachain/lib/data_model.py +74 -0
- datachain/lib/dc.py +225 -168
- datachain/lib/file.py +41 -41
- datachain/lib/gpt4_vision.py +1 -9
- datachain/lib/hf_image_to_text.py +9 -17
- datachain/lib/hf_pipeline.py +4 -12
- datachain/lib/image.py +2 -18
- datachain/lib/image_transform.py +0 -1
- datachain/lib/iptc_exif_xmp.py +8 -15
- datachain/lib/meta_formats.py +1 -5
- datachain/lib/model_store.py +77 -0
- datachain/lib/pytorch.py +9 -21
- datachain/lib/signal_schema.py +139 -60
- datachain/lib/text.py +5 -16
- datachain/lib/udf.py +114 -30
- datachain/lib/udf_signature.py +5 -5
- datachain/lib/webdataset.py +3 -3
- datachain/lib/webdataset_laion.py +2 -3
- datachain/node.py +4 -4
- datachain/query/batch.py +1 -1
- datachain/query/dataset.py +51 -178
- datachain/query/dispatch.py +43 -30
- datachain/query/udf.py +46 -26
- datachain/remote/studio.py +1 -9
- datachain/torch/__init__.py +21 -0
- datachain/utils.py +39 -0
- {datachain-0.2.9.dist-info → datachain-0.2.11.dist-info}/METADATA +14 -12
- {datachain-0.2.9.dist-info → datachain-0.2.11.dist-info}/RECORD +45 -43
- {datachain-0.2.9.dist-info → datachain-0.2.11.dist-info}/WHEEL +1 -1
- datachain/image/__init__.py +0 -3
- datachain/lib/cached_stream.py +0 -38
- datachain/lib/claude.py +0 -69
- datachain/lib/feature.py +0 -412
- datachain/lib/feature_registry.py +0 -51
- datachain/lib/feature_utils.py +0 -154
- {datachain-0.2.9.dist-info → datachain-0.2.11.dist-info}/LICENSE +0 -0
- {datachain-0.2.9.dist-info → datachain-0.2.11.dist-info}/entry_points.txt +0 -0
- {datachain-0.2.9.dist-info → datachain-0.2.11.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import TYPE_CHECKING, ClassVar, Union, get_args, get_origin
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
from datachain.lib.model_store import ModelStore
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from datachain.catalog import Catalog
|
|
11
|
+
|
|
12
|
+
StandardType = Union[
|
|
13
|
+
type[int],
|
|
14
|
+
type[str],
|
|
15
|
+
type[float],
|
|
16
|
+
type[bool],
|
|
17
|
+
type[list],
|
|
18
|
+
type[dict],
|
|
19
|
+
type[bytes],
|
|
20
|
+
type[datetime],
|
|
21
|
+
]
|
|
22
|
+
DataType = Union[type[BaseModel], StandardType]
|
|
23
|
+
DataTypeNames = "BaseModel, int, str, float, bool, list, dict, bytes, datetime"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DataModel(BaseModel):
|
|
27
|
+
_version: ClassVar[int] = 1
|
|
28
|
+
|
|
29
|
+
def get_value(self):
|
|
30
|
+
"""Getting value from data. It's used in conjunction with method that operate
|
|
31
|
+
with raw data such as to_pytorch(). In contrast to method that operated with
|
|
32
|
+
data structures such as pydantic"""
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def __pydantic_init_subclass__(cls):
|
|
37
|
+
"""It automatically registers every declared DataModel child class."""
|
|
38
|
+
ModelStore.add(cls)
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def register(models: Union[DataType, Sequence[DataType]]):
|
|
42
|
+
"""For registering classes manually. It accepts a single class or a sequence of
|
|
43
|
+
classes."""
|
|
44
|
+
if not isinstance(models, Sequence):
|
|
45
|
+
models = [models]
|
|
46
|
+
for val in models:
|
|
47
|
+
ModelStore.add(val)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class FileBasic(DataModel):
|
|
51
|
+
def _set_stream(self, catalog: "Catalog", caching_enabled: bool = False) -> None:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
def open(self):
|
|
55
|
+
raise NotImplementedError
|
|
56
|
+
|
|
57
|
+
def read(self):
|
|
58
|
+
with self.open() as stream:
|
|
59
|
+
return stream.read()
|
|
60
|
+
|
|
61
|
+
def get_value(self):
|
|
62
|
+
return self.read()
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def is_chain_type(t: type) -> bool:
|
|
66
|
+
if ModelStore.is_pydantic(t):
|
|
67
|
+
return True
|
|
68
|
+
if any(t is ft or t is get_args(ft)[0] for ft in get_args(StandardType)):
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
if get_origin(t) is list and len(get_args(t)) == 1:
|
|
72
|
+
return is_chain_type(get_args(t)[0])
|
|
73
|
+
|
|
74
|
+
return False
|