datachain 0.2.9__py3-none-any.whl → 0.2.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (51) hide show
  1. datachain/__init__.py +17 -8
  2. datachain/catalog/catalog.py +5 -5
  3. datachain/cli.py +0 -2
  4. datachain/data_storage/schema.py +5 -5
  5. datachain/data_storage/sqlite.py +1 -1
  6. datachain/data_storage/warehouse.py +7 -7
  7. datachain/lib/arrow.py +25 -8
  8. datachain/lib/clip.py +6 -11
  9. datachain/lib/convert/__init__.py +0 -0
  10. datachain/lib/convert/flatten.py +67 -0
  11. datachain/lib/convert/type_converter.py +96 -0
  12. datachain/lib/convert/unflatten.py +69 -0
  13. datachain/lib/convert/values_to_tuples.py +85 -0
  14. datachain/lib/data_model.py +74 -0
  15. datachain/lib/dc.py +225 -168
  16. datachain/lib/file.py +41 -41
  17. datachain/lib/gpt4_vision.py +1 -9
  18. datachain/lib/hf_image_to_text.py +9 -17
  19. datachain/lib/hf_pipeline.py +4 -12
  20. datachain/lib/image.py +2 -18
  21. datachain/lib/image_transform.py +0 -1
  22. datachain/lib/iptc_exif_xmp.py +8 -15
  23. datachain/lib/meta_formats.py +1 -5
  24. datachain/lib/model_store.py +77 -0
  25. datachain/lib/pytorch.py +9 -21
  26. datachain/lib/signal_schema.py +139 -60
  27. datachain/lib/text.py +5 -16
  28. datachain/lib/udf.py +114 -30
  29. datachain/lib/udf_signature.py +5 -5
  30. datachain/lib/webdataset.py +3 -3
  31. datachain/lib/webdataset_laion.py +2 -3
  32. datachain/node.py +4 -4
  33. datachain/query/batch.py +1 -1
  34. datachain/query/dataset.py +51 -178
  35. datachain/query/dispatch.py +43 -30
  36. datachain/query/udf.py +46 -26
  37. datachain/remote/studio.py +1 -9
  38. datachain/torch/__init__.py +21 -0
  39. datachain/utils.py +39 -0
  40. {datachain-0.2.9.dist-info → datachain-0.2.11.dist-info}/METADATA +14 -12
  41. {datachain-0.2.9.dist-info → datachain-0.2.11.dist-info}/RECORD +45 -43
  42. {datachain-0.2.9.dist-info → datachain-0.2.11.dist-info}/WHEEL +1 -1
  43. datachain/image/__init__.py +0 -3
  44. datachain/lib/cached_stream.py +0 -38
  45. datachain/lib/claude.py +0 -69
  46. datachain/lib/feature.py +0 -412
  47. datachain/lib/feature_registry.py +0 -51
  48. datachain/lib/feature_utils.py +0 -154
  49. {datachain-0.2.9.dist-info → datachain-0.2.11.dist-info}/LICENSE +0 -0
  50. {datachain-0.2.9.dist-info → datachain-0.2.11.dist-info}/entry_points.txt +0 -0
  51. {datachain-0.2.9.dist-info → datachain-0.2.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,74 @@
1
+ from collections.abc import Sequence
2
+ from datetime import datetime
3
+ from typing import TYPE_CHECKING, ClassVar, Union, get_args, get_origin
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from datachain.lib.model_store import ModelStore
8
+
9
+ if TYPE_CHECKING:
10
+ from datachain.catalog import Catalog
11
+
12
+ StandardType = Union[
13
+ type[int],
14
+ type[str],
15
+ type[float],
16
+ type[bool],
17
+ type[list],
18
+ type[dict],
19
+ type[bytes],
20
+ type[datetime],
21
+ ]
22
+ DataType = Union[type[BaseModel], StandardType]
23
+ DataTypeNames = "BaseModel, int, str, float, bool, list, dict, bytes, datetime"
24
+
25
+
26
+ class DataModel(BaseModel):
27
+ _version: ClassVar[int] = 1
28
+
29
+ def get_value(self):
30
+ """Getting value from data. It's used in conjunction with method that operate
31
+ with raw data such as to_pytorch(). In contrast to method that operated with
32
+ data structures such as pydantic"""
33
+ return
34
+
35
+ @classmethod
36
+ def __pydantic_init_subclass__(cls):
37
+ """It automatically registers every declared DataModel child class."""
38
+ ModelStore.add(cls)
39
+
40
+ @staticmethod
41
+ def register(models: Union[DataType, Sequence[DataType]]):
42
+ """For registering classes manually. It accepts a single class or a sequence of
43
+ classes."""
44
+ if not isinstance(models, Sequence):
45
+ models = [models]
46
+ for val in models:
47
+ ModelStore.add(val)
48
+
49
+
50
+ class FileBasic(DataModel):
51
+ def _set_stream(self, catalog: "Catalog", caching_enabled: bool = False) -> None:
52
+ pass
53
+
54
+ def open(self):
55
+ raise NotImplementedError
56
+
57
+ def read(self):
58
+ with self.open() as stream:
59
+ return stream.read()
60
+
61
+ def get_value(self):
62
+ return self.read()
63
+
64
+
65
+ def is_chain_type(t: type) -> bool:
66
+ if ModelStore.is_pydantic(t):
67
+ return True
68
+ if any(t is ft or t is get_args(ft)[0] for ft in get_args(StandardType)):
69
+ return True
70
+
71
+ if get_origin(t) is list and len(get_args(t)) == 1:
72
+ return is_chain_type(get_args(t)[0])
73
+
74
+ return False