datachain 0.18.7__py3-none-any.whl → 0.18.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/lib/file.py +31 -11
- datachain/lib/image.py +1 -1
- datachain/lib/utils.py +2 -4
- datachain/lib/video.py +10 -4
- datachain/query/dataset.py +1 -1
- {datachain-0.18.7.dist-info → datachain-0.18.9.dist-info}/METADATA +2 -2
- {datachain-0.18.7.dist-info → datachain-0.18.9.dist-info}/RECORD +11 -11
- {datachain-0.18.7.dist-info → datachain-0.18.9.dist-info}/WHEEL +0 -0
- {datachain-0.18.7.dist-info → datachain-0.18.9.dist-info}/entry_points.txt +0 -0
- {datachain-0.18.7.dist-info → datachain-0.18.9.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.18.7.dist-info → datachain-0.18.9.dist-info}/top_level.txt +0 -0
datachain/lib/file.py
CHANGED
|
@@ -81,14 +81,28 @@ class FileExporter(NodesThreadPool):
|
|
|
81
81
|
|
|
82
82
|
|
|
83
83
|
class VFileError(DataChainError):
|
|
84
|
-
def __init__(self,
|
|
84
|
+
def __init__(self, message: str, source: str, path: str, vtype: str = ""):
|
|
85
|
+
self.message = message
|
|
86
|
+
self.source = source
|
|
87
|
+
self.path = path
|
|
88
|
+
self.vtype = vtype
|
|
89
|
+
|
|
85
90
|
type_ = f" of vtype '{vtype}'" if vtype else ""
|
|
86
|
-
super().__init__(f"Error in v-file '{
|
|
91
|
+
super().__init__(f"Error in v-file '{source}/{path}'{type_}: {message}")
|
|
92
|
+
|
|
93
|
+
def __reduce__(self):
|
|
94
|
+
return self.__class__, (self.message, self.source, self.path, self.vtype)
|
|
87
95
|
|
|
88
96
|
|
|
89
97
|
class FileError(DataChainError):
|
|
90
|
-
def __init__(self,
|
|
91
|
-
|
|
98
|
+
def __init__(self, message: str, source: str, path: str):
|
|
99
|
+
self.message = message
|
|
100
|
+
self.source = source
|
|
101
|
+
self.path = path
|
|
102
|
+
super().__init__(f"Error in file '{source}/{path}': {message}")
|
|
103
|
+
|
|
104
|
+
def __reduce__(self):
|
|
105
|
+
return self.__class__, (self.message, self.source, self.path)
|
|
92
106
|
|
|
93
107
|
|
|
94
108
|
class VFile(ABC):
|
|
@@ -114,18 +128,20 @@ class TarVFile(VFile):
|
|
|
114
128
|
def open(cls, file: "File", location: list[dict]):
|
|
115
129
|
"""Stream file from tar archive based on location in archive."""
|
|
116
130
|
if len(location) > 1:
|
|
117
|
-
raise VFileError(
|
|
131
|
+
raise VFileError(
|
|
132
|
+
"multiple 'location's are not supported yet", file.source, file.path
|
|
133
|
+
)
|
|
118
134
|
|
|
119
135
|
loc = location[0]
|
|
120
136
|
|
|
121
137
|
if (offset := loc.get("offset", None)) is None:
|
|
122
|
-
raise VFileError(
|
|
138
|
+
raise VFileError("'offset' is not specified", file.source, file.path)
|
|
123
139
|
|
|
124
140
|
if (size := loc.get("size", None)) is None:
|
|
125
|
-
raise VFileError(
|
|
141
|
+
raise VFileError("'size' is not specified", file.source, file.path)
|
|
126
142
|
|
|
127
143
|
if (parent := loc.get("parent", None)) is None:
|
|
128
|
-
raise VFileError(
|
|
144
|
+
raise VFileError("'parent' is not specified", file.source, file.path)
|
|
129
145
|
|
|
130
146
|
tar_file = File(**parent)
|
|
131
147
|
tar_file._set_stream(file._catalog)
|
|
@@ -145,14 +161,18 @@ class VFileRegistry:
|
|
|
145
161
|
@classmethod
|
|
146
162
|
def resolve(cls, file: "File", location: list[dict]):
|
|
147
163
|
if len(location) == 0:
|
|
148
|
-
raise VFileError(
|
|
164
|
+
raise VFileError(
|
|
165
|
+
"'location' must not be list of JSONs", file.source, file.path
|
|
166
|
+
)
|
|
149
167
|
|
|
150
168
|
if not (vtype := location[0].get("vtype", "")):
|
|
151
|
-
raise VFileError(
|
|
169
|
+
raise VFileError("vtype is not specified", file.source, file.path)
|
|
152
170
|
|
|
153
171
|
reader = cls._vtype_readers.get(vtype, None)
|
|
154
172
|
if not reader:
|
|
155
|
-
raise VFileError(
|
|
173
|
+
raise VFileError(
|
|
174
|
+
"reader not registered", file.source, file.path, vtype=vtype
|
|
175
|
+
)
|
|
156
176
|
|
|
157
177
|
return reader.open(file, location)
|
|
158
178
|
|
datachain/lib/image.py
CHANGED
|
@@ -19,7 +19,7 @@ def image_info(file: Union[File, ImageFile]) -> Image:
|
|
|
19
19
|
try:
|
|
20
20
|
img = file.as_image_file().read()
|
|
21
21
|
except Exception as exc:
|
|
22
|
-
raise FileError(
|
|
22
|
+
raise FileError("unable to open image file", file.source, file.path) from exc
|
|
23
23
|
|
|
24
24
|
return Image(
|
|
25
25
|
width=img.width,
|
datachain/lib/utils.py
CHANGED
|
@@ -18,13 +18,11 @@ class AbstractUDF(ABC):
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class DataChainError(Exception):
|
|
21
|
-
|
|
22
|
-
super().__init__(message)
|
|
21
|
+
pass
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
class DataChainParamsError(DataChainError):
|
|
26
|
-
|
|
27
|
-
super().__init__(message)
|
|
25
|
+
pass
|
|
28
26
|
|
|
29
27
|
|
|
30
28
|
class DataChainColumnError(DataChainParamsError):
|
datachain/lib/video.py
CHANGED
|
@@ -34,21 +34,27 @@ def video_info(file: Union[File, VideoFile]) -> Video:
|
|
|
34
34
|
file.ensure_cached()
|
|
35
35
|
file_path = file.get_local_path()
|
|
36
36
|
if not file_path:
|
|
37
|
-
raise FileError(
|
|
37
|
+
raise FileError("unable to download video file", file.source, file.path)
|
|
38
38
|
|
|
39
39
|
try:
|
|
40
40
|
probe = ffmpeg.probe(file_path)
|
|
41
41
|
except Exception as exc:
|
|
42
|
-
raise FileError(
|
|
42
|
+
raise FileError(
|
|
43
|
+
"unable to extract metadata from video file", file.source, file.path
|
|
44
|
+
) from exc
|
|
43
45
|
|
|
44
46
|
all_streams = probe.get("streams")
|
|
45
47
|
video_format = probe.get("format")
|
|
46
48
|
if not all_streams or not video_format:
|
|
47
|
-
raise FileError(
|
|
49
|
+
raise FileError(
|
|
50
|
+
"unable to extract metadata from video file", file.source, file.path
|
|
51
|
+
)
|
|
48
52
|
|
|
49
53
|
video_streams = [s for s in all_streams if s["codec_type"] == "video"]
|
|
50
54
|
if len(video_streams) == 0:
|
|
51
|
-
raise FileError(
|
|
55
|
+
raise FileError(
|
|
56
|
+
"unable to extract metadata from video file", file.source, file.path
|
|
57
|
+
)
|
|
52
58
|
|
|
53
59
|
video_stream = video_streams[0]
|
|
54
60
|
|
datachain/query/dataset.py
CHANGED
|
@@ -1348,7 +1348,7 @@ class DatasetQuery:
|
|
|
1348
1348
|
|
|
1349
1349
|
async def get_params(row: Sequence) -> tuple:
|
|
1350
1350
|
row_dict = RowDict(zip(query_fields, row))
|
|
1351
|
-
return tuple(
|
|
1351
|
+
return tuple( # noqa: C409
|
|
1352
1352
|
[
|
|
1353
1353
|
await p.get_value_async(
|
|
1354
1354
|
self.catalog, row_dict, mapper, **kwargs
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.18.
|
|
3
|
+
Version: 0.18.9
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -94,7 +94,7 @@ Requires-Dist: scipy; extra == "tests"
|
|
|
94
94
|
Requires-Dist: ultralytics; extra == "tests"
|
|
95
95
|
Provides-Extra: dev
|
|
96
96
|
Requires-Dist: datachain[docs,tests]; extra == "dev"
|
|
97
|
-
Requires-Dist: mypy==1.
|
|
97
|
+
Requires-Dist: mypy==1.16.0; extra == "dev"
|
|
98
98
|
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
99
99
|
Requires-Dist: types-pytz; extra == "dev"
|
|
100
100
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
@@ -72,9 +72,9 @@ datachain/lib/arrow.py,sha256=mFO_6wRqzpEzBhXf7Xn1aeLUvaiHcC6XQ-8as9sbcgY,10253
|
|
|
72
72
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
73
73
|
datachain/lib/data_model.py,sha256=ZwBXELtqROEdLL4DmxTipnwUZmhQvMz_UVDzyf7nQ9Y,2899
|
|
74
74
|
datachain/lib/dataset_info.py,sha256=d-jz6zeDU5DEgYtyeSF5nK0MU-40FV5km_iOCh4pXzo,3179
|
|
75
|
-
datachain/lib/file.py,sha256=
|
|
75
|
+
datachain/lib/file.py,sha256=mzc7_fpHAkVhs4z3jBUhFQzPEbODdXJpzjVfby2IkC4,31117
|
|
76
76
|
datachain/lib/hf.py,sha256=gjxuStZBlKtNk3-4yYSlWZDv9zBGblOdvEy_Lwap5hA,5882
|
|
77
|
-
datachain/lib/image.py,sha256=
|
|
77
|
+
datachain/lib/image.py,sha256=erWvZW5M3emnbl6_fGAOPyKm-1EKbt3vOdWPfe3Oo7U,3265
|
|
78
78
|
datachain/lib/listing.py,sha256=5_GoATtIwCtd1JMqlorPB_vQDxndOQZpiWjNOG3NMw4,7007
|
|
79
79
|
datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
|
|
80
80
|
datachain/lib/meta_formats.py,sha256=Epydbdch1g4CojK8wd_ePzmwmljC4fVWlJtZ16jsX-A,6349
|
|
@@ -86,8 +86,8 @@ datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
|
|
|
86
86
|
datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
87
87
|
datachain/lib/udf.py,sha256=FWqA476ygdk4MU-0qehYKxvnt8Tekh21Cyf3RgddD1k,16674
|
|
88
88
|
datachain/lib/udf_signature.py,sha256=2EtsOPDNSPqcOlYwqbCdy6RF5MldI-7smii8aLy8p7Y,7543
|
|
89
|
-
datachain/lib/utils.py,sha256=
|
|
90
|
-
datachain/lib/video.py,sha256=
|
|
89
|
+
datachain/lib/utils.py,sha256=rG2y7NwTqZOuomZZRmrA-Q-ANM_j1cToQYqDJoOeGyU,1480
|
|
90
|
+
datachain/lib/video.py,sha256=u6fLJWj5G6QqsVkpfHnKGklBNpG3BRRg6v3izngnNcU,6767
|
|
91
91
|
datachain/lib/webdataset.py,sha256=o7SHk5HOUWsZ5Ln04xOM04eQqiBHiJNO7xLgyVBrwo8,6924
|
|
92
92
|
datachain/lib/webdataset_laion.py,sha256=xvT6m_r5y0KbOx14BUe7UC5mOgrktJq53Mh-H0EVlUE,2525
|
|
93
93
|
datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -121,7 +121,7 @@ datachain/model/ultralytics/pose.py,sha256=pBlmt63Qe68FKmexHimUGlNbNOoOlMHXG4fzX
|
|
|
121
121
|
datachain/model/ultralytics/segment.py,sha256=63bDCj43E6iZ0hFI5J6uQfksdCmjEp6sEm1XzVaE8pw,2986
|
|
122
122
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
123
123
|
datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
|
|
124
|
-
datachain/query/dataset.py,sha256=
|
|
124
|
+
datachain/query/dataset.py,sha256=dI51zOU1Drev65f6SPn4mvRdwRXs4SOW5STMm3WYd7A,60601
|
|
125
125
|
datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
|
|
126
126
|
datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
|
|
127
127
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
@@ -153,9 +153,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
153
153
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
154
154
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
155
155
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
156
|
-
datachain-0.18.
|
|
157
|
-
datachain-0.18.
|
|
158
|
-
datachain-0.18.
|
|
159
|
-
datachain-0.18.
|
|
160
|
-
datachain-0.18.
|
|
161
|
-
datachain-0.18.
|
|
156
|
+
datachain-0.18.9.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
157
|
+
datachain-0.18.9.dist-info/METADATA,sha256=0BhJEeQiYf41Rg7DLgJ-WtiUu9cpwwUtVwo__lPaMAw,11319
|
|
158
|
+
datachain-0.18.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
159
|
+
datachain-0.18.9.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
160
|
+
datachain-0.18.9.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
161
|
+
datachain-0.18.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|