datachain 0.18.7__py3-none-any.whl → 0.18.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/lib/file.py CHANGED
@@ -81,14 +81,28 @@ class FileExporter(NodesThreadPool):
81
81
 
82
82
 
83
83
  class VFileError(DataChainError):
84
- def __init__(self, file: "File", message: str, vtype: str = ""):
84
+ def __init__(self, message: str, source: str, path: str, vtype: str = ""):
85
+ self.message = message
86
+ self.source = source
87
+ self.path = path
88
+ self.vtype = vtype
89
+
85
90
  type_ = f" of vtype '{vtype}'" if vtype else ""
86
- super().__init__(f"Error in v-file '{file.path}'{type_}: {message}")
91
+ super().__init__(f"Error in v-file '{source}/{path}'{type_}: {message}")
92
+
93
+ def __reduce__(self):
94
+ return self.__class__, (self.message, self.source, self.path, self.vtype)
87
95
 
88
96
 
89
97
  class FileError(DataChainError):
90
- def __init__(self, file: "File", message: str):
91
- super().__init__(f"Error in file {file.get_uri()}: {message}")
98
+ def __init__(self, message: str, source: str, path: str):
99
+ self.message = message
100
+ self.source = source
101
+ self.path = path
102
+ super().__init__(f"Error in file '{source}/{path}': {message}")
103
+
104
+ def __reduce__(self):
105
+ return self.__class__, (self.message, self.source, self.path)
92
106
 
93
107
 
94
108
  class VFile(ABC):
@@ -114,18 +128,20 @@ class TarVFile(VFile):
114
128
  def open(cls, file: "File", location: list[dict]):
115
129
  """Stream file from tar archive based on location in archive."""
116
130
  if len(location) > 1:
117
- raise VFileError(file, "multiple 'location's are not supported yet")
131
+ raise VFileError(
132
+ "multiple 'location's are not supported yet", file.source, file.path
133
+ )
118
134
 
119
135
  loc = location[0]
120
136
 
121
137
  if (offset := loc.get("offset", None)) is None:
122
- raise VFileError(file, "'offset' is not specified")
138
+ raise VFileError("'offset' is not specified", file.source, file.path)
123
139
 
124
140
  if (size := loc.get("size", None)) is None:
125
- raise VFileError(file, "'size' is not specified")
141
+ raise VFileError("'size' is not specified", file.source, file.path)
126
142
 
127
143
  if (parent := loc.get("parent", None)) is None:
128
- raise VFileError(file, "'parent' is not specified")
144
+ raise VFileError("'parent' is not specified", file.source, file.path)
129
145
 
130
146
  tar_file = File(**parent)
131
147
  tar_file._set_stream(file._catalog)
@@ -145,14 +161,18 @@ class VFileRegistry:
145
161
  @classmethod
146
162
  def resolve(cls, file: "File", location: list[dict]):
147
163
  if len(location) == 0:
148
- raise VFileError(file, "'location' must not be list of JSONs")
164
+ raise VFileError(
165
+ "'location' must not be list of JSONs", file.source, file.path
166
+ )
149
167
 
150
168
  if not (vtype := location[0].get("vtype", "")):
151
- raise VFileError(file, "vtype is not specified")
169
+ raise VFileError("vtype is not specified", file.source, file.path)
152
170
 
153
171
  reader = cls._vtype_readers.get(vtype, None)
154
172
  if not reader:
155
- raise VFileError(file, "reader not registered", vtype)
173
+ raise VFileError(
174
+ "reader not registered", file.source, file.path, vtype=vtype
175
+ )
156
176
 
157
177
  return reader.open(file, location)
158
178
 
datachain/lib/image.py CHANGED
@@ -19,7 +19,7 @@ def image_info(file: Union[File, ImageFile]) -> Image:
19
19
  try:
20
20
  img = file.as_image_file().read()
21
21
  except Exception as exc:
22
- raise FileError(file, "unable to open image file") from exc
22
+ raise FileError("unable to open image file", file.source, file.path) from exc
23
23
 
24
24
  return Image(
25
25
  width=img.width,
datachain/lib/utils.py CHANGED
@@ -18,13 +18,11 @@ class AbstractUDF(ABC):
18
18
 
19
19
 
20
20
  class DataChainError(Exception):
21
- def __init__(self, message):
22
- super().__init__(message)
21
+ pass
23
22
 
24
23
 
25
24
  class DataChainParamsError(DataChainError):
26
- def __init__(self, message):
27
- super().__init__(message)
25
+ pass
28
26
 
29
27
 
30
28
  class DataChainColumnError(DataChainParamsError):
datachain/lib/video.py CHANGED
@@ -34,21 +34,27 @@ def video_info(file: Union[File, VideoFile]) -> Video:
34
34
  file.ensure_cached()
35
35
  file_path = file.get_local_path()
36
36
  if not file_path:
37
- raise FileError(file, "unable to download video file")
37
+ raise FileError("unable to download video file", file.source, file.path)
38
38
 
39
39
  try:
40
40
  probe = ffmpeg.probe(file_path)
41
41
  except Exception as exc:
42
- raise FileError(file, "unable to extract metadata from video file") from exc
42
+ raise FileError(
43
+ "unable to extract metadata from video file", file.source, file.path
44
+ ) from exc
43
45
 
44
46
  all_streams = probe.get("streams")
45
47
  video_format = probe.get("format")
46
48
  if not all_streams or not video_format:
47
- raise FileError(file, "unable to extract metadata from video file")
49
+ raise FileError(
50
+ "unable to extract metadata from video file", file.source, file.path
51
+ )
48
52
 
49
53
  video_streams = [s for s in all_streams if s["codec_type"] == "video"]
50
54
  if len(video_streams) == 0:
51
- raise FileError(file, "unable to extract metadata from video file")
55
+ raise FileError(
56
+ "unable to extract metadata from video file", file.source, file.path
57
+ )
52
58
 
53
59
  video_stream = video_streams[0]
54
60
 
@@ -1348,7 +1348,7 @@ class DatasetQuery:
1348
1348
 
1349
1349
  async def get_params(row: Sequence) -> tuple:
1350
1350
  row_dict = RowDict(zip(query_fields, row))
1351
- return tuple(
1351
+ return tuple( # noqa: C409
1352
1352
  [
1353
1353
  await p.get_value_async(
1354
1354
  self.catalog, row_dict, mapper, **kwargs
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.18.7
3
+ Version: 0.18.9
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -94,7 +94,7 @@ Requires-Dist: scipy; extra == "tests"
94
94
  Requires-Dist: ultralytics; extra == "tests"
95
95
  Provides-Extra: dev
96
96
  Requires-Dist: datachain[docs,tests]; extra == "dev"
97
- Requires-Dist: mypy==1.15.0; extra == "dev"
97
+ Requires-Dist: mypy==1.16.0; extra == "dev"
98
98
  Requires-Dist: types-python-dateutil; extra == "dev"
99
99
  Requires-Dist: types-pytz; extra == "dev"
100
100
  Requires-Dist: types-PyYAML; extra == "dev"
@@ -72,9 +72,9 @@ datachain/lib/arrow.py,sha256=mFO_6wRqzpEzBhXf7Xn1aeLUvaiHcC6XQ-8as9sbcgY,10253
72
72
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
73
73
  datachain/lib/data_model.py,sha256=ZwBXELtqROEdLL4DmxTipnwUZmhQvMz_UVDzyf7nQ9Y,2899
74
74
  datachain/lib/dataset_info.py,sha256=d-jz6zeDU5DEgYtyeSF5nK0MU-40FV5km_iOCh4pXzo,3179
75
- datachain/lib/file.py,sha256=0oFm1MWU7AatXplxRj-6Xbjjb6A_AvM_awwk9mYb0hc,30466
75
+ datachain/lib/file.py,sha256=mzc7_fpHAkVhs4z3jBUhFQzPEbODdXJpzjVfby2IkC4,31117
76
76
  datachain/lib/hf.py,sha256=gjxuStZBlKtNk3-4yYSlWZDv9zBGblOdvEy_Lwap5hA,5882
77
- datachain/lib/image.py,sha256=butvUY_33PVEYPKX2nVCPeJjJVcBaptZwsE9REQsTS8,3247
77
+ datachain/lib/image.py,sha256=erWvZW5M3emnbl6_fGAOPyKm-1EKbt3vOdWPfe3Oo7U,3265
78
78
  datachain/lib/listing.py,sha256=5_GoATtIwCtd1JMqlorPB_vQDxndOQZpiWjNOG3NMw4,7007
79
79
  datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
80
80
  datachain/lib/meta_formats.py,sha256=Epydbdch1g4CojK8wd_ePzmwmljC4fVWlJtZ16jsX-A,6349
@@ -86,8 +86,8 @@ datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
86
86
  datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
87
87
  datachain/lib/udf.py,sha256=FWqA476ygdk4MU-0qehYKxvnt8Tekh21Cyf3RgddD1k,16674
88
88
  datachain/lib/udf_signature.py,sha256=2EtsOPDNSPqcOlYwqbCdy6RF5MldI-7smii8aLy8p7Y,7543
89
- datachain/lib/utils.py,sha256=QrjVs_oLRXEotOPUYurBJypBFi_ReTJmxcnJeH4j2Uk,1596
90
- datachain/lib/video.py,sha256=suH_8Mi8VYk4-IVb1vjSduF_njs64ji1WGKHxDLnGYw,6629
89
+ datachain/lib/utils.py,sha256=rG2y7NwTqZOuomZZRmrA-Q-ANM_j1cToQYqDJoOeGyU,1480
90
+ datachain/lib/video.py,sha256=u6fLJWj5G6QqsVkpfHnKGklBNpG3BRRg6v3izngnNcU,6767
91
91
  datachain/lib/webdataset.py,sha256=o7SHk5HOUWsZ5Ln04xOM04eQqiBHiJNO7xLgyVBrwo8,6924
92
92
  datachain/lib/webdataset_laion.py,sha256=xvT6m_r5y0KbOx14BUe7UC5mOgrktJq53Mh-H0EVlUE,2525
93
93
  datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -121,7 +121,7 @@ datachain/model/ultralytics/pose.py,sha256=pBlmt63Qe68FKmexHimUGlNbNOoOlMHXG4fzX
121
121
  datachain/model/ultralytics/segment.py,sha256=63bDCj43E6iZ0hFI5J6uQfksdCmjEp6sEm1XzVaE8pw,2986
122
122
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
123
123
  datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
124
- datachain/query/dataset.py,sha256=3c3MAiIl7ZnCii_0dZA-Om73ornNMSKkna32JX3H05E,60587
124
+ datachain/query/dataset.py,sha256=dI51zOU1Drev65f6SPn4mvRdwRXs4SOW5STMm3WYd7A,60601
125
125
  datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
126
126
  datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
127
127
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
@@ -153,9 +153,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
153
153
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
154
154
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
155
155
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
156
- datachain-0.18.7.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
157
- datachain-0.18.7.dist-info/METADATA,sha256=OXGuP0EbV6ZC57NPhtyse2-6OP2pDKbhJkmcDfHp1mU,11319
158
- datachain-0.18.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
- datachain-0.18.7.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
160
- datachain-0.18.7.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
161
- datachain-0.18.7.dist-info/RECORD,,
156
+ datachain-0.18.9.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
157
+ datachain-0.18.9.dist-info/METADATA,sha256=0BhJEeQiYf41Rg7DLgJ-WtiUu9cpwwUtVwo__lPaMAw,11319
158
+ datachain-0.18.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
+ datachain-0.18.9.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
160
+ datachain-0.18.9.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
161
+ datachain-0.18.9.dist-info/RECORD,,