datachain 0.11.11__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (44) hide show
  1. datachain/catalog/catalog.py +39 -7
  2. datachain/catalog/loader.py +19 -13
  3. datachain/cli/__init__.py +2 -1
  4. datachain/cli/commands/ls.py +8 -6
  5. datachain/cli/commands/show.py +7 -0
  6. datachain/cli/parser/studio.py +13 -1
  7. datachain/client/fsspec.py +12 -16
  8. datachain/client/gcs.py +1 -1
  9. datachain/client/hf.py +36 -14
  10. datachain/client/local.py +1 -4
  11. datachain/client/s3.py +1 -1
  12. datachain/data_storage/metastore.py +6 -0
  13. datachain/data_storage/warehouse.py +3 -8
  14. datachain/dataset.py +8 -0
  15. datachain/error.py +0 -12
  16. datachain/fs/utils.py +30 -0
  17. datachain/func/__init__.py +5 -0
  18. datachain/func/func.py +2 -1
  19. datachain/lib/dc.py +59 -15
  20. datachain/lib/file.py +63 -18
  21. datachain/lib/image.py +30 -6
  22. datachain/lib/listing.py +21 -39
  23. datachain/lib/meta_formats.py +2 -2
  24. datachain/lib/signal_schema.py +65 -18
  25. datachain/lib/udf.py +3 -0
  26. datachain/lib/udf_signature.py +17 -9
  27. datachain/lib/video.py +7 -5
  28. datachain/model/bbox.py +209 -58
  29. datachain/model/pose.py +49 -37
  30. datachain/model/segment.py +22 -18
  31. datachain/model/ultralytics/bbox.py +9 -9
  32. datachain/model/ultralytics/pose.py +7 -7
  33. datachain/model/ultralytics/segment.py +7 -7
  34. datachain/model/utils.py +191 -0
  35. datachain/query/dataset.py +8 -2
  36. datachain/sql/sqlite/base.py +2 -2
  37. datachain/studio.py +8 -6
  38. datachain/utils.py +0 -16
  39. {datachain-0.11.11.dist-info → datachain-0.13.0.dist-info}/METADATA +4 -2
  40. {datachain-0.11.11.dist-info → datachain-0.13.0.dist-info}/RECORD +44 -42
  41. {datachain-0.11.11.dist-info → datachain-0.13.0.dist-info}/WHEEL +1 -1
  42. {datachain-0.11.11.dist-info → datachain-0.13.0.dist-info}/LICENSE +0 -0
  43. {datachain-0.11.11.dist-info → datachain-0.13.0.dist-info}/entry_points.txt +0 -0
  44. {datachain-0.11.11.dist-info → datachain-0.13.0.dist-info}/top_level.txt +0 -0
datachain/lib/udf.py CHANGED
@@ -159,6 +159,7 @@ class UDFBase(AbstractUDF):
159
159
  ```
160
160
  """
161
161
 
162
+ is_input_batched = False
162
163
  is_output_batched = False
163
164
  prefetch: int = 0
164
165
 
@@ -395,6 +396,7 @@ class Mapper(UDFBase):
395
396
  class BatchMapper(UDFBase):
396
397
  """Inherit from this class to pass to `DataChain.batch_map()`."""
397
398
 
399
+ is_input_batched = True
398
400
  is_output_batched = True
399
401
 
400
402
  def run(
@@ -481,6 +483,7 @@ class Generator(UDFBase):
481
483
  class Aggregator(UDFBase):
482
484
  """Inherit from this class to pass to `DataChain.agg()`."""
483
485
 
486
+ is_input_batched = True
484
487
  is_output_batched = True
485
488
 
486
489
  def run(
@@ -1,7 +1,7 @@
1
1
  import inspect
2
2
  from collections.abc import Generator, Iterator, Sequence
3
3
  from dataclasses import dataclass
4
- from typing import Callable, Union, get_args, get_origin
4
+ from typing import Any, Callable, Union, get_args, get_origin
5
5
 
6
6
  from datachain.lib.data_model import DataType, DataTypeNames, is_chain_type
7
7
  from datachain.lib.signal_schema import SignalSchema
@@ -18,7 +18,7 @@ class UdfSignatureError(DataChainParamsError):
18
18
  @dataclass
19
19
  class UdfSignature:
20
20
  func: Union[Callable, UDFBase]
21
- params: Sequence[str]
21
+ params: dict[str, Union[DataType, Any]]
22
22
  output_schema: SignalSchema
23
23
 
24
24
  DEFAULT_RETURN_TYPE = str
@@ -58,15 +58,23 @@ class UdfSignature:
58
58
  if not isinstance(udf_func, UDFBase) and not callable(udf_func):
59
59
  raise UdfSignatureError(chain, f"UDF '{udf_func}' is not callable")
60
60
 
61
- func_params_map_sign, func_outs_sign, is_iterator = (
62
- UdfSignature._func_signature(chain, udf_func)
61
+ func_params_map_sign, func_outs_sign, is_iterator = cls._func_signature(
62
+ chain, udf_func
63
63
  )
64
+
65
+ udf_params: dict[str, Union[DataType, Any]] = {}
64
66
  if params:
65
- udf_params = [params] if isinstance(params, str) else params
66
- elif not func_params_map_sign:
67
- udf_params = []
68
- else:
69
- udf_params = list(func_params_map_sign.keys())
67
+ udf_params = (
68
+ {params: Any} if isinstance(params, str) else dict.fromkeys(params, Any)
69
+ )
70
+ elif func_params_map_sign:
71
+ udf_params = {
72
+ param: (
73
+ param_type if param_type is not inspect.Parameter.empty else Any
74
+ )
75
+ for param, param_type in func_params_map_sign.items()
76
+ }
77
+
70
78
  if output:
71
79
  udf_output_map = UdfSignature._validate_output(
72
80
  chain, signal_name, func, func_outs_sign, output
datachain/lib/video.py CHANGED
@@ -1,11 +1,11 @@
1
1
  import posixpath
2
2
  import shutil
3
3
  import tempfile
4
- from typing import Optional
4
+ from typing import Optional, Union
5
5
 
6
6
  from numpy import ndarray
7
7
 
8
- from datachain.lib.file import FileError, ImageFile, Video, VideoFile
8
+ from datachain.lib.file import File, FileError, ImageFile, Video, VideoFile
9
9
 
10
10
  try:
11
11
  import ffmpeg
@@ -18,7 +18,7 @@ except ImportError as exc:
18
18
  ) from exc
19
19
 
20
20
 
21
- def video_info(file: VideoFile) -> Video:
21
+ def video_info(file: Union[File, VideoFile]) -> Video:
22
22
  """
23
23
  Returns video file information.
24
24
 
@@ -28,6 +28,8 @@ def video_info(file: VideoFile) -> Video:
28
28
  Returns:
29
29
  Video: Video file information.
30
30
  """
31
+ file = file.as_video_file()
32
+
31
33
  if not (file_path := file.get_local_path()):
32
34
  file.ensure_cached()
33
35
  file_path = file.get_local_path()
@@ -170,7 +172,7 @@ def save_video_frame(
170
172
  output_file = posixpath.join(
171
173
  output, f"{video.get_file_stem()}_{frame:04d}.{format}"
172
174
  )
173
- return ImageFile.upload(img, output_file)
175
+ return ImageFile.upload(img, output_file, catalog=video._catalog)
174
176
 
175
177
 
176
178
  def save_video_fragment(
@@ -218,6 +220,6 @@ def save_video_fragment(
218
220
  ).output(output_file_tmp).run(quiet=True)
219
221
 
220
222
  with open(output_file_tmp, "rb") as f:
221
- return VideoFile.upload(f.read(), output_file)
223
+ return VideoFile.upload(f.read(), output_file, catalog=video._catalog)
222
224
  finally:
223
225
  shutil.rmtree(temp_dir)
datachain/model/bbox.py CHANGED
@@ -1,47 +1,216 @@
1
+ from collections.abc import Sequence
2
+ from typing import TYPE_CHECKING, Union
3
+
1
4
  from pydantic import Field
2
5
 
3
6
  from datachain.lib.data_model import DataModel
4
7
 
8
+ from .utils import convert_bbox, validate_bbox
9
+
10
+ if TYPE_CHECKING:
11
+ from .pose import Pose, Pose3D
12
+
5
13
 
6
14
  class BBox(DataModel):
7
15
  """
8
- A data model for representing bounding box.
16
+ A data model representing a bounding box.
9
17
 
10
18
  Attributes:
11
- title (str): The title of the bounding box.
12
- coords (list[int]): The coordinates of the bounding box.
19
+ title (str): The title or label associated with the bounding box.
20
+ coords (list[int]): A list of four bounding box coordinates.
13
21
 
14
- The bounding box is defined by two points:
15
- - (x1, y1): The top-left corner of the box.
16
- - (x2, y2): The bottom-right corner of the box.
22
+ The bounding box follows the PASCAL VOC format, where:
23
+ - (x1, y1) represents the pixel coordinates of the top-left corner.
24
+ - (x2, y2) represents the pixel coordinates of the bottom-right corner.
17
25
  """
18
26
 
19
27
  title: str = Field(default="")
20
28
  coords: list[int] = Field(default=[])
21
29
 
22
30
  @staticmethod
23
- def from_list(coords: list[float], title: str = "") -> "BBox":
24
- assert len(coords) == 4, "Bounding box must be a list of 4 coordinates."
25
- assert all(isinstance(value, (int, float)) for value in coords), (
26
- "Bounding box coordinates must be floats or integers."
27
- )
28
- return BBox(
29
- title=title,
30
- coords=[round(c) for c in coords],
31
+ def from_albumentations(
32
+ coords: Sequence[float],
33
+ img_size: Sequence[int],
34
+ title: str = "",
35
+ ) -> "BBox":
36
+ """
37
+ Create a bounding box from Albumentations format.
38
+
39
+ Albumentations represents bounding boxes as `[x_min, y_min, x_max, y_max]`
40
+ with normalized coordinates (values between 0 and 1) relative to the image size.
41
+
42
+ Args:
43
+ coords (Sequence[float]): The bounding box coordinates in
44
+ Albumentations format.
45
+ img_size (Sequence[int]): The reference image size as `[width, height]`.
46
+ title (str, optional): The title or label of the bounding box.
47
+ Defaults to an empty string.
48
+
49
+ Returns:
50
+ BBox: The bounding box data model.
51
+ """
52
+ validate_bbox(coords, float)
53
+ bbox_coords = convert_bbox(coords, img_size, "albumentations", "voc")
54
+ return BBox(title=title, coords=list(map(round, bbox_coords)))
55
+
56
+ def to_albumentations(self, img_size: Sequence[int]) -> list[float]:
57
+ """
58
+ Convert the bounding box coordinates to Albumentations format.
59
+
60
+ Albumentations represents bounding boxes as `[x_min, y_min, x_max, y_max]`
61
+ with normalized coordinates (values between 0 and 1) relative to the image size.
62
+
63
+ Args:
64
+ img_size (Sequence[int]): The reference image size as `[width, height]`.
65
+
66
+ Returns:
67
+ list[float]: The bounding box coordinates in Albumentations format.
68
+ """
69
+ return convert_bbox(self.coords, img_size, "voc", "albumentations")
70
+
71
+ @staticmethod
72
+ def from_coco(
73
+ coords: Sequence[float],
74
+ title: str = "",
75
+ ) -> "BBox":
76
+ """
77
+ Create a bounding box from COCO format.
78
+
79
+ COCO format represents bounding boxes as [x_min, y_min, width, height], where:
80
+ - (x_min, y_min) are the pixel coordinates of the top-left corner.
81
+ - width and height define the size of the bounding box in pixels.
82
+
83
+ Args:
84
+ coords (Sequence[float]): The bounding box coordinates in COCO format.
85
+ title (str): The title of the bounding box.
86
+
87
+ Returns:
88
+ BBox: The bounding box data model.
89
+ """
90
+ validate_bbox(coords, float, int)
91
+ bbox_coords = convert_bbox(coords, [], "coco", "voc")
92
+ return BBox(title=title, coords=list(map(round, bbox_coords)))
93
+
94
+ def to_coco(self) -> list[int]:
95
+ """
96
+ Return the bounding box coordinates in COCO format.
97
+
98
+ COCO format represents bounding boxes as [x_min, y_min, width, height], where:
99
+ - (x_min, y_min) are the pixel coordinates of the top-left corner.
100
+ - width and height define the size of the bounding box in pixels.
101
+
102
+ Returns:
103
+ list[int]: The bounding box coordinates in COCO format.
104
+ """
105
+ res = convert_bbox(self.coords, [], "voc", "coco")
106
+ return list(map(round, res))
107
+
108
+ @staticmethod
109
+ def from_voc(
110
+ coords: Sequence[float],
111
+ title: str = "",
112
+ ) -> "BBox":
113
+ """
114
+ Create a bounding box from PASCAL VOC format.
115
+
116
+ PASCAL VOC format represents bounding boxes as [x_min, y_min, x_max, y_max],
117
+ where:
118
+ - (x_min, y_min) are the pixel coordinates of the top-left corner.
119
+ - (x_max, y_max) are the pixel coordinates of the bottom-right corner.
120
+
121
+ Args:
122
+ coords (Sequence[float]): The bounding box coordinates in VOC format.
123
+ title (str): The title of the bounding box.
124
+
125
+ Returns:
126
+ BBox: The bounding box data model.
127
+ """
128
+ validate_bbox(coords, float, int)
129
+ return BBox(title=title, coords=list(map(round, coords)))
130
+
131
+ def to_voc(self) -> list[int]:
132
+ """
133
+ Return the bounding box coordinates in PASCAL VOC format.
134
+
135
+ PASCAL VOC format represents bounding boxes as [x_min, y_min, x_max, y_max],
136
+ where:
137
+ - (x_min, y_min) are the pixel coordinates of the top-left corner.
138
+ - (x_max, y_max) are the pixel coordinates of the bottom-right corner.
139
+
140
+ Returns:
141
+ list[int]: The bounding box coordinates in VOC format.
142
+ """
143
+ return self.coords
144
+
145
+ @staticmethod
146
+ def from_yolo(
147
+ coords: Sequence[float],
148
+ img_size: Sequence[int],
149
+ title: str = "",
150
+ ) -> "BBox":
151
+ """
152
+ Create a bounding box from YOLO format.
153
+
154
+ YOLO format represents bounding boxes as [x_center, y_center, width, height],
155
+ where:
156
+ - (x_center, y_center) are the normalized coordinates of the box center.
157
+ - width and height normalized values define the size of the bounding box.
158
+
159
+ Args:
160
+ coords (Sequence[float]): The bounding box coordinates in YOLO format.
161
+ img_size (Sequence[int]): The reference image size as `[width, height]`.
162
+ title (str): The title of the bounding box.
163
+
164
+ Returns:
165
+ BBox: The bounding box data model.
166
+ """
167
+ validate_bbox(coords, float)
168
+ bbox_coords = convert_bbox(coords, img_size, "yolo", "voc")
169
+ return BBox(title=title, coords=list(map(round, bbox_coords)))
170
+
171
+ def to_yolo(self, img_size: Sequence[int]) -> list[float]:
172
+ """
173
+ Return the bounding box coordinates in YOLO format.
174
+
175
+ YOLO format represents bounding boxes as [x_center, y_center, width, height],
176
+ where:
177
+ - (x_center, y_center) are the normalized coordinates of the box center.
178
+ - width and height normalized values define the size of the bounding box.
179
+
180
+ Args:
181
+ img_size (Sequence[int]): The reference image size as `[width, height]`.
182
+
183
+ Returns:
184
+ list[float]: The bounding box coordinates in YOLO format.
185
+ """
186
+ return convert_bbox(self.coords, img_size, "voc", "yolo")
187
+
188
+ def point_inside(self, x: int, y: int) -> bool:
189
+ """
190
+ Return True if the point is inside the bounding box.
191
+
192
+ Assumes that if the point is on the edge of the bounding box,
193
+ it is considered inside.
194
+ """
195
+ x1, y1, x2, y2 = self.coords
196
+ return x1 <= x <= x2 and y1 <= y <= y2
197
+
198
+ def pose_inside(self, pose: Union["Pose", "Pose3D"]) -> bool:
199
+ """Return True if the pose is inside the bounding box."""
200
+ return all(
201
+ self.point_inside(x, y) for x, y in zip(pose.x, pose.y) if x > 0 or y > 0
31
202
  )
32
203
 
204
+ @staticmethod
205
+ def from_list(coords: Sequence[float], title: str = "") -> "BBox":
206
+ return BBox.from_voc(coords, title=title)
207
+
33
208
  @staticmethod
34
209
  def from_dict(coords: dict[str, float], title: str = "") -> "BBox":
35
- assert isinstance(coords, dict) and set(coords) == {
36
- "x1",
37
- "y1",
38
- "x2",
39
- "y2",
40
- }, "Bounding box must be a dictionary with keys 'x1', 'y1', 'x2' and 'y2'."
41
- return BBox.from_list(
42
- [coords["x1"], coords["y1"], coords["x2"], coords["y2"]],
43
- title=title,
44
- )
210
+ keys = ("x1", "y1", "x2", "y2")
211
+ if not isinstance(coords, dict) or set(coords) != set(keys):
212
+ raise ValueError("Bounding box must be a dictionary with coordinates.")
213
+ return BBox.from_voc([coords[k] for k in keys], title=title)
45
214
 
46
215
 
47
216
  class OBBox(DataModel):
@@ -63,40 +232,22 @@ class OBBox(DataModel):
63
232
  coords: list[int] = Field(default=[])
64
233
 
65
234
  @staticmethod
66
- def from_list(coords: list[float], title: str = "") -> "OBBox":
67
- assert len(coords) == 8, (
68
- "Oriented bounding box must be a list of 8 coordinates."
69
- )
70
- assert all(isinstance(value, (int, float)) for value in coords), (
71
- "Oriented bounding box coordinates must be floats or integers."
72
- )
73
- return OBBox(
74
- title=title,
75
- coords=[round(c) for c in coords],
76
- )
235
+ def from_list(coords: Sequence[float], title: str = "") -> "OBBox":
236
+ if not isinstance(coords, (list, tuple)):
237
+ raise TypeError("Oriented bounding box must be a list of coordinates.")
238
+ if len(coords) != 8:
239
+ raise ValueError("Oriented bounding box must have 8 coordinates.")
240
+ if not all(isinstance(value, (int, float)) for value in coords):
241
+ raise ValueError(
242
+ "Oriented bounding box coordinates must be floats or integers."
243
+ )
244
+ return OBBox(title=title, coords=list(map(round, coords)))
77
245
 
78
246
  @staticmethod
79
247
  def from_dict(coords: dict[str, float], title: str = "") -> "OBBox":
80
- assert isinstance(coords, dict) and set(coords) == {
81
- "x1",
82
- "y1",
83
- "x2",
84
- "y2",
85
- "x3",
86
- "y3",
87
- "x4",
88
- "y4",
89
- }, "Oriented bounding box must be a dictionary with coordinates."
90
- return OBBox.from_list(
91
- [
92
- coords["x1"],
93
- coords["y1"],
94
- coords["x2"],
95
- coords["y2"],
96
- coords["x3"],
97
- coords["y3"],
98
- coords["x4"],
99
- coords["y4"],
100
- ],
101
- title=title,
102
- )
248
+ keys = ("x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4")
249
+ if not isinstance(coords, dict) or set(coords) != set(keys):
250
+ raise ValueError(
251
+ "Oriented bounding box must be a dictionary with coordinates."
252
+ )
253
+ return OBBox.from_list([coords[k] for k in keys], title=title)
datachain/model/pose.py CHANGED
@@ -1,3 +1,5 @@
1
+ from collections.abc import Sequence
2
+
1
3
  from pydantic import Field
2
4
 
3
5
  from datachain.lib.data_model import DataModel
@@ -19,27 +21,30 @@ class Pose(DataModel):
19
21
  y: list[int] = Field(default=[])
20
22
 
21
23
  @staticmethod
22
- def from_list(points: list[list[float]]) -> "Pose":
23
- assert len(points) == 2, "Pose must be a list of 2 lists: x and y coordinates."
24
+ def from_list(points: Sequence[Sequence[float]]) -> "Pose":
25
+ if not isinstance(points, (list, tuple)):
26
+ raise TypeError("Pose must be a list of coordinates.")
27
+ if len(points) != 2:
28
+ raise ValueError("Pose must be a list of 2 lists: x and y coordinates.")
24
29
  points_x, points_y = points
25
- assert len(points_x) == len(points_y) == 17, (
26
- "Pose x and y coordinates must have the same length of 17."
27
- )
28
- assert all(
29
- isinstance(value, (int, float)) for value in [*points_x, *points_y]
30
- ), "Pose coordinates must be floats or integers."
31
- return Pose(
32
- x=[round(coord) for coord in points_x],
33
- y=[round(coord) for coord in points_y],
34
- )
30
+ if not isinstance(points_x, (list, tuple)) or not isinstance(
31
+ points_y, (list, tuple)
32
+ ):
33
+ raise TypeError("Pose x and y coordinates must be lists.")
34
+ if len(points_x) != len(points_y) != 17:
35
+ raise ValueError(
36
+ "Pose x and y coordinates must have the same length of 17."
37
+ )
38
+ if not all(isinstance(value, (int, float)) for value in [*points_x, *points_y]):
39
+ raise ValueError("Pose coordinates must be floats or integers.")
40
+ return Pose(x=list(map(round, points_x)), y=list(map(round, points_y)))
35
41
 
36
42
  @staticmethod
37
- def from_dict(points: dict[str, list[float]]) -> "Pose":
38
- assert isinstance(points, dict) and set(points) == {
39
- "x",
40
- "y",
41
- }, "Pose must be a dict with keys 'x' and 'y'."
42
- return Pose.from_list([points["x"], points["y"]])
43
+ def from_dict(points: dict[str, Sequence[float]]) -> "Pose":
44
+ keys = ("x", "y")
45
+ if not isinstance(points, dict) or set(points) != set(keys):
46
+ raise ValueError("Pose must be a dictionary with coordinates.")
47
+ return Pose.from_list([points[k] for k in keys])
43
48
 
44
49
 
45
50
  class Pose3D(DataModel):
@@ -60,29 +65,36 @@ class Pose3D(DataModel):
60
65
  visible: list[float] = Field(default=[])
61
66
 
62
67
  @staticmethod
63
- def from_list(points: list[list[float]]) -> "Pose3D":
64
- assert len(points) == 3, (
65
- "Pose3D must be a list of 3 lists: x, y coordinates and visible."
66
- )
68
+ def from_list(points: Sequence[Sequence[float]]) -> "Pose3D":
69
+ if not isinstance(points, (list, tuple)):
70
+ raise TypeError("Pose3D must be a list of coordinates.")
71
+ if len(points) != 3:
72
+ raise ValueError(
73
+ "Pose3D must be a list of 3 lists: x, y coordinates and visible."
74
+ )
67
75
  points_x, points_y, points_v = points
68
- assert len(points_x) == len(points_y) == len(points_v) == 17, (
69
- "Pose3D x, y coordinates and visible must have the same length of 17."
70
- )
71
- assert all(
76
+ if (
77
+ not isinstance(points_x, (list, tuple))
78
+ or not isinstance(points_y, (list, tuple))
79
+ or not isinstance(points_v, (list, tuple))
80
+ ):
81
+ raise TypeError("Pose3D x, y and visible must be lists.")
82
+ if len(points_x) != len(points_y) != len(points_v) != 17:
83
+ raise ValueError("Pose3D x, y and visible must have the same length of 17.")
84
+ if not all(
72
85
  isinstance(value, (int, float))
73
86
  for value in [*points_x, *points_y, *points_v]
74
- ), "Pose3D coordinates must be floats or integers."
87
+ ):
88
+ raise ValueError("Pose3D coordinates must be floats or integers.")
75
89
  return Pose3D(
76
- x=[round(coord) for coord in points_x],
77
- y=[round(coord) for coord in points_y],
78
- visible=points_v,
90
+ x=list(map(round, points_x)),
91
+ y=list(map(round, points_y)),
92
+ visible=list(points_v),
79
93
  )
80
94
 
81
95
  @staticmethod
82
- def from_dict(points: dict[str, list[float]]) -> "Pose3D":
83
- assert isinstance(points, dict) and set(points) == {
84
- "x",
85
- "y",
86
- "visible",
87
- }, "Pose3D must be a dict with keys 'x', 'y' and 'visible'."
88
- return Pose3D.from_list([points["x"], points["y"], points["visible"]])
96
+ def from_dict(points: dict[str, Sequence[float]]) -> "Pose3D":
97
+ keys = ("x", "y", "visible")
98
+ if not isinstance(points, dict) or set(points) != set(keys):
99
+ raise ValueError("Pose3D must be a dictionary with coordinates.")
100
+ return Pose3D.from_list([points[k] for k in keys])
@@ -1,3 +1,5 @@
1
+ from collections.abc import Sequence
2
+
1
3
  from pydantic import Field
2
4
 
3
5
  from datachain.lib.data_model import DataModel
@@ -21,27 +23,29 @@ class Segment(DataModel):
21
23
  y: list[int] = Field(default=[])
22
24
 
23
25
  @staticmethod
24
- def from_list(points: list[list[float]], title: str = "") -> "Segment":
25
- assert len(points) == 2, (
26
- "Segment must be a list of 2 lists: x and y coordinates."
27
- )
26
+ def from_list(points: Sequence[Sequence[float]], title: str = "") -> "Segment":
27
+ if not isinstance(points, (list, tuple)):
28
+ raise TypeError("Segment must be a list of coordinates.")
29
+ if len(points) != 2:
30
+ raise ValueError("Segment must be a list of 2 lists: x and y coordinates.")
28
31
  points_x, points_y = points
29
- assert len(points_x) == len(points_y), (
30
- "Segment x and y coordinates must have the same length."
31
- )
32
- assert all(
33
- isinstance(value, (int, float)) for value in [*points_x, *points_y]
34
- ), "Segment coordinates must be floats or integers."
32
+ if not isinstance(points_x, (list, tuple)) or not isinstance(
33
+ points_y, (list, tuple)
34
+ ):
35
+ raise TypeError("Segment x and y coordinates must be lists.")
36
+ if len(points_x) != len(points_y):
37
+ raise ValueError("Segment x and y coordinates must have the same length.")
38
+ if not all(isinstance(value, (int, float)) for value in [*points_x, *points_y]):
39
+ raise ValueError("Segment coordinates must be floats or integers.")
35
40
  return Segment(
36
41
  title=title,
37
- x=[round(coord) for coord in points_x],
38
- y=[round(coord) for coord in points_y],
42
+ x=list(map(round, points_x)),
43
+ y=list(map(round, points_y)),
39
44
  )
40
45
 
41
46
  @staticmethod
42
- def from_dict(points: dict[str, list[float]], title: str = "") -> "Segment":
43
- assert isinstance(points, dict) and set(points) == {
44
- "x",
45
- "y",
46
- }, "Segment must be a dict with keys 'x' and 'y'."
47
- return Segment.from_list([points["x"], points["y"]], title=title)
47
+ def from_dict(points: dict[str, Sequence[float]], title: str = "") -> "Segment":
48
+ keys = ("x", "y")
49
+ if not isinstance(points, dict) or set(points) != set(keys):
50
+ raise ValueError("Segment must be a dictionary with coordinates.")
51
+ return Segment.from_list([points[k] for k in keys], title=title)
@@ -23,7 +23,7 @@ class YoloBBox(DataModel):
23
23
  cls: int = Field(default=-1)
24
24
  name: str = Field(default="")
25
25
  confidence: float = Field(default=0)
26
- box: BBox
26
+ box: BBox = Field(default=BBox())
27
27
 
28
28
  @staticmethod
29
29
  def from_result(result: "Results") -> "YoloBBox":
@@ -55,10 +55,10 @@ class YoloBBoxes(DataModel):
55
55
  box: A list of bounding boxes of the detected objects
56
56
  """
57
57
 
58
- cls: list[int]
59
- name: list[str]
60
- confidence: list[float]
61
- box: list[BBox]
58
+ cls: list[int] = Field(default=[])
59
+ name: list[str] = Field(default=[])
60
+ confidence: list[float] = Field(default=[])
61
+ box: list[BBox] = Field(default=[])
62
62
 
63
63
  @staticmethod
64
64
  def from_results(results: list["Results"]) -> "YoloBBoxes":
@@ -124,10 +124,10 @@ class YoloOBBoxes(DataModel):
124
124
  box: A list of oriented bounding boxes of the detected objects.
125
125
  """
126
126
 
127
- cls: list[int]
128
- name: list[str]
129
- confidence: list[float]
130
- box: list[OBBox]
127
+ cls: list[int] = Field(default=[])
128
+ name: list[str] = Field(default=[])
129
+ confidence: list[float] = Field(default=[])
130
+ box: list[OBBox] = Field(default=[])
131
131
 
132
132
  @staticmethod
133
133
  def from_results(results: list["Results"]) -> "YoloOBBoxes":
@@ -47,8 +47,8 @@ class YoloPose(DataModel):
47
47
  cls: int = Field(default=-1)
48
48
  name: str = Field(default="")
49
49
  confidence: float = Field(default=0)
50
- box: BBox
51
- pose: Pose3D
50
+ box: BBox = Field(default=BBox())
51
+ pose: Pose3D = Field(default=Pose3D())
52
52
 
53
53
  @staticmethod
54
54
  def from_result(result: "Results") -> "YoloPose":
@@ -87,11 +87,11 @@ class YoloPoses(DataModel):
87
87
  pose: The 3D pose keypoints of the poses.
88
88
  """
89
89
 
90
- cls: list[int]
91
- name: list[str]
92
- confidence: list[float]
93
- box: list[BBox]
94
- pose: list[Pose3D]
90
+ cls: list[int] = Field(default=[])
91
+ name: list[str] = Field(default=[])
92
+ confidence: list[float] = Field(default=[])
93
+ box: list[BBox] = Field(default=[])
94
+ pose: list[Pose3D] = Field(default=[])
95
95
 
96
96
  @staticmethod
97
97
  def from_results(results: list["Results"]) -> "YoloPoses":