datachain 0.11.11__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/catalog/catalog.py +33 -5
- datachain/catalog/loader.py +19 -13
- datachain/cli/__init__.py +2 -1
- datachain/cli/parser/studio.py +13 -1
- datachain/client/fsspec.py +12 -16
- datachain/client/hf.py +36 -14
- datachain/client/local.py +1 -4
- datachain/data_storage/warehouse.py +3 -8
- datachain/dataset.py +8 -0
- datachain/error.py +0 -12
- datachain/fs/utils.py +30 -0
- datachain/func/__init__.py +5 -0
- datachain/func/func.py +2 -1
- datachain/lib/dc.py +23 -8
- datachain/lib/file.py +55 -17
- datachain/lib/image.py +30 -6
- datachain/lib/listing.py +21 -39
- datachain/lib/video.py +7 -5
- datachain/model/bbox.py +209 -58
- datachain/model/pose.py +49 -37
- datachain/model/segment.py +22 -18
- datachain/model/ultralytics/bbox.py +9 -9
- datachain/model/ultralytics/pose.py +7 -7
- datachain/model/ultralytics/segment.py +7 -7
- datachain/model/utils.py +191 -0
- datachain/query/dataset.py +4 -2
- datachain/studio.py +8 -6
- datachain/utils.py +0 -16
- {datachain-0.11.11.dist-info → datachain-0.12.0.dist-info}/METADATA +4 -2
- {datachain-0.11.11.dist-info → datachain-0.12.0.dist-info}/RECORD +34 -32
- {datachain-0.11.11.dist-info → datachain-0.12.0.dist-info}/WHEEL +1 -1
- {datachain-0.11.11.dist-info → datachain-0.12.0.dist-info}/LICENSE +0 -0
- {datachain-0.11.11.dist-info → datachain-0.12.0.dist-info}/entry_points.txt +0 -0
- {datachain-0.11.11.dist-info → datachain-0.12.0.dist-info}/top_level.txt +0 -0
datachain/model/pose.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
|
|
1
3
|
from pydantic import Field
|
|
2
4
|
|
|
3
5
|
from datachain.lib.data_model import DataModel
|
|
@@ -19,27 +21,30 @@ class Pose(DataModel):
|
|
|
19
21
|
y: list[int] = Field(default=[])
|
|
20
22
|
|
|
21
23
|
@staticmethod
|
|
22
|
-
def from_list(points:
|
|
23
|
-
|
|
24
|
+
def from_list(points: Sequence[Sequence[float]]) -> "Pose":
|
|
25
|
+
if not isinstance(points, (list, tuple)):
|
|
26
|
+
raise TypeError("Pose must be a list of coordinates.")
|
|
27
|
+
if len(points) != 2:
|
|
28
|
+
raise ValueError("Pose must be a list of 2 lists: x and y coordinates.")
|
|
24
29
|
points_x, points_y = points
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
30
|
+
if not isinstance(points_x, (list, tuple)) or not isinstance(
|
|
31
|
+
points_y, (list, tuple)
|
|
32
|
+
):
|
|
33
|
+
raise TypeError("Pose x and y coordinates must be lists.")
|
|
34
|
+
if len(points_x) != len(points_y) != 17:
|
|
35
|
+
raise ValueError(
|
|
36
|
+
"Pose x and y coordinates must have the same length of 17."
|
|
37
|
+
)
|
|
38
|
+
if not all(isinstance(value, (int, float)) for value in [*points_x, *points_y]):
|
|
39
|
+
raise ValueError("Pose coordinates must be floats or integers.")
|
|
40
|
+
return Pose(x=list(map(round, points_x)), y=list(map(round, points_y)))
|
|
35
41
|
|
|
36
42
|
@staticmethod
|
|
37
|
-
def from_dict(points: dict[str,
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
"
|
|
41
|
-
|
|
42
|
-
return Pose.from_list([points["x"], points["y"]])
|
|
43
|
+
def from_dict(points: dict[str, Sequence[float]]) -> "Pose":
|
|
44
|
+
keys = ("x", "y")
|
|
45
|
+
if not isinstance(points, dict) or set(points) != set(keys):
|
|
46
|
+
raise ValueError("Pose must be a dictionary with coordinates.")
|
|
47
|
+
return Pose.from_list([points[k] for k in keys])
|
|
43
48
|
|
|
44
49
|
|
|
45
50
|
class Pose3D(DataModel):
|
|
@@ -60,29 +65,36 @@ class Pose3D(DataModel):
|
|
|
60
65
|
visible: list[float] = Field(default=[])
|
|
61
66
|
|
|
62
67
|
@staticmethod
|
|
63
|
-
def from_list(points:
|
|
64
|
-
|
|
65
|
-
"Pose3D must be a list of
|
|
66
|
-
)
|
|
68
|
+
def from_list(points: Sequence[Sequence[float]]) -> "Pose3D":
|
|
69
|
+
if not isinstance(points, (list, tuple)):
|
|
70
|
+
raise TypeError("Pose3D must be a list of coordinates.")
|
|
71
|
+
if len(points) != 3:
|
|
72
|
+
raise ValueError(
|
|
73
|
+
"Pose3D must be a list of 3 lists: x, y coordinates and visible."
|
|
74
|
+
)
|
|
67
75
|
points_x, points_y, points_v = points
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
76
|
+
if (
|
|
77
|
+
not isinstance(points_x, (list, tuple))
|
|
78
|
+
or not isinstance(points_y, (list, tuple))
|
|
79
|
+
or not isinstance(points_v, (list, tuple))
|
|
80
|
+
):
|
|
81
|
+
raise TypeError("Pose3D x, y and visible must be lists.")
|
|
82
|
+
if len(points_x) != len(points_y) != len(points_v) != 17:
|
|
83
|
+
raise ValueError("Pose3D x, y and visible must have the same length of 17.")
|
|
84
|
+
if not all(
|
|
72
85
|
isinstance(value, (int, float))
|
|
73
86
|
for value in [*points_x, *points_y, *points_v]
|
|
74
|
-
)
|
|
87
|
+
):
|
|
88
|
+
raise ValueError("Pose3D coordinates must be floats or integers.")
|
|
75
89
|
return Pose3D(
|
|
76
|
-
x=
|
|
77
|
-
y=
|
|
78
|
-
visible=points_v,
|
|
90
|
+
x=list(map(round, points_x)),
|
|
91
|
+
y=list(map(round, points_y)),
|
|
92
|
+
visible=list(points_v),
|
|
79
93
|
)
|
|
80
94
|
|
|
81
95
|
@staticmethod
|
|
82
|
-
def from_dict(points: dict[str,
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
"
|
|
86
|
-
|
|
87
|
-
}, "Pose3D must be a dict with keys 'x', 'y' and 'visible'."
|
|
88
|
-
return Pose3D.from_list([points["x"], points["y"], points["visible"]])
|
|
96
|
+
def from_dict(points: dict[str, Sequence[float]]) -> "Pose3D":
|
|
97
|
+
keys = ("x", "y", "visible")
|
|
98
|
+
if not isinstance(points, dict) or set(points) != set(keys):
|
|
99
|
+
raise ValueError("Pose3D must be a dictionary with coordinates.")
|
|
100
|
+
return Pose3D.from_list([points[k] for k in keys])
|
datachain/model/segment.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
|
|
1
3
|
from pydantic import Field
|
|
2
4
|
|
|
3
5
|
from datachain.lib.data_model import DataModel
|
|
@@ -21,27 +23,29 @@ class Segment(DataModel):
|
|
|
21
23
|
y: list[int] = Field(default=[])
|
|
22
24
|
|
|
23
25
|
@staticmethod
|
|
24
|
-
def from_list(points:
|
|
25
|
-
|
|
26
|
-
"Segment must be a list of
|
|
27
|
-
)
|
|
26
|
+
def from_list(points: Sequence[Sequence[float]], title: str = "") -> "Segment":
|
|
27
|
+
if not isinstance(points, (list, tuple)):
|
|
28
|
+
raise TypeError("Segment must be a list of coordinates.")
|
|
29
|
+
if len(points) != 2:
|
|
30
|
+
raise ValueError("Segment must be a list of 2 lists: x and y coordinates.")
|
|
28
31
|
points_x, points_y = points
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
if not isinstance(points_x, (list, tuple)) or not isinstance(
|
|
33
|
+
points_y, (list, tuple)
|
|
34
|
+
):
|
|
35
|
+
raise TypeError("Segment x and y coordinates must be lists.")
|
|
36
|
+
if len(points_x) != len(points_y):
|
|
37
|
+
raise ValueError("Segment x and y coordinates must have the same length.")
|
|
38
|
+
if not all(isinstance(value, (int, float)) for value in [*points_x, *points_y]):
|
|
39
|
+
raise ValueError("Segment coordinates must be floats or integers.")
|
|
35
40
|
return Segment(
|
|
36
41
|
title=title,
|
|
37
|
-
x=
|
|
38
|
-
y=
|
|
42
|
+
x=list(map(round, points_x)),
|
|
43
|
+
y=list(map(round, points_y)),
|
|
39
44
|
)
|
|
40
45
|
|
|
41
46
|
@staticmethod
|
|
42
|
-
def from_dict(points: dict[str,
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
"
|
|
46
|
-
|
|
47
|
-
return Segment.from_list([points["x"], points["y"]], title=title)
|
|
47
|
+
def from_dict(points: dict[str, Sequence[float]], title: str = "") -> "Segment":
|
|
48
|
+
keys = ("x", "y")
|
|
49
|
+
if not isinstance(points, dict) or set(points) != set(keys):
|
|
50
|
+
raise ValueError("Segment must be a dictionary with coordinates.")
|
|
51
|
+
return Segment.from_list([points[k] for k in keys], title=title)
|
|
@@ -23,7 +23,7 @@ class YoloBBox(DataModel):
|
|
|
23
23
|
cls: int = Field(default=-1)
|
|
24
24
|
name: str = Field(default="")
|
|
25
25
|
confidence: float = Field(default=0)
|
|
26
|
-
box: BBox
|
|
26
|
+
box: BBox = Field(default=BBox())
|
|
27
27
|
|
|
28
28
|
@staticmethod
|
|
29
29
|
def from_result(result: "Results") -> "YoloBBox":
|
|
@@ -55,10 +55,10 @@ class YoloBBoxes(DataModel):
|
|
|
55
55
|
box: A list of bounding boxes of the detected objects
|
|
56
56
|
"""
|
|
57
57
|
|
|
58
|
-
cls: list[int]
|
|
59
|
-
name: list[str]
|
|
60
|
-
confidence: list[float]
|
|
61
|
-
box: list[BBox]
|
|
58
|
+
cls: list[int] = Field(default=[])
|
|
59
|
+
name: list[str] = Field(default=[])
|
|
60
|
+
confidence: list[float] = Field(default=[])
|
|
61
|
+
box: list[BBox] = Field(default=[])
|
|
62
62
|
|
|
63
63
|
@staticmethod
|
|
64
64
|
def from_results(results: list["Results"]) -> "YoloBBoxes":
|
|
@@ -124,10 +124,10 @@ class YoloOBBoxes(DataModel):
|
|
|
124
124
|
box: A list of oriented bounding boxes of the detected objects.
|
|
125
125
|
"""
|
|
126
126
|
|
|
127
|
-
cls: list[int]
|
|
128
|
-
name: list[str]
|
|
129
|
-
confidence: list[float]
|
|
130
|
-
box: list[OBBox]
|
|
127
|
+
cls: list[int] = Field(default=[])
|
|
128
|
+
name: list[str] = Field(default=[])
|
|
129
|
+
confidence: list[float] = Field(default=[])
|
|
130
|
+
box: list[OBBox] = Field(default=[])
|
|
131
131
|
|
|
132
132
|
@staticmethod
|
|
133
133
|
def from_results(results: list["Results"]) -> "YoloOBBoxes":
|
|
@@ -47,8 +47,8 @@ class YoloPose(DataModel):
|
|
|
47
47
|
cls: int = Field(default=-1)
|
|
48
48
|
name: str = Field(default="")
|
|
49
49
|
confidence: float = Field(default=0)
|
|
50
|
-
box: BBox
|
|
51
|
-
pose: Pose3D
|
|
50
|
+
box: BBox = Field(default=BBox())
|
|
51
|
+
pose: Pose3D = Field(default=Pose3D())
|
|
52
52
|
|
|
53
53
|
@staticmethod
|
|
54
54
|
def from_result(result: "Results") -> "YoloPose":
|
|
@@ -87,11 +87,11 @@ class YoloPoses(DataModel):
|
|
|
87
87
|
pose: The 3D pose keypoints of the poses.
|
|
88
88
|
"""
|
|
89
89
|
|
|
90
|
-
cls: list[int]
|
|
91
|
-
name: list[str]
|
|
92
|
-
confidence: list[float]
|
|
93
|
-
box: list[BBox]
|
|
94
|
-
pose: list[Pose3D]
|
|
90
|
+
cls: list[int] = Field(default=[])
|
|
91
|
+
name: list[str] = Field(default=[])
|
|
92
|
+
confidence: list[float] = Field(default=[])
|
|
93
|
+
box: list[BBox] = Field(default=[])
|
|
94
|
+
pose: list[Pose3D] = Field(default=[])
|
|
95
95
|
|
|
96
96
|
@staticmethod
|
|
97
97
|
def from_results(results: list["Results"]) -> "YoloPoses":
|
|
@@ -25,8 +25,8 @@ class YoloSegment(DataModel):
|
|
|
25
25
|
cls: int = Field(default=-1)
|
|
26
26
|
name: str = Field(default="")
|
|
27
27
|
confidence: float = Field(default=0)
|
|
28
|
-
box: BBox
|
|
29
|
-
segment: Segment
|
|
28
|
+
box: BBox = Field(default=BBox())
|
|
29
|
+
segment: Segment = Field(default=Segment())
|
|
30
30
|
|
|
31
31
|
@staticmethod
|
|
32
32
|
def from_result(result: "Results") -> "YoloSegment":
|
|
@@ -65,11 +65,11 @@ class YoloSegments(DataModel):
|
|
|
65
65
|
segment (list[Segments]): The segments of the segments.
|
|
66
66
|
"""
|
|
67
67
|
|
|
68
|
-
cls: list[int]
|
|
69
|
-
name: list[str]
|
|
70
|
-
confidence: list[float]
|
|
71
|
-
box: list[BBox]
|
|
72
|
-
segment: list[Segment]
|
|
68
|
+
cls: list[int] = Field(default=[])
|
|
69
|
+
name: list[str] = Field(default=[])
|
|
70
|
+
confidence: list[float] = Field(default=[])
|
|
71
|
+
box: list[BBox] = Field(default=[])
|
|
72
|
+
segment: list[Segment] = Field(default=[])
|
|
73
73
|
|
|
74
74
|
@staticmethod
|
|
75
75
|
def from_results(results: list["Results"]) -> "YoloSegments":
|
datachain/model/utils.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
BBoxType = Literal["albumentations", "coco", "voc", "yolo"]
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def validate_bbox(coords: Sequence[float], *types: type) -> None:
|
|
8
|
+
"""Validate the bounding box coordinates."""
|
|
9
|
+
if not isinstance(coords, (list, tuple)):
|
|
10
|
+
raise TypeError(
|
|
11
|
+
f"Invalid bounding box coordinates: {coords}, should be a list or tuple"
|
|
12
|
+
)
|
|
13
|
+
if len(coords) != 4:
|
|
14
|
+
raise ValueError(
|
|
15
|
+
f"Invalid bounding box coordinates: {coords}, should have 4 values"
|
|
16
|
+
)
|
|
17
|
+
if any(not isinstance(c, types) for c in coords):
|
|
18
|
+
raise ValueError(
|
|
19
|
+
f"Invalid bounding box coordinates: {coords}, should be {types}"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def convert_bbox(
|
|
24
|
+
coords: Sequence[float],
|
|
25
|
+
img_size: Sequence[int],
|
|
26
|
+
source: BBoxType,
|
|
27
|
+
target: BBoxType,
|
|
28
|
+
) -> list[float]:
|
|
29
|
+
"""
|
|
30
|
+
Convert the bounding box coordinates between different formats.
|
|
31
|
+
|
|
32
|
+
Supported formats: "albumentations", "coco", "voc", "yolo".
|
|
33
|
+
|
|
34
|
+
Albumentations format represents bounding boxes as [x_min, y_min, x_max, y_max],
|
|
35
|
+
where:
|
|
36
|
+
- (x_min, y_min) are the normalized coordinates of the top-left corner.
|
|
37
|
+
- (x_max, y_max) are the normalized coordinates of the bottom-right corner.
|
|
38
|
+
|
|
39
|
+
COCO format represents bounding boxes as [x_min, y_min, width, height], where:
|
|
40
|
+
- (x_min, y_min) are the pixel coordinates of the top-left corner.
|
|
41
|
+
- width and height define the size of the bounding box in pixels.
|
|
42
|
+
|
|
43
|
+
PASCAL VOC format represents bounding boxes as [x_min, y_min, x_max, y_max], where:
|
|
44
|
+
- (x_min, y_min) are the pixel coordinates of the top-left corner.
|
|
45
|
+
- (x_max, y_max) are the pixel coordinates of the bottom-right corner.
|
|
46
|
+
|
|
47
|
+
YOLO format represents bounding boxes as [x_center, y_center, width, height], where:
|
|
48
|
+
- (x_center, y_center) are the normalized coordinates of the box center.
|
|
49
|
+
- width and height normalized values define the size of the bounding box.
|
|
50
|
+
|
|
51
|
+
Normalized coordinates are floats between 0 and 1, representing the
|
|
52
|
+
relative position of the pixels in the image.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
coords (Sequence[float]): The bounding box coordinates to convert.
|
|
56
|
+
img_size (Sequence[int]): The reference image size (width, height).
|
|
57
|
+
source (str): The source bounding box format.
|
|
58
|
+
target (str): The target bounding box format.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
list[float]: The bounding box coordinates in the target format.
|
|
62
|
+
"""
|
|
63
|
+
if source == "albumentations":
|
|
64
|
+
return [
|
|
65
|
+
round(c, 4) for c in convert_albumentations_bbox(coords, img_size, target)
|
|
66
|
+
]
|
|
67
|
+
if source == "coco":
|
|
68
|
+
return [round(c, 4) for c in convert_coco_bbox(coords, img_size, target)]
|
|
69
|
+
if source == "voc":
|
|
70
|
+
return [round(c, 4) for c in convert_voc_bbox(coords, img_size, target)]
|
|
71
|
+
if source == "yolo":
|
|
72
|
+
return [round(c, 4) for c in convert_yolo_bbox(coords, img_size, target)]
|
|
73
|
+
raise ValueError(f"Unsupported source format: {source}")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def convert_albumentations_bbox(
|
|
77
|
+
coords: Sequence[float],
|
|
78
|
+
img_size: Sequence[int],
|
|
79
|
+
target: BBoxType,
|
|
80
|
+
) -> list[float]:
|
|
81
|
+
"""Convert the Albumentations bounding box coordinates to other formats."""
|
|
82
|
+
if target == "albumentations":
|
|
83
|
+
return list(coords)
|
|
84
|
+
if target == "coco":
|
|
85
|
+
return [
|
|
86
|
+
coords[0] * img_size[0],
|
|
87
|
+
coords[1] * img_size[1],
|
|
88
|
+
(coords[2] - coords[0]) * img_size[0],
|
|
89
|
+
(coords[3] - coords[1]) * img_size[1],
|
|
90
|
+
]
|
|
91
|
+
if target == "voc":
|
|
92
|
+
return [coords[i] * img_size[i % 2] for i in range(4)]
|
|
93
|
+
if target == "yolo":
|
|
94
|
+
return [
|
|
95
|
+
(coords[0] + coords[2]) / 2,
|
|
96
|
+
(coords[1] + coords[3]) / 2,
|
|
97
|
+
coords[2] - coords[0],
|
|
98
|
+
coords[3] - coords[1],
|
|
99
|
+
]
|
|
100
|
+
raise ValueError(f"Unsupported target format: {target}")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def convert_coco_bbox(
|
|
104
|
+
coords: Sequence[float],
|
|
105
|
+
img_size: Sequence[int],
|
|
106
|
+
target: BBoxType,
|
|
107
|
+
) -> list[float]:
|
|
108
|
+
"""Convert the COCO bounding box coordinates to other formats."""
|
|
109
|
+
if target == "albumentations":
|
|
110
|
+
return [
|
|
111
|
+
coords[0] / img_size[0],
|
|
112
|
+
coords[1] / img_size[1],
|
|
113
|
+
(coords[0] + coords[2]) / img_size[0],
|
|
114
|
+
(coords[1] + coords[3]) / img_size[1],
|
|
115
|
+
]
|
|
116
|
+
if target == "coco":
|
|
117
|
+
return list(coords)
|
|
118
|
+
if target == "voc":
|
|
119
|
+
return [coords[0], coords[1], coords[0] + coords[2], coords[1] + coords[3]]
|
|
120
|
+
if target == "yolo":
|
|
121
|
+
return [
|
|
122
|
+
(coords[0] + coords[2] / 2) / img_size[0],
|
|
123
|
+
(coords[1] + coords[3] / 2) / img_size[1],
|
|
124
|
+
coords[2] / img_size[0],
|
|
125
|
+
coords[3] / img_size[1],
|
|
126
|
+
]
|
|
127
|
+
raise ValueError(f"Unsupported target format: {target}")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def convert_voc_bbox(
|
|
131
|
+
coords: Sequence[float],
|
|
132
|
+
img_size: Sequence[int],
|
|
133
|
+
target: BBoxType,
|
|
134
|
+
) -> list[float]:
|
|
135
|
+
"""Convert the PASCAL VOC bounding box coordinates to other formats."""
|
|
136
|
+
if target == "albumentations":
|
|
137
|
+
return [
|
|
138
|
+
coords[0] / img_size[0],
|
|
139
|
+
coords[1] / img_size[1],
|
|
140
|
+
coords[2] / img_size[0],
|
|
141
|
+
coords[3] / img_size[1],
|
|
142
|
+
]
|
|
143
|
+
if target == "coco":
|
|
144
|
+
return [
|
|
145
|
+
coords[0],
|
|
146
|
+
coords[1],
|
|
147
|
+
coords[2] - coords[0],
|
|
148
|
+
coords[3] - coords[1],
|
|
149
|
+
]
|
|
150
|
+
if target == "voc":
|
|
151
|
+
return list(coords)
|
|
152
|
+
if target == "yolo":
|
|
153
|
+
return [
|
|
154
|
+
(coords[0] + coords[2]) / 2 / img_size[0],
|
|
155
|
+
(coords[1] + coords[3]) / 2 / img_size[1],
|
|
156
|
+
(coords[2] - coords[0]) / img_size[0],
|
|
157
|
+
(coords[3] - coords[1]) / img_size[1],
|
|
158
|
+
]
|
|
159
|
+
raise ValueError(f"Unsupported target format: {target}")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def convert_yolo_bbox(
|
|
163
|
+
coords: Sequence[float],
|
|
164
|
+
img_size: Sequence[int],
|
|
165
|
+
target: BBoxType,
|
|
166
|
+
) -> list[float]:
|
|
167
|
+
"""Convert the YOLO bounding box coordinates to other formats."""
|
|
168
|
+
if target == "albumentations":
|
|
169
|
+
return [
|
|
170
|
+
coords[0] - coords[2] / 2,
|
|
171
|
+
coords[1] - coords[3] / 2,
|
|
172
|
+
coords[0] + coords[2] / 2,
|
|
173
|
+
coords[1] + coords[3] / 2,
|
|
174
|
+
]
|
|
175
|
+
if target == "coco":
|
|
176
|
+
return [
|
|
177
|
+
(coords[0] - coords[2] / 2) * img_size[0],
|
|
178
|
+
(coords[1] - coords[3] / 2) * img_size[1],
|
|
179
|
+
coords[2] * img_size[0],
|
|
180
|
+
coords[3] * img_size[1],
|
|
181
|
+
]
|
|
182
|
+
if target == "voc":
|
|
183
|
+
return [
|
|
184
|
+
(coords[0] - coords[2] / 2) * img_size[0],
|
|
185
|
+
(coords[1] - coords[3] / 2) * img_size[1],
|
|
186
|
+
(coords[0] + coords[2] / 2) * img_size[0],
|
|
187
|
+
(coords[1] + coords[3] / 2) * img_size[1],
|
|
188
|
+
]
|
|
189
|
+
if target == "yolo":
|
|
190
|
+
return list(coords)
|
|
191
|
+
raise ValueError(f"Unsupported target format: {target}")
|
datachain/query/dataset.py
CHANGED
|
@@ -22,7 +22,6 @@ from typing import (
|
|
|
22
22
|
)
|
|
23
23
|
|
|
24
24
|
import attrs
|
|
25
|
-
import psutil
|
|
26
25
|
import sqlalchemy
|
|
27
26
|
import sqlalchemy as sa
|
|
28
27
|
from attrs import frozen
|
|
@@ -52,7 +51,6 @@ from datachain.lib.udf import UDFAdapter, _get_cache
|
|
|
52
51
|
from datachain.progress import CombinedDownloadCallback, TqdmCombinedDownloadCallback
|
|
53
52
|
from datachain.query.schema import C, UDFParamSpec, normalize_param
|
|
54
53
|
from datachain.query.session import Session
|
|
55
|
-
from datachain.remote.studio import is_token_set
|
|
56
54
|
from datachain.sql.functions.random import rand
|
|
57
55
|
from datachain.utils import (
|
|
58
56
|
batched,
|
|
@@ -333,6 +331,8 @@ def process_udf_outputs(
|
|
|
333
331
|
batch_size: int = INSERT_BATCH_SIZE,
|
|
334
332
|
cb: Callback = DEFAULT_CALLBACK,
|
|
335
333
|
) -> None:
|
|
334
|
+
import psutil
|
|
335
|
+
|
|
336
336
|
rows: list[UDFResult] = []
|
|
337
337
|
# Optimization: Compute row types once, rather than for every row.
|
|
338
338
|
udf_col_types = get_udf_col_types(warehouse, udf)
|
|
@@ -1087,6 +1087,8 @@ class DatasetQuery:
|
|
|
1087
1087
|
in_memory: bool = False,
|
|
1088
1088
|
fallback_to_studio: bool = True,
|
|
1089
1089
|
) -> None:
|
|
1090
|
+
from datachain.remote.studio import is_token_set
|
|
1091
|
+
|
|
1090
1092
|
self.session = Session.get(session, catalog=catalog, in_memory=in_memory)
|
|
1091
1093
|
self.catalog = catalog or self.session.catalog
|
|
1092
1094
|
self.steps: list[Step] = []
|
datachain/studio.py
CHANGED
|
@@ -57,7 +57,7 @@ def process_auth_cli_args(args: "Namespace"):
|
|
|
57
57
|
if args.cmd == "login":
|
|
58
58
|
return login(args)
|
|
59
59
|
if args.cmd == "logout":
|
|
60
|
-
return logout()
|
|
60
|
+
return logout(args.local)
|
|
61
61
|
if args.cmd == "token":
|
|
62
62
|
return token()
|
|
63
63
|
|
|
@@ -110,13 +110,15 @@ def login(args: "Namespace"):
|
|
|
110
110
|
except StudioAuthError as exc:
|
|
111
111
|
raise DataChainError(f"Failed to authenticate with Studio: {exc}") from exc
|
|
112
112
|
|
|
113
|
-
|
|
113
|
+
level = ConfigLevel.LOCAL if args.local else ConfigLevel.GLOBAL
|
|
114
|
+
config_path = save_config(hostname, access_token, level=level)
|
|
114
115
|
print(f"Authentication complete. Saved token to {config_path}.")
|
|
115
116
|
return 0
|
|
116
117
|
|
|
117
118
|
|
|
118
|
-
def logout():
|
|
119
|
-
|
|
119
|
+
def logout(local: bool = False):
|
|
120
|
+
level = ConfigLevel.LOCAL if local else ConfigLevel.GLOBAL
|
|
121
|
+
with Config(level).edit() as conf:
|
|
120
122
|
token = conf.get("studio", {}).get("token")
|
|
121
123
|
if not token:
|
|
122
124
|
raise DataChainError(
|
|
@@ -209,8 +211,8 @@ def remove_studio_dataset(
|
|
|
209
211
|
print(f"Dataset '{name}' removed from Studio")
|
|
210
212
|
|
|
211
213
|
|
|
212
|
-
def save_config(hostname, token):
|
|
213
|
-
config = Config(
|
|
214
|
+
def save_config(hostname, token, level=ConfigLevel.GLOBAL):
|
|
215
|
+
config = Config(level)
|
|
214
216
|
with config.edit() as conf:
|
|
215
217
|
studio_conf = conf.get("studio", {})
|
|
216
218
|
studio_conf["url"] = hostname
|
datachain/utils.py
CHANGED
|
@@ -6,7 +6,6 @@ import os
|
|
|
6
6
|
import os.path as osp
|
|
7
7
|
import random
|
|
8
8
|
import re
|
|
9
|
-
import stat
|
|
10
9
|
import sys
|
|
11
10
|
import time
|
|
12
11
|
from collections.abc import Iterable, Iterator, Sequence
|
|
@@ -193,14 +192,6 @@ def suffix_to_number(num_str: str) -> int:
|
|
|
193
192
|
raise ValueError(f"Invalid number/suffix for: {num_str}") from None
|
|
194
193
|
|
|
195
194
|
|
|
196
|
-
def force_create_dir(name):
|
|
197
|
-
if not os.path.exists(name):
|
|
198
|
-
os.mkdir(name)
|
|
199
|
-
elif not os.path.isdir(name):
|
|
200
|
-
os.remove(name)
|
|
201
|
-
os.mkdir(name)
|
|
202
|
-
|
|
203
|
-
|
|
204
195
|
def datachain_paths_join(source_path: str, file_paths: Iterable[str]) -> Iterable[str]:
|
|
205
196
|
source_parts = source_path.rstrip("/").split("/")
|
|
206
197
|
if glob.has_magic(source_parts[-1]):
|
|
@@ -210,13 +201,6 @@ def datachain_paths_join(source_path: str, file_paths: Iterable[str]) -> Iterabl
|
|
|
210
201
|
return (f"{source_stripped}/{path.lstrip('/')}" for path in file_paths)
|
|
211
202
|
|
|
212
203
|
|
|
213
|
-
# From: https://docs.python.org/3/library/shutil.html#rmtree-example
|
|
214
|
-
def remove_readonly(func, path, _):
|
|
215
|
-
"Clear the readonly bit and reattempt the removal"
|
|
216
|
-
os.chmod(path, stat.S_IWRITE)
|
|
217
|
-
func(path)
|
|
218
|
-
|
|
219
|
-
|
|
220
204
|
def sql_escape_like(search: str, escape: str = "\\") -> str:
|
|
221
205
|
return (
|
|
222
206
|
search.replace(escape, escape * 2)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.0
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -71,6 +71,7 @@ Requires-Dist: usearch; extra == "vector"
|
|
|
71
71
|
Provides-Extra: hf
|
|
72
72
|
Requires-Dist: numba>=0.60.0; extra == "hf"
|
|
73
73
|
Requires-Dist: datasets[audio,vision]>=2.21.0; extra == "hf"
|
|
74
|
+
Requires-Dist: fsspec>=2024.12.0; extra == "hf"
|
|
74
75
|
Provides-Extra: video
|
|
75
76
|
Requires-Dist: ffmpeg-python; extra == "video"
|
|
76
77
|
Requires-Dist: imageio[ffmpeg,pyav]>=2.37.0; extra == "video"
|
|
@@ -90,6 +91,7 @@ Requires-Dist: hypothesis; extra == "tests"
|
|
|
90
91
|
Requires-Dist: aiotools>=1.7.0; extra == "tests"
|
|
91
92
|
Requires-Dist: requests-mock; extra == "tests"
|
|
92
93
|
Requires-Dist: scipy; extra == "tests"
|
|
94
|
+
Requires-Dist: ultralytics; extra == "tests"
|
|
93
95
|
Provides-Extra: dev
|
|
94
96
|
Requires-Dist: datachain[docs,tests]; extra == "dev"
|
|
95
97
|
Requires-Dist: mypy==1.15.0; extra == "dev"
|
|
@@ -103,7 +105,7 @@ Requires-Dist: datachain[tests]; extra == "examples"
|
|
|
103
105
|
Requires-Dist: defusedxml; extra == "examples"
|
|
104
106
|
Requires-Dist: accelerate; extra == "examples"
|
|
105
107
|
Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
106
|
-
Requires-Dist: ultralytics
|
|
108
|
+
Requires-Dist: ultralytics; extra == "examples"
|
|
107
109
|
Requires-Dist: open_clip_torch; extra == "examples"
|
|
108
110
|
|
|
109
111
|
================
|