datachain 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from datachain.lib import func, models
1
+ from datachain.lib import func
2
2
  from datachain.lib.data_model import DataModel, DataType, is_chain_type
3
3
  from datachain.lib.dc import C, Column, DataChain, Sys
4
4
  from datachain.lib.file import (
@@ -38,6 +38,5 @@ __all__ = [
38
38
  "func",
39
39
  "is_chain_type",
40
40
  "metrics",
41
- "models",
42
41
  "param",
43
42
  ]
@@ -0,0 +1,6 @@
1
+ from . import ultralytics
2
+ from .bbox import BBox, OBBox
3
+ from .pose import Pose, Pose3D
4
+ from .segment import Segment
5
+
6
+ __all__ = ["BBox", "OBBox", "Pose", "Pose3D", "Segment", "ultralytics"]
@@ -0,0 +1,102 @@
1
+ from pydantic import Field
2
+
3
+ from datachain.lib.data_model import DataModel
4
+
5
+
6
+ class BBox(DataModel):
7
+ """
8
+ A data model for representing bounding box.
9
+
10
+ Attributes:
11
+ title (str): The title of the bounding box.
12
+ coords (list[int]): The coordinates of the bounding box.
13
+
14
+ The bounding box is defined by two points:
15
+ - (x1, y1): The top-left corner of the box.
16
+ - (x2, y2): The bottom-right corner of the box.
17
+ """
18
+
19
+ title: str = Field(default="")
20
+ coords: list[int] = Field(default=None)
21
+
22
+ @staticmethod
23
+ def from_list(coords: list[float], title: str = "") -> "BBox":
24
+ assert len(coords) == 4, "Bounding box must be a list of 4 coordinates."
25
+ assert all(
26
+ isinstance(value, (int, float)) for value in coords
27
+ ), "Bounding box coordinates must be floats or integers."
28
+ return BBox(
29
+ title=title,
30
+ coords=[round(c) for c in coords],
31
+ )
32
+
33
+ @staticmethod
34
+ def from_dict(coords: dict[str, float], title: str = "") -> "BBox":
35
+ assert isinstance(coords, dict) and set(coords) == {
36
+ "x1",
37
+ "y1",
38
+ "x2",
39
+ "y2",
40
+ }, "Bounding box must be a dictionary with keys 'x1', 'y1', 'x2' and 'y2'."
41
+ return BBox.from_list(
42
+ [coords["x1"], coords["y1"], coords["x2"], coords["y2"]],
43
+ title=title,
44
+ )
45
+
46
+
47
+ class OBBox(DataModel):
48
+ """
49
+ A data model for representing oriented bounding boxes.
50
+
51
+ Attributes:
52
+ title (str): The title of the oriented bounding box.
53
+ coords (list[int]): The coordinates of the oriented bounding box.
54
+
55
+ The oriented bounding box is defined by four points:
56
+ - (x1, y1): The first corner of the box.
57
+ - (x2, y2): The second corner of the box.
58
+ - (x3, y3): The third corner of the box.
59
+ - (x4, y4): The fourth corner of the box.
60
+ """
61
+
62
+ title: str = Field(default="")
63
+ coords: list[int] = Field(default=None)
64
+
65
+ @staticmethod
66
+ def from_list(coords: list[float], title: str = "") -> "OBBox":
67
+ assert (
68
+ len(coords) == 8
69
+ ), "Oriented bounding box must be a list of 8 coordinates."
70
+ assert all(
71
+ isinstance(value, (int, float)) for value in coords
72
+ ), "Oriented bounding box coordinates must be floats or integers."
73
+ return OBBox(
74
+ title=title,
75
+ coords=[round(c) for c in coords],
76
+ )
77
+
78
+ @staticmethod
79
+ def from_dict(coords: dict[str, float], title: str = "") -> "OBBox":
80
+ assert isinstance(coords, dict) and set(coords) == {
81
+ "x1",
82
+ "y1",
83
+ "x2",
84
+ "y2",
85
+ "x3",
86
+ "y3",
87
+ "x4",
88
+ "y4",
89
+ }, "Oriented bounding box must be a dictionary with coordinates."
90
+ return OBBox.from_list(
91
+ [
92
+ coords["x1"],
93
+ coords["y1"],
94
+ coords["x2"],
95
+ coords["y2"],
96
+ coords["x3"],
97
+ coords["y3"],
98
+ coords["x4"],
99
+ coords["y4"],
100
+ ],
101
+ title=title,
102
+ )
@@ -0,0 +1,88 @@
1
+ from pydantic import Field
2
+
3
+ from datachain.lib.data_model import DataModel
4
+
5
+
6
+ class Pose(DataModel):
7
+ """
8
+ A data model for representing pose keypoints.
9
+
10
+ Attributes:
11
+ x (list[int]): The x-coordinates of the keypoints.
12
+ y (list[int]): The y-coordinates of the keypoints.
13
+
14
+ The keypoints are represented as lists of x and y coordinates, where each index
15
+ corresponds to a specific body part.
16
+ """
17
+
18
+ x: list[int] = Field(default=None)
19
+ y: list[int] = Field(default=None)
20
+
21
+ @staticmethod
22
+ def from_list(points: list[list[float]]) -> "Pose":
23
+ assert len(points) == 2, "Pose must be a list of 2 lists: x and y coordinates."
24
+ points_x, points_y = points
25
+ assert (
26
+ len(points_x) == len(points_y) == 17
27
+ ), "Pose x and y coordinates must have the same length of 17."
28
+ assert all(
29
+ isinstance(value, (int, float)) for value in [*points_x, *points_y]
30
+ ), "Pose coordinates must be floats or integers."
31
+ return Pose(
32
+ x=[round(coord) for coord in points_x],
33
+ y=[round(coord) for coord in points_y],
34
+ )
35
+
36
+ @staticmethod
37
+ def from_dict(points: dict[str, list[float]]) -> "Pose":
38
+ assert isinstance(points, dict) and set(points) == {
39
+ "x",
40
+ "y",
41
+ }, "Pose must be a dict with keys 'x' and 'y'."
42
+ return Pose.from_list([points["x"], points["y"]])
43
+
44
+
45
+ class Pose3D(DataModel):
46
+ """
47
+ A data model for representing 3D pose keypoints.
48
+
49
+ Attributes:
50
+ x (list[int]): The x-coordinates of the keypoints.
51
+ y (list[int]): The y-coordinates of the keypoints.
52
+ visible (list[float]): The visibility of the keypoints.
53
+
54
+ The keypoints are represented as lists of x, y, and visibility values,
55
+ where each index corresponds to a specific body part.
56
+ """
57
+
58
+ x: list[int] = Field(default=None)
59
+ y: list[int] = Field(default=None)
60
+ visible: list[float] = Field(default=None)
61
+
62
+ @staticmethod
63
+ def from_list(points: list[list[float]]) -> "Pose3D":
64
+ assert (
65
+ len(points) == 3
66
+ ), "Pose3D must be a list of 3 lists: x, y coordinates and visible."
67
+ points_x, points_y, points_v = points
68
+ assert (
69
+ len(points_x) == len(points_y) == len(points_v) == 17
70
+ ), "Pose3D x, y coordinates and visible must have the same length of 17."
71
+ assert all(
72
+ isinstance(value, (int, float))
73
+ for value in [*points_x, *points_y, *points_v]
74
+ ), "Pose3D coordinates must be floats or integers."
75
+ return Pose3D(
76
+ x=[round(coord) for coord in points_x],
77
+ y=[round(coord) for coord in points_y],
78
+ visible=points_v,
79
+ )
80
+
81
+ @staticmethod
82
+ def from_dict(points: dict[str, list[float]]) -> "Pose3D":
83
+ assert isinstance(points, dict) and set(points) == {
84
+ "x",
85
+ "y",
86
+ "visible",
87
+ }, "Pose3D must be a dict with keys 'x', 'y' and 'visible'."
88
+ return Pose3D.from_list([points["x"], points["y"], points["visible"]])
@@ -0,0 +1,47 @@
1
+ from pydantic import Field
2
+
3
+ from datachain.lib.data_model import DataModel
4
+
5
+
6
+ class Segment(DataModel):
7
+ """
8
+ A data model for representing segment.
9
+
10
+ Attributes:
11
+ title (str): The title of the segment.
12
+ x (list[int]): The x-coordinates of the segment.
13
+ y (list[int]): The y-coordinates of the segment.
14
+
15
+ The segment is represented as lists of x and y coordinates, where each index
16
+ corresponds to a specific point.
17
+ """
18
+
19
+ title: str = Field(default="")
20
+ x: list[int] = Field(default=None)
21
+ y: list[int] = Field(default=None)
22
+
23
+ @staticmethod
24
+ def from_list(points: list[list[float]], title: str = "") -> "Segment":
25
+ assert (
26
+ len(points) == 2
27
+ ), "Segment must be a list of 2 lists: x and y coordinates."
28
+ points_x, points_y = points
29
+ assert len(points_x) == len(
30
+ points_y
31
+ ), "Segment x and y coordinates must have the same length."
32
+ assert all(
33
+ isinstance(value, (int, float)) for value in [*points_x, *points_y]
34
+ ), "Segment coordinates must be floats or integers."
35
+ return Segment(
36
+ title=title,
37
+ x=[round(coord) for coord in points_x],
38
+ y=[round(coord) for coord in points_y],
39
+ )
40
+
41
+ @staticmethod
42
+ def from_dict(points: dict[str, list[float]], title: str = "") -> "Segment":
43
+ assert isinstance(points, dict) and set(points) == {
44
+ "x",
45
+ "y",
46
+ }, "Segment must be a dict with keys 'x' and 'y'."
47
+ return Segment.from_list([points["x"], points["y"]], title=title)
@@ -0,0 +1,27 @@
1
+ """
2
+ This module contains the YOLO models.
3
+
4
+ YOLO stands for "You Only Look Once", a family of object detection models that
5
+ are designed to be fast and accurate. The models are trained to detect objects
6
+ in images by dividing the image into a grid and predicting the bounding boxes
7
+ and class probabilities for each grid cell.
8
+
9
+ More information about YOLO can be found here:
10
+ - https://pjreddie.com/darknet/yolo/
11
+ - https://docs.ultralytics.com/
12
+ """
13
+
14
+ from .bbox import YoloBBox, YoloBBoxes, YoloOBBox, YoloOBBoxes
15
+ from .pose import YoloPose, YoloPoses
16
+ from .segment import YoloSegment, YoloSegments
17
+
18
+ __all__ = [
19
+ "YoloBBox",
20
+ "YoloBBoxes",
21
+ "YoloOBBox",
22
+ "YoloOBBoxes",
23
+ "YoloPose",
24
+ "YoloPoses",
25
+ "YoloSegment",
26
+ "YoloSegments",
27
+ ]
@@ -0,0 +1,147 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from pydantic import Field
4
+
5
+ from datachain.lib.data_model import DataModel
6
+ from datachain.model.bbox import BBox, OBBox
7
+
8
+ if TYPE_CHECKING:
9
+ from ultralytics.engine.results import Results
10
+
11
+
12
+ class YoloBBox(DataModel):
13
+ """
14
+ A class representing a bounding box detected by a YOLO model.
15
+
16
+ Attributes:
17
+ cls: The class of the detected object.
18
+ name: The name of the detected object.
19
+ confidence: The confidence score of the detection.
20
+ box: The bounding box of the detected object
21
+ """
22
+
23
+ cls: int = Field(default=-1)
24
+ name: str = Field(default="")
25
+ confidence: float = Field(default=0)
26
+ box: BBox
27
+
28
+ @staticmethod
29
+ def from_result(result: "Results") -> "YoloBBox":
30
+ summary = result.summary()
31
+ if not summary:
32
+ return YoloBBox(box=BBox())
33
+ name = summary[0].get("name", "")
34
+ box = (
35
+ BBox.from_dict(summary[0]["box"], title=name)
36
+ if "box" in summary[0]
37
+ else BBox()
38
+ )
39
+ return YoloBBox(
40
+ cls=summary[0]["class"],
41
+ name=name,
42
+ confidence=summary[0]["confidence"],
43
+ box=box,
44
+ )
45
+
46
+
47
+ class YoloBBoxes(DataModel):
48
+ """
49
+ A class representing a list of bounding boxes detected by a YOLO model.
50
+
51
+ Attributes:
52
+ cls: A list of classes of the detected objects.
53
+ name: A list of names of the detected objects.
54
+ confidence: A list of confidence scores of the detections.
55
+ box: A list of bounding boxes of the detected objects
56
+ """
57
+
58
+ cls: list[int]
59
+ name: list[str]
60
+ confidence: list[float]
61
+ box: list[BBox]
62
+
63
+ @staticmethod
64
+ def from_results(results: list["Results"]) -> "YoloBBoxes":
65
+ cls, names, confidence, box = [], [], [], []
66
+ for r in results:
67
+ for s in r.summary():
68
+ name = s.get("name", "")
69
+ cls.append(s["class"])
70
+ names.append(name)
71
+ confidence.append(s["confidence"])
72
+ box.append(BBox.from_dict(s.get("box", {}), title=name))
73
+ return YoloBBoxes(
74
+ cls=cls,
75
+ name=names,
76
+ confidence=confidence,
77
+ box=box,
78
+ )
79
+
80
+
81
+ class YoloOBBox(DataModel):
82
+ """
83
+ A class representing an oriented bounding box detected by a YOLO model.
84
+
85
+ Attributes:
86
+ cls: The class of the detected object.
87
+ name: The name of the detected object.
88
+ confidence: The confidence score of the detection.
89
+ box: The oriented bounding box of the detected object.
90
+ """
91
+
92
+ cls: int = Field(default=-1)
93
+ name: str = Field(default="")
94
+ confidence: float = Field(default=0)
95
+ box: OBBox
96
+
97
+ @staticmethod
98
+ def from_result(result: "Results") -> "YoloOBBox":
99
+ summary = result.summary()
100
+ if not summary:
101
+ return YoloOBBox(box=OBBox())
102
+ name = summary[0].get("name", "")
103
+ box = (
104
+ OBBox.from_dict(summary[0]["box"], title=name)
105
+ if "box" in summary[0]
106
+ else OBBox()
107
+ )
108
+ return YoloOBBox(
109
+ cls=summary[0]["class"],
110
+ name=name,
111
+ confidence=summary[0]["confidence"],
112
+ box=box,
113
+ )
114
+
115
+
116
+ class YoloOBBoxes(DataModel):
117
+ """
118
+ A class representing a list of oriented bounding boxes detected by a YOLO model.
119
+
120
+ Attributes:
121
+ cls: A list of classes of the detected objects.
122
+ name: A list of names of the detected objects.
123
+ confidence: A list of confidence scores of the detections.
124
+ box: A list of oriented bounding boxes of the detected objects.
125
+ """
126
+
127
+ cls: list[int]
128
+ name: list[str]
129
+ confidence: list[float]
130
+ box: list[OBBox]
131
+
132
+ @staticmethod
133
+ def from_results(results: list["Results"]) -> "YoloOBBoxes":
134
+ cls, names, confidence, box = [], [], [], []
135
+ for r in results:
136
+ for s in r.summary():
137
+ name = s.get("name", "")
138
+ cls.append(s["class"])
139
+ names.append(name)
140
+ confidence.append(s["confidence"])
141
+ box.append(OBBox.from_dict(s.get("box", {}), title=name))
142
+ return YoloOBBoxes(
143
+ cls=cls,
144
+ name=names,
145
+ confidence=confidence,
146
+ box=box,
147
+ )
@@ -0,0 +1,113 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from pydantic import Field
4
+
5
+ from datachain.lib.data_model import DataModel
6
+ from datachain.model.bbox import BBox
7
+ from datachain.model.pose import Pose3D
8
+
9
+ if TYPE_CHECKING:
10
+ from ultralytics.engine.results import Results
11
+
12
+
13
+ class YoloPoseBodyPart:
14
+ """An enumeration of body parts for YOLO pose keypoints."""
15
+
16
+ nose = 0
17
+ left_eye = 1
18
+ right_eye = 2
19
+ left_ear = 3
20
+ right_ear = 4
21
+ left_shoulder = 5
22
+ right_shoulder = 6
23
+ left_elbow = 7
24
+ right_elbow = 8
25
+ left_wrist = 9
26
+ right_wrist = 10
27
+ left_hip = 11
28
+ right_hip = 12
29
+ left_knee = 13
30
+ right_knee = 14
31
+ left_ankle = 15
32
+ right_ankle = 16
33
+
34
+
35
+ class YoloPose(DataModel):
36
+ """
37
+ A data model for YOLO pose keypoints.
38
+
39
+ Attributes:
40
+ cls: The class of the pose.
41
+ name: The name of the pose.
42
+ confidence: The confidence score of the pose.
43
+ box: The bounding box of the pose.
44
+ pose: The 3D pose keypoints.
45
+ """
46
+
47
+ cls: int = Field(default=-1)
48
+ name: str = Field(default="")
49
+ confidence: float = Field(default=0)
50
+ box: BBox
51
+ pose: Pose3D
52
+
53
+ @staticmethod
54
+ def from_result(result: "Results") -> "YoloPose":
55
+ summary = result.summary()
56
+ if not summary:
57
+ return YoloPose(box=BBox(), pose=Pose3D())
58
+ name = summary[0].get("name", "")
59
+ box = (
60
+ BBox.from_dict(summary[0]["box"], title=name)
61
+ if "box" in summary[0]
62
+ else BBox()
63
+ )
64
+ pose = (
65
+ Pose3D.from_dict(summary[0]["keypoints"])
66
+ if "keypoints" in summary[0]
67
+ else Pose3D()
68
+ )
69
+ return YoloPose(
70
+ cls=summary[0]["class"],
71
+ name=name,
72
+ confidence=summary[0]["confidence"],
73
+ box=box,
74
+ pose=pose,
75
+ )
76
+
77
+
78
+ class YoloPoses(DataModel):
79
+ """
80
+ A data model for a list of YOLO pose keypoints.
81
+
82
+ Attributes:
83
+ cls: The classes of the poses.
84
+ name: The names of the poses.
85
+ confidence: The confidence scores of the poses.
86
+ box: The bounding boxes of the poses.
87
+ pose: The 3D pose keypoints of the poses.
88
+ """
89
+
90
+ cls: list[int]
91
+ name: list[str]
92
+ confidence: list[float]
93
+ box: list[BBox]
94
+ pose: list[Pose3D]
95
+
96
+ @staticmethod
97
+ def from_results(results: list["Results"]) -> "YoloPoses":
98
+ cls, names, confidence, box, pose = [], [], [], [], []
99
+ for r in results:
100
+ for s in r.summary():
101
+ name = s.get("name", "")
102
+ cls.append(s["class"])
103
+ names.append(name)
104
+ confidence.append(s["confidence"])
105
+ box.append(BBox.from_dict(s.get("box", {}), title=name))
106
+ pose.append(Pose3D.from_dict(s.get("keypoints", {})))
107
+ return YoloPoses(
108
+ cls=cls,
109
+ name=names,
110
+ confidence=confidence,
111
+ box=box,
112
+ pose=pose,
113
+ )
@@ -0,0 +1,91 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from pydantic import Field
4
+
5
+ from datachain.lib.data_model import DataModel
6
+ from datachain.model.bbox import BBox
7
+ from datachain.model.segment import Segment
8
+
9
+ if TYPE_CHECKING:
10
+ from ultralytics.engine.results import Results
11
+
12
+
13
+ class YoloSegment(DataModel):
14
+ """
15
+ A data model for a single YOLO segment.
16
+
17
+ Attributes:
18
+ cls (int): The class of the segment.
19
+ name (str): The name of the segment.
20
+ confidence (float): The confidence of the segment.
21
+ box (BBox): The bounding box of the segment.
22
+ segment (Segments): The segments of the segment.
23
+ """
24
+
25
+ cls: int = Field(default=-1)
26
+ name: str = Field(default="")
27
+ confidence: float = Field(default=0)
28
+ box: BBox
29
+ segment: Segment
30
+
31
+ @staticmethod
32
+ def from_result(result: "Results") -> "YoloSegment":
33
+ summary = result.summary()
34
+ if not summary:
35
+ return YoloSegment(box=BBox(), segment=Segment())
36
+ name = summary[0].get("name", "")
37
+ box = (
38
+ BBox.from_dict(summary[0]["box"], title=name)
39
+ if "box" in summary[0]
40
+ else BBox()
41
+ )
42
+ segment = (
43
+ Segment.from_dict(summary[0]["segments"], title=name)
44
+ if "segments" in summary[0]
45
+ else Segment()
46
+ )
47
+ return YoloSegment(
48
+ cls=summary[0]["class"],
49
+ name=summary[0]["name"],
50
+ confidence=summary[0]["confidence"],
51
+ box=box,
52
+ segment=segment,
53
+ )
54
+
55
+
56
+ class YoloSegments(DataModel):
57
+ """
58
+ A data model for a list of YOLO segments.
59
+
60
+ Attributes:
61
+ cls (list[int]): The classes of the segments.
62
+ name (list[str]): The names of the segments.
63
+ confidence (list[float]): The confidences of the segments.
64
+ box (list[BBox]): The bounding boxes of the segments.
65
+ segment (list[Segments]): The segments of the segments.
66
+ """
67
+
68
+ cls: list[int]
69
+ name: list[str]
70
+ confidence: list[float]
71
+ box: list[BBox]
72
+ segment: list[Segment]
73
+
74
+ @staticmethod
75
+ def from_results(results: list["Results"]) -> "YoloSegments":
76
+ cls, names, confidence, box, segment = [], [], [], [], []
77
+ for r in results:
78
+ for s in r.summary():
79
+ name = s.get("name", "")
80
+ cls.append(s["class"])
81
+ names.append(name)
82
+ confidence.append(s["confidence"])
83
+ box.append(BBox.from_dict(s.get("box", {}), title=name))
84
+ segment.append(Segment.from_dict(s.get("segments", {}), title=name))
85
+ return YoloSegments(
86
+ cls=cls,
87
+ name=names,
88
+ confidence=confidence,
89
+ box=box,
90
+ segment=segment,
91
+ )
@@ -58,10 +58,14 @@ def train_test_split(dc: DataChain, weights: list[float]) -> list[DataChain]:
58
58
 
59
59
  weights_normalized = [weight / sum(weights) for weight in weights]
60
60
 
61
+ resolution = 2**31 - 1 # Maximum positive value for a 32-bit signed integer.
62
+
61
63
  return [
62
64
  dc.filter(
63
- C("sys__rand") % 1000 >= round(sum(weights_normalized[:index]) * 1000),
64
- C("sys__rand") % 1000 < round(sum(weights_normalized[: index + 1]) * 1000),
65
+ C("sys__rand") % resolution
66
+ >= round(sum(weights_normalized[:index]) * resolution),
67
+ C("sys__rand") % resolution
68
+ < round(sum(weights_normalized[: index + 1]) * resolution),
65
69
  )
66
70
  for index, _ in enumerate(weights_normalized)
67
71
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.7.0
3
+ Version: 0.7.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -19,85 +19,86 @@ License-File: LICENSE
19
19
  Requires-Dist: pyyaml
20
20
  Requires-Dist: tomlkit
21
21
  Requires-Dist: tqdm
22
- Requires-Dist: numpy <3,>=1
23
- Requires-Dist: pandas >=2.0.0
22
+ Requires-Dist: numpy<3,>=1
23
+ Requires-Dist: pandas>=2.0.0
24
24
  Requires-Dist: pyarrow
25
25
  Requires-Dist: typing-extensions
26
- Requires-Dist: python-dateutil >=2
27
- Requires-Dist: attrs >=21.3.0
28
- Requires-Dist: s3fs >=2024.2.0
29
- Requires-Dist: gcsfs >=2024.2.0
30
- Requires-Dist: adlfs >=2024.2.0
31
- Requires-Dist: dvc-data <4,>=3.10
32
- Requires-Dist: dvc-objects <6,>=4
33
- Requires-Dist: shtab <2,>=1.3.4
34
- Requires-Dist: sqlalchemy >=2
35
- Requires-Dist: multiprocess ==0.70.16
26
+ Requires-Dist: python-dateutil>=2
27
+ Requires-Dist: attrs>=21.3.0
28
+ Requires-Dist: s3fs>=2024.2.0
29
+ Requires-Dist: gcsfs>=2024.2.0
30
+ Requires-Dist: adlfs>=2024.2.0
31
+ Requires-Dist: dvc-data<4,>=3.10
32
+ Requires-Dist: dvc-objects<6,>=4
33
+ Requires-Dist: shtab<2,>=1.3.4
34
+ Requires-Dist: sqlalchemy>=2
35
+ Requires-Dist: multiprocess==0.70.16
36
36
  Requires-Dist: cloudpickle
37
- Requires-Dist: orjson >=3.10.5
38
- Requires-Dist: pydantic <3,>=2
39
- Requires-Dist: jmespath >=1.0
40
- Requires-Dist: datamodel-code-generator >=0.25
41
- Requires-Dist: Pillow <12,>=10.0.0
42
- Requires-Dist: msgpack <2,>=1.0.4
37
+ Requires-Dist: orjson>=3.10.5
38
+ Requires-Dist: pydantic<3,>=2
39
+ Requires-Dist: jmespath>=1.0
40
+ Requires-Dist: datamodel-code-generator>=0.25
41
+ Requires-Dist: Pillow<12,>=10.0.0
42
+ Requires-Dist: msgpack<2,>=1.0.4
43
43
  Requires-Dist: psutil
44
- Requires-Dist: huggingface-hub
45
- Requires-Dist: iterative-telemetry >=0.0.9
44
+ Requires-Dist: huggingface_hub
45
+ Requires-Dist: iterative-telemetry>=0.0.9
46
46
  Requires-Dist: platformdirs
47
- Requires-Dist: dvc-studio-client <1,>=0.21
47
+ Requires-Dist: dvc-studio-client<1,>=0.21
48
48
  Requires-Dist: tabulate
49
- Provides-Extra: dev
50
- Requires-Dist: datachain[docs,tests] ; extra == 'dev'
51
- Requires-Dist: mypy ==1.13.0 ; extra == 'dev'
52
- Requires-Dist: types-python-dateutil ; extra == 'dev'
53
- Requires-Dist: types-pytz ; extra == 'dev'
54
- Requires-Dist: types-PyYAML ; extra == 'dev'
55
- Requires-Dist: types-requests ; extra == 'dev'
56
- Requires-Dist: types-tabulate ; extra == 'dev'
57
49
  Provides-Extra: docs
58
- Requires-Dist: mkdocs >=1.5.2 ; extra == 'docs'
59
- Requires-Dist: mkdocs-gen-files >=0.5.0 ; extra == 'docs'
60
- Requires-Dist: mkdocs-material >=9.3.1 ; extra == 'docs'
61
- Requires-Dist: mkdocs-section-index >=0.3.6 ; extra == 'docs'
62
- Requires-Dist: mkdocstrings-python >=1.6.3 ; extra == 'docs'
63
- Requires-Dist: mkdocs-literate-nav >=0.6.1 ; extra == 'docs'
64
- Provides-Extra: examples
65
- Requires-Dist: datachain[tests] ; extra == 'examples'
66
- Requires-Dist: numpy <2,>=1 ; extra == 'examples'
67
- Requires-Dist: defusedxml ; extra == 'examples'
68
- Requires-Dist: accelerate ; extra == 'examples'
69
- Requires-Dist: unstructured[embed-huggingface,pdf] <0.16.0 ; extra == 'examples'
70
- Requires-Dist: pdfplumber ==0.11.4 ; extra == 'examples'
71
- Requires-Dist: huggingface-hub[hf_transfer] ; extra == 'examples'
72
- Requires-Dist: onnx ==1.16.1 ; extra == 'examples'
73
- Provides-Extra: hf
74
- Requires-Dist: numba >=0.60.0 ; extra == 'hf'
75
- Requires-Dist: datasets[audio,vision] >=2.21.0 ; extra == 'hf'
76
- Provides-Extra: remote
77
- Requires-Dist: lz4 ; extra == 'remote'
78
- Requires-Dist: requests >=2.22.0 ; extra == 'remote'
79
- Provides-Extra: tests
80
- Requires-Dist: datachain[hf,remote,torch,vector] ; extra == 'tests'
81
- Requires-Dist: pytest <9,>=8 ; extra == 'tests'
82
- Requires-Dist: pytest-sugar >=0.9.6 ; extra == 'tests'
83
- Requires-Dist: pytest-cov >=4.1.0 ; extra == 'tests'
84
- Requires-Dist: pytest-mock >=3.12.0 ; extra == 'tests'
85
- Requires-Dist: pytest-servers[all] >=0.5.8 ; extra == 'tests'
86
- Requires-Dist: pytest-benchmark[histogram] ; extra == 'tests'
87
- Requires-Dist: pytest-xdist >=3.3.1 ; extra == 'tests'
88
- Requires-Dist: virtualenv ; extra == 'tests'
89
- Requires-Dist: dulwich ; extra == 'tests'
90
- Requires-Dist: hypothesis ; extra == 'tests'
91
- Requires-Dist: open-clip-torch ; extra == 'tests'
92
- Requires-Dist: aiotools >=1.7.0 ; extra == 'tests'
93
- Requires-Dist: requests-mock ; extra == 'tests'
94
- Requires-Dist: scipy ; extra == 'tests'
50
+ Requires-Dist: mkdocs>=1.5.2; extra == "docs"
51
+ Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
52
+ Requires-Dist: mkdocs-material>=9.3.1; extra == "docs"
53
+ Requires-Dist: mkdocs-section-index>=0.3.6; extra == "docs"
54
+ Requires-Dist: mkdocstrings-python>=1.6.3; extra == "docs"
55
+ Requires-Dist: mkdocs-literate-nav>=0.6.1; extra == "docs"
95
56
  Provides-Extra: torch
96
- Requires-Dist: torch >=2.1.0 ; extra == 'torch'
97
- Requires-Dist: torchvision ; extra == 'torch'
98
- Requires-Dist: transformers >=4.36.0 ; extra == 'torch'
57
+ Requires-Dist: torch>=2.1.0; extra == "torch"
58
+ Requires-Dist: torchvision; extra == "torch"
59
+ Requires-Dist: transformers>=4.36.0; extra == "torch"
60
+ Provides-Extra: remote
61
+ Requires-Dist: lz4; extra == "remote"
62
+ Requires-Dist: requests>=2.22.0; extra == "remote"
99
63
  Provides-Extra: vector
100
- Requires-Dist: usearch ; extra == 'vector'
64
+ Requires-Dist: usearch; extra == "vector"
65
+ Provides-Extra: hf
66
+ Requires-Dist: numba>=0.60.0; extra == "hf"
67
+ Requires-Dist: datasets[audio,vision]>=2.21.0; extra == "hf"
68
+ Provides-Extra: tests
69
+ Requires-Dist: datachain[hf,remote,torch,vector]; extra == "tests"
70
+ Requires-Dist: pytest<9,>=8; extra == "tests"
71
+ Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
72
+ Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
73
+ Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
74
+ Requires-Dist: pytest-servers[all]>=0.5.8; extra == "tests"
75
+ Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
76
+ Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
77
+ Requires-Dist: virtualenv; extra == "tests"
78
+ Requires-Dist: dulwich; extra == "tests"
79
+ Requires-Dist: hypothesis; extra == "tests"
80
+ Requires-Dist: open_clip_torch; extra == "tests"
81
+ Requires-Dist: aiotools>=1.7.0; extra == "tests"
82
+ Requires-Dist: requests-mock; extra == "tests"
83
+ Requires-Dist: scipy; extra == "tests"
84
+ Provides-Extra: dev
85
+ Requires-Dist: datachain[docs,tests]; extra == "dev"
86
+ Requires-Dist: mypy==1.13.0; extra == "dev"
87
+ Requires-Dist: types-python-dateutil; extra == "dev"
88
+ Requires-Dist: types-pytz; extra == "dev"
89
+ Requires-Dist: types-PyYAML; extra == "dev"
90
+ Requires-Dist: types-requests; extra == "dev"
91
+ Requires-Dist: types-tabulate; extra == "dev"
92
+ Provides-Extra: examples
93
+ Requires-Dist: datachain[tests]; extra == "examples"
94
+ Requires-Dist: numpy<2,>=1; extra == "examples"
95
+ Requires-Dist: defusedxml; extra == "examples"
96
+ Requires-Dist: accelerate; extra == "examples"
97
+ Requires-Dist: unstructured[embed-huggingface,pdf]<0.16.0; extra == "examples"
98
+ Requires-Dist: pdfplumber==0.11.4; extra == "examples"
99
+ Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
100
+ Requires-Dist: onnx==1.16.1; extra == "examples"
101
+ Requires-Dist: ultralytics==8.3.29; extra == "examples"
101
102
 
102
103
  ================
103
104
  |logo| DataChain
@@ -1,4 +1,4 @@
1
- datachain/__init__.py,sha256=nnTyB5MpCfBZ6D85JPz-5hUT7i-68Is-47Bxgew8lRw,930
1
+ datachain/__init__.py,sha256=OGzc8xZWtwqxiiutjU4AxCRPY0lrX_csgERiTrq4G0o,908
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
3
  datachain/asyn.py,sha256=5aKrjnUxk0mtnZeFKNJd1DCE0MsnSoyJBZkr0y9H_a0,9313
4
4
  datachain/cache.py,sha256=s0YHN7qurmQv-eC265TjeureK84TebWWAnL07cxchZQ,2997
@@ -71,10 +71,14 @@ datachain/lib/convert/values_to_tuples.py,sha256=varRCnSMT_pZmHznrd2Yi05qXLLz_v9
71
71
  datachain/lib/func/__init__.py,sha256=wlAKhGV0QDg9y7reSwoUF8Vicfqh_YOUNIXLzxICGz4,403
72
72
  datachain/lib/func/aggregate.py,sha256=H1ziFQdaK9zvnxvttfnEzkkyGvEEmMAvmgCsBV6nfm8,10917
73
73
  datachain/lib/func/func.py,sha256=HAJZ_tpiRG2R-et7pr0WnoyNZYtpbPn3_HBuL3RQpbU,4800
74
- datachain/lib/models/__init__.py,sha256=AGvjPbUokJiir3uelTa4XGtNSECkMFc5Xmi_N3AtxPQ,119
75
- datachain/lib/models/bbox.py,sha256=aiYNhvEcRK3dEN4MBcptmkPKc9kMP16ZQdu7xPk6hek,1555
76
- datachain/lib/models/pose.py,sha256=peuJPNSiGuTXfCfGIABwv8PGYistvTTBmtf-8X8E_eA,1077
77
- datachain/lib/models/yolo.py,sha256=eftoJDUa8iOpFTF1EkKVAd5Q-3HRd6X4eCIZ9h5p4nI,972
74
+ datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
75
+ datachain/model/bbox.py,sha256=LLtzc8OiL-cxqqlPWXA4MFTo8HRS3GW2gOxA0Sf_cxI,3158
76
+ datachain/model/pose.py,sha256=0URrnS99Ugq0yspCXC2z-hgpybEA5tWLJXpxqVLnAlI,3088
77
+ datachain/model/segment.py,sha256=dMxtm-05fNseEoEKpZj9iDN7fwGK1udyAreN-V-cRks,1597
78
+ datachain/model/ultralytics/__init__.py,sha256=EvcNX9qUyxKXXlKCPpsXeRrabyXk5E9EkN-tyiYkfS4,750
79
+ datachain/model/ultralytics/bbox.py,sha256=OZ9XBdyMOYc401P-RhfSN9QaYvMpnx2Phu9ptaJgZBY,4316
80
+ datachain/model/ultralytics/pose.py,sha256=71KBTcoST2wcEtsyGXqLVpvUtqbp9gwZGA15pEPtX5A,2959
81
+ datachain/model/ultralytics/segment.py,sha256=Z1ab0tZRJubSYNH4KkFlzhYeGNTfAyC71KmkQcToHDQ,2760
78
82
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
79
83
  datachain/query/batch.py,sha256=5fEhORFe7li12SdYddaSK3LyqksMfCHhwN1_A6TfsA4,3485
80
84
  datachain/query/dataset.py,sha256=sQny-ZemB2HueC4mPg-7qSaqUD85MMO-DQyVVP8K1CA,53765
@@ -104,11 +108,11 @@ datachain/sql/sqlite/base.py,sha256=aHSZVvh4XSVkvZ07h3jMoRlHI4sWD8y3SnmGs9xMG9Y,
104
108
  datachain/sql/sqlite/types.py,sha256=lPXS1XbkmUtlkkiRxy_A_UzsgpPv2VSkXYOD4zIHM4w,1734
105
109
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
106
110
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
107
- datachain/toolkit/split.py,sha256=6FcEJgUsJsUcCqKW5aXuJy4DvbcQ7_dFbsfNPhn8EVg,2377
111
+ datachain/toolkit/split.py,sha256=ZgDcrNiKiPXZmKD591_1z9qRIXitu5zwAsoVPB7ykiU,2508
108
112
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
109
- datachain-0.7.0.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
110
- datachain-0.7.0.dist-info/METADATA,sha256=Cm0v22C-aT14JBG4NjNQmM7kvhYZFAf6rh0yDAMRWFU,18037
111
- datachain-0.7.0.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
112
- datachain-0.7.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
113
- datachain-0.7.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
114
- datachain-0.7.0.dist-info/RECORD,,
113
+ datachain-0.7.1.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
114
+ datachain-0.7.1.dist-info/METADATA,sha256=9ICI9nDBKNq39JJR2q_RxuYBCFkUD4o81T2FEO8LKDU,18006
115
+ datachain-0.7.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
116
+ datachain-0.7.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
117
+ datachain-0.7.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
118
+ datachain-0.7.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.5.0)
2
+ Generator: setuptools (75.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,5 +0,0 @@
1
- from . import yolo
2
- from .bbox import BBox
3
- from .pose import Pose, Pose3D
4
-
5
- __all__ = ["BBox", "Pose", "Pose3D", "yolo"]
@@ -1,45 +0,0 @@
1
- from typing import Optional
2
-
3
- from pydantic import Field
4
-
5
- from datachain.lib.data_model import DataModel
6
-
7
-
8
- class BBox(DataModel):
9
- """
10
- A data model for representing bounding boxes.
11
-
12
- Attributes:
13
- title (str): The title of the bounding box.
14
- x1 (float): The x-coordinate of the top-left corner of the bounding box.
15
- y1 (float): The y-coordinate of the top-left corner of the bounding box.
16
- x2 (float): The x-coordinate of the bottom-right corner of the bounding box.
17
- y2 (float): The y-coordinate of the bottom-right corner of the bounding box.
18
-
19
- The bounding box is defined by two points:
20
- - (x1, y1): The top-left corner of the box.
21
- - (x2, y2): The bottom-right corner of the box.
22
- """
23
-
24
- title: str = Field(default="")
25
- x1: float = Field(default=0)
26
- y1: float = Field(default=0)
27
- x2: float = Field(default=0)
28
- y2: float = Field(default=0)
29
-
30
- @staticmethod
31
- def from_xywh(bbox: list[float], title: Optional[str] = None) -> "BBox":
32
- """
33
- Converts a bounding box in (x, y, width, height) format
34
- to a BBox data model instance.
35
-
36
- Args:
37
- bbox (list[float]): A bounding box, represented as a list
38
- of four floats [x, y, width, height].
39
-
40
- Returns:
41
- BBox2D: An instance of the BBox data model.
42
- """
43
- assert len(bbox) == 4, f"Bounding box must have 4 elements, got f{len(bbox)}"
44
- x, y, w, h = bbox
45
- return BBox(title=title or "", x1=x, y1=y, x2=x + w, y2=y + h)
@@ -1,37 +0,0 @@
1
- from pydantic import Field
2
-
3
- from datachain.lib.data_model import DataModel
4
-
5
-
6
- class Pose(DataModel):
7
- """
8
- A data model for representing pose keypoints.
9
-
10
- Attributes:
11
- x (list[float]): The x-coordinates of the keypoints.
12
- y (list[float]): The y-coordinates of the keypoints.
13
-
14
- The keypoints are represented as lists of x and y coordinates, where each index
15
- corresponds to a specific body part.
16
- """
17
-
18
- x: list[float] = Field(default=None)
19
- y: list[float] = Field(default=None)
20
-
21
-
22
- class Pose3D(DataModel):
23
- """
24
- A data model for representing 3D pose keypoints.
25
-
26
- Attributes:
27
- x (list[float]): The x-coordinates of the keypoints.
28
- y (list[float]): The y-coordinates of the keypoints.
29
- visible (list[float]): The visibility of the keypoints.
30
-
31
- The keypoints are represented as lists of x, y, and visibility values,
32
- where each index corresponds to a specific body part.
33
- """
34
-
35
- x: list[float] = Field(default=None)
36
- y: list[float] = Field(default=None)
37
- visible: list[float] = Field(default=None)
@@ -1,39 +0,0 @@
1
- """
2
- This module contains the YOLO models.
3
-
4
- YOLO stands for "You Only Look Once", a family of object detection models that
5
- are designed to be fast and accurate. The models are trained to detect objects
6
- in images by dividing the image into a grid and predicting the bounding boxes
7
- and class probabilities for each grid cell.
8
-
9
- More information about YOLO can be found here:
10
- - https://pjreddie.com/darknet/yolo/
11
- - https://docs.ultralytics.com/
12
- """
13
-
14
-
15
- class PoseBodyPart:
16
- """
17
- An enumeration of body parts for YOLO pose keypoints.
18
-
19
- More information about the body parts can be found here:
20
- https://docs.ultralytics.com/tasks/pose/
21
- """
22
-
23
- nose = 0
24
- left_eye = 1
25
- right_eye = 2
26
- left_ear = 3
27
- right_ear = 4
28
- left_shoulder = 5
29
- right_shoulder = 6
30
- left_elbow = 7
31
- right_elbow = 8
32
- left_wrist = 9
33
- right_wrist = 10
34
- left_hip = 11
35
- right_hip = 12
36
- left_knee = 13
37
- right_knee = 14
38
- left_ankle = 15
39
- right_ankle = 16