datachain 0.6.10__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/asyn.py +36 -4
- datachain/data_storage/warehouse.py +4 -1
- datachain/lib/dc.py +6 -1
- datachain/lib/file.py +5 -0
- datachain/lib/models/__init__.py +3 -4
- datachain/lib/models/bbox.py +25 -96
- datachain/lib/models/pose.py +8 -79
- datachain/lib/models/yolo.py +39 -0
- datachain/lib/settings.py +11 -1
- datachain/lib/udf.py +45 -18
- datachain/node.py +1 -1
- datachain/query/dataset.py +25 -27
- datachain/sql/sqlite/types.py +8 -1
- {datachain-0.6.10.dist-info → datachain-0.7.0.dist-info}/METADATA +1 -1
- {datachain-0.6.10.dist-info → datachain-0.7.0.dist-info}/RECORD +19 -23
- datachain/lib/models/segment.py +0 -53
- datachain/lib/models/ultralytics/__init__.py +0 -14
- datachain/lib/models/ultralytics/bbox.py +0 -189
- datachain/lib/models/ultralytics/pose.py +0 -126
- datachain/lib/models/ultralytics/segment.py +0 -121
- {datachain-0.6.10.dist-info → datachain-0.7.0.dist-info}/LICENSE +0 -0
- {datachain-0.6.10.dist-info → datachain-0.7.0.dist-info}/WHEEL +0 -0
- {datachain-0.6.10.dist-info → datachain-0.7.0.dist-info}/entry_points.txt +0 -0
- {datachain-0.6.10.dist-info → datachain-0.7.0.dist-info}/top_level.txt +0 -0
datachain/asyn.py
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
|
|
2
|
+
import threading
|
|
3
|
+
from collections.abc import (
|
|
4
|
+
AsyncIterable,
|
|
5
|
+
Awaitable,
|
|
6
|
+
Coroutine,
|
|
7
|
+
Generator,
|
|
8
|
+
Iterable,
|
|
9
|
+
Iterator,
|
|
10
|
+
)
|
|
3
11
|
from concurrent.futures import ThreadPoolExecutor
|
|
4
12
|
from heapq import heappop, heappush
|
|
5
13
|
from typing import Any, Callable, Generic, Optional, TypeVar
|
|
@@ -47,6 +55,7 @@ class AsyncMapper(Generic[InputT, ResultT]):
|
|
|
47
55
|
self.loop = get_loop() if loop is None else loop
|
|
48
56
|
self.pool = ThreadPoolExecutor(workers)
|
|
49
57
|
self._tasks: set[asyncio.Task] = set()
|
|
58
|
+
self._shutdown_producer = threading.Event()
|
|
50
59
|
|
|
51
60
|
def start_task(self, coro: Coroutine) -> asyncio.Task:
|
|
52
61
|
task = self.loop.create_task(coro)
|
|
@@ -54,9 +63,31 @@ class AsyncMapper(Generic[InputT, ResultT]):
|
|
|
54
63
|
task.add_done_callback(self._tasks.discard)
|
|
55
64
|
return task
|
|
56
65
|
|
|
57
|
-
|
|
66
|
+
def _produce(self) -> None:
|
|
58
67
|
for item in self.iterable:
|
|
59
|
-
|
|
68
|
+
if self._shutdown_producer.is_set():
|
|
69
|
+
return
|
|
70
|
+
fut = asyncio.run_coroutine_threadsafe(self.work_queue.put(item), self.loop)
|
|
71
|
+
fut.result() # wait until the item is in the queue
|
|
72
|
+
|
|
73
|
+
async def produce(self) -> None:
|
|
74
|
+
await self.to_thread(self._produce)
|
|
75
|
+
|
|
76
|
+
def shutdown_producer(self) -> None:
|
|
77
|
+
"""
|
|
78
|
+
Signal the producer to stop and drain any remaining items from the work_queue.
|
|
79
|
+
|
|
80
|
+
This method sets an internal event, `_shutdown_producer`, which tells the
|
|
81
|
+
producer that it should stop adding items to the queue. To ensure that the
|
|
82
|
+
producer notices this signal promptly, we also attempt to drain any items
|
|
83
|
+
currently in the queue, clearing it so that the event can be checked without
|
|
84
|
+
delay.
|
|
85
|
+
"""
|
|
86
|
+
self._shutdown_producer.set()
|
|
87
|
+
q = self.work_queue
|
|
88
|
+
while not q.empty():
|
|
89
|
+
q.get_nowait()
|
|
90
|
+
q.task_done()
|
|
60
91
|
|
|
61
92
|
async def worker(self) -> None:
|
|
62
93
|
while (item := await self.work_queue.get()) is not None:
|
|
@@ -132,7 +163,7 @@ class AsyncMapper(Generic[InputT, ResultT]):
|
|
|
132
163
|
self.result_queue.get_nowait()
|
|
133
164
|
await self.result_queue.put(None)
|
|
134
165
|
|
|
135
|
-
def iterate(self, timeout=None) ->
|
|
166
|
+
def iterate(self, timeout=None) -> Generator[ResultT, None, None]:
|
|
136
167
|
init = asyncio.run_coroutine_threadsafe(self.init(), self.loop)
|
|
137
168
|
init.result(timeout=1)
|
|
138
169
|
async_run = asyncio.run_coroutine_threadsafe(self.run(), self.loop)
|
|
@@ -145,6 +176,7 @@ class AsyncMapper(Generic[InputT, ResultT]):
|
|
|
145
176
|
if exc := async_run.exception():
|
|
146
177
|
raise exc
|
|
147
178
|
finally:
|
|
179
|
+
self.shutdown_producer()
|
|
148
180
|
if not async_run.done():
|
|
149
181
|
async_run.cancel()
|
|
150
182
|
|
|
@@ -232,7 +232,10 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
232
232
|
if limit < page_size:
|
|
233
233
|
paginated_query = paginated_query.limit(None).limit(limit)
|
|
234
234
|
|
|
235
|
-
|
|
235
|
+
# Ensure we're using a thread-local connection
|
|
236
|
+
with self.clone() as wh:
|
|
237
|
+
# Cursor results are not thread-safe, so we convert them to a list
|
|
238
|
+
results = list(wh.dataset_rows_select(paginated_query.offset(offset)))
|
|
236
239
|
|
|
237
240
|
processed = False
|
|
238
241
|
for row in results:
|
datachain/lib/dc.py
CHANGED
|
@@ -334,6 +334,7 @@ class DataChain:
|
|
|
334
334
|
parallel=None,
|
|
335
335
|
workers=None,
|
|
336
336
|
min_task_size=None,
|
|
337
|
+
prefetch: Optional[int] = None,
|
|
337
338
|
sys: Optional[bool] = None,
|
|
338
339
|
) -> "Self":
|
|
339
340
|
"""Change settings for chain.
|
|
@@ -360,7 +361,7 @@ class DataChain:
|
|
|
360
361
|
if sys is None:
|
|
361
362
|
sys = self._sys
|
|
362
363
|
settings = copy.copy(self._settings)
|
|
363
|
-
settings.add(Settings(cache, parallel, workers, min_task_size))
|
|
364
|
+
settings.add(Settings(cache, parallel, workers, min_task_size, prefetch))
|
|
364
365
|
return self._evolve(settings=settings, _sys=sys)
|
|
365
366
|
|
|
366
367
|
def reset_settings(self, settings: Optional[Settings] = None) -> "Self":
|
|
@@ -882,6 +883,8 @@ class DataChain:
|
|
|
882
883
|
```
|
|
883
884
|
"""
|
|
884
885
|
udf_obj = self._udf_to_obj(Mapper, func, params, output, signal_map)
|
|
886
|
+
if (prefetch := self._settings.prefetch) is not None:
|
|
887
|
+
udf_obj.prefetch = prefetch
|
|
885
888
|
|
|
886
889
|
return self._evolve(
|
|
887
890
|
query=self._query.add_signals(
|
|
@@ -919,6 +922,8 @@ class DataChain:
|
|
|
919
922
|
```
|
|
920
923
|
"""
|
|
921
924
|
udf_obj = self._udf_to_obj(Generator, func, params, output, signal_map)
|
|
925
|
+
if (prefetch := self._settings.prefetch) is not None:
|
|
926
|
+
udf_obj.prefetch = prefetch
|
|
922
927
|
return self._evolve(
|
|
923
928
|
query=self._query.generate(
|
|
924
929
|
udf_obj.to_udf_wrapper(),
|
datachain/lib/file.py
CHANGED
|
@@ -268,6 +268,11 @@ class File(DataModel):
|
|
|
268
268
|
client = self._catalog.get_client(self.source)
|
|
269
269
|
client.download(self, callback=self._download_cb)
|
|
270
270
|
|
|
271
|
+
async def _prefetch(self) -> None:
|
|
272
|
+
if self._caching_enabled:
|
|
273
|
+
client = self._catalog.get_client(self.source)
|
|
274
|
+
await client._download(self, callback=self._download_cb)
|
|
275
|
+
|
|
271
276
|
def get_local_path(self) -> Optional[str]:
|
|
272
277
|
"""Return path to a file in a local cache.
|
|
273
278
|
|
datachain/lib/models/__init__.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
from . import
|
|
2
|
-
from .bbox import BBox
|
|
1
|
+
from . import yolo
|
|
2
|
+
from .bbox import BBox
|
|
3
3
|
from .pose import Pose, Pose3D
|
|
4
|
-
from .segment import Segments
|
|
5
4
|
|
|
6
|
-
__all__ = ["BBox", "
|
|
5
|
+
__all__ = ["BBox", "Pose", "Pose3D", "yolo"]
|
datachain/lib/models/bbox.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
1
3
|
from pydantic import Field
|
|
2
4
|
|
|
3
5
|
from datachain.lib.data_model import DataModel
|
|
@@ -9,7 +11,10 @@ class BBox(DataModel):
|
|
|
9
11
|
|
|
10
12
|
Attributes:
|
|
11
13
|
title (str): The title of the bounding box.
|
|
12
|
-
|
|
14
|
+
x1 (float): The x-coordinate of the top-left corner of the bounding box.
|
|
15
|
+
y1 (float): The y-coordinate of the top-left corner of the bounding box.
|
|
16
|
+
x2 (float): The x-coordinate of the bottom-right corner of the bounding box.
|
|
17
|
+
y2 (float): The y-coordinate of the bottom-right corner of the bounding box.
|
|
13
18
|
|
|
14
19
|
The bounding box is defined by two points:
|
|
15
20
|
- (x1, y1): The top-left corner of the box.
|
|
@@ -17,100 +22,24 @@ class BBox(DataModel):
|
|
|
17
22
|
"""
|
|
18
23
|
|
|
19
24
|
title: str = Field(default="")
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
assert len(coords) == 4, "Bounding box coordinates must be a list of 4 floats."
|
|
25
|
-
assert all(
|
|
26
|
-
isinstance(value, (int, float)) for value in coords
|
|
27
|
-
), "Bounding box coordinates must be integers or floats."
|
|
28
|
-
return BBox(
|
|
29
|
-
title=title,
|
|
30
|
-
coords=[round(c) for c in coords],
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
@staticmethod
|
|
34
|
-
def from_dict(coords: dict[str, float], title: str = "") -> "BBox":
|
|
35
|
-
assert (
|
|
36
|
-
len(coords) == 4
|
|
37
|
-
), "Bounding box coordinates must be a dictionary of 4 floats."
|
|
38
|
-
assert set(coords) == {
|
|
39
|
-
"x1",
|
|
40
|
-
"y1",
|
|
41
|
-
"x2",
|
|
42
|
-
"y2",
|
|
43
|
-
}, "Bounding box coordinates must contain keys with coordinates."
|
|
44
|
-
assert all(
|
|
45
|
-
isinstance(value, (int, float)) for value in coords.values()
|
|
46
|
-
), "Bounding box coordinates must be integers or floats."
|
|
47
|
-
return BBox(
|
|
48
|
-
title=title,
|
|
49
|
-
coords=[
|
|
50
|
-
round(coords["x1"]),
|
|
51
|
-
round(coords["y1"]),
|
|
52
|
-
round(coords["x2"]),
|
|
53
|
-
round(coords["y2"]),
|
|
54
|
-
],
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
class OBBox(DataModel):
|
|
59
|
-
"""
|
|
60
|
-
A data model for representing oriented bounding boxes.
|
|
61
|
-
|
|
62
|
-
Attributes:
|
|
63
|
-
title (str): The title of the oriented bounding box.
|
|
64
|
-
coords (list[int]): The coordinates of the oriented bounding box.
|
|
65
|
-
|
|
66
|
-
The oriented bounding box is defined by four points:
|
|
67
|
-
- (x1, y1): The first corner of the box.
|
|
68
|
-
- (x2, y2): The second corner of the box.
|
|
69
|
-
- (x3, y3): The third corner of the box.
|
|
70
|
-
- (x4, y4): The fourth corner of the box.
|
|
71
|
-
"""
|
|
72
|
-
|
|
73
|
-
title: str = Field(default="")
|
|
74
|
-
coords: list[int] = Field(default=None)
|
|
75
|
-
|
|
76
|
-
@staticmethod
|
|
77
|
-
def from_list(coords: list[float], title: str = "") -> "OBBox":
|
|
78
|
-
assert (
|
|
79
|
-
len(coords) == 8
|
|
80
|
-
), "Oriented bounding box coordinates must be a list of 8 floats."
|
|
81
|
-
assert all(
|
|
82
|
-
isinstance(value, (int, float)) for value in coords
|
|
83
|
-
), "Oriented bounding box coordinates must be integers or floats."
|
|
84
|
-
return OBBox(
|
|
85
|
-
title=title,
|
|
86
|
-
coords=[round(c) for c in coords],
|
|
87
|
-
)
|
|
25
|
+
x1: float = Field(default=0)
|
|
26
|
+
y1: float = Field(default=0)
|
|
27
|
+
x2: float = Field(default=0)
|
|
28
|
+
y2: float = Field(default=0)
|
|
88
29
|
|
|
89
30
|
@staticmethod
|
|
90
|
-
def
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
return
|
|
105
|
-
title=title,
|
|
106
|
-
coords=[
|
|
107
|
-
round(coords["x1"]),
|
|
108
|
-
round(coords["y1"]),
|
|
109
|
-
round(coords["x2"]),
|
|
110
|
-
round(coords["y2"]),
|
|
111
|
-
round(coords["x3"]),
|
|
112
|
-
round(coords["y3"]),
|
|
113
|
-
round(coords["x4"]),
|
|
114
|
-
round(coords["y4"]),
|
|
115
|
-
],
|
|
116
|
-
)
|
|
31
|
+
def from_xywh(bbox: list[float], title: Optional[str] = None) -> "BBox":
|
|
32
|
+
"""
|
|
33
|
+
Converts a bounding box in (x, y, width, height) format
|
|
34
|
+
to a BBox data model instance.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
bbox (list[float]): A bounding box, represented as a list
|
|
38
|
+
of four floats [x, y, width, height].
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
BBox2D: An instance of the BBox data model.
|
|
42
|
+
"""
|
|
43
|
+
assert len(bbox) == 4, f"Bounding box must have 4 elements, got f{len(bbox)}"
|
|
44
|
+
x, y, w, h = bbox
|
|
45
|
+
return BBox(title=title or "", x1=x, y1=y, x2=x + w, y2=y + h)
|
datachain/lib/models/pose.py
CHANGED
|
@@ -8,48 +8,15 @@ class Pose(DataModel):
|
|
|
8
8
|
A data model for representing pose keypoints.
|
|
9
9
|
|
|
10
10
|
Attributes:
|
|
11
|
-
x (list[
|
|
12
|
-
y (list[
|
|
11
|
+
x (list[float]): The x-coordinates of the keypoints.
|
|
12
|
+
y (list[float]): The y-coordinates of the keypoints.
|
|
13
13
|
|
|
14
14
|
The keypoints are represented as lists of x and y coordinates, where each index
|
|
15
15
|
corresponds to a specific body part.
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
|
-
x: list[
|
|
19
|
-
y: list[
|
|
20
|
-
|
|
21
|
-
@staticmethod
|
|
22
|
-
def from_list(points: list[list[float]]) -> "Pose":
|
|
23
|
-
assert len(points) == 2, "Pose coordinates must be a list of 2 lists."
|
|
24
|
-
points_x, points_y = points
|
|
25
|
-
assert (
|
|
26
|
-
len(points_x) == len(points_y) == 17
|
|
27
|
-
), "Pose x and y coordinates must have the same length of 17."
|
|
28
|
-
assert all(
|
|
29
|
-
isinstance(value, (int, float)) for value in [*points_x, *points_y]
|
|
30
|
-
), "Pose coordinates must be integers or floats."
|
|
31
|
-
return Pose(
|
|
32
|
-
x=[round(coord) for coord in points_x],
|
|
33
|
-
y=[round(coord) for coord in points_y],
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
@staticmethod
|
|
37
|
-
def from_dict(points: dict[str, list[float]]) -> "Pose":
|
|
38
|
-
assert set(points) == {
|
|
39
|
-
"x",
|
|
40
|
-
"y",
|
|
41
|
-
}, "Pose coordinates must contain keys 'x' and 'y'."
|
|
42
|
-
points_x, points_y = points["x"], points["y"]
|
|
43
|
-
assert (
|
|
44
|
-
len(points_x) == len(points_y) == 17
|
|
45
|
-
), "Pose x and y coordinates must have the same length of 17."
|
|
46
|
-
assert all(
|
|
47
|
-
isinstance(value, (int, float)) for value in [*points_x, *points_y]
|
|
48
|
-
), "Pose coordinates must be integers or floats."
|
|
49
|
-
return Pose(
|
|
50
|
-
x=[round(coord) for coord in points_x],
|
|
51
|
-
y=[round(coord) for coord in points_y],
|
|
52
|
-
)
|
|
18
|
+
x: list[float] = Field(default=None)
|
|
19
|
+
y: list[float] = Field(default=None)
|
|
53
20
|
|
|
54
21
|
|
|
55
22
|
class Pose3D(DataModel):
|
|
@@ -57,52 +24,14 @@ class Pose3D(DataModel):
|
|
|
57
24
|
A data model for representing 3D pose keypoints.
|
|
58
25
|
|
|
59
26
|
Attributes:
|
|
60
|
-
x (list[
|
|
61
|
-
y (list[
|
|
27
|
+
x (list[float]): The x-coordinates of the keypoints.
|
|
28
|
+
y (list[float]): The y-coordinates of the keypoints.
|
|
62
29
|
visible (list[float]): The visibility of the keypoints.
|
|
63
30
|
|
|
64
31
|
The keypoints are represented as lists of x, y, and visibility values,
|
|
65
32
|
where each index corresponds to a specific body part.
|
|
66
33
|
"""
|
|
67
34
|
|
|
68
|
-
x: list[
|
|
69
|
-
y: list[
|
|
35
|
+
x: list[float] = Field(default=None)
|
|
36
|
+
y: list[float] = Field(default=None)
|
|
70
37
|
visible: list[float] = Field(default=None)
|
|
71
|
-
|
|
72
|
-
@staticmethod
|
|
73
|
-
def from_list(points: list[list[float]]) -> "Pose3D":
|
|
74
|
-
assert len(points) == 3, "Pose coordinates must be a list of 3 lists."
|
|
75
|
-
points_x, points_y, points_v = points
|
|
76
|
-
assert (
|
|
77
|
-
len(points_x) == len(points_y) == len(points_v) == 17
|
|
78
|
-
), "Pose x, y, and visibility coordinates must have the same length of 17."
|
|
79
|
-
assert all(
|
|
80
|
-
isinstance(value, (int, float))
|
|
81
|
-
for value in [*points_x, *points_y, *points_v]
|
|
82
|
-
), "Pose coordinates must be integers or floats."
|
|
83
|
-
return Pose3D(
|
|
84
|
-
x=[round(coord) for coord in points_x],
|
|
85
|
-
y=[round(coord) for coord in points_y],
|
|
86
|
-
visible=points_v,
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
@staticmethod
|
|
90
|
-
def from_dict(points: dict[str, list[float]]) -> "Pose3D":
|
|
91
|
-
assert set(points) == {
|
|
92
|
-
"x",
|
|
93
|
-
"y",
|
|
94
|
-
"visible",
|
|
95
|
-
}, "Pose coordinates must contain keys 'x', 'y', and 'visible'."
|
|
96
|
-
points_x, points_y, points_v = points["x"], points["y"], points["visible"]
|
|
97
|
-
assert (
|
|
98
|
-
len(points_x) == len(points_y) == len(points_v) == 17
|
|
99
|
-
), "Pose x, y, and visibility coordinates must have the same length of 17."
|
|
100
|
-
assert all(
|
|
101
|
-
isinstance(value, (int, float))
|
|
102
|
-
for value in [*points_x, *points_y, *points_v]
|
|
103
|
-
), "Pose coordinates must be integers or floats."
|
|
104
|
-
return Pose3D(
|
|
105
|
-
x=[round(coord) for coord in points_x],
|
|
106
|
-
y=[round(coord) for coord in points_y],
|
|
107
|
-
visible=points_v,
|
|
108
|
-
)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains the YOLO models.
|
|
3
|
+
|
|
4
|
+
YOLO stands for "You Only Look Once", a family of object detection models that
|
|
5
|
+
are designed to be fast and accurate. The models are trained to detect objects
|
|
6
|
+
in images by dividing the image into a grid and predicting the bounding boxes
|
|
7
|
+
and class probabilities for each grid cell.
|
|
8
|
+
|
|
9
|
+
More information about YOLO can be found here:
|
|
10
|
+
- https://pjreddie.com/darknet/yolo/
|
|
11
|
+
- https://docs.ultralytics.com/
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PoseBodyPart:
|
|
16
|
+
"""
|
|
17
|
+
An enumeration of body parts for YOLO pose keypoints.
|
|
18
|
+
|
|
19
|
+
More information about the body parts can be found here:
|
|
20
|
+
https://docs.ultralytics.com/tasks/pose/
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
nose = 0
|
|
24
|
+
left_eye = 1
|
|
25
|
+
right_eye = 2
|
|
26
|
+
left_ear = 3
|
|
27
|
+
right_ear = 4
|
|
28
|
+
left_shoulder = 5
|
|
29
|
+
right_shoulder = 6
|
|
30
|
+
left_elbow = 7
|
|
31
|
+
right_elbow = 8
|
|
32
|
+
left_wrist = 9
|
|
33
|
+
right_wrist = 10
|
|
34
|
+
left_hip = 11
|
|
35
|
+
right_hip = 12
|
|
36
|
+
left_knee = 13
|
|
37
|
+
right_knee = 14
|
|
38
|
+
left_ankle = 15
|
|
39
|
+
right_ankle = 16
|
datachain/lib/settings.py
CHANGED
|
@@ -7,11 +7,19 @@ class SettingsError(DataChainParamsError):
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class Settings:
|
|
10
|
-
def __init__(
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
cache=None,
|
|
13
|
+
parallel=None,
|
|
14
|
+
workers=None,
|
|
15
|
+
min_task_size=None,
|
|
16
|
+
prefetch=None,
|
|
17
|
+
):
|
|
11
18
|
self._cache = cache
|
|
12
19
|
self.parallel = parallel
|
|
13
20
|
self._workers = workers
|
|
14
21
|
self.min_task_size = min_task_size
|
|
22
|
+
self.prefetch = prefetch
|
|
15
23
|
|
|
16
24
|
if not isinstance(cache, bool) and cache is not None:
|
|
17
25
|
raise SettingsError(
|
|
@@ -66,3 +74,5 @@ class Settings:
|
|
|
66
74
|
self.parallel = settings.parallel or self.parallel
|
|
67
75
|
self._workers = settings._workers or self._workers
|
|
68
76
|
self.min_task_size = settings.min_task_size or self.min_task_size
|
|
77
|
+
if settings.prefetch is not None:
|
|
78
|
+
self.prefetch = settings.prefetch
|
datachain/lib/udf.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
import sys
|
|
2
3
|
import traceback
|
|
3
4
|
from collections.abc import Iterable, Iterator, Mapping, Sequence
|
|
@@ -7,6 +8,7 @@ import attrs
|
|
|
7
8
|
from fsspec.callbacks import DEFAULT_CALLBACK, Callback
|
|
8
9
|
from pydantic import BaseModel
|
|
9
10
|
|
|
11
|
+
from datachain.asyn import AsyncMapper
|
|
10
12
|
from datachain.dataset import RowDict
|
|
11
13
|
from datachain.lib.convert.flatten import flatten
|
|
12
14
|
from datachain.lib.data_model import DataValue
|
|
@@ -21,6 +23,8 @@ from datachain.query.batch import (
|
|
|
21
23
|
)
|
|
22
24
|
|
|
23
25
|
if TYPE_CHECKING:
|
|
26
|
+
from collections import abc
|
|
27
|
+
|
|
24
28
|
from typing_extensions import Self
|
|
25
29
|
|
|
26
30
|
from datachain.catalog import Catalog
|
|
@@ -276,9 +280,18 @@ class UDFBase(AbstractUDF):
|
|
|
276
280
|
return result_objs
|
|
277
281
|
|
|
278
282
|
|
|
283
|
+
async def _prefetch_input(row):
|
|
284
|
+
for obj in row:
|
|
285
|
+
if isinstance(obj, File):
|
|
286
|
+
await obj._prefetch()
|
|
287
|
+
return row
|
|
288
|
+
|
|
289
|
+
|
|
279
290
|
class Mapper(UDFBase):
|
|
280
291
|
"""Inherit from this class to pass to `DataChain.map()`."""
|
|
281
292
|
|
|
293
|
+
prefetch: int = 2
|
|
294
|
+
|
|
282
295
|
def run(
|
|
283
296
|
self,
|
|
284
297
|
udf_fields: "Sequence[str]",
|
|
@@ -290,16 +303,22 @@ class Mapper(UDFBase):
|
|
|
290
303
|
) -> Iterator[Iterable[UDFResult]]:
|
|
291
304
|
self.catalog = catalog
|
|
292
305
|
self.setup()
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
306
|
+
prepared_inputs: abc.Generator[Sequence[Any], None, None] = (
|
|
307
|
+
self._prepare_row_and_id(row, udf_fields, cache, download_cb)
|
|
308
|
+
for row in udf_inputs
|
|
309
|
+
)
|
|
310
|
+
if self.prefetch > 0:
|
|
311
|
+
prepared_inputs = AsyncMapper(
|
|
312
|
+
_prefetch_input, prepared_inputs, workers=self.prefetch
|
|
313
|
+
).iterate()
|
|
314
|
+
|
|
315
|
+
with contextlib.closing(prepared_inputs):
|
|
316
|
+
for id_, *udf_args in prepared_inputs:
|
|
317
|
+
result_objs = self.process_safe(udf_args)
|
|
318
|
+
udf_output = self._flatten_row(result_objs)
|
|
319
|
+
output = [{"sys__id": id_} | dict(zip(self.signal_names, udf_output))]
|
|
320
|
+
processed_cb.relative_update(1)
|
|
321
|
+
yield output
|
|
303
322
|
|
|
304
323
|
self.teardown()
|
|
305
324
|
|
|
@@ -349,6 +368,7 @@ class Generator(UDFBase):
|
|
|
349
368
|
"""Inherit from this class to pass to `DataChain.gen()`."""
|
|
350
369
|
|
|
351
370
|
is_output_batched = True
|
|
371
|
+
prefetch: int = 2
|
|
352
372
|
|
|
353
373
|
def run(
|
|
354
374
|
self,
|
|
@@ -361,14 +381,21 @@ class Generator(UDFBase):
|
|
|
361
381
|
) -> Iterator[Iterable[UDFResult]]:
|
|
362
382
|
self.catalog = catalog
|
|
363
383
|
self.setup()
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
384
|
+
prepared_inputs: abc.Generator[Sequence[Any], None, None] = (
|
|
385
|
+
self._prepare_row(row, udf_fields, cache, download_cb) for row in udf_inputs
|
|
386
|
+
)
|
|
387
|
+
if self.prefetch > 0:
|
|
388
|
+
prepared_inputs = AsyncMapper(
|
|
389
|
+
_prefetch_input, prepared_inputs, workers=self.prefetch
|
|
390
|
+
).iterate()
|
|
391
|
+
|
|
392
|
+
with contextlib.closing(prepared_inputs):
|
|
393
|
+
for row in prepared_inputs:
|
|
394
|
+
result_objs = self.process_safe(row)
|
|
395
|
+
udf_outputs = (self._flatten_row(row) for row in result_objs)
|
|
396
|
+
output = (dict(zip(self.signal_names, row)) for row in udf_outputs)
|
|
397
|
+
processed_cb.relative_update(1)
|
|
398
|
+
yield output
|
|
372
399
|
|
|
373
400
|
self.teardown()
|
|
374
401
|
|
datachain/node.py
CHANGED
datachain/query/dataset.py
CHANGED
|
@@ -473,33 +473,31 @@ class UDFStep(Step, ABC):
|
|
|
473
473
|
# Otherwise process single-threaded (faster for smaller UDFs)
|
|
474
474
|
warehouse = self.catalog.warehouse
|
|
475
475
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
processed_cb.close()
|
|
502
|
-
generated_cb.close()
|
|
476
|
+
udf_inputs = batching(warehouse.dataset_select_paginated, query)
|
|
477
|
+
download_cb = get_download_callback()
|
|
478
|
+
processed_cb = get_processed_callback()
|
|
479
|
+
generated_cb = get_generated_callback(self.is_generator)
|
|
480
|
+
try:
|
|
481
|
+
udf_results = self.udf.run(
|
|
482
|
+
udf_fields,
|
|
483
|
+
udf_inputs,
|
|
484
|
+
self.catalog,
|
|
485
|
+
self.is_generator,
|
|
486
|
+
self.cache,
|
|
487
|
+
download_cb,
|
|
488
|
+
processed_cb,
|
|
489
|
+
)
|
|
490
|
+
process_udf_outputs(
|
|
491
|
+
warehouse,
|
|
492
|
+
udf_table,
|
|
493
|
+
udf_results,
|
|
494
|
+
self.udf,
|
|
495
|
+
cb=generated_cb,
|
|
496
|
+
)
|
|
497
|
+
finally:
|
|
498
|
+
download_cb.close()
|
|
499
|
+
processed_cb.close()
|
|
500
|
+
generated_cb.close()
|
|
503
501
|
|
|
504
502
|
warehouse.insert_rows_done(udf_table)
|
|
505
503
|
|
datachain/sql/sqlite/types.py
CHANGED
|
@@ -36,7 +36,14 @@ def convert_array(arr):
|
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
def adapt_np_array(arr):
|
|
39
|
-
|
|
39
|
+
def _json_serialize(obj):
|
|
40
|
+
if isinstance(obj, np.ndarray):
|
|
41
|
+
return obj.tolist()
|
|
42
|
+
return obj
|
|
43
|
+
|
|
44
|
+
return orjson.dumps(
|
|
45
|
+
arr, option=orjson.OPT_SERIALIZE_NUMPY, default=_json_serialize
|
|
46
|
+
).decode("utf-8")
|
|
40
47
|
|
|
41
48
|
|
|
42
49
|
def adapt_np_generic(val):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datachain/__init__.py,sha256=nnTyB5MpCfBZ6D85JPz-5hUT7i-68Is-47Bxgew8lRw,930
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
|
-
datachain/asyn.py,sha256=
|
|
3
|
+
datachain/asyn.py,sha256=5aKrjnUxk0mtnZeFKNJd1DCE0MsnSoyJBZkr0y9H_a0,9313
|
|
4
4
|
datachain/cache.py,sha256=s0YHN7qurmQv-eC265TjeureK84TebWWAnL07cxchZQ,2997
|
|
5
5
|
datachain/cli.py,sha256=hdVt_HJumQVgtaBAtBVJm-uPyYVogMXNVLmRcZyWHgk,36677
|
|
6
6
|
datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
|
|
@@ -9,7 +9,7 @@ datachain/dataset.py,sha256=0IN-5y723y-bnFlieKtOFZLCjwX_yplFo3q0DV7LRPw,14821
|
|
|
9
9
|
datachain/error.py,sha256=bxAAL32lSeMgzsQDEHbGTGORj-mPzzpCRvWDPueJNN4,1092
|
|
10
10
|
datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
|
|
11
11
|
datachain/listing.py,sha256=TgKg25ZWAP5enzKgw2_2GUPJVdnQUh6uySHB5SJrUY4,7773
|
|
12
|
-
datachain/node.py,sha256=
|
|
12
|
+
datachain/node.py,sha256=o8Sqy92QkzzcLK6XmIFLyDSE6Rw6kUTmGRhEmfLFdhg,5211
|
|
13
13
|
datachain/nodes_fetcher.py,sha256=F-73-h19HHNGtHFBGKk7p3mc0ALm4a9zGnzhtuUjnp4,1107
|
|
14
14
|
datachain/nodes_thread_pool.py,sha256=uPo-xl8zG5m9YgODjPFBpbcqqHjI-dcxH87yAbj_qco,3192
|
|
15
15
|
datachain/progress.py,sha256=5KotcvvzAUL_RF0GEj4JY0IB1lyImnmHxe89YkT1XO4,4330
|
|
@@ -37,14 +37,14 @@ datachain/data_storage/metastore.py,sha256=5b7o_CSHC2djottebYn-Hq5q0yaSLOKPIRCna
|
|
|
37
37
|
datachain/data_storage/schema.py,sha256=scANMQqozita3HjEtq7eupMgh6yYkrZHoXtfuL2RoQg,9879
|
|
38
38
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
39
39
|
datachain/data_storage/sqlite.py,sha256=CspRUlYsIcubgzvcQxTACnmcuKESSLZcqCl0dcrtRiA,27471
|
|
40
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
40
|
+
datachain/data_storage/warehouse.py,sha256=yXNU0U3exzR1E6dqbYYmL4RhXWsbYWVdZ3jONGcVniY,30914
|
|
41
41
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
42
|
datachain/lib/arrow.py,sha256=-hu9tic79a01SY2UBqkA3U6wUr6tnE3T3q5q_BnO93A,9156
|
|
43
43
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
44
44
|
datachain/lib/data_model.py,sha256=dau4AlZBhOFvF7pEKMeqCeRkcFFg5KFvTBWW_2CdH5g,2371
|
|
45
45
|
datachain/lib/dataset_info.py,sha256=q0EW9tj5jXGSD9Lzct9zbH4P1lfIGd_cIWqhnMxv7Q0,2464
|
|
46
|
-
datachain/lib/dc.py,sha256=
|
|
47
|
-
datachain/lib/file.py,sha256
|
|
46
|
+
datachain/lib/dc.py,sha256=u0RQJPG0zwxsoYS-4wrbDBPuLYZajwIi1YX37khKfkI,87942
|
|
47
|
+
datachain/lib/file.py,sha256=-XMkL6ED1sE7TMhWoMRTEuOXswZJw8X6AEmJDONFP74,15019
|
|
48
48
|
datachain/lib/hf.py,sha256=BW2NPpqxkpPwkSaGlppT8Rbs8zPpyYC-tR6htY08c-0,5817
|
|
49
49
|
datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
|
|
50
50
|
datachain/lib/listing.py,sha256=cVkCp7TRVpcZKSx-Bbk9t51bQI9Mw0o86W6ZPhAsuzM,3667
|
|
@@ -52,11 +52,11 @@ datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g
|
|
|
52
52
|
datachain/lib/meta_formats.py,sha256=anK2bDVbaeCCh0yvKUBaW2MVos3zRgdaSV8uSduzPcU,6680
|
|
53
53
|
datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
|
|
54
54
|
datachain/lib/pytorch.py,sha256=W-ARi2xH1f1DUkVfRuerW-YWYgSaJASmNCxtz2lrJGI,6072
|
|
55
|
-
datachain/lib/settings.py,sha256=
|
|
55
|
+
datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
|
|
56
56
|
datachain/lib/signal_schema.py,sha256=xwkE5bxJxUhZTjrA6jqN87XbSXPikCbL6eOPL9WyrKM,24556
|
|
57
57
|
datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
|
|
58
58
|
datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
59
|
-
datachain/lib/udf.py,sha256
|
|
59
|
+
datachain/lib/udf.py,sha256=-j0krjNAELTqRI0dB1N65AmawtcIY5vN---AuUcW8Us,13637
|
|
60
60
|
datachain/lib/udf_signature.py,sha256=GXw24A-Olna6DWCdgy2bC-gZh_gLGPQ-KvjuI6pUjC0,7281
|
|
61
61
|
datachain/lib/utils.py,sha256=6NwgWLl5JrgtD4rsSFEe-yR2ntEwJMJEtAZ3FIxK3fg,1529
|
|
62
62
|
datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -71,17 +71,13 @@ datachain/lib/convert/values_to_tuples.py,sha256=varRCnSMT_pZmHznrd2Yi05qXLLz_v9
|
|
|
71
71
|
datachain/lib/func/__init__.py,sha256=wlAKhGV0QDg9y7reSwoUF8Vicfqh_YOUNIXLzxICGz4,403
|
|
72
72
|
datachain/lib/func/aggregate.py,sha256=H1ziFQdaK9zvnxvttfnEzkkyGvEEmMAvmgCsBV6nfm8,10917
|
|
73
73
|
datachain/lib/func/func.py,sha256=HAJZ_tpiRG2R-et7pr0WnoyNZYtpbPn3_HBuL3RQpbU,4800
|
|
74
|
-
datachain/lib/models/__init__.py,sha256=
|
|
75
|
-
datachain/lib/models/bbox.py,sha256=
|
|
76
|
-
datachain/lib/models/pose.py,sha256=
|
|
77
|
-
datachain/lib/models/
|
|
78
|
-
datachain/lib/models/ultralytics/__init__.py,sha256=g8mgII0k_RJiOG9kd4k_ECfCgDhT_iPh3vCC_5OiDD4,305
|
|
79
|
-
datachain/lib/models/ultralytics/bbox.py,sha256=LAaezAnnugfBiczWZ63NTo65kX2BegR5WGXjQTOTE28,5784
|
|
80
|
-
datachain/lib/models/ultralytics/pose.py,sha256=nMoEeeY_Zi7Iiu7vIo9ZTq8ARUdg_BcZMQIA_WgRNk4,3488
|
|
81
|
-
datachain/lib/models/ultralytics/segment.py,sha256=IHnthsq6uQ6DSdHLK2akbdd0Eq8wW7oaAK6pUG8nxJc,3818
|
|
74
|
+
datachain/lib/models/__init__.py,sha256=AGvjPbUokJiir3uelTa4XGtNSECkMFc5Xmi_N3AtxPQ,119
|
|
75
|
+
datachain/lib/models/bbox.py,sha256=aiYNhvEcRK3dEN4MBcptmkPKc9kMP16ZQdu7xPk6hek,1555
|
|
76
|
+
datachain/lib/models/pose.py,sha256=peuJPNSiGuTXfCfGIABwv8PGYistvTTBmtf-8X8E_eA,1077
|
|
77
|
+
datachain/lib/models/yolo.py,sha256=eftoJDUa8iOpFTF1EkKVAd5Q-3HRd6X4eCIZ9h5p4nI,972
|
|
82
78
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
83
79
|
datachain/query/batch.py,sha256=5fEhORFe7li12SdYddaSK3LyqksMfCHhwN1_A6TfsA4,3485
|
|
84
|
-
datachain/query/dataset.py,sha256=
|
|
80
|
+
datachain/query/dataset.py,sha256=sQny-ZemB2HueC4mPg-7qSaqUD85MMO-DQyVVP8K1CA,53765
|
|
85
81
|
datachain/query/dispatch.py,sha256=wjjTWw6sFQbB9SKRh78VbfvwSMgJXCfqJklS3-9KnCU,12025
|
|
86
82
|
datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
|
|
87
83
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
@@ -105,14 +101,14 @@ datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTE
|
|
|
105
101
|
datachain/sql/functions/string.py,sha256=DYgiw8XSk7ge7GXvyRI1zbaMruIizNeI-puOjriQGZQ,1148
|
|
106
102
|
datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
|
|
107
103
|
datachain/sql/sqlite/base.py,sha256=aHSZVvh4XSVkvZ07h3jMoRlHI4sWD8y3SnmGs9xMG9Y,14375
|
|
108
|
-
datachain/sql/sqlite/types.py,sha256=
|
|
104
|
+
datachain/sql/sqlite/types.py,sha256=lPXS1XbkmUtlkkiRxy_A_UzsgpPv2VSkXYOD4zIHM4w,1734
|
|
109
105
|
datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
|
|
110
106
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
111
107
|
datachain/toolkit/split.py,sha256=6FcEJgUsJsUcCqKW5aXuJy4DvbcQ7_dFbsfNPhn8EVg,2377
|
|
112
108
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
113
|
-
datachain-0.
|
|
114
|
-
datachain-0.
|
|
115
|
-
datachain-0.
|
|
116
|
-
datachain-0.
|
|
117
|
-
datachain-0.
|
|
118
|
-
datachain-0.
|
|
109
|
+
datachain-0.7.0.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
110
|
+
datachain-0.7.0.dist-info/METADATA,sha256=Cm0v22C-aT14JBG4NjNQmM7kvhYZFAf6rh0yDAMRWFU,18037
|
|
111
|
+
datachain-0.7.0.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
|
|
112
|
+
datachain-0.7.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
113
|
+
datachain-0.7.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
114
|
+
datachain-0.7.0.dist-info/RECORD,,
|
datachain/lib/models/segment.py
DELETED
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
from pydantic import Field
|
|
2
|
-
|
|
3
|
-
from datachain.lib.data_model import DataModel
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class Segments(DataModel):
|
|
7
|
-
"""
|
|
8
|
-
A data model for representing segments.
|
|
9
|
-
|
|
10
|
-
Attributes:
|
|
11
|
-
title (str): The title of the segments.
|
|
12
|
-
x (list[int]): The x-coordinates of the segments.
|
|
13
|
-
y (list[int]): The y-coordinates of the segments.
|
|
14
|
-
|
|
15
|
-
The segments are represented as lists of x and y coordinates, where each index
|
|
16
|
-
corresponds to a specific segment.
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
title: str = Field(default="")
|
|
20
|
-
x: list[int] = Field(default=None)
|
|
21
|
-
y: list[int] = Field(default=None)
|
|
22
|
-
|
|
23
|
-
@staticmethod
|
|
24
|
-
def from_list(points: list[list[float]], title: str = "") -> "Segments":
|
|
25
|
-
assert len(points) == 2, "Segments coordinates must be a list of 2 lists."
|
|
26
|
-
points_x, points_y = points
|
|
27
|
-
assert len(points_x) == len(
|
|
28
|
-
points_y
|
|
29
|
-
), "Segments x and y coordinates must have the same length."
|
|
30
|
-
assert all(
|
|
31
|
-
isinstance(value, (int, float)) for value in [*points_x, *points_y]
|
|
32
|
-
), "Segments coordinates must be integers or floats."
|
|
33
|
-
return Segments(
|
|
34
|
-
title=title,
|
|
35
|
-
x=[round(coord) for coord in points_x],
|
|
36
|
-
y=[round(coord) for coord in points_y],
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
@staticmethod
|
|
40
|
-
def from_dict(points: dict[str, list[float]], title: str = "") -> "Segments":
|
|
41
|
-
assert set(points) == {
|
|
42
|
-
"x",
|
|
43
|
-
"y",
|
|
44
|
-
}, "Segments coordinates must contain keys 'x' and 'y'."
|
|
45
|
-
points_x, points_y = points["x"], points["y"]
|
|
46
|
-
assert all(
|
|
47
|
-
isinstance(value, (int, float)) for value in [*points_x, *points_y]
|
|
48
|
-
), "Segments coordinates must be integers or floats."
|
|
49
|
-
return Segments(
|
|
50
|
-
title=title,
|
|
51
|
-
x=[round(coord) for coord in points_x],
|
|
52
|
-
y=[round(coord) for coord in points_y],
|
|
53
|
-
)
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
from .bbox import YoloBBox, YoloBBoxes, YoloOBBox, YoloOBBoxes
|
|
2
|
-
from .pose import YoloPose, YoloPoses
|
|
3
|
-
from .segment import YoloSegment, YoloSegments
|
|
4
|
-
|
|
5
|
-
__all__ = [
|
|
6
|
-
"YoloBBox",
|
|
7
|
-
"YoloBBoxes",
|
|
8
|
-
"YoloOBBox",
|
|
9
|
-
"YoloOBBoxes",
|
|
10
|
-
"YoloPose",
|
|
11
|
-
"YoloPoses",
|
|
12
|
-
"YoloSegment",
|
|
13
|
-
"YoloSegments",
|
|
14
|
-
]
|
|
@@ -1,189 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
This module contains the YOLO models.
|
|
3
|
-
|
|
4
|
-
YOLO stands for "You Only Look Once", a family of object detection models that
|
|
5
|
-
are designed to be fast and accurate. The models are trained to detect objects
|
|
6
|
-
in images by dividing the image into a grid and predicting the bounding boxes
|
|
7
|
-
and class probabilities for each grid cell.
|
|
8
|
-
|
|
9
|
-
More information about YOLO can be found here:
|
|
10
|
-
- https://pjreddie.com/darknet/yolo/
|
|
11
|
-
- https://docs.ultralytics.com/
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
from io import BytesIO
|
|
15
|
-
from typing import TYPE_CHECKING
|
|
16
|
-
|
|
17
|
-
from PIL import Image
|
|
18
|
-
from pydantic import Field
|
|
19
|
-
|
|
20
|
-
from datachain.lib.data_model import DataModel
|
|
21
|
-
from datachain.lib.models.bbox import BBox, OBBox
|
|
22
|
-
|
|
23
|
-
if TYPE_CHECKING:
|
|
24
|
-
from ultralytics.engine.results import Results
|
|
25
|
-
from ultralytics.models import YOLO
|
|
26
|
-
|
|
27
|
-
from datachain.lib.file import File
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class YoloBBox(DataModel):
|
|
31
|
-
"""
|
|
32
|
-
A class representing a bounding box detected by a YOLO model.
|
|
33
|
-
|
|
34
|
-
Attributes:
|
|
35
|
-
cls: The class of the detected object.
|
|
36
|
-
name: The name of the detected object.
|
|
37
|
-
confidence: The confidence score of the detection.
|
|
38
|
-
box: The bounding box of the detected object
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
cls: int = Field(default=-1)
|
|
42
|
-
name: str = Field(default="")
|
|
43
|
-
confidence: float = Field(default=0)
|
|
44
|
-
box: BBox = Field(default=None)
|
|
45
|
-
|
|
46
|
-
@staticmethod
|
|
47
|
-
def from_file(yolo: "YOLO", file: "File") -> "YoloBBox":
|
|
48
|
-
results = yolo(Image.open(BytesIO(file.read())))
|
|
49
|
-
if len(results) == 0:
|
|
50
|
-
return YoloBBox()
|
|
51
|
-
return YoloBBox.from_result(results[0])
|
|
52
|
-
|
|
53
|
-
@staticmethod
|
|
54
|
-
def from_result(result: "Results") -> "YoloBBox":
|
|
55
|
-
summary = result.summary()
|
|
56
|
-
if not summary:
|
|
57
|
-
return YoloBBox()
|
|
58
|
-
name = summary[0].get("name", "")
|
|
59
|
-
box = (
|
|
60
|
-
BBox.from_dict(summary[0]["box"], title=name)
|
|
61
|
-
if "box" in summary[0]
|
|
62
|
-
else BBox()
|
|
63
|
-
)
|
|
64
|
-
return YoloBBox(
|
|
65
|
-
cls=summary[0]["class"],
|
|
66
|
-
name=name,
|
|
67
|
-
confidence=summary[0]["confidence"],
|
|
68
|
-
box=box,
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
class YoloBBoxes(DataModel):
|
|
73
|
-
"""
|
|
74
|
-
A class representing a list of bounding boxes detected by a YOLO model.
|
|
75
|
-
|
|
76
|
-
Attributes:
|
|
77
|
-
cls: A list of classes of the detected objects.
|
|
78
|
-
name: A list of names of the detected objects.
|
|
79
|
-
confidence: A list of confidence scores of the detections.
|
|
80
|
-
box: A list of bounding boxes of the detected objects
|
|
81
|
-
"""
|
|
82
|
-
|
|
83
|
-
cls: list[int]
|
|
84
|
-
name: list[str]
|
|
85
|
-
confidence: list[float]
|
|
86
|
-
box: list[BBox]
|
|
87
|
-
|
|
88
|
-
@staticmethod
|
|
89
|
-
def from_file(yolo: "YOLO", file: "File") -> "YoloBBoxes":
|
|
90
|
-
results = yolo(Image.open(BytesIO(file.read())))
|
|
91
|
-
return YoloBBoxes.from_results(results)
|
|
92
|
-
|
|
93
|
-
@staticmethod
|
|
94
|
-
def from_results(results: list["Results"]) -> "YoloBBoxes":
|
|
95
|
-
cls, names, confidence, box = [], [], [], []
|
|
96
|
-
for r in results:
|
|
97
|
-
for s in r.summary():
|
|
98
|
-
name = s.get("name", "")
|
|
99
|
-
cls.append(s["class"])
|
|
100
|
-
names.append(name)
|
|
101
|
-
confidence.append(s["confidence"])
|
|
102
|
-
box.append(BBox.from_dict(s.get("box", {}), title=name))
|
|
103
|
-
return YoloBBoxes(
|
|
104
|
-
cls=cls,
|
|
105
|
-
name=names,
|
|
106
|
-
confidence=confidence,
|
|
107
|
-
box=box,
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
class YoloOBBox(DataModel):
|
|
112
|
-
"""
|
|
113
|
-
A class representing an oriented bounding box detected by a YOLO model.
|
|
114
|
-
|
|
115
|
-
Attributes:
|
|
116
|
-
cls: The class of the detected object.
|
|
117
|
-
name: The name of the detected object.
|
|
118
|
-
confidence: The confidence score of the detection.
|
|
119
|
-
box: The oriented bounding box of the detected object.
|
|
120
|
-
"""
|
|
121
|
-
|
|
122
|
-
cls: int = Field(default=-1)
|
|
123
|
-
name: str = Field(default="")
|
|
124
|
-
confidence: float = Field(default=0)
|
|
125
|
-
box: OBBox = Field(default=None)
|
|
126
|
-
|
|
127
|
-
@staticmethod
|
|
128
|
-
def from_file(yolo: "YOLO", file: "File") -> "YoloOBBox":
|
|
129
|
-
results = yolo(Image.open(BytesIO(file.read())))
|
|
130
|
-
if len(results) == 0:
|
|
131
|
-
return YoloOBBox()
|
|
132
|
-
return YoloOBBox.from_result(results[0])
|
|
133
|
-
|
|
134
|
-
@staticmethod
|
|
135
|
-
def from_result(result: "Results") -> "YoloOBBox":
|
|
136
|
-
summary = result.summary()
|
|
137
|
-
if not summary:
|
|
138
|
-
return YoloOBBox()
|
|
139
|
-
name = summary[0].get("name", "")
|
|
140
|
-
box = (
|
|
141
|
-
OBBox.from_dict(summary[0]["box"], title=name)
|
|
142
|
-
if "box" in summary[0]
|
|
143
|
-
else OBBox()
|
|
144
|
-
)
|
|
145
|
-
return YoloOBBox(
|
|
146
|
-
cls=summary[0]["class"],
|
|
147
|
-
name=name,
|
|
148
|
-
confidence=summary[0]["confidence"],
|
|
149
|
-
box=box,
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
class YoloOBBoxes(DataModel):
|
|
154
|
-
"""
|
|
155
|
-
A class representing a list of oriented bounding boxes detected by a YOLO model.
|
|
156
|
-
|
|
157
|
-
Attributes:
|
|
158
|
-
cls: A list of classes of the detected objects.
|
|
159
|
-
name: A list of names of the detected objects.
|
|
160
|
-
confidence: A list of confidence scores of the detections.
|
|
161
|
-
box: A list of oriented bounding boxes of the detected objects.
|
|
162
|
-
"""
|
|
163
|
-
|
|
164
|
-
cls: list[int]
|
|
165
|
-
name: list[str]
|
|
166
|
-
confidence: list[float]
|
|
167
|
-
box: list[OBBox]
|
|
168
|
-
|
|
169
|
-
@staticmethod
|
|
170
|
-
def from_file(yolo: "YOLO", file: "File") -> "YoloOBBoxes":
|
|
171
|
-
results = yolo(Image.open(BytesIO(file.read())))
|
|
172
|
-
return YoloOBBoxes.from_results(results)
|
|
173
|
-
|
|
174
|
-
@staticmethod
|
|
175
|
-
def from_results(results: list["Results"]) -> "YoloOBBoxes":
|
|
176
|
-
cls, names, confidence, box = [], [], [], []
|
|
177
|
-
for r in results:
|
|
178
|
-
for s in r.summary():
|
|
179
|
-
name = s.get("name", "")
|
|
180
|
-
cls.append(s["class"])
|
|
181
|
-
names.append(name)
|
|
182
|
-
confidence.append(s["confidence"])
|
|
183
|
-
box.append(OBBox.from_dict(s.get("box", {}), title=name))
|
|
184
|
-
return YoloOBBoxes(
|
|
185
|
-
cls=cls,
|
|
186
|
-
name=names,
|
|
187
|
-
confidence=confidence,
|
|
188
|
-
box=box,
|
|
189
|
-
)
|
|
@@ -1,126 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
This module contains the YOLO models.
|
|
3
|
-
|
|
4
|
-
YOLO stands for "You Only Look Once", a family of object detection models that
|
|
5
|
-
are designed to be fast and accurate. The models are trained to detect objects
|
|
6
|
-
in images by dividing the image into a grid and predicting the bounding boxes
|
|
7
|
-
and class probabilities for each grid cell.
|
|
8
|
-
|
|
9
|
-
More information about YOLO can be found here:
|
|
10
|
-
- https://pjreddie.com/darknet/yolo/
|
|
11
|
-
- https://docs.ultralytics.com/
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
from typing import TYPE_CHECKING
|
|
15
|
-
|
|
16
|
-
from pydantic import Field
|
|
17
|
-
|
|
18
|
-
from datachain.lib.data_model import DataModel
|
|
19
|
-
from datachain.lib.models.bbox import BBox
|
|
20
|
-
from datachain.lib.models.pose import Pose3D
|
|
21
|
-
|
|
22
|
-
if TYPE_CHECKING:
|
|
23
|
-
from ultralytics.engine.results import Results
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class YoloPoseBodyPart:
|
|
27
|
-
"""An enumeration of body parts for YOLO pose keypoints."""
|
|
28
|
-
|
|
29
|
-
nose = 0
|
|
30
|
-
left_eye = 1
|
|
31
|
-
right_eye = 2
|
|
32
|
-
left_ear = 3
|
|
33
|
-
right_ear = 4
|
|
34
|
-
left_shoulder = 5
|
|
35
|
-
right_shoulder = 6
|
|
36
|
-
left_elbow = 7
|
|
37
|
-
right_elbow = 8
|
|
38
|
-
left_wrist = 9
|
|
39
|
-
right_wrist = 10
|
|
40
|
-
left_hip = 11
|
|
41
|
-
right_hip = 12
|
|
42
|
-
left_knee = 13
|
|
43
|
-
right_knee = 14
|
|
44
|
-
left_ankle = 15
|
|
45
|
-
right_ankle = 16
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class YoloPose(DataModel):
|
|
49
|
-
"""
|
|
50
|
-
A data model for YOLO pose keypoints.
|
|
51
|
-
|
|
52
|
-
Attributes:
|
|
53
|
-
cls: The class of the pose.
|
|
54
|
-
name: The name of the pose.
|
|
55
|
-
confidence: The confidence score of the pose.
|
|
56
|
-
box: The bounding box of the pose.
|
|
57
|
-
keypoints: The 3D pose keypoints.
|
|
58
|
-
"""
|
|
59
|
-
|
|
60
|
-
cls: int = Field(default=-1)
|
|
61
|
-
name: str = Field(default="")
|
|
62
|
-
confidence: float = Field(default=0)
|
|
63
|
-
box: BBox = Field(default=None)
|
|
64
|
-
keypoints: Pose3D = Field(default=None)
|
|
65
|
-
|
|
66
|
-
@staticmethod
|
|
67
|
-
def from_result(result: "Results") -> "YoloPose":
|
|
68
|
-
summary = result.summary()
|
|
69
|
-
if not summary:
|
|
70
|
-
return YoloPose()
|
|
71
|
-
name = summary[0].get("name", "")
|
|
72
|
-
box = (
|
|
73
|
-
BBox.from_dict(summary[0]["box"], title=name)
|
|
74
|
-
if "box" in summary[0]
|
|
75
|
-
else BBox()
|
|
76
|
-
)
|
|
77
|
-
keypoints = (
|
|
78
|
-
Pose3D.from_dict(summary[0]["keypoints"])
|
|
79
|
-
if "keypoints" in summary[0]
|
|
80
|
-
else Pose3D()
|
|
81
|
-
)
|
|
82
|
-
return YoloPose(
|
|
83
|
-
cls=summary[0]["class"],
|
|
84
|
-
name=name,
|
|
85
|
-
confidence=summary[0]["confidence"],
|
|
86
|
-
box=box,
|
|
87
|
-
keypoints=keypoints,
|
|
88
|
-
)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
class YoloPoses(DataModel):
|
|
92
|
-
"""
|
|
93
|
-
A data model for a list of YOLO pose keypoints.
|
|
94
|
-
|
|
95
|
-
Attributes:
|
|
96
|
-
cls: The classes of the poses.
|
|
97
|
-
name: The names of the poses.
|
|
98
|
-
confidence: The confidence scores of the poses.
|
|
99
|
-
box: The bounding boxes of the poses.
|
|
100
|
-
keypoints: The 3D pose keypoints of the poses.
|
|
101
|
-
"""
|
|
102
|
-
|
|
103
|
-
cls: list[int]
|
|
104
|
-
name: list[str]
|
|
105
|
-
confidence: list[float]
|
|
106
|
-
box: list[BBox]
|
|
107
|
-
keypoints: list[Pose3D]
|
|
108
|
-
|
|
109
|
-
@staticmethod
|
|
110
|
-
def from_results(results: list["Results"]) -> "YoloPoses":
|
|
111
|
-
cls, names, confidence, box, keypoints = [], [], [], [], []
|
|
112
|
-
for r in results:
|
|
113
|
-
for s in r.summary():
|
|
114
|
-
name = s.get("name", "")
|
|
115
|
-
cls.append(s["class"])
|
|
116
|
-
names.append(name)
|
|
117
|
-
confidence.append(s["confidence"])
|
|
118
|
-
box.append(BBox.from_dict(s.get("box", {}), title=name))
|
|
119
|
-
keypoints.append(Pose3D.from_dict(s.get("keypoints", {})))
|
|
120
|
-
return YoloPoses(
|
|
121
|
-
cls=cls,
|
|
122
|
-
name=names,
|
|
123
|
-
confidence=confidence,
|
|
124
|
-
box=box,
|
|
125
|
-
keypoints=keypoints,
|
|
126
|
-
)
|
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
This module contains the YOLO models.
|
|
3
|
-
|
|
4
|
-
YOLO stands for "You Only Look Once", a family of object detection models that
|
|
5
|
-
are designed to be fast and accurate. The models are trained to detect objects
|
|
6
|
-
in images by dividing the image into a grid and predicting the bounding boxes
|
|
7
|
-
and class probabilities for each grid cell.
|
|
8
|
-
|
|
9
|
-
More information about YOLO can be found here:
|
|
10
|
-
- https://pjreddie.com/darknet/yolo/
|
|
11
|
-
- https://docs.ultralytics.com/
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
from io import BytesIO
|
|
15
|
-
from typing import TYPE_CHECKING
|
|
16
|
-
|
|
17
|
-
from PIL import Image
|
|
18
|
-
from pydantic import Field
|
|
19
|
-
|
|
20
|
-
from datachain.lib.data_model import DataModel
|
|
21
|
-
from datachain.lib.models.bbox import BBox
|
|
22
|
-
from datachain.lib.models.segment import Segments
|
|
23
|
-
|
|
24
|
-
if TYPE_CHECKING:
|
|
25
|
-
from ultralytics.engine.results import Results
|
|
26
|
-
from ultralytics.models import YOLO
|
|
27
|
-
|
|
28
|
-
from datachain.lib.file import File
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class YoloSegment(DataModel):
|
|
32
|
-
"""
|
|
33
|
-
A data model for a single YOLO segment.
|
|
34
|
-
|
|
35
|
-
Attributes:
|
|
36
|
-
cls (int): The class of the segment.
|
|
37
|
-
name (str): The name of the segment.
|
|
38
|
-
confidence (float): The confidence of the segment.
|
|
39
|
-
box (BBox): The bounding box of the segment.
|
|
40
|
-
segments (Segments): The segments of the segment.
|
|
41
|
-
"""
|
|
42
|
-
|
|
43
|
-
cls: int = Field(default=-1)
|
|
44
|
-
name: str = Field(default="")
|
|
45
|
-
confidence: float = Field(default=0)
|
|
46
|
-
box: BBox = Field(default=None)
|
|
47
|
-
segments: Segments = Field(default=None)
|
|
48
|
-
|
|
49
|
-
@staticmethod
|
|
50
|
-
def from_file(yolo: "YOLO", file: "File") -> "YoloSegment":
|
|
51
|
-
results = yolo(Image.open(BytesIO(file.read())))
|
|
52
|
-
if len(results) == 0:
|
|
53
|
-
return YoloSegment()
|
|
54
|
-
return YoloSegment.from_result(results[0])
|
|
55
|
-
|
|
56
|
-
@staticmethod
|
|
57
|
-
def from_result(result: "Results") -> "YoloSegment":
|
|
58
|
-
summary = result.summary()
|
|
59
|
-
if not summary:
|
|
60
|
-
return YoloSegment()
|
|
61
|
-
name = summary[0].get("name", "")
|
|
62
|
-
box = (
|
|
63
|
-
BBox.from_dict(summary[0]["box"], title=name)
|
|
64
|
-
if "box" in summary[0]
|
|
65
|
-
else BBox()
|
|
66
|
-
)
|
|
67
|
-
segments = (
|
|
68
|
-
Segments.from_dict(summary[0]["segments"], title=name)
|
|
69
|
-
if "segments" in summary[0]
|
|
70
|
-
else Segments()
|
|
71
|
-
)
|
|
72
|
-
return YoloSegment(
|
|
73
|
-
cls=summary[0]["class"],
|
|
74
|
-
name=summary[0]["name"],
|
|
75
|
-
confidence=summary[0]["confidence"],
|
|
76
|
-
box=box,
|
|
77
|
-
segments=segments,
|
|
78
|
-
)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
class YoloSegments(DataModel):
|
|
82
|
-
"""
|
|
83
|
-
A data model for a list of YOLO segments.
|
|
84
|
-
|
|
85
|
-
Attributes:
|
|
86
|
-
cls (list[int]): The classes of the segments.
|
|
87
|
-
name (list[str]): The names of the segments.
|
|
88
|
-
confidence (list[float]): The confidences of the segments.
|
|
89
|
-
box (list[BBox]): The bounding boxes of the segments.
|
|
90
|
-
segments (list[Segments]): The segments of the segments.
|
|
91
|
-
"""
|
|
92
|
-
|
|
93
|
-
cls: list[int]
|
|
94
|
-
name: list[str]
|
|
95
|
-
confidence: list[float]
|
|
96
|
-
box: list[BBox]
|
|
97
|
-
segments: list[Segments]
|
|
98
|
-
|
|
99
|
-
@staticmethod
|
|
100
|
-
def from_file(yolo: "YOLO", file: "File") -> "YoloSegments":
|
|
101
|
-
results = yolo(Image.open(BytesIO(file.read())))
|
|
102
|
-
return YoloSegments.from_results(results)
|
|
103
|
-
|
|
104
|
-
@staticmethod
|
|
105
|
-
def from_results(results: list["Results"]) -> "YoloSegments":
|
|
106
|
-
cls, names, confidence, box, segments = [], [], [], [], []
|
|
107
|
-
for r in results:
|
|
108
|
-
for s in r.summary():
|
|
109
|
-
name = s.get("name", "")
|
|
110
|
-
cls.append(s["class"])
|
|
111
|
-
names.append(name)
|
|
112
|
-
confidence.append(s["confidence"])
|
|
113
|
-
box.append(BBox.from_dict(s.get("box", {}), title=name))
|
|
114
|
-
segments.append(Segments.from_dict(s.get("segments", {}), title=name))
|
|
115
|
-
return YoloSegments(
|
|
116
|
-
cls=cls,
|
|
117
|
-
name=names,
|
|
118
|
-
confidence=confidence,
|
|
119
|
-
box=box,
|
|
120
|
-
segments=segments,
|
|
121
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|