dragoneye-python 1.0.2__tar.gz → 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dragoneye_python-2.0.0/PKG-INFO +671 -0
- dragoneye_python-2.0.0/README.md +637 -0
- dragoneye_python-2.0.0/dragoneye_python.egg-info/PKG-INFO +671 -0
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/pyproject.toml +1 -1
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/src/dragoneye/__init__.py +20 -12
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/src/dragoneye/classification.py +25 -9
- dragoneye_python-2.0.0/src/dragoneye/models.py +157 -0
- dragoneye_python-2.0.0/src/dragoneye/parquet_deserializer.py +245 -0
- dragoneye_python-1.0.2/PKG-INFO +0 -505
- dragoneye_python-1.0.2/README.md +0 -471
- dragoneye_python-1.0.2/dragoneye_python.egg-info/PKG-INFO +0 -505
- dragoneye_python-1.0.2/src/dragoneye/models.py +0 -64
- dragoneye_python-1.0.2/src/dragoneye/parquet_deserializer.py +0 -136
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/LICENSE +0 -0
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/dragoneye_python.egg-info/SOURCES.txt +0 -0
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/dragoneye_python.egg-info/dependency_links.txt +0 -0
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/dragoneye_python.egg-info/requires.txt +0 -0
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/dragoneye_python.egg-info/top_level.txt +0 -0
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/requirements.txt +0 -0
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/setup.cfg +0 -0
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/src/dragoneye/client.py +0 -0
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/src/dragoneye/constants.py +0 -0
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/src/dragoneye/types/__init__.py +0 -0
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/src/dragoneye/types/common.py +0 -0
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/src/dragoneye/types/exception.py +0 -0
- {dragoneye_python-1.0.2 → dragoneye_python-2.0.0}/src/dragoneye/types/media.py +0 -0
|
@@ -0,0 +1,671 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dragoneye-python
|
|
3
|
+
Version: 2.0.0
|
|
4
|
+
Summary: Official Python SDK for the Dragoneye computer vision API
|
|
5
|
+
Author-email: Dragoneye <support@dragoneye.ai>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://dragoneye.ai
|
|
8
|
+
Project-URL: Documentation, https://docs.dragoneye.ai/integrating/python-sdk
|
|
9
|
+
Project-URL: Repository, https://github.com/dragoneyeAI/dragoneye-python
|
|
10
|
+
Project-URL: Playground, https://playground.dragoneye.ai/
|
|
11
|
+
Project-URL: Issues, https://github.com/dragoneyeAI/dragoneye-python/issues
|
|
12
|
+
Keywords: dragoneye,computer-vision,image-classification,video-classification,machine-learning,ai
|
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
24
|
+
Requires-Python: >=3.8
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: requests
|
|
28
|
+
Requires-Dist: pydantic>=2
|
|
29
|
+
Requires-Dist: typing-extensions>=4.0.0
|
|
30
|
+
Requires-Dist: backoff>=2.0.0
|
|
31
|
+
Requires-Dist: aiohttp
|
|
32
|
+
Requires-Dist: polars>=1.0.0
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
|
|
35
|
+
# dragoneye-python
|
|
36
|
+
|
|
37
|
+
[](https://pypi.org/project/dragoneye-python/)
|
|
38
|
+
[](https://pypi.org/project/dragoneye-python/)
|
|
39
|
+
[](https://github.com/dragoneyeAI/dragoneye-python/blob/main/LICENSE)
|
|
40
|
+
|
|
41
|
+
The official Python SDK for [Dragoneye](https://dragoneye.ai) — build and call custom computer vision models from Python.
|
|
42
|
+
|
|
43
|
+
Describe what you want to detect in plain English on the [Dragoneye Playground](https://playground.dragoneye.ai/), and the AI Model Builder assembles a vision model with your categories and attributes. This SDK lets you run that model on images and videos and get back structured predictions with bounding boxes, category scores, and attribute scores.
|
|
44
|
+
|
|
45
|
+
- 📘 **Full documentation:** https://docs.dragoneye.ai/integrating/python-sdk
|
|
46
|
+
- 🎮 **Playground:** https://playground.dragoneye.ai/
|
|
47
|
+
- 🐍 **PyPI:** https://pypi.org/project/dragoneye-python/
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Using the Python SDK
|
|
52
|
+
|
|
53
|
+
If you're integrating with our APIs using Python, the Dragoneye SDK streamlines the process with minimal setup. Here's how you can get started and explore the types and endpoints in detail.
|
|
54
|
+
|
|
55
|
+
## Installation
|
|
56
|
+
|
|
57
|
+
Install the package using pip.
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install dragoneye-python
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Quick Start
|
|
64
|
+
|
|
65
|
+
> **Tip — Prerequisites**: Don't have an API key yet? See [Creating an Access Token](https://docs.dragoneye.ai/account-management/creating-access-token).
|
|
66
|
+
|
|
67
|
+
To call the classifier, follow these steps:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
import asyncio
|
|
71
|
+
from dragoneye import Dragoneye, Image, Video
|
|
72
|
+
|
|
73
|
+
async def main():
|
|
74
|
+
# The api_key can also be set via the DRAGONEYE_API_KEY environment variable.
|
|
75
|
+
client = Dragoneye(api_key="<YOUR_ACCESS_TOKEN>")
|
|
76
|
+
|
|
77
|
+
# Example: predict from an image
|
|
78
|
+
image = Image.from_path("photo.jpg")
|
|
79
|
+
image_result = await client.classification.predict_image(
|
|
80
|
+
media=image,
|
|
81
|
+
model_name="recognize_anything/your_model_name", # change to your desired model
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Example: predict from a video
|
|
85
|
+
# NOTE! When loading a file, you can optionally pass a file name or identifier
|
|
86
|
+
# that you use to identify your own files.
|
|
87
|
+
video = Video.from_path(
|
|
88
|
+
path="example.mp4",
|
|
89
|
+
name="any-file-name",
|
|
90
|
+
)
|
|
91
|
+
video_result = await client.classification.predict_video(
|
|
92
|
+
media=video,
|
|
93
|
+
model_name="recognize_anything/your_model_name",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Accessing image results
|
|
97
|
+
for obj in image_result.objects:
|
|
98
|
+
bbox = obj.bbox_observation.normalized_bbox
|
|
99
|
+
top_category = max(obj.categories, key=lambda c: c.score)
|
|
100
|
+
print(f"Category: {top_category.name} ({top_category.score:.2f})")
|
|
101
|
+
for attr in top_category.attributes:
|
|
102
|
+
print(f" {attr.attribute_name}: {attr.option_name} ({attr.score:.2f})")
|
|
103
|
+
|
|
104
|
+
asyncio.run(main())
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
> **Note — Model names**: Model names follow the format `recognize_anything/model_name`. Use the name you specified when creating the model.
|
|
108
|
+
|
|
109
|
+
## How predictions are structured
|
|
110
|
+
|
|
111
|
+
Both endpoints return a list of **objects** the model detected. Each object has:
|
|
112
|
+
|
|
113
|
+
- A **bounding box** — where the object is, in normalized `(x1, y1, x2, y2)` coordinates.
|
|
114
|
+
- One or more **categories** — what the object is, each with a confidence `score`.
|
|
115
|
+
- A list of **attributes** on each category — additional properties the model predicted (for example, a building's exterior color), each as the chosen option plus a score.
|
|
116
|
+
|
|
117
|
+
Images and videos return slightly different object shapes. An **image** is a single moment, so each object has one bounding box and one score per attribute. A **video** adds a time dimension: the same object is tracked across frames, so it carries the timestamps where it appeared, a bounding box per sampled frame, and attribute scores that can change over time.
|
|
118
|
+
|
|
119
|
+
### Example Image Response
|
|
120
|
+
|
|
121
|
+
Below is an example of what a `ClassificationPredictImageResponse` looks like for a Building Detection model. The response is a flat list of `objects`, where each `ImageDetectedObject` is a single detected object with one bounding box and a score per attribute:
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
ClassificationPredictImageResponse(
|
|
125
|
+
prediction_task_uuid="a1b2c3d4-e5f6-7890-abcd-ef1234567890",
|
|
126
|
+
original_file_name="my-photo",
|
|
127
|
+
objects=[
|
|
128
|
+
ImageDetectedObject(
|
|
129
|
+
object_id=1,
|
|
130
|
+
bbox_observation=BboxObservation(normalized_bbox=(0.12, 0.25, 0.55, 0.78), bbox_score=0.97),
|
|
131
|
+
categories=[
|
|
132
|
+
ImageCategoryPrediction(
|
|
133
|
+
category_id=2084323334,
|
|
134
|
+
name="House (detached)",
|
|
135
|
+
score=0.92,
|
|
136
|
+
attributes=[
|
|
137
|
+
ImageAttributePrediction(
|
|
138
|
+
attribute_id=1371766615,
|
|
139
|
+
attribute_name="Building Exterior Color",
|
|
140
|
+
option_id=3498033303,
|
|
141
|
+
option_name="White / Off-white",
|
|
142
|
+
score=0.85,
|
|
143
|
+
),
|
|
144
|
+
ImageAttributePrediction(
|
|
145
|
+
attribute_id=448392115,
|
|
146
|
+
attribute_name="Building Exterior Material",
|
|
147
|
+
option_id=3887467550,
|
|
148
|
+
option_name="Wood (incl. timber siding)",
|
|
149
|
+
score=0.78,
|
|
150
|
+
),
|
|
151
|
+
# ... more attributes omitted for brevity
|
|
152
|
+
],
|
|
153
|
+
),
|
|
154
|
+
],
|
|
155
|
+
),
|
|
156
|
+
ImageDetectedObject(
|
|
157
|
+
object_id=2,
|
|
158
|
+
bbox_observation=BboxObservation(normalized_bbox=(0.60, 0.30, 0.88, 0.75), bbox_score=0.90),
|
|
159
|
+
categories=[
|
|
160
|
+
ImageCategoryPrediction(
|
|
161
|
+
category_id=3212613421,
|
|
162
|
+
name="Garage (detached)",
|
|
163
|
+
score=0.87,
|
|
164
|
+
attributes=[
|
|
165
|
+
# ... attributes omitted for brevity
|
|
166
|
+
],
|
|
167
|
+
),
|
|
168
|
+
],
|
|
169
|
+
),
|
|
170
|
+
],
|
|
171
|
+
)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Each `ImageDetectedObject` has an `object_id`, a single `bbox_observation`, and its `categories`. The `bbox_observation` is a `BboxObservation` — the same bounding-box type videos use. Every attribute is one chosen option with a single `score`.
|
|
175
|
+
|
|
176
|
+
### Example Video Response
|
|
177
|
+
|
|
178
|
+
Below is an example of what a `ClassificationPredictVideoResponse` looks like for the same model. The response is a flat list of `objects`, where each `VideoDetectedObject` is a single object tracked across the whole video:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
ClassificationPredictVideoResponse(
|
|
182
|
+
prediction_task_uuid="a1b2c3d4-e5f6-7890-abcd-ef1234567890",
|
|
183
|
+
original_file_name="any-file-name",
|
|
184
|
+
frames_per_second=1,
|
|
185
|
+
# Every processed frame's timestamp (microseconds), sorted — including
|
|
186
|
+
# frames where nothing was detected.
|
|
187
|
+
frame_timestamps_microseconds=[0, 1000000, 2000000, 3000000],
|
|
188
|
+
objects=[
|
|
189
|
+
VideoDetectedObject(
|
|
190
|
+
object_id=1,
|
|
191
|
+
# When this object was on screen (in microseconds).
|
|
192
|
+
timestamp_ranges=[TimestampRange(timestamp_start_us_inclusive=0, timestamp_end_us_inclusive=3000000)],
|
|
193
|
+
# One observation per sampled frame in the object's lifespan.
|
|
194
|
+
bbox_observations=[
|
|
195
|
+
VideoBboxObservation(timestamp_microseconds=0, observation=BboxObservation(normalized_bbox=(0.12, 0.25, 0.55, 0.78), bbox_score=0.97)),
|
|
196
|
+
VideoBboxObservation(timestamp_microseconds=1000000, observation=BboxObservation(normalized_bbox=(0.13, 0.26, 0.56, 0.79), bbox_score=0.96)),
|
|
197
|
+
# Gap frame: the object is still on screen, but the model
|
|
198
|
+
# didn't predict a box for it this frame, so the whole
|
|
199
|
+
# observation is None. Skip these when drawing or denormalizing.
|
|
200
|
+
VideoBboxObservation(timestamp_microseconds=2000000, observation=None),
|
|
201
|
+
VideoBboxObservation(timestamp_microseconds=3000000, observation=BboxObservation(normalized_bbox=(0.14, 0.27, 0.57, 0.80), bbox_score=0.95)),
|
|
202
|
+
],
|
|
203
|
+
categories=[
|
|
204
|
+
VideoCategoryPrediction(
|
|
205
|
+
category_id=2084323334,
|
|
206
|
+
name="House (detached)",
|
|
207
|
+
score=0.92,
|
|
208
|
+
# Each attribute is the option the model predicted
|
|
209
|
+
# as well as the start and end times that the model
|
|
210
|
+
# predicted it.
|
|
211
|
+
attributes=[
|
|
212
|
+
VideoAttributePrediction(
|
|
213
|
+
attribute_id=1371766615,
|
|
214
|
+
attribute_name="Building Exterior Color",
|
|
215
|
+
option_id=3498033303,
|
|
216
|
+
option_name="White / Off-white",
|
|
217
|
+
timestamp_ranges=[
|
|
218
|
+
ScoredTimestampRange(timestamp_start_us_inclusive=0, timestamp_end_us_inclusive=3000000, score=0.85),
|
|
219
|
+
],
|
|
220
|
+
),
|
|
221
|
+
VideoAttributePrediction(
|
|
222
|
+
attribute_id=448392115,
|
|
223
|
+
attribute_name="Building Exterior Material",
|
|
224
|
+
option_id=3887467550,
|
|
225
|
+
option_name="Wood (incl. timber siding)",
|
|
226
|
+
timestamp_ranges=[
|
|
227
|
+
ScoredTimestampRange(timestamp_start_us_inclusive=0, timestamp_end_us_inclusive=3000000, score=0.78),
|
|
228
|
+
],
|
|
229
|
+
),
|
|
230
|
+
# ... more attributes omitted for brevity
|
|
231
|
+
],
|
|
232
|
+
),
|
|
233
|
+
],
|
|
234
|
+
),
|
|
235
|
+
VideoDetectedObject(
|
|
236
|
+
object_id=2,
|
|
237
|
+
timestamp_ranges=[TimestampRange(timestamp_start_us_inclusive=0, timestamp_end_us_inclusive=1000000)],
|
|
238
|
+
bbox_observations=[
|
|
239
|
+
VideoBboxObservation(timestamp_microseconds=0, observation=BboxObservation(normalized_bbox=(0.60, 0.30, 0.88, 0.75), bbox_score=0.90)),
|
|
240
|
+
VideoBboxObservation(timestamp_microseconds=1000000, observation=BboxObservation(normalized_bbox=(0.61, 0.31, 0.89, 0.76), bbox_score=0.89)),
|
|
241
|
+
],
|
|
242
|
+
categories=[
|
|
243
|
+
VideoCategoryPrediction(
|
|
244
|
+
category_id=3212613421,
|
|
245
|
+
name="Garage (detached)",
|
|
246
|
+
score=0.87,
|
|
247
|
+
attributes=[
|
|
248
|
+
# ... attributes omitted for brevity
|
|
249
|
+
],
|
|
250
|
+
),
|
|
251
|
+
],
|
|
252
|
+
),
|
|
253
|
+
],
|
|
254
|
+
)
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
Read a video response one object at a time. Each `VideoDetectedObject` is one object the model tracked through the video, and it tells you:
|
|
258
|
+
|
|
259
|
+
- `object_id` — a stable id, so you can follow the same object from frame to frame.
|
|
260
|
+
- `timestamp_ranges` — when the object was on screen, in microseconds.
|
|
261
|
+
- `bbox_observations` — where the object was, with one observation per sampled frame in its lifespan.
|
|
262
|
+
- `categories` — what the object is, plus its attribute predictions.
|
|
263
|
+
|
|
264
|
+
The response also carries `frame_timestamps_microseconds`: the sorted list of **every** frame the model processed, in microseconds — including frames where nothing was detected. This is the full timeline of the video, broader than any single object's `timestamp_ranges` or `bbox_observations` (which only cover the frames where that object appeared). Use it to line a playback position up with a real frame: snap an arbitrary scrub time to the nearest value in this list, then look up detections at that timestamp. It's video-only — image responses don't have it.
|
|
265
|
+
|
|
266
|
+
> **Note — Gap frames**: A tracked object can stay on screen across frames where the model didn't predict a box for it. These are **gap frames**: the object is still within its `timestamp_ranges`, but for that frame the model produced no detection. The SDK keeps one `VideoBboxObservation` per sampled frame in the lifespan and sets its `observation` to `None` on gap frames, so a track's observations stay aligned to the frames it spans. Any code that draws or denormalizes coordinates must **skip observations where `observation is None`**:
|
|
267
|
+
>
|
|
268
|
+
> ```python
|
|
269
|
+
> for obs in obj.bbox_observations:
|
|
270
|
+
> if obs.observation is None:
|
|
271
|
+
> continue # gap frame — object on screen but no predicted box
|
|
272
|
+
> x1, y1, x2, y2 = obs.observation.normalized_bbox
|
|
273
|
+
> # ... draw / denormalize
|
|
274
|
+
> ```
|
|
275
|
+
>
|
|
276
|
+
> Gap frames only occur on the video path. Image objects always carry a real bounding box.
|
|
277
|
+
|
|
278
|
+
Attributes work a little differently in video because the model's answer can change over time. Each `VideoAttributePrediction` is one chosen option together with the time spans where that option applied. If the answer changes partway through (say a traffic light goes from green to red), the same attribute appears again with the new option. Images don't have a time dimension, so they use the simpler `ImageDetectedObject` shape shown above.
|
|
279
|
+
|
|
280
|
+
---
|
|
281
|
+
|
|
282
|
+
## Client
|
|
283
|
+
|
|
284
|
+
**`Dragoneye`**
|
|
285
|
+
|
|
286
|
+
The main client used to interact with the API.
|
|
287
|
+
|
|
288
|
+
```python
|
|
289
|
+
client = Dragoneye(
|
|
290
|
+
api_key="<YOUR_ACCESS_TOKEN>",
|
|
291
|
+
max_retries=10,
|
|
292
|
+
max_backoff_time=120,
|
|
293
|
+
)
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
**Arguments:**
|
|
297
|
+
|
|
298
|
+
- `api_key` (Optional[str]): Your API key. If omitted, the SDK reads from the `DRAGONEYE_API_KEY` environment variable.
|
|
299
|
+
- `max_retries` (int): Maximum retry attempts on rate-limit (429) responses. Default: `10`.
|
|
300
|
+
- `max_backoff_time` (int): Maximum backoff time in seconds for exponential backoff. Default: `120`.
|
|
301
|
+
|
|
302
|
+
---
|
|
303
|
+
|
|
304
|
+
## Media Classes
|
|
305
|
+
|
|
306
|
+
`Image` and `Video` are used to wrap media before passing it to a prediction endpoint. Each class restricts the MIME type to its respective media type (`image/*` or `video/*`).
|
|
307
|
+
|
|
308
|
+
### Constructors
|
|
309
|
+
|
|
310
|
+
**`from_path`**
|
|
311
|
+
|
|
312
|
+
```python
|
|
313
|
+
media = Image.from_path(
|
|
314
|
+
path="photo.jpg",
|
|
315
|
+
name="my-photo", # optional identifier
|
|
316
|
+
mime_type=None, # auto-detected from extension by default
|
|
317
|
+
guess_from_extension=True, # set False to require explicit mime_type
|
|
318
|
+
read_into_memory=False, # set True to load bytes into memory immediately
|
|
319
|
+
)
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
**`from_bytes`**
|
|
323
|
+
|
|
324
|
+
```python
|
|
325
|
+
media = Image.from_bytes(
|
|
326
|
+
data=raw_bytes,
|
|
327
|
+
mime_type="image/jpeg",
|
|
328
|
+
name="my-photo", # optional
|
|
329
|
+
)
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
**`from_stream`**
|
|
333
|
+
|
|
334
|
+
```python
|
|
335
|
+
media = Video.from_stream(
|
|
336
|
+
stream=open("clip.mp4", "rb"),
|
|
337
|
+
mime_type="video/mp4",
|
|
338
|
+
name="my-clip", # optional
|
|
339
|
+
)
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
---
|
|
343
|
+
|
|
344
|
+
## Types and Endpoints
|
|
345
|
+
|
|
346
|
+
### Types
|
|
347
|
+
|
|
348
|
+
The response types form a nested hierarchy. Images and videos use different object shapes: images are timestamp-free, while videos carry a time dimension.
|
|
349
|
+
|
|
350
|
+
Image responses use the simpler, timestamp-free shape:
|
|
351
|
+
|
|
352
|
+
```
|
|
353
|
+
ClassificationPredictImageResponse
|
|
354
|
+
└── objects: [ImageDetectedObject]
|
|
355
|
+
├── object_id: int
|
|
356
|
+
├── bbox_observation: BboxObservation
|
|
357
|
+
│ ├── normalized_bbox: (x1, y1, x2, y2)
|
|
358
|
+
│ └── bbox_score: float
|
|
359
|
+
└── categories: [ImageCategoryPrediction]
|
|
360
|
+
├── category_id, name, score
|
|
361
|
+
└── attributes: [ImageAttributePrediction]
|
|
362
|
+
├── attribute_id, attribute_name
|
|
363
|
+
├── option_id, option_name
|
|
364
|
+
└── score: float
|
|
365
|
+
```
|
|
366
|
+
|
|
367
|
+
Video responses add timestamps and one bounding box per sampled frame:
|
|
368
|
+
|
|
369
|
+
```
|
|
370
|
+
ClassificationPredictVideoResponse
|
|
371
|
+
├── frame_timestamps_microseconds: [int] # sorted; every processed frame, incl. zero-detection frames
|
|
372
|
+
└── objects: [VideoDetectedObject]
|
|
373
|
+
├── object_id: int
|
|
374
|
+
├── timestamp_ranges: [TimestampRange] (timestamp_start_us_inclusive, timestamp_end_us_inclusive)
|
|
375
|
+
├── bbox_observations: [VideoBboxObservation]
|
|
376
|
+
│ ├── timestamp_microseconds: int
|
|
377
|
+
│ └── observation: BboxObservation | None # None on gap frames
|
|
378
|
+
│ ├── normalized_bbox: (x1, y1, x2, y2)
|
|
379
|
+
│ └── bbox_score: float
|
|
380
|
+
└── categories: [VideoCategoryPrediction]
|
|
381
|
+
├── category_id, name, score
|
|
382
|
+
└── attributes: [VideoAttributePrediction]
|
|
383
|
+
├── attribute_id, attribute_name
|
|
384
|
+
├── option_id, option_name
|
|
385
|
+
└── timestamp_ranges: [ScoredTimestampRange] (timestamp_start_us_inclusive, timestamp_end_us_inclusive, score)
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
Both shapes use the same `BboxObservation` for a bounding box. An image object has exactly one `BboxObservation` and one `score` per attribute. A video object collects one `VideoBboxObservation` per sampled frame it appears in — each wrapping a `BboxObservation`, or `None` on a gap frame — and each attribute carries the scored timestamp ranges over which its option held.
|
|
389
|
+
|
|
390
|
+
---
|
|
391
|
+
|
|
392
|
+
#### Shared types
|
|
393
|
+
|
|
394
|
+
**`TimestampRange`**
|
|
395
|
+
A contiguous span in microseconds, inclusive on both ends. Used by video responses to describe when an object was visible.
|
|
396
|
+
|
|
397
|
+
Properties:
|
|
398
|
+
|
|
399
|
+
- `timestamp_start_us_inclusive` (int): Start of the span in microseconds.
|
|
400
|
+
- `timestamp_end_us_inclusive` (int): End of the span in microseconds.
|
|
401
|
+
|
|
402
|
+
**`ScoredTimestampRange`**
|
|
403
|
+
A `TimestampRange` that also carries the confidence the chosen option held over it. `score` is the mean of the option's raw per-frame scores over the range.
|
|
404
|
+
|
|
405
|
+
Properties:
|
|
406
|
+
|
|
407
|
+
- `timestamp_start_us_inclusive` (int): Start of the span in microseconds.
|
|
408
|
+
- `timestamp_end_us_inclusive` (int): End of the span in microseconds.
|
|
409
|
+
- `score` (float): Confidence score for the option over this span.
|
|
410
|
+
|
|
411
|
+
**`NormalizedBbox`**
|
|
412
|
+
Type alias for normalized bounding boxes, represented as a tuple of four float values `(x1, y1, x2, y2)` in the range `[0, 1]`.
|
|
413
|
+
|
|
414
|
+
**`BboxObservation`**
|
|
415
|
+
A bounding box and the confidence of the detection that produced it. Shared by image and video responses. Both fields are always present — a `BboxObservation` only exists where a box was actually placed (on video, a frame with no box is represented by a `None` `observation` on its `VideoBboxObservation`).
|
|
416
|
+
|
|
417
|
+
Properties:
|
|
418
|
+
|
|
419
|
+
- `normalized_bbox` (NormalizedBbox): The bounding box (normalized coordinates).
|
|
420
|
+
- `bbox_score` (float): Confidence score for the bounding box.
|
|
421
|
+
|
|
422
|
+
#### Image types
|
|
423
|
+
|
|
424
|
+
**`ImageAttributePrediction`**
|
|
425
|
+
A chosen attribute option for an object in an image, with its confidence score.
|
|
426
|
+
|
|
427
|
+
Properties:
|
|
428
|
+
|
|
429
|
+
- `attribute_id` (int): Unique identifier for the attribute.
|
|
430
|
+
- `attribute_name` (str): The name of the attribute.
|
|
431
|
+
- `option_id` (int): Unique identifier for the chosen option.
|
|
432
|
+
- `option_name` (str): The name of the chosen option.
|
|
433
|
+
- `score` (float): Confidence score for the chosen option.
|
|
434
|
+
|
|
435
|
+
**`ImageCategoryPrediction`**
|
|
436
|
+
A predicted category and its attribute predictions for an object in an image.
|
|
437
|
+
|
|
438
|
+
Properties:
|
|
439
|
+
|
|
440
|
+
- `category_id` (int): Unique identifier for the category.
|
|
441
|
+
- `name` (str): The name of the category.
|
|
442
|
+
- `score` (float): Confidence score for the category.
|
|
443
|
+
- `attributes` (List[ImageAttributePrediction]): Attribute predictions for this category.
|
|
444
|
+
|
|
445
|
+
**`ImageDetectedObject`**
|
|
446
|
+
A single detected object in an image: its bounding box and its categories.
|
|
447
|
+
|
|
448
|
+
Properties:
|
|
449
|
+
|
|
450
|
+
- `object_id` (int): Identifier for the detected object.
|
|
451
|
+
- `bbox_observation` (BboxObservation): The object's bounding box.
|
|
452
|
+
- `categories` (List[ImageCategoryPrediction]): Category and attribute predictions for this object.
|
|
453
|
+
|
|
454
|
+
#### Video types
|
|
455
|
+
|
|
456
|
+
**`VideoBboxObservation`**
|
|
457
|
+
A single sighting of a tracked object at one sampled frame. The object's `bbox_observations` holds one of these per frame in its lifespan, including **gap frames** — frames where the object is still on screen but the model predicted no box. A detected frame carries a real `BboxObservation`; a gap frame carries `observation=None`.
|
|
458
|
+
|
|
459
|
+
Properties:
|
|
460
|
+
|
|
461
|
+
- `timestamp_microseconds` (int): Timestamp of the observation in microseconds.
|
|
462
|
+
- `observation` (Optional[BboxObservation]): The bounding box and its score at this timestamp, or `None` on a gap frame where the object was present but not detected.
|
|
463
|
+
|
|
464
|
+
**`VideoAttributePrediction`**
|
|
465
|
+
A chosen attribute option together with the scored timestamp ranges over which it held. The same `attribute_id` may appear more than once across an object's life if the chosen option changes over time.
|
|
466
|
+
|
|
467
|
+
Properties:
|
|
468
|
+
|
|
469
|
+
- `attribute_id` (int): Unique identifier for the attribute.
|
|
470
|
+
- `attribute_name` (str): The name of the attribute.
|
|
471
|
+
- `option_id` (int): Unique identifier for the chosen option.
|
|
472
|
+
- `option_name` (str): The name of the chosen option.
|
|
473
|
+
- `timestamp_ranges` (List[ScoredTimestampRange]): The scored spans over which this option was chosen.
|
|
474
|
+
|
|
475
|
+
**`VideoCategoryPrediction`**
|
|
476
|
+
A predicted category and its attribute predictions for a tracked object.
|
|
477
|
+
|
|
478
|
+
Properties:
|
|
479
|
+
|
|
480
|
+
- `category_id` (int): Unique identifier for the category.
|
|
481
|
+
- `name` (str): The name of the category.
|
|
482
|
+
- `score` (float): Confidence score for the category.
|
|
483
|
+
- `attributes` (List[VideoAttributePrediction]): Attribute predictions for this category.
|
|
484
|
+
|
|
485
|
+
**`VideoDetectedObject`**
|
|
486
|
+
A single object tracked across the video: its lifespan, every bounding-box observation, and its categories.
|
|
487
|
+
|
|
488
|
+
Properties:
|
|
489
|
+
|
|
490
|
+
- `object_id` (int): Stable identifier for the tracked object.
|
|
491
|
+
- `timestamp_ranges` (List[TimestampRange]): The spans over which the object was visible.
|
|
492
|
+
- `bbox_observations` (List[VideoBboxObservation]): One observation per sampled frame in the object's lifespan. On gap frames (object on screen, no predicted box) the `VideoBboxObservation`'s `observation` is `None`.
|
|
493
|
+
- `categories` (List[VideoCategoryPrediction]): Category and attribute predictions for this object.
|
|
494
|
+
|
|
495
|
+
#### Response types
|
|
496
|
+
|
|
497
|
+
**`ClassificationPredictImageResponse`**
|
|
498
|
+
The response object returned after predicting an image.
|
|
499
|
+
|
|
500
|
+
Properties:
|
|
501
|
+
|
|
502
|
+
- `objects` (List[ImageDetectedObject]): Detected objects and their predictions.
|
|
503
|
+
- `prediction_task_uuid` (str): The unique identifier for the prediction task.
|
|
504
|
+
- `original_file_name` (Optional[str]): The file name of the original media, if provided.
|
|
505
|
+
|
|
506
|
+
**`ClassificationPredictVideoResponse`**
|
|
507
|
+
The response object returned after predicting a video.
|
|
508
|
+
|
|
509
|
+
Properties:
|
|
510
|
+
|
|
511
|
+
- `objects` (List[VideoDetectedObject]): Tracked objects and their predictions across the video.
|
|
512
|
+
- `frames_per_second` (int): The number of frames per second that were sampled.
|
|
513
|
+
- `frame_timestamps_microseconds` (List[int]): Sorted timestamps (in microseconds) of every processed frame, including frames where nothing was detected. Use it to snap an arbitrary playback position to a real frame before looking up detections.
|
|
514
|
+
- `prediction_task_uuid` (str): The unique identifier for the prediction task.
|
|
515
|
+
- `original_file_name` (Optional[str]): The file name of the original media, if provided.
|
|
516
|
+
|
|
517
|
+
**`PredictionTaskStatusResponse`**
|
|
518
|
+
Represents the status of a prediction task.
|
|
519
|
+
|
|
520
|
+
Properties:
|
|
521
|
+
|
|
522
|
+
- `prediction_task_uuid` (str): The unique identifier for the task.
|
|
523
|
+
- `prediction_type` (str): Either `"image"` or `"video"`.
|
|
524
|
+
- `status` (str): The current task status (`predicted`, `failed`, etc.).
|
|
525
|
+
|
|
526
|
+
---
|
|
527
|
+
|
|
528
|
+
### Endpoints
|
|
529
|
+
|
|
530
|
+
#### `client.classification.predict_image`
|
|
531
|
+
|
|
532
|
+
```python
|
|
533
|
+
await client.classification.predict_image(
|
|
534
|
+
media: Image,
|
|
535
|
+
model_name: str,
|
|
536
|
+
timeout_seconds: Optional[int] = None,
|
|
537
|
+
) -> ClassificationPredictImageResponse
|
|
538
|
+
```
|
|
539
|
+
|
|
540
|
+
Performs a classification prediction on a single image.
|
|
541
|
+
|
|
542
|
+
| Parameter | Type | Default | Description |
|
|
543
|
+
|-----------|------|---------|-------------|
|
|
544
|
+
| `media` | `Image` | *required* | An `Image` object (from `from_path`, `from_bytes`, or `from_stream`). |
|
|
545
|
+
| `model_name` | `str` | *required* | The name of the model to use for prediction. |
|
|
546
|
+
| `timeout_seconds` | `Optional[int]` | `None` | Maximum wait time in seconds. Raises `PredictionTimeoutException` on timeout. `None` polls indefinitely. |
|
|
547
|
+
|
|
548
|
+
**Returns:** `ClassificationPredictImageResponse` — detected objects and their predictions.
|
|
549
|
+
|
|
550
|
+
---
|
|
551
|
+
|
|
552
|
+
#### `client.classification.predict_video`
|
|
553
|
+
|
|
554
|
+
```python
|
|
555
|
+
await client.classification.predict_video(
|
|
556
|
+
media: Video,
|
|
557
|
+
model_name: str,
|
|
558
|
+
frames_per_second: int = 1,
|
|
559
|
+
timeout_seconds: Optional[int] = None,
|
|
560
|
+
) -> ClassificationPredictVideoResponse
|
|
561
|
+
```
|
|
562
|
+
|
|
563
|
+
Performs a classification prediction on a video.
|
|
564
|
+
|
|
565
|
+
| Parameter | Type | Default | Description |
|
|
566
|
+
|-----------|------|---------|-------------|
|
|
567
|
+
| `media` | `Video` | *required* | A `Video` object (from `from_path`, `from_bytes`, or `from_stream`). |
|
|
568
|
+
| `model_name` | `str` | *required* | The name of the model to use for prediction. |
|
|
569
|
+
| `frames_per_second` | `int` | `1` | How many frames per second to sample from the video. |
|
|
570
|
+
| `timeout_seconds` | `Optional[int]` | `None` | Maximum wait time in seconds. Raises `PredictionTimeoutException` on timeout. `None` polls indefinitely. |
|
|
571
|
+
|
|
572
|
+
**Returns:** `ClassificationPredictVideoResponse` — tracked objects and their predictions across the video.
|
|
573
|
+
|
|
574
|
+
---
|
|
575
|
+
|
|
576
|
+
#### `client.classification.status`
|
|
577
|
+
|
|
578
|
+
```python
|
|
579
|
+
await client.classification.status(
|
|
580
|
+
prediction_task_uuid: str,
|
|
581
|
+
) -> PredictionTaskStatusResponse
|
|
582
|
+
```
|
|
583
|
+
|
|
584
|
+
Checks the status of a prediction task.
|
|
585
|
+
|
|
586
|
+
| Parameter | Type | Default | Description |
|
|
587
|
+
|-----------|------|---------|-------------|
|
|
588
|
+
| `prediction_task_uuid` | `str` | *required* | The UUID of the prediction task. |
|
|
589
|
+
|
|
590
|
+
**Returns:** `PredictionTaskStatusResponse` — the task's current status.
|
|
591
|
+
|
|
592
|
+
---
|
|
593
|
+
|
|
594
|
+
#### `client.classification.get_image_results`
|
|
595
|
+
|
|
596
|
+
```python
|
|
597
|
+
await client.classification.get_image_results(
|
|
598
|
+
prediction_task_uuid: str,
|
|
599
|
+
) -> ClassificationPredictImageResponse
|
|
600
|
+
```
|
|
601
|
+
|
|
602
|
+
Retrieves the results of a completed image prediction task.
|
|
603
|
+
|
|
604
|
+
| Parameter | Type | Default | Description |
|
|
605
|
+
|-----------|------|---------|-------------|
|
|
606
|
+
| `prediction_task_uuid` | `str` | *required* | The UUID of the prediction task. |
|
|
607
|
+
|
|
608
|
+
**Returns:** `ClassificationPredictImageResponse`
|
|
609
|
+
|
|
610
|
+
---
|
|
611
|
+
|
|
612
|
+
#### `client.classification.get_video_results`
|
|
613
|
+
|
|
614
|
+
```python
|
|
615
|
+
await client.classification.get_video_results(
|
|
616
|
+
prediction_task_uuid: str,
|
|
617
|
+
) -> ClassificationPredictVideoResponse
|
|
618
|
+
```
|
|
619
|
+
|
|
620
|
+
Retrieves the results of a completed video prediction task.
|
|
621
|
+
|
|
622
|
+
| Parameter | Type | Default | Description |
|
|
623
|
+
|-----------|------|---------|-------------|
|
|
624
|
+
| `prediction_task_uuid` | `str` | *required* | The UUID of the prediction task. |
|
|
625
|
+
|
|
626
|
+
**Returns:** `ClassificationPredictVideoResponse`
|
|
627
|
+
|
|
628
|
+
---
|
|
629
|
+
|
|
630
|
+
## Error Handling
|
|
631
|
+
|
|
632
|
+
The SDK defines the following exception types:
|
|
633
|
+
|
|
634
|
+
| Exception | When it's raised |
|
|
635
|
+
|-----------|-----------------|
|
|
636
|
+
| `PredictionTimeoutException` | The prediction did not complete within the specified `timeout_seconds`. |
|
|
637
|
+
| `PredictionTaskError` | The prediction task failed on the server. |
|
|
638
|
+
| `PredictionUploadError` | The media file could not be uploaded. |
|
|
639
|
+
| `PredictionTaskBeginError` | The prediction task could not be started. |
|
|
640
|
+
| `PredictionTaskResultsUnavailableError` | Results were requested for a task that has not completed. |
|
|
641
|
+
|
|
642
|
+
```python
|
|
643
|
+
from dragoneye import Dragoneye, Image
|
|
644
|
+
from dragoneye.types.exception import (
|
|
645
|
+
PredictionTimeoutException,
|
|
646
|
+
PredictionTaskError,
|
|
647
|
+
PredictionUploadError,
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
try:
|
|
651
|
+
result = await client.classification.predict_image(
|
|
652
|
+
media=image,
|
|
653
|
+
model_name="recognize_anything/your_model_name",
|
|
654
|
+
timeout_seconds=60,
|
|
655
|
+
)
|
|
656
|
+
except PredictionTimeoutException:
|
|
657
|
+
print("Prediction timed out — try increasing timeout_seconds")
|
|
658
|
+
except PredictionUploadError:
|
|
659
|
+
print("Failed to upload media — check file path and format")
|
|
660
|
+
except PredictionTaskError:
|
|
661
|
+
print("Prediction task failed on the server")
|
|
662
|
+
```
|
|
663
|
+
|
|
664
|
+
---
|
|
665
|
+
|
|
666
|
+
## Notes
|
|
667
|
+
|
|
668
|
+
- All public methods are **asynchronous**. Use `asyncio.run` or an async loop to call them.
|
|
669
|
+
- For images, use `predict_image` with an `Image` object. For videos, use `predict_video` with a `Video` object. Passing the wrong media type will raise a `ValueError`.
|
|
670
|
+
- Predictions are executed as tasks: the SDK automatically handles task creation, media upload, polling, and result retrieval.
|
|
671
|
+
- The SDK automatically retries on rate-limit (429) responses using exponential backoff. You can configure this behavior via the `max_retries` and `max_backoff_time` parameters on the `Dragoneye` client.
|