datnvt-cvat-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ from datnvt_cvat_cli.api import CVATClient
4
+ from datnvt_cvat_cli.models import DatasetFormat, ServerConfig
5
+
6
+ __version__ = "0.1.0"
7
+ __all__ = ["CVATClient", "DatasetFormat", "ServerConfig"]
datnvt_cvat_cli/api.py ADDED
@@ -0,0 +1,427 @@
1
+ """CVAT REST API client for annotation management."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ import logging
7
+ import time
8
+ import zipfile
9
+ from pathlib import Path
10
+
11
+ import requests
12
+
13
+ from datnvt_cvat_cli.models import DATASET_FORMAT_QUERY_MAP, DatasetFormat, ServerConfig
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class CVATClient:
19
+ """REST API client for a CVAT server instance."""
20
+
21
+ def __init__(
22
+ self,
23
+ url: str,
24
+ username: str,
25
+ password: str,
26
+ project_id: int = 0,
27
+ ) -> None:
28
+ self.url = url.rstrip("/")
29
+ self.project_id = project_id
30
+ self._auth = (username, password)
31
+ self._headers = {"Accept": "application/vnd.cvat+json"}
32
+
33
+ @classmethod
34
+ def from_config(cls, config: ServerConfig) -> CVATClient:
35
+ return cls(config.url, config.username, config.password, config.project_id)
36
+
37
+ # ------------------------------------------------------------------
38
+ # Low-level HTTP helpers
39
+ # ------------------------------------------------------------------
40
+
41
+ def _get(self, path: str, **kwargs) -> requests.Response:
42
+ resp = requests.get(
43
+ f"{self.url}{path}",
44
+ auth=self._auth,
45
+ headers=self._headers,
46
+ timeout=None,
47
+ **kwargs,
48
+ )
49
+ resp.raise_for_status()
50
+ return resp
51
+
52
+ def _post(self, path: str, **kwargs) -> requests.Response:
53
+ resp = requests.post(
54
+ f"{self.url}{path}",
55
+ auth=self._auth,
56
+ headers=self._headers,
57
+ timeout=None,
58
+ **kwargs,
59
+ )
60
+ resp.raise_for_status()
61
+ return resp
62
+
63
+ def _put(self, path: str, **kwargs) -> requests.Response:
64
+ resp = requests.put(
65
+ f"{self.url}{path}",
66
+ auth=self._auth,
67
+ headers=self._headers,
68
+ timeout=None,
69
+ **kwargs,
70
+ )
71
+ resp.raise_for_status()
72
+ return resp
73
+
74
+ def _wait_for_request(
75
+ self,
76
+ rq_id: str,
77
+ poll_interval: float = 1.0,
78
+ timeout: int = 600,
79
+ ) -> None:
80
+ """Poll /api/requests/{rq_id} until status is 'finished'."""
81
+ deadline = time.time() + timeout
82
+ while True:
83
+ resp = self._get(f"/api/requests/{rq_id}")
84
+ status = resp.json().get("status")
85
+ if status == "finished":
86
+ return
87
+ if status == "failed":
88
+ raise RuntimeError(f"CVAT async request {rq_id} reported failure")
89
+ if time.time() > deadline:
90
+ raise TimeoutError(
91
+ f"CVAT async request {rq_id} did not finish within {timeout}s",
92
+ )
93
+ time.sleep(poll_interval)
94
+
95
+ def _paginate(self, path: str, params: dict | None = None) -> list[dict]:
96
+ """Collect all pages from a paginated CVAT list endpoint."""
97
+ params = dict(params or {})
98
+ params.setdefault("page_size", 100)
99
+ params["page"] = 1
100
+ results: list[dict] = []
101
+ while True:
102
+ data = self._get(path, params=params).json()
103
+ results.extend(data.get("results", []))
104
+ if not data.get("next"):
105
+ break
106
+ params["page"] += 1
107
+ return results
108
+
109
+ # ------------------------------------------------------------------
110
+ # Connection / discovery
111
+ # ------------------------------------------------------------------
112
+
113
+ def check_connection(self) -> bool:
114
+ """Return True if credentials are valid and the server is reachable."""
115
+ try:
116
+ self._get("/api/users/self")
117
+ return True
118
+ except Exception:
119
+ return False
120
+
121
+ def check_project(self, project_id: int | None = None) -> bool:
122
+ """Return True if the given project exists and is accessible.
123
+
124
+ Falls back to self.project_id when project_id is None.
125
+ Returns False immediately when no project_id is configured.
126
+ """
127
+ pid = project_id if project_id is not None else self.project_id
128
+ if not pid:
129
+ return False
130
+ try:
131
+ self._get(f"/api/projects/{pid}")
132
+ return True
133
+ except Exception:
134
+ return False
135
+
136
+ def get_projects(self) -> list[dict]:
137
+ """List all projects accessible to the authenticated user."""
138
+ return self._paginate("/api/projects")
139
+
140
+ def get_tasks(self, project_id: int | None = None) -> list[dict]:
141
+ """List all tasks, optionally filtered by project."""
142
+ pid = project_id if project_id is not None else self.project_id
143
+ params = {"project_id": pid} if pid else {}
144
+ return self._paginate("/api/tasks", params)
145
+
146
+ def get_task_ids(self, project_id: int | None = None) -> list[int]:
147
+ """Return only the task IDs for the given project."""
148
+ return [t["id"] for t in self.get_tasks(project_id)]
149
+
150
+ def get_task_info(self, task_id: int) -> dict:
151
+ """Return full metadata dict for a single task."""
152
+ return self._get(f"/api/tasks/{task_id}").json()
153
+
154
+ # ------------------------------------------------------------------
155
+ # Download helpers
156
+ # ------------------------------------------------------------------
157
+
158
+ def _extract_zip(self, zip_bytes: bytes, dest: Path) -> None:
159
+ dest.mkdir(parents=True, exist_ok=True)
160
+ with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
161
+ zf.extractall(dest)
162
+
163
+ def _download_annotations_new_api(
164
+ self,
165
+ task_id: int,
166
+ fmt: DatasetFormat,
167
+ ) -> bytes:
168
+ """CVAT 2.x: POST annotations/export → poll → GET download."""
169
+ format_name = DATASET_FORMAT_QUERY_MAP[fmt]
170
+ resp = self._post(
171
+ f"/api/tasks/{task_id}/annotations/export",
172
+ params={"format": format_name},
173
+ )
174
+ rq_id = resp.json().get("rq_id")
175
+ if not rq_id:
176
+ raise ValueError("No rq_id in annotations/export response")
177
+ self._wait_for_request(rq_id)
178
+ dl = self._get(
179
+ f"/api/tasks/{task_id}/annotations/export",
180
+ params={"format": format_name, "rq_id": rq_id, "action": "download"},
181
+ stream=True,
182
+ )
183
+ return dl.content
184
+
185
+ def _download_annotations_legacy(
186
+ self,
187
+ task_id: int,
188
+ fmt: DatasetFormat,
189
+ ) -> bytes:
190
+ """Legacy: POST dataset/export to prime cache, then GET annotations download."""
191
+ try:
192
+ self._post(
193
+ f"/api/tasks/{task_id}/dataset/export",
194
+ params={"save_images": False, "format": fmt.value},
195
+ )
196
+ except Exception:
197
+ pass
198
+
199
+ req_path = f"/api/tasks/{task_id}/annotations"
200
+ params = {"format": fmt.value, "action": "download"}
201
+ for attempt in range(6):
202
+ try:
203
+ resp = requests.get(
204
+ f"{self.url}{req_path}",
205
+ params=params,
206
+ auth=self._auth,
207
+ headers=self._headers,
208
+ timeout=None,
209
+ )
210
+ if resp.status_code == 200:
211
+ return resp.content
212
+ # 202 = export still processing; other non-OK → raise
213
+ if resp.status_code != 202:
214
+ resp.raise_for_status()
215
+ except requests.HTTPError:
216
+ if attempt == 5:
217
+ raise
218
+ time.sleep(2**attempt)
219
+ raise RuntimeError(
220
+ f"Could not download annotations for task {task_id} after retries",
221
+ )
222
+
223
+ def _download_with_images(
224
+ self,
225
+ task_id: int,
226
+ out_path: Path,
227
+ fmt: DatasetFormat,
228
+ ) -> Path:
229
+ """Download the full dataset (annotations + images) via dataset/export."""
230
+ out_path.mkdir(parents=True, exist_ok=True)
231
+ resp = self._post(
232
+ f"/api/tasks/{task_id}/dataset/export",
233
+ params={
234
+ "format": fmt.value,
235
+ "save_images": True,
236
+ "filename": f"task_{task_id}_dataset",
237
+ },
238
+ )
239
+ rq_id = resp.json().get("rq_id")
240
+ if not rq_id:
241
+ raise ValueError("No rq_id received from dataset/export")
242
+ self._wait_for_request(rq_id)
243
+
244
+ dl = self._get(
245
+ f"/api/tasks/{task_id}/dataset",
246
+ params={"format": fmt.value, "action": "download"},
247
+ stream=True,
248
+ )
249
+ zip_path = out_path / "_dataset.zip"
250
+ with open(zip_path, "wb") as f:
251
+ for chunk in dl.iter_content(chunk_size=8192):
252
+ f.write(chunk)
253
+ with zipfile.ZipFile(zip_path) as zf:
254
+ zf.extractall(out_path)
255
+ zip_path.unlink()
256
+ return out_path
257
+
258
+ # ------------------------------------------------------------------
259
+ # Public download API
260
+ # ------------------------------------------------------------------
261
+
262
+ def download_annotations(
263
+ self,
264
+ task_id: int,
265
+ out_path: Path,
266
+ fmt: DatasetFormat = DatasetFormat.CVAT,
267
+ save_images: bool = False,
268
+ ) -> Path:
269
+ """Download a single task to *out_path*.
270
+
271
+ Args:
272
+ task_id: CVAT task ID.
273
+ out_path: Destination directory.
274
+ fmt: Annotation format.
275
+ save_images: When True, also download the raw images alongside
276
+ annotations (uses the higher-privilege dataset/export endpoint).
277
+ """
278
+ out_path = Path(out_path)
279
+ if save_images:
280
+ return self._download_with_images(task_id, out_path, fmt)
281
+
282
+ # Try new CVAT 2.x endpoint first; fall back to legacy
283
+ zip_bytes: bytes | None = None
284
+ try:
285
+ zip_bytes = self._download_annotations_new_api(task_id, fmt)
286
+ logger.debug(f"Task {task_id}: used annotations/export endpoint")
287
+ except Exception as exc:
288
+ logger.warning(
289
+ f"Task {task_id}: annotations/export failed ({exc}), trying legacy",
290
+ )
291
+
292
+ if zip_bytes is None:
293
+ zip_bytes = self._download_annotations_legacy(task_id, fmt)
294
+ logger.debug(f"Task {task_id}: used legacy annotations endpoint")
295
+
296
+ self._extract_zip(zip_bytes, out_path)
297
+ logger.info(f"Task {task_id}: annotations saved to {out_path}")
298
+ return out_path
299
+
300
+ def download_tasks(
301
+ self,
302
+ task_ids: list[int],
303
+ out_dir: str | None = None,
304
+ out_paths: list[Path] | None = None,
305
+ fmt: DatasetFormat = DatasetFormat.CVAT,
306
+ save_images: bool = False,
307
+ skip_existing: bool = True,
308
+ folder_prefix: str = "task",
309
+ ) -> None:
310
+ """Download multiple tasks in sequence.
311
+
312
+ Subdirectories are named ``{folder_prefix}{id}/`` when *out_dir* is used
313
+ (default prefix is ``task``, e.g. ``task147/``).
314
+ """
315
+ paths = self._resolve_out_paths(task_ids, out_dir, out_paths, folder_prefix)
316
+ for task_id, path in zip(task_ids, paths):
317
+ if skip_existing and self._has_annotations(path, fmt):
318
+ logger.info(f"Task {task_id}: already downloaded, skipping")
319
+ continue
320
+ logger.info(f"Task {task_id}: downloading → {path}")
321
+ try:
322
+ self.download_annotations(task_id, path, fmt, save_images)
323
+ except Exception as exc:
324
+ logger.error(f"Task {task_id}: download failed — {exc}")
325
+
326
+ # ------------------------------------------------------------------
327
+ # Upload API
328
+ # ------------------------------------------------------------------
329
+
330
+ def upload_annotations(
331
+ self,
332
+ task_id: int,
333
+ anno_path: Path,
334
+ fmt: DatasetFormat = DatasetFormat.CVAT,
335
+ ) -> bool:
336
+ """Upload an annotation file to a CVAT task.
337
+
338
+ Args:
339
+ task_id: Target CVAT task.
340
+ anno_path: Path to the annotation file (JSON for COCO, XML for CVAT).
341
+ fmt: Format of the annotation file.
342
+
343
+ Returns:
344
+ True on success, False on failure.
345
+ """
346
+ format_name = DATASET_FORMAT_QUERY_MAP[fmt]
347
+ with open(anno_path, encoding="utf-8") as fh:
348
+ content = fh.read()
349
+
350
+ filename = "annotations.json" if fmt == DatasetFormat.COCO else "annotations.xml"
351
+ mime = "application/json" if fmt == DatasetFormat.COCO else "application/xml"
352
+ files = {"annotation_file": (filename, content, mime)}
353
+
354
+ try:
355
+ resp = self._put(
356
+ f"/api/tasks/{task_id}/annotations",
357
+ params={"format": format_name},
358
+ files=files,
359
+ )
360
+ if resp.status_code in (200, 201, 202):
361
+ logger.info(f"Task {task_id}: annotations uploaded successfully")
362
+ return True
363
+ logger.error(f"Task {task_id}: upload returned {resp.status_code}")
364
+ return False
365
+ except requests.HTTPError as exc:
366
+ logger.error(f"Task {task_id}: upload failed — {exc}")
367
+ return False
368
+
369
+ def upload_tasks(
370
+ self,
371
+ task_ids: list[int],
372
+ in_dir: str,
373
+ fmt: DatasetFormat = DatasetFormat.CVAT,
374
+ ) -> None:
375
+ """Upload annotations for multiple tasks from ``in_dir/task{id}/`` subdirs.
376
+
377
+ Looks for ``annotations/instances_default_pseudo.json`` (COCO) or
378
+ ``annotations_pseudo.xml`` (CVAT) inside each task folder; falls back
379
+ to the non-pseudo variant if the pseudo file doesn't exist.
380
+ """
381
+ for task_id in task_ids:
382
+ task_dir = Path(in_dir) / f"task{task_id}"
383
+ if fmt == DatasetFormat.COCO:
384
+ candidates = [
385
+ task_dir / "annotations" / "instances_default_pseudo.json",
386
+ task_dir / "annotations" / "instances_default.json",
387
+ ]
388
+ else:
389
+ candidates = [
390
+ task_dir / "annotations_pseudo.xml",
391
+ task_dir / "annotations.xml",
392
+ ]
393
+
394
+ anno_file = next((c for c in candidates if c.exists()), None)
395
+ if anno_file is None:
396
+ logger.warning(
397
+ f"Task {task_id}: no annotation file found under {task_dir}",
398
+ )
399
+ continue
400
+
401
+ logger.info(f"Task {task_id}: uploading {anno_file}")
402
+ self.upload_annotations(task_id, anno_file, fmt)
403
+
404
+ # ------------------------------------------------------------------
405
+ # Private helpers
406
+ # ------------------------------------------------------------------
407
+
408
+ def _resolve_out_paths(
409
+ self,
410
+ task_ids: list[int],
411
+ out_dir: str | None,
412
+ out_paths: list[Path] | None,
413
+ folder_prefix: str = "task",
414
+ ) -> list[Path]:
415
+ if out_paths:
416
+ return [Path(p) for p in out_paths]
417
+ if out_dir:
418
+ return [Path(out_dir) / f"{folder_prefix}{tid}" for tid in task_ids]
419
+ raise ValueError("Provide either out_dir or out_paths")
420
+
421
+ def _annotation_path(self, base: Path, fmt: DatasetFormat) -> Path:
422
+ if fmt == DatasetFormat.COCO:
423
+ return base / "annotations" / "instances_default.json"
424
+ return base / "annotations.xml"
425
+
426
+ def _has_annotations(self, path: Path, fmt: DatasetFormat) -> bool:
427
+ return self._annotation_path(path, fmt).exists()