quiclabel-coco-sync 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quiclabel_coco_sync-0.0.1/.gitignore +79 -0
- quiclabel_coco_sync-0.0.1/LICENSE +21 -0
- quiclabel_coco_sync-0.0.1/PKG-INFO +145 -0
- quiclabel_coco_sync-0.0.1/README.md +116 -0
- quiclabel_coco_sync-0.0.1/pyproject.toml +52 -0
- quiclabel_coco_sync-0.0.1/src/quiclabel_sync_project_coco/__init__.py +3 -0
- quiclabel_coco_sync-0.0.1/src/quiclabel_sync_project_coco/api.py +97 -0
- quiclabel_coco_sync-0.0.1/src/quiclabel_sync_project_coco/cli.py +196 -0
- quiclabel_coco_sync-0.0.1/src/quiclabel_sync_project_coco/config.py +110 -0
- quiclabel_coco_sync-0.0.1/src/quiclabel_sync_project_coco/downloader.py +139 -0
- quiclabel_coco_sync-0.0.1/src/quiclabel_sync_project_coco/writer.py +75 -0
- quiclabel_coco_sync-0.0.1/tests/test_config.py +142 -0
- quiclabel_coco_sync-0.0.1/tests/test_downloader.py +218 -0
- quiclabel_coco_sync-0.0.1/tests/test_writer.py +81 -0
- quiclabel_coco_sync-0.0.1/uv.lock +323 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Dependencies
|
|
2
|
+
node_modules/
|
|
3
|
+
.pnpm-store/
|
|
4
|
+
|
|
5
|
+
# Build outputs
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
.vite/
|
|
9
|
+
.next/
|
|
10
|
+
.turbo/
|
|
11
|
+
out/
|
|
12
|
+
|
|
13
|
+
# Environment
|
|
14
|
+
.env
|
|
15
|
+
.env.local
|
|
16
|
+
.env.*.local
|
|
17
|
+
.env.deploy
|
|
18
|
+
.env.production
|
|
19
|
+
deploy/.env.production
|
|
20
|
+
|
|
21
|
+
# Python
|
|
22
|
+
__pycache__/
|
|
23
|
+
*.pyc
|
|
24
|
+
.venv/
|
|
25
|
+
venv/
|
|
26
|
+
*.egg-info/
|
|
27
|
+
|
|
28
|
+
# IDE
|
|
29
|
+
.vscode/
|
|
30
|
+
.idea/
|
|
31
|
+
*.swp
|
|
32
|
+
*.swo
|
|
33
|
+
*.tsbuildinfo
|
|
34
|
+
.claude/
|
|
35
|
+
|
|
36
|
+
# OS
|
|
37
|
+
.DS_Store
|
|
38
|
+
Thumbs.db
|
|
39
|
+
|
|
40
|
+
# Logs
|
|
41
|
+
*.log
|
|
42
|
+
npm-debug.log*
|
|
43
|
+
pnpm-debug.log*
|
|
44
|
+
|
|
45
|
+
# Testing
|
|
46
|
+
coverage/
|
|
47
|
+
test-results/
|
|
48
|
+
playwright-report/
|
|
49
|
+
|
|
50
|
+
# E2E runtime files
|
|
51
|
+
.e2e-pids
|
|
52
|
+
.e2e-jobs
|
|
53
|
+
.e2e-*.log
|
|
54
|
+
|
|
55
|
+
# Prisma
|
|
56
|
+
packages/database/prisma/migrations/**/migration_lock.toml
|
|
57
|
+
|
|
58
|
+
# MedSight infer-app-client
|
|
59
|
+
*.onnx
|
|
60
|
+
apps/infer-app-client/assets/trt_cache/
|
|
61
|
+
apps/infer-app-client/dist/
|
|
62
|
+
apps/infer-app-client/installer_output/
|
|
63
|
+
apps/infer-app-client/outputs/
|
|
64
|
+
.pytest_cache/
|
|
65
|
+
|
|
66
|
+
# Temporary
|
|
67
|
+
*.tmp
|
|
68
|
+
*.bak
|
|
69
|
+
apps/infer_app/
|
|
70
|
+
|
|
71
|
+
ssim_stats.json
|
|
72
|
+
ssim_all/
|
|
73
|
+
ssim_debug/.teamwork/
|
|
74
|
+
|
|
75
|
+
.teamwork/
|
|
76
|
+
|
|
77
|
+
# Teamwork agent scratch
|
|
78
|
+
.teamwork-ids.json
|
|
79
|
+
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 weavejam / quiclabel contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: quiclabel-coco-sync
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: CLI to incrementally sync a QuicLabel COCO dataset (annotations + images) from quiclabel-admin
|
|
5
|
+
Project-URL: Homepage, https://github.com/weavejam/quiclabel/tree/main/apps/quiclabel-sync-project-coco
|
|
6
|
+
Project-URL: Repository, https://github.com/weavejam/quiclabel
|
|
7
|
+
Project-URL: Issues, https://github.com/weavejam/quiclabel/issues
|
|
8
|
+
Author: weavejam / quiclabel contributors
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: annotation,coco,computer-vision,dataset,quiclabel,sync
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
24
|
+
Classifier: Topic :: Utilities
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Requires-Dist: click>=8.1
|
|
27
|
+
Requires-Dist: requests>=2.31
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# quiclabel-coco-sync
|
|
31
|
+
|
|
32
|
+
CLI to incrementally sync a QuicLabel COCO dataset (annotations + images)
|
|
33
|
+
from `quiclabel-admin`. Pulls a fresh `annotations-YYYYMMDD-HHMMSS.json`
|
|
34
|
+
next to your existing dataset and multi-threadedly downloads only the
|
|
35
|
+
images you don't already have.
|
|
36
|
+
|
|
37
|
+
## Prerequisites
|
|
38
|
+
|
|
39
|
+
- **uv** — Python package & runtime manager. Install:
|
|
40
|
+
- macOS / Linux: `curl -LsSf https://astral.sh/uv/install.sh | sh`
|
|
41
|
+
- Windows: `winget install astral-sh.uv` (or `irm https://astral.sh/uv/install.ps1 | iex`)
|
|
42
|
+
- via pipx: `pipx install uv`
|
|
43
|
+
- **An API key** — get one from quiclabel-admin: *Settings → API Keys → New key*.
|
|
44
|
+
Copy the `qk_...` value immediately (it's only shown once).
|
|
45
|
+
|
|
46
|
+
## Quick start (from PyPI — recommended)
|
|
47
|
+
|
|
48
|
+
No clone, no install — `uvx` downloads, caches and runs in one shot:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
uvx quiclabel-coco-sync path/to/annotations.json \
|
|
52
|
+
--admin-url https://quiclabel-admin.example.com \
|
|
53
|
+
--api-key qk_xxxxxxxxxxxxxxxxxxxxxx
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Or set env vars and call it bare:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
export QUICLABEL_ADMIN_URL=https://quiclabel-admin.example.com
|
|
60
|
+
export QUICLABEL_API_KEY=qk_xxxxxxxxxxxxxxxxxxxxxx
|
|
61
|
+
uvx quiclabel-coco-sync path/to/annotations.json
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Prefer a persistent install? Use `uv tool`:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
uv tool install quiclabel-coco-sync
|
|
68
|
+
quiclabel-coco-sync path/to/annotations.json --admin-url ... --api-key ...
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## From the monorepo (contributors)
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# From the repo root
|
|
75
|
+
pnpm sync-project-coco path/to/annotations.json \
|
|
76
|
+
--admin-url https://quiclabel-admin.example.com \
|
|
77
|
+
--api-key qk_xxxxxxxxxxxxxxxxxxxxxx
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Or directly with `uv` against this app directory:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
cd apps/quiclabel-sync-project-coco
|
|
84
|
+
uv sync
|
|
85
|
+
uv run quiclabel-coco-sync path/to/annotations.json \
|
|
86
|
+
--admin-url https://quiclabel-admin.example.com \
|
|
87
|
+
--api-key qk_xxxxxxxxxxxxxxxxxxxxxx
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## What it does
|
|
91
|
+
|
|
92
|
+
1. Reads `path/to/annotations.json` and its `meta` block (added by the COCO exporter).
|
|
93
|
+
2. Calls `GET /api/v1/projects/<project_id>/coco` with the same filters,
|
|
94
|
+
paging by cursor — so 10k+ task projects don't blow up server memory.
|
|
95
|
+
3. Writes `path/to/annotations-20260519-143045.json` (timestamped — never
|
|
96
|
+
overwrites your input).
|
|
97
|
+
4. Diffs `task_id` sets, downloads any missing images to `path/to/images/`
|
|
98
|
+
using a thread pool. Files already on disk are skipped by file name.
|
|
99
|
+
|
|
100
|
+
The old `annotations.json` and the existing `images/*` files are never touched.
|
|
101
|
+
|
|
102
|
+
## Configuration priority
|
|
103
|
+
|
|
104
|
+
Each value is resolved in this order — first wins:
|
|
105
|
+
|
|
106
|
+
1. CLI flag (`--project-id`, `--statuses`, …)
|
|
107
|
+
2. Env var (`QUICLABEL_ADMIN_URL`, `QUICLABEL_API_KEY`)
|
|
108
|
+
3. `meta` block of the input json
|
|
109
|
+
|
|
110
|
+
If anything required is missing from all three, the CLI exits with a clear
|
|
111
|
+
message naming the missing key and where to provide it.
|
|
112
|
+
|
|
113
|
+
## Recovery
|
|
114
|
+
|
|
115
|
+
- **Partial failure** (some images failed mid-run): just re-run the same
|
|
116
|
+
command. Already-downloaded files are skipped by file name, so retry only
|
|
117
|
+
fetches the remaining ones. The CLI tells you this in the failure summary.
|
|
118
|
+
- **Corrupt image file**: delete it, then re-run.
|
|
119
|
+
- **A `.part` file in `images/`** indicates a crashed download. Safe to delete.
|
|
120
|
+
|
|
121
|
+
## Development
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
cd apps/quiclabel-sync-project-coco
|
|
125
|
+
uv sync --group dev
|
|
126
|
+
uv run pytest
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Releasing to PyPI (maintainers)
|
|
130
|
+
|
|
131
|
+
Manual release flow until CI is wired up:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
cd apps/quiclabel-sync-project-coco
|
|
135
|
+
|
|
136
|
+
# 1. bump version in pyproject.toml
|
|
137
|
+
# 2. build sdist + wheel
|
|
138
|
+
uv build
|
|
139
|
+
|
|
140
|
+
# 3. publish (uses UV_PUBLISH_TOKEN or prompts)
|
|
141
|
+
uv publish
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Get a PyPI API token at <https://pypi.org/manage/account/token/>.
|
|
145
|
+
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# quiclabel-coco-sync
|
|
2
|
+
|
|
3
|
+
CLI to incrementally sync a QuicLabel COCO dataset (annotations + images)
|
|
4
|
+
from `quiclabel-admin`. Pulls a fresh `annotations-YYYYMMDD-HHMMSS.json`
|
|
5
|
+
next to your existing dataset and multi-threadedly downloads only the
|
|
6
|
+
images you don't already have.
|
|
7
|
+
|
|
8
|
+
## Prerequisites
|
|
9
|
+
|
|
10
|
+
- **uv** — Python package & runtime manager. Install:
|
|
11
|
+
- macOS / Linux: `curl -LsSf https://astral.sh/uv/install.sh | sh`
|
|
12
|
+
- Windows: `winget install astral-sh.uv` (or `irm https://astral.sh/uv/install.ps1 | iex`)
|
|
13
|
+
- via pipx: `pipx install uv`
|
|
14
|
+
- **An API key** — get one from quiclabel-admin: *Settings → API Keys → New key*.
|
|
15
|
+
Copy the `qk_...` value immediately (it's only shown once).
|
|
16
|
+
|
|
17
|
+
## Quick start (from PyPI — recommended)
|
|
18
|
+
|
|
19
|
+
No clone, no install — `uvx` downloads, caches and runs in one shot:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
uvx quiclabel-coco-sync path/to/annotations.json \
|
|
23
|
+
--admin-url https://quiclabel-admin.example.com \
|
|
24
|
+
--api-key qk_xxxxxxxxxxxxxxxxxxxxxx
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Or set env vars and call it bare:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
export QUICLABEL_ADMIN_URL=https://quiclabel-admin.example.com
|
|
31
|
+
export QUICLABEL_API_KEY=qk_xxxxxxxxxxxxxxxxxxxxxx
|
|
32
|
+
uvx quiclabel-coco-sync path/to/annotations.json
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Prefer a persistent install? Use `uv tool`:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
uv tool install quiclabel-coco-sync
|
|
39
|
+
quiclabel-coco-sync path/to/annotations.json --admin-url ... --api-key ...
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## From the monorepo (contributors)
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
# From the repo root
|
|
46
|
+
pnpm sync-project-coco path/to/annotations.json \
|
|
47
|
+
--admin-url https://quiclabel-admin.example.com \
|
|
48
|
+
--api-key qk_xxxxxxxxxxxxxxxxxxxxxx
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Or directly with `uv` against this app directory:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
cd apps/quiclabel-sync-project-coco
|
|
55
|
+
uv sync
|
|
56
|
+
uv run quiclabel-coco-sync path/to/annotations.json \
|
|
57
|
+
--admin-url https://quiclabel-admin.example.com \
|
|
58
|
+
--api-key qk_xxxxxxxxxxxxxxxxxxxxxx
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## What it does
|
|
62
|
+
|
|
63
|
+
1. Reads `path/to/annotations.json` and its `meta` block (added by the COCO exporter).
|
|
64
|
+
2. Calls `GET /api/v1/projects/<project_id>/coco` with the same filters,
|
|
65
|
+
paging by cursor — so 10k+ task projects don't blow up server memory.
|
|
66
|
+
3. Writes `path/to/annotations-20260519-143045.json` (timestamped — never
|
|
67
|
+
overwrites your input).
|
|
68
|
+
4. Diffs `task_id` sets, downloads any missing images to `path/to/images/`
|
|
69
|
+
using a thread pool. Files already on disk are skipped by file name.
|
|
70
|
+
|
|
71
|
+
The old `annotations.json` and the existing `images/*` files are never touched.
|
|
72
|
+
|
|
73
|
+
## Configuration priority
|
|
74
|
+
|
|
75
|
+
Each value is resolved in this order — first wins:
|
|
76
|
+
|
|
77
|
+
1. CLI flag (`--project-id`, `--statuses`, …)
|
|
78
|
+
2. Env var (`QUICLABEL_ADMIN_URL`, `QUICLABEL_API_KEY`)
|
|
79
|
+
3. `meta` block of the input json
|
|
80
|
+
|
|
81
|
+
If anything required is missing from all three, the CLI exits with a clear
|
|
82
|
+
message naming the missing key and where to provide it.
|
|
83
|
+
|
|
84
|
+
## Recovery
|
|
85
|
+
|
|
86
|
+
- **Partial failure** (some images failed mid-run): just re-run the same
|
|
87
|
+
command. Already-downloaded files are skipped by file name, so retry only
|
|
88
|
+
fetches the remaining ones. The CLI tells you this in the failure summary.
|
|
89
|
+
- **Corrupt image file**: delete it, then re-run.
|
|
90
|
+
- **A `.part` file in `images/`** indicates a crashed download. Safe to delete.
|
|
91
|
+
|
|
92
|
+
## Development
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
cd apps/quiclabel-sync-project-coco
|
|
96
|
+
uv sync --group dev
|
|
97
|
+
uv run pytest
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Releasing to PyPI (maintainers)
|
|
101
|
+
|
|
102
|
+
Manual release flow until CI is wired up:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
cd apps/quiclabel-sync-project-coco
|
|
106
|
+
|
|
107
|
+
# 1. bump version in pyproject.toml
|
|
108
|
+
# 2. build sdist + wheel
|
|
109
|
+
uv build
|
|
110
|
+
|
|
111
|
+
# 3. publish (uses UV_PUBLISH_TOKEN or prompts)
|
|
112
|
+
uv publish
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Get a PyPI API token at <https://pypi.org/manage/account/token/>.
|
|
116
|
+
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "quiclabel-coco-sync"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
description = "CLI to incrementally sync a QuicLabel COCO dataset (annotations + images) from quiclabel-admin"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
license-files = ["LICENSE"]
|
|
8
|
+
requires-python = ">=3.10"
|
|
9
|
+
authors = [
|
|
10
|
+
{ name = "weavejam / quiclabel contributors" },
|
|
11
|
+
]
|
|
12
|
+
keywords = ["quiclabel", "coco", "annotation", "dataset", "sync", "computer-vision"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 4 - Beta",
|
|
15
|
+
"Environment :: Console",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Intended Audience :: Science/Research",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
21
|
+
"Programming Language :: Python :: 3.10",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Programming Language :: Python :: 3.13",
|
|
25
|
+
"Topic :: Scientific/Engineering :: Image Recognition",
|
|
26
|
+
"Topic :: Utilities",
|
|
27
|
+
]
|
|
28
|
+
dependencies = [
|
|
29
|
+
"click>=8.1",
|
|
30
|
+
"requests>=2.31",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
Homepage = "https://github.com/weavejam/quiclabel/tree/main/apps/quiclabel-sync-project-coco"
|
|
35
|
+
Repository = "https://github.com/weavejam/quiclabel"
|
|
36
|
+
Issues = "https://github.com/weavejam/quiclabel/issues"
|
|
37
|
+
|
|
38
|
+
[project.scripts]
|
|
39
|
+
quiclabel-coco-sync = "quiclabel_sync_project_coco.cli:main"
|
|
40
|
+
sync-project-coco = "quiclabel_sync_project_coco.cli:main"
|
|
41
|
+
|
|
42
|
+
[dependency-groups]
|
|
43
|
+
dev = [
|
|
44
|
+
"pytest>=8.0",
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
[build-system]
|
|
48
|
+
requires = ["hatchling"]
|
|
49
|
+
build-backend = "hatchling.build"
|
|
50
|
+
|
|
51
|
+
[tool.hatch.build.targets.wheel]
|
|
52
|
+
packages = ["src/quiclabel_sync_project_coco"]
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""HTTP client for GET /api/v1/projects/:id/coco with cursor pagination."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import logging
|
|
5
|
+
import time
|
|
6
|
+
from typing import Any, Iterator
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
RETRYABLE_STATUS = {429, 500, 502, 503, 504}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ApiError(Exception):
|
|
16
|
+
"""Raised for non-retryable HTTP failures (4xx other than 429)."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _request_page(
|
|
20
|
+
session: requests.Session,
|
|
21
|
+
url: str,
|
|
22
|
+
params: dict[str, Any],
|
|
23
|
+
headers: dict[str, str],
|
|
24
|
+
*,
|
|
25
|
+
max_retries: int = 3,
|
|
26
|
+
timeout: float = 30.0,
|
|
27
|
+
) -> dict[str, Any]:
|
|
28
|
+
"""Fetch one page with retry-on-5xx. Raises ApiError for 4xx."""
|
|
29
|
+
for attempt in range(max_retries + 1):
|
|
30
|
+
try:
|
|
31
|
+
resp = session.get(url, params=params, headers=headers, timeout=timeout)
|
|
32
|
+
except requests.RequestException as e:
|
|
33
|
+
if attempt == max_retries:
|
|
34
|
+
raise ApiError(f"Network error after {max_retries + 1} attempts: {e}") from e
|
|
35
|
+
logger.warning("network error (attempt %d): %s", attempt + 1, e)
|
|
36
|
+
time.sleep(2**attempt)
|
|
37
|
+
continue
|
|
38
|
+
|
|
39
|
+
if resp.status_code == 200:
|
|
40
|
+
return resp.json()
|
|
41
|
+
|
|
42
|
+
if resp.status_code in (401, 403):
|
|
43
|
+
raise ApiError(
|
|
44
|
+
f"Authentication failed ({resp.status_code}): check --api-key"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
if resp.status_code in RETRYABLE_STATUS and attempt < max_retries:
|
|
48
|
+
logger.warning(
|
|
49
|
+
"server %d (attempt %d), retrying", resp.status_code, attempt + 1
|
|
50
|
+
)
|
|
51
|
+
time.sleep(2**attempt)
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
raise ApiError(f"API error {resp.status_code}: {resp.text[:500]}")
|
|
55
|
+
|
|
56
|
+
raise ApiError("retry loop exhausted") # pragma: no cover
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def iter_pages(
|
|
60
|
+
admin_url: str,
|
|
61
|
+
api_key: str,
|
|
62
|
+
project_id: str,
|
|
63
|
+
*,
|
|
64
|
+
statuses: list[str],
|
|
65
|
+
tag_ids: list[str],
|
|
66
|
+
image_source: str,
|
|
67
|
+
limit: int = 500,
|
|
68
|
+
session: requests.Session | None = None,
|
|
69
|
+
) -> Iterator[dict[str, Any]]:
|
|
70
|
+
"""Yield each page dict in turn. The first page has meta/info/categories;
|
|
71
|
+
subsequent pages only have images/annotations/next_cursor."""
|
|
72
|
+
s = session or requests.Session()
|
|
73
|
+
headers = {"Authorization": f"Bearer {api_key}"}
|
|
74
|
+
url = f"{admin_url}/api/v1/projects/{project_id}/coco"
|
|
75
|
+
params: dict[str, Any] = {
|
|
76
|
+
"statuses": ",".join(statuses),
|
|
77
|
+
"image_source": image_source,
|
|
78
|
+
"limit": limit,
|
|
79
|
+
}
|
|
80
|
+
if tag_ids:
|
|
81
|
+
params["tag_ids"] = ",".join(tag_ids)
|
|
82
|
+
|
|
83
|
+
cursor: str | None = None
|
|
84
|
+
page_num = 0
|
|
85
|
+
while True:
|
|
86
|
+
page_params = dict(params)
|
|
87
|
+
if cursor:
|
|
88
|
+
page_params["cursor"] = cursor
|
|
89
|
+
|
|
90
|
+
page = _request_page(s, url, page_params, headers)
|
|
91
|
+
page_num += 1
|
|
92
|
+
logger.info("page %d: %d images", page_num, len(page.get("images") or []))
|
|
93
|
+
yield page
|
|
94
|
+
|
|
95
|
+
cursor = page.get("next_cursor")
|
|
96
|
+
if not cursor:
|
|
97
|
+
return
|