quiclabel-coco-sync 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,79 @@
1
+ # Dependencies
2
+ node_modules/
3
+ .pnpm-store/
4
+
5
+ # Build outputs
6
+ dist/
7
+ build/
8
+ .vite/
9
+ .next/
10
+ .turbo/
11
+ out/
12
+
13
+ # Environment
14
+ .env
15
+ .env.local
16
+ .env.*.local
17
+ .env.deploy
18
+ .env.production
19
+ deploy/.env.production
20
+
21
+ # Python
22
+ __pycache__/
23
+ *.pyc
24
+ .venv/
25
+ venv/
26
+ *.egg-info/
27
+
28
+ # IDE
29
+ .vscode/
30
+ .idea/
31
+ *.swp
32
+ *.swo
33
+ *.tsbuildinfo
34
+ .claude/
35
+
36
+ # OS
37
+ .DS_Store
38
+ Thumbs.db
39
+
40
+ # Logs
41
+ *.log
42
+ npm-debug.log*
43
+ pnpm-debug.log*
44
+
45
+ # Testing
46
+ coverage/
47
+ test-results/
48
+ playwright-report/
49
+
50
+ # E2E runtime files
51
+ .e2e-pids
52
+ .e2e-jobs
53
+ .e2e-*.log
54
+
55
+ # Prisma
56
+ packages/database/prisma/migrations/**/migration_lock.toml
57
+
58
+ # MedSight infer-app-client
59
+ *.onnx
60
+ apps/infer-app-client/assets/trt_cache/
61
+ apps/infer-app-client/dist/
62
+ apps/infer-app-client/installer_output/
63
+ apps/infer-app-client/outputs/
64
+ .pytest_cache/
65
+
66
+ # Temporary
67
+ *.tmp
68
+ *.bak
69
+ apps/infer_app/
70
+
71
+ ssim_stats.json
72
+ ssim_all/
73
+ ssim_debug/.teamwork/
74
+
75
+ .teamwork/
76
+
77
+ # Teamwork agent scratch
78
+ .teamwork-ids.json
79
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 weavejam / quiclabel contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,145 @@
1
+ Metadata-Version: 2.4
2
+ Name: quiclabel-coco-sync
3
+ Version: 0.0.1
4
+ Summary: CLI to incrementally sync a QuicLabel COCO dataset (annotations + images) from quiclabel-admin
5
+ Project-URL: Homepage, https://github.com/weavejam/quiclabel/tree/main/apps/quiclabel-sync-project-coco
6
+ Project-URL: Repository, https://github.com/weavejam/quiclabel
7
+ Project-URL: Issues, https://github.com/weavejam/quiclabel/issues
8
+ Author: weavejam / quiclabel contributors
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: annotation,coco,computer-vision,dataset,quiclabel,sync
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3 :: Only
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: Scientific/Engineering :: Image Recognition
24
+ Classifier: Topic :: Utilities
25
+ Requires-Python: >=3.10
26
+ Requires-Dist: click>=8.1
27
+ Requires-Dist: requests>=2.31
28
+ Description-Content-Type: text/markdown
29
+
30
+ # quiclabel-coco-sync
31
+
32
+ CLI to incrementally sync a QuicLabel COCO dataset (annotations + images)
33
+ from `quiclabel-admin`. Pulls a fresh `annotations-YYYYMMDD-HHMMSS.json`
34
+ next to your existing dataset and multi-threadedly downloads only the
35
+ images you don't already have.
36
+
37
+ ## Prerequisites
38
+
39
+ - **uv** — Python package & runtime manager. Install:
40
+ - macOS / Linux: `curl -LsSf https://astral.sh/uv/install.sh | sh`
41
+ - Windows: `winget install astral-sh.uv` (or `irm https://astral.sh/uv/install.ps1 | iex`)
42
+ - via pipx: `pipx install uv`
43
+ - **An API key** — get one from quiclabel-admin: *Settings → API Keys → New key*.
44
+ Copy the `qk_...` value immediately (it's only shown once).
45
+
46
+ ## Quick start (from PyPI — recommended)
47
+
48
+ No clone, no install — `uvx` downloads, caches and runs in one shot:
49
+
50
+ ```bash
51
+ uvx quiclabel-coco-sync path/to/annotations.json \
52
+ --admin-url https://quiclabel-admin.example.com \
53
+ --api-key qk_xxxxxxxxxxxxxxxxxxxxxx
54
+ ```
55
+
56
+ Or set env vars and call it bare:
57
+
58
+ ```bash
59
+ export QUICLABEL_ADMIN_URL=https://quiclabel-admin.example.com
60
+ export QUICLABEL_API_KEY=qk_xxxxxxxxxxxxxxxxxxxxxx
61
+ uvx quiclabel-coco-sync path/to/annotations.json
62
+ ```
63
+
64
+ Prefer a persistent install? Use `uv tool`:
65
+
66
+ ```bash
67
+ uv tool install quiclabel-coco-sync
68
+ quiclabel-coco-sync path/to/annotations.json --admin-url ... --api-key ...
69
+ ```
70
+
71
+ ## From the monorepo (contributors)
72
+
73
+ ```bash
74
+ # From the repo root
75
+ pnpm sync-project-coco path/to/annotations.json \
76
+ --admin-url https://quiclabel-admin.example.com \
77
+ --api-key qk_xxxxxxxxxxxxxxxxxxxxxx
78
+ ```
79
+
80
+ Or directly with `uv` against this app directory:
81
+
82
+ ```bash
83
+ cd apps/quiclabel-sync-project-coco
84
+ uv sync
85
+ uv run quiclabel-coco-sync path/to/annotations.json \
86
+ --admin-url https://quiclabel-admin.example.com \
87
+ --api-key qk_xxxxxxxxxxxxxxxxxxxxxx
88
+ ```
89
+
90
+ ## What it does
91
+
92
+ 1. Reads `path/to/annotations.json` and its `meta` block (added by the COCO exporter).
93
+ 2. Calls `GET /api/v1/projects/<project_id>/coco` with the same filters,
94
+ paging by cursor — so 10k+ task projects don't blow up server memory.
95
+ 3. Writes `path/to/annotations-20260519-143045.json` (timestamped — never
96
+ overwrites your input).
97
+ 4. Diffs `task_id` sets, downloads any missing images to `path/to/images/`
98
+ using a thread pool. Files already on disk are skipped by file name.
99
+
100
+ The old `annotations.json` and the existing `images/*` files are never touched.
101
+
102
+ ## Configuration priority
103
+
104
+ Each value is resolved in this order — first wins:
105
+
106
+ 1. CLI flag (`--project-id`, `--statuses`, …)
107
+ 2. Env var (`QUICLABEL_ADMIN_URL`, `QUICLABEL_API_KEY`)
108
+ 3. `meta` block of the input json
109
+
110
+ If anything required is missing from all three, the CLI exits with a clear
111
+ message naming the missing key and where to provide it.
112
+
113
+ ## Recovery
114
+
115
+ - **Partial failure** (some images failed mid-run): just re-run the same
116
+ command. Already-downloaded files are skipped by file name, so retry only
117
+ fetches the remaining ones. The CLI tells you this in the failure summary.
118
+ - **Corrupt image file**: delete it, then re-run.
119
+ - **A `.part` file in `images/`** indicates a crashed download. Safe to delete.
120
+
121
+ ## Development
122
+
123
+ ```bash
124
+ cd apps/quiclabel-sync-project-coco
125
+ uv sync --group dev
126
+ uv run pytest
127
+ ```
128
+
129
+ ## Releasing to PyPI (maintainers)
130
+
131
+ Manual release flow until CI is wired up:
132
+
133
+ ```bash
134
+ cd apps/quiclabel-sync-project-coco
135
+
136
+ # 1. bump version in pyproject.toml
137
+ # 2. build sdist + wheel
138
+ uv build
139
+
140
+ # 3. publish (uses UV_PUBLISH_TOKEN or prompts)
141
+ uv publish
142
+ ```
143
+
144
+ Get a PyPI API token at <https://pypi.org/manage/account/token/>.
145
+
@@ -0,0 +1,116 @@
1
+ # quiclabel-coco-sync
2
+
3
+ CLI to incrementally sync a QuicLabel COCO dataset (annotations + images)
4
+ from `quiclabel-admin`. Pulls a fresh `annotations-YYYYMMDD-HHMMSS.json`
5
+ next to your existing dataset and multi-threadedly downloads only the
6
+ images you don't already have.
7
+
8
+ ## Prerequisites
9
+
10
+ - **uv** — Python package & runtime manager. Install:
11
+ - macOS / Linux: `curl -LsSf https://astral.sh/uv/install.sh | sh`
12
+ - Windows: `winget install astral-sh.uv` (or `irm https://astral.sh/uv/install.ps1 | iex`)
13
+ - via pipx: `pipx install uv`
14
+ - **An API key** — get one from quiclabel-admin: *Settings → API Keys → New key*.
15
+ Copy the `qk_...` value immediately (it's only shown once).
16
+
17
+ ## Quick start (from PyPI — recommended)
18
+
19
+ No clone, no install — `uvx` downloads, caches and runs in one shot:
20
+
21
+ ```bash
22
+ uvx quiclabel-coco-sync path/to/annotations.json \
23
+ --admin-url https://quiclabel-admin.example.com \
24
+ --api-key qk_xxxxxxxxxxxxxxxxxxxxxx
25
+ ```
26
+
27
+ Or set env vars and call it bare:
28
+
29
+ ```bash
30
+ export QUICLABEL_ADMIN_URL=https://quiclabel-admin.example.com
31
+ export QUICLABEL_API_KEY=qk_xxxxxxxxxxxxxxxxxxxxxx
32
+ uvx quiclabel-coco-sync path/to/annotations.json
33
+ ```
34
+
35
+ Prefer a persistent install? Use `uv tool`:
36
+
37
+ ```bash
38
+ uv tool install quiclabel-coco-sync
39
+ quiclabel-coco-sync path/to/annotations.json --admin-url ... --api-key ...
40
+ ```
41
+
42
+ ## From the monorepo (contributors)
43
+
44
+ ```bash
45
+ # From the repo root
46
+ pnpm sync-project-coco path/to/annotations.json \
47
+ --admin-url https://quiclabel-admin.example.com \
48
+ --api-key qk_xxxxxxxxxxxxxxxxxxxxxx
49
+ ```
50
+
51
+ Or directly with `uv` against this app directory:
52
+
53
+ ```bash
54
+ cd apps/quiclabel-sync-project-coco
55
+ uv sync
56
+ uv run quiclabel-coco-sync path/to/annotations.json \
57
+ --admin-url https://quiclabel-admin.example.com \
58
+ --api-key qk_xxxxxxxxxxxxxxxxxxxxxx
59
+ ```
60
+
61
+ ## What it does
62
+
63
+ 1. Reads `path/to/annotations.json` and its `meta` block (added by the COCO exporter).
64
+ 2. Calls `GET /api/v1/projects/<project_id>/coco` with the same filters,
65
+ paging by cursor — so 10k+ task projects don't blow up server memory.
66
+ 3. Writes `path/to/annotations-20260519-143045.json` (timestamped — never
67
+ overwrites your input).
68
+ 4. Diffs `task_id` sets, downloads any missing images to `path/to/images/`
69
+ using a thread pool. Files already on disk are skipped by file name.
70
+
71
+ The old `annotations.json` and the existing `images/*` files are never touched.
72
+
73
+ ## Configuration priority
74
+
75
+ Each value is resolved in this order — first wins:
76
+
77
+ 1. CLI flag (`--project-id`, `--statuses`, …)
78
+ 2. Env var (`QUICLABEL_ADMIN_URL`, `QUICLABEL_API_KEY`)
79
+ 3. `meta` block of the input json
80
+
81
+ If anything required is missing from all three, the CLI exits with a clear
82
+ message naming the missing key and where to provide it.
83
+
84
+ ## Recovery
85
+
86
+ - **Partial failure** (some images failed mid-run): just re-run the same
87
+ command. Already-downloaded files are skipped by file name, so retry only
88
+ fetches the remaining ones. The CLI tells you this in the failure summary.
89
+ - **Corrupt image file**: delete it, then re-run.
90
+ - **A `.part` file in `images/`** indicates a crashed download. Safe to delete.
91
+
92
+ ## Development
93
+
94
+ ```bash
95
+ cd apps/quiclabel-sync-project-coco
96
+ uv sync --group dev
97
+ uv run pytest
98
+ ```
99
+
100
+ ## Releasing to PyPI (maintainers)
101
+
102
+ Manual release flow until CI is wired up:
103
+
104
+ ```bash
105
+ cd apps/quiclabel-sync-project-coco
106
+
107
+ # 1. bump version in pyproject.toml
108
+ # 2. build sdist + wheel
109
+ uv build
110
+
111
+ # 3. publish (uses UV_PUBLISH_TOKEN or prompts)
112
+ uv publish
113
+ ```
114
+
115
+ Get a PyPI API token at <https://pypi.org/manage/account/token/>.
116
+
@@ -0,0 +1,52 @@
1
+ [project]
2
+ name = "quiclabel-coco-sync"
3
+ version = "0.0.1"
4
+ description = "CLI to incrementally sync a QuicLabel COCO dataset (annotations + images) from quiclabel-admin"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ license-files = ["LICENSE"]
8
+ requires-python = ">=3.10"
9
+ authors = [
10
+ { name = "weavejam / quiclabel contributors" },
11
+ ]
12
+ keywords = ["quiclabel", "coco", "annotation", "dataset", "sync", "computer-vision"]
13
+ classifiers = [
14
+ "Development Status :: 4 - Beta",
15
+ "Environment :: Console",
16
+ "Intended Audience :: Developers",
17
+ "Intended Audience :: Science/Research",
18
+ "Operating System :: OS Independent",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3 :: Only",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Programming Language :: Python :: 3.13",
25
+ "Topic :: Scientific/Engineering :: Image Recognition",
26
+ "Topic :: Utilities",
27
+ ]
28
+ dependencies = [
29
+ "click>=8.1",
30
+ "requests>=2.31",
31
+ ]
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/weavejam/quiclabel/tree/main/apps/quiclabel-sync-project-coco"
35
+ Repository = "https://github.com/weavejam/quiclabel"
36
+ Issues = "https://github.com/weavejam/quiclabel/issues"
37
+
38
+ [project.scripts]
39
+ quiclabel-coco-sync = "quiclabel_sync_project_coco.cli:main"
40
+ sync-project-coco = "quiclabel_sync_project_coco.cli:main"
41
+
42
+ [dependency-groups]
43
+ dev = [
44
+ "pytest>=8.0",
45
+ ]
46
+
47
+ [build-system]
48
+ requires = ["hatchling"]
49
+ build-backend = "hatchling.build"
50
+
51
+ [tool.hatch.build.targets.wheel]
52
+ packages = ["src/quiclabel_sync_project_coco"]
@@ -0,0 +1,3 @@
1
+ """Incremental sync for QuicLabel COCO datasets."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,97 @@
1
+ """HTTP client for GET /api/v1/projects/:id/coco with cursor pagination."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ import time
6
+ from typing import Any, Iterator
7
+
8
+ import requests
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ RETRYABLE_STATUS = {429, 500, 502, 503, 504}
13
+
14
+
15
+ class ApiError(Exception):
16
+ """Raised for non-retryable HTTP failures (4xx other than 429)."""
17
+
18
+
19
+ def _request_page(
20
+ session: requests.Session,
21
+ url: str,
22
+ params: dict[str, Any],
23
+ headers: dict[str, str],
24
+ *,
25
+ max_retries: int = 3,
26
+ timeout: float = 30.0,
27
+ ) -> dict[str, Any]:
28
+ """Fetch one page with retry-on-5xx. Raises ApiError for 4xx."""
29
+ for attempt in range(max_retries + 1):
30
+ try:
31
+ resp = session.get(url, params=params, headers=headers, timeout=timeout)
32
+ except requests.RequestException as e:
33
+ if attempt == max_retries:
34
+ raise ApiError(f"Network error after {max_retries + 1} attempts: {e}") from e
35
+ logger.warning("network error (attempt %d): %s", attempt + 1, e)
36
+ time.sleep(2**attempt)
37
+ continue
38
+
39
+ if resp.status_code == 200:
40
+ return resp.json()
41
+
42
+ if resp.status_code in (401, 403):
43
+ raise ApiError(
44
+ f"Authentication failed ({resp.status_code}): check --api-key"
45
+ )
46
+
47
+ if resp.status_code in RETRYABLE_STATUS and attempt < max_retries:
48
+ logger.warning(
49
+ "server %d (attempt %d), retrying", resp.status_code, attempt + 1
50
+ )
51
+ time.sleep(2**attempt)
52
+ continue
53
+
54
+ raise ApiError(f"API error {resp.status_code}: {resp.text[:500]}")
55
+
56
+ raise ApiError("retry loop exhausted") # pragma: no cover
57
+
58
+
59
+ def iter_pages(
60
+ admin_url: str,
61
+ api_key: str,
62
+ project_id: str,
63
+ *,
64
+ statuses: list[str],
65
+ tag_ids: list[str],
66
+ image_source: str,
67
+ limit: int = 500,
68
+ session: requests.Session | None = None,
69
+ ) -> Iterator[dict[str, Any]]:
70
+ """Yield each page dict in turn. The first page has meta/info/categories;
71
+ subsequent pages only have images/annotations/next_cursor."""
72
+ s = session or requests.Session()
73
+ headers = {"Authorization": f"Bearer {api_key}"}
74
+ url = f"{admin_url}/api/v1/projects/{project_id}/coco"
75
+ params: dict[str, Any] = {
76
+ "statuses": ",".join(statuses),
77
+ "image_source": image_source,
78
+ "limit": limit,
79
+ }
80
+ if tag_ids:
81
+ params["tag_ids"] = ",".join(tag_ids)
82
+
83
+ cursor: str | None = None
84
+ page_num = 0
85
+ while True:
86
+ page_params = dict(params)
87
+ if cursor:
88
+ page_params["cursor"] = cursor
89
+
90
+ page = _request_page(s, url, page_params, headers)
91
+ page_num += 1
92
+ logger.info("page %d: %d images", page_num, len(page.get("images") or []))
93
+ yield page
94
+
95
+ cursor = page.get("next_cursor")
96
+ if not cursor:
97
+ return