labelpull 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
labelpull/__init__.py ADDED
@@ -0,0 +1,44 @@
1
+ """labelpull: pull the latest Labelbox annotations into a tidy table.
2
+
3
+ The Labelbox SDK exports a project's labels as nested, ontology-shaped JSON.
4
+ labelpull is the thin layer the SDK lacks: a generic flattener
5
+ (:func:`~labelpull.core.flatten`) plus the correctness logic (latest-label
6
+ selection, status normalization) and a one-command CLI on top.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from labelpull.adapters import (
12
+ ADAPTERS,
13
+ Adapter,
14
+ GenericAdapter,
15
+ SpeciesAdapter,
16
+ write_csv,
17
+ )
18
+ from labelpull.core import (
19
+ WORKFLOW_STATUSES,
20
+ FeatureRow,
21
+ Summary,
22
+ export,
23
+ flatten,
24
+ read_export_file,
25
+ summarize,
26
+ )
27
+
28
+ __version__ = "0.1.0"
29
+
30
+ __all__ = [
31
+ "ADAPTERS",
32
+ "WORKFLOW_STATUSES",
33
+ "Adapter",
34
+ "FeatureRow",
35
+ "GenericAdapter",
36
+ "SpeciesAdapter",
37
+ "Summary",
38
+ "__version__",
39
+ "export",
40
+ "flatten",
41
+ "read_export_file",
42
+ "summarize",
43
+ "write_csv",
44
+ ]
labelpull/adapters.py ADDED
@@ -0,0 +1,110 @@
1
+ """Adapters: collapse ontology-agnostic :class:`FeatureRow` rows into a shape.
2
+
3
+ The generic path writes ``FeatureRow`` rows straight to a long-format CSV that
4
+ any project can read. An adapter narrows that to a project-specific wide record.
5
+ :class:`SpeciesAdapter` is the reference implementation, reproducing
6
+ speciesfirst's ``global_key,taxon,organs,labeled_by,workflow_status`` pull CSV
7
+ from the generic rows, so the engine has exactly one parser.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import csv
13
+ from collections import OrderedDict
14
+ from collections.abc import Iterable, Sequence
15
+ from pathlib import Path
16
+ from typing import Protocol, runtime_checkable
17
+
18
+ from labelpull.core import FeatureRow
19
+
20
+
21
+ @runtime_checkable
22
+ class Adapter(Protocol):
23
+ """Map flattened features to named columns plus the rows to write."""
24
+
25
+ columns: Sequence[str]
26
+
27
+ def rows(self, features: Iterable[FeatureRow]) -> Iterable[Sequence[str]]: ...
28
+
29
+
30
+ class GenericAdapter:
31
+ """One CSV row per feature: the ontology-agnostic long format."""
32
+
33
+ columns: Sequence[str] = (
34
+ "global_key",
35
+ "data_row_id",
36
+ "feature_kind",
37
+ "feature_name",
38
+ "value",
39
+ "workflow_status",
40
+ "labeled_by",
41
+ "created_at",
42
+ "parent_feature_id",
43
+ )
44
+
45
+ def rows(self, features: Iterable[FeatureRow]) -> Iterable[Sequence[str]]:
46
+ for f in features:
47
+ yield (
48
+ f.global_key,
49
+ f.data_row_id,
50
+ f.feature_kind,
51
+ f.feature_name,
52
+ f.value,
53
+ f.workflow_status or "",
54
+ f.labeled_by or "",
55
+ f.created_at or "",
56
+ f.parent_feature_id,
57
+ )
58
+
59
+
60
+ class SpeciesAdapter:
61
+ """One row per ``global_key``: reproduces speciesfirst's pull CSV.
62
+
63
+ ``taxon`` is the ``Taxon`` single-select radio; ``organs`` is the ``Organs``
64
+ checklist (``;``-joined). A reached-and-labelled row with neither still
65
+ appears (seeded by the ``label`` sentinel), matching speciesfirst's "reached
66
+ but unlabelled yields ``taxon=''``" behaviour. Insertion order follows the
67
+ export stream.
68
+ """
69
+
70
+ columns: Sequence[str] = ("global_key", "taxon", "organs", "labeled_by", "workflow_status")
71
+ taxon_feature = "Taxon"
72
+ organs_feature = "Organs"
73
+
74
+ def rows(self, features: Iterable[FeatureRow]) -> Iterable[Sequence[str]]:
75
+ by_key: OrderedDict[str, dict[str, str]] = OrderedDict()
76
+ for f in features:
77
+ rec = by_key.setdefault(
78
+ f.global_key,
79
+ {"taxon": "", "organs": "", "labeled_by": "", "workflow_status": ""},
80
+ )
81
+ if f.labeled_by:
82
+ rec["labeled_by"] = f.labeled_by
83
+ if f.workflow_status:
84
+ rec["workflow_status"] = f.workflow_status
85
+ if f.feature_kind == "radio" and f.feature_name == self.taxon_feature and f.value:
86
+ rec["taxon"] = f.value
87
+ elif f.feature_kind == "checklist" and f.feature_name == self.organs_feature:
88
+ rec["organs"] = f.value
89
+ for global_key, rec in by_key.items():
90
+ yield (
91
+ global_key,
92
+ rec["taxon"],
93
+ rec["organs"],
94
+ rec["labeled_by"],
95
+ rec["workflow_status"],
96
+ )
97
+
98
+
99
+ ADAPTERS: dict[str, type] = {"generic": GenericAdapter, "species": SpeciesAdapter}
100
+
101
+
102
+ def write_csv(path: str | Path, adapter: Adapter, features: Iterable[FeatureRow]) -> Path:
103
+ """Write ``features`` through ``adapter`` to ``path`` (parents created)."""
104
+ path = Path(path)
105
+ path.parent.mkdir(parents=True, exist_ok=True)
106
+ with path.open("w", newline="") as f:
107
+ writer = csv.writer(f)
108
+ writer.writerow(adapter.columns)
109
+ writer.writerows(adapter.rows(features))
110
+ return path
labelpull/cli.py ADDED
@@ -0,0 +1,101 @@
1
+ """``labelpull`` CLI: pull the latest Labelbox annotations to a tidy CSV.
2
+
3
+ labelpull pull PROJECT_ID -o labels.csv
4
+ labelpull pull PROJECT_ID --status Done --since 2026-06-01
5
+ labelpull pull PROJECT_ID --schema species -o taxa.csv
6
+ labelpull pull PROJECT_ID --from-export export.ndjson # offline, no API key
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from pathlib import Path
12
+
13
+ import typer
14
+
15
+ from labelpull import __version__
16
+ from labelpull.adapters import ADAPTERS, write_csv
17
+ from labelpull.core import (
18
+ FeatureRow,
19
+ JsonDict,
20
+ _created_at,
21
+ _select_project,
22
+ flatten,
23
+ read_export_file,
24
+ summarize,
25
+ )
26
+ from labelpull.core import export as live_export
27
+
28
+ app = typer.Typer(add_completion=False, help="Pull the latest Labelbox annotations to CSV.")
29
+
30
+
31
+ @app.callback()
32
+ def _main() -> None:
33
+ """labelpull: pull the latest Labelbox annotations into a tidy table."""
34
+
35
+
36
+ @app.command()
37
+ def pull(
38
+ project_id: str = typer.Argument(..., help="Labelbox project id to export from."),
39
+ out: Path = typer.Option(
40
+ Path("pulled_labels.csv"), "--out", "-o", help="Where to write the CSV."
41
+ ),
42
+ schema: str = typer.Option(
43
+ "generic",
44
+ help="generic = one row per feature (any ontology); "
45
+ "species = speciesfirst Taxon/Organs wide CSV.",
46
+ ),
47
+ status: str | None = typer.Option(
48
+ None, help="Filter by task-queue stage: ToLabel | InReview | InRework | Done."
49
+ ),
50
+ since: str | None = typer.Option(
51
+ None, help="Keep only rows whose newest label was created on/after this ISO date/time."
52
+ ),
53
+ from_export: Path | None = typer.Option(
54
+ None,
55
+ exists=True,
56
+ dir_okay=False,
57
+ help="Flatten a saved export (JSON/NDJSON) offline instead of the live API.",
58
+ ),
59
+ api_key: str | None = typer.Option(None, help="Labelbox API key (else LABELBOX_API_KEY)."),
60
+ ) -> None:
61
+ """Export the latest annotations and flatten them to CSV, with a summary."""
62
+ if schema not in ADAPTERS:
63
+ raise typer.BadParameter(f"unknown schema {schema!r}; choose from {sorted(ADAPTERS)}")
64
+ adapter = ADAPTERS[schema]()
65
+
66
+ typer.echo(f"labelpull v{__version__}")
67
+ if from_export is not None:
68
+ rows = read_export_file(from_export)
69
+ if since is not None:
70
+ rows = [r for r in rows if _row_since(r, project_id, since)]
71
+ typer.echo(f" read {len(rows)} rows from {from_export}")
72
+ else:
73
+ rows = list(live_export(project_id, status=status, since=since, api_key=api_key))
74
+ typer.echo(f" exported {len(rows)} rows from project {project_id}")
75
+
76
+ features = [f for r in rows for f in flatten(r, project_id)]
77
+ _print_summary(rows, features)
78
+ write_csv(out, adapter, features)
79
+ typer.echo(f"wrote {schema} CSV: {out}")
80
+
81
+
82
+ def _row_since(dr: JsonDict, project_id: str, since: str) -> bool:
83
+ return _created_at(_select_project(dr, project_id)) >= since
84
+
85
+
86
+ def _print_summary(rows: list[JsonDict], features: list[FeatureRow]) -> None:
87
+ s = summarize(rows, features)
88
+ typer.echo(
89
+ f" {s.n_labelled} labelled / {s.n_data_rows} rows "
90
+ f"({s.n_reached_unlabelled} reached unlabelled)"
91
+ )
92
+ if s.statuses:
93
+ typer.echo(" status: " + ", ".join(f"{k}={v}" for k, v in sorted(s.statuses.items())))
94
+ if s.feature_kinds:
95
+ typer.echo(" kinds: " + ", ".join(f"{k}={v}" for k, v in sorted(s.feature_kinds.items())))
96
+ if s.latest_created_at:
97
+ typer.echo(f" latest label: {s.latest_created_at}")
98
+
99
+
100
+ if __name__ == "__main__": # pragma: no cover
101
+ app()
labelpull/core.py ADDED
@@ -0,0 +1,285 @@
1
+ """Ontology-agnostic Labelbox export + flatten.
2
+
3
+ The Labelbox SDK already exports a project's labels and streams them as deeply
4
+ nested, ontology-shaped JSON. What it does *not* give you is a tabular view, the
5
+ correctness logic to pick the right label when a row was reviewed, or a workflow
6
+ status that is always populated. This module is exactly that thin layer:
7
+
8
+ * :func:`export` wraps ``project.export(...)`` + ``wait_till_done()`` +
9
+ ``get_buffered_stream()`` (SDK lazy-imported, so it is optional) and adds a
10
+ ``since`` filter for "only the latest annotations".
11
+ * :func:`flatten` turns one export row into :class:`FeatureRow` long-format rows,
12
+ covering *every* feature kind (classifications AND objects) without assuming a
13
+ particular ontology. It encodes the two traps a hand-written parser gets wrong:
14
+ selecting the most-recently-created label (a QC-reviewed row carries both the
15
+ annotator's and the reviewer's label) and normalizing the workflow status.
16
+ * :func:`read_export_file` parses a saved export (UI download or a prior pull) so
17
+ the same flattener runs offline, no API key required.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import os
24
+ from collections.abc import Iterable, Iterator
25
+ from dataclasses import dataclass
26
+ from pathlib import Path
27
+ from typing import Any, cast
28
+
29
+ # One export row (and its nested blocks) is arbitrary JSON; alias it for brevity.
30
+ JsonDict = dict[str, Any]
31
+
32
+ # The task-queue stages ``project.export(filters={"workflow_status": ...})`` accepts.
33
+ WORKFLOW_STATUSES = ("ToLabel", "InReview", "InRework", "Done")
34
+
35
+ # Geometry keys a localized object may carry in the v7 export, in probe order.
36
+ _GEOMETRY_KINDS = ("bounding_box", "polygon", "line", "point", "mask")
37
+
38
+
39
+ @dataclass(frozen=True)
40
+ class FeatureRow:
41
+ """One ``(label, feature)`` pair from an export row, ontology-agnostic.
42
+
43
+ A classification answer or a localized object. An object's nested
44
+ classifications become their own rows, linked to the object via
45
+ :attr:`parent_feature_id`. Each labelled data row also yields one
46
+ ``feature_kind="label"`` sentinel row (no feature, ``value=""``) so that a
47
+ reached-and-labelled row is always represented even when empty.
48
+ """
49
+
50
+ global_key: str
51
+ data_row_id: str
52
+ # one of: label, radio, checklist, text, bounding_box, polygon,
53
+ # line, point, mask, relationship, unknown
54
+ feature_kind: str
55
+ feature_name: str
56
+ value: str # answer value(s) / compact geometry; "" when none
57
+ workflow_status: str | None
58
+ labeled_by: str | None
59
+ created_at: str | None
60
+ parent_feature_id: str # "" for top-level features
61
+
62
+
63
+ def export(
64
+ project_id: str,
65
+ *,
66
+ status: str | None = None,
67
+ since: str | None = None,
68
+ api_key: str | None = None,
69
+ client: Any | None = None,
70
+ ) -> Iterator[JsonDict]:
71
+ """Stream export rows (one dict per data row) for ``project_id``.
72
+
73
+ ``status`` filters by task-queue stage (see :data:`WORKFLOW_STATUSES`).
74
+ ``since`` keeps only rows whose newest label was created on/after an ISO
75
+ date/datetime string (lexicographic compare on the ISO timestamp). Pass
76
+ ``client`` to inject a stub; otherwise the ``labelbox`` SDK is imported
77
+ lazily and a client is built from ``api_key`` or ``LABELBOX_API_KEY``.
78
+ """
79
+ cl = client if client is not None else _make_client(api_key)
80
+ project = cl.get_project(project_id)
81
+ filters = {"workflow_status": status} if status else None
82
+ task = project.export(
83
+ params={"data_row_details": True, "label_details": True, "project_details": True},
84
+ filters=filters,
85
+ )
86
+ task.wait_till_done()
87
+ for row in task.get_buffered_stream():
88
+ dr = row.json
89
+ if since is None or _created_at(_select_project(dr, project_id)) >= since:
90
+ yield dr
91
+
92
+
93
+ def read_export_file(path: str | Path) -> list[JsonDict]:
94
+ """Load a saved export (JSON array or NDJSON) for offline flattening."""
95
+ text = Path(path).read_text().strip()
96
+ if not text:
97
+ return []
98
+ try:
99
+ loaded = json.loads(text)
100
+ return loaded if isinstance(loaded, list) else [loaded]
101
+ except json.JSONDecodeError:
102
+ return [json.loads(line) for line in text.splitlines() if line.strip()]
103
+
104
+
105
+ def flatten(dr: JsonDict, project_id: str | None = None) -> list[FeatureRow]:
106
+ """Flatten one export row into :class:`FeatureRow` rows (every feature).
107
+
108
+ ``project_id`` selects which project's labels to read; ``None`` uses the only
109
+ project present (the common single-project export) and returns nothing if the
110
+ row is ambiguous (multiple projects) so a caller never silently mixes them.
111
+ An unreached or unlabelled row yields ``[]``.
112
+ """
113
+ data_row = dr.get("data_row") or {}
114
+ global_key = data_row.get("global_key") or ""
115
+ data_row_id = data_row.get("id") or global_key
116
+ proj = _select_project(dr, project_id)
117
+ label = _latest_label(proj)
118
+ if not global_key or label is None:
119
+ return []
120
+
121
+ status = _workflow_status(proj)
122
+ details = label.get("label_details") or {}
123
+ labeled_by = details.get("created_by")
124
+ created_at = details.get("created_at")
125
+ ann = label.get("annotations") or {}
126
+ rows: list[FeatureRow] = []
127
+
128
+ def emit(kind: str, name: str | None, value: str, parent: str = "") -> None:
129
+ rows.append(
130
+ FeatureRow(
131
+ global_key,
132
+ data_row_id,
133
+ kind,
134
+ name or "",
135
+ value,
136
+ status,
137
+ labeled_by,
138
+ created_at,
139
+ parent,
140
+ )
141
+ )
142
+
143
+ # Sentinel: this row was reached and labelled (carries who/when even if empty).
144
+ emit("label", "", "")
145
+
146
+ for cls in ann.get("classifications") or []:
147
+ kind, value = _classification_value(cls)
148
+ emit(kind, cls.get("name"), value)
149
+
150
+ for obj in ann.get("objects") or []:
151
+ kind, value = _object_geometry(obj)
152
+ feature_id = obj.get("feature_id") or obj.get("feature_schema_id") or ""
153
+ emit(kind, obj.get("name"), value)
154
+ for cls in obj.get("classifications") or []:
155
+ ckind, cvalue = _classification_value(cls)
156
+ emit(ckind, cls.get("name"), cvalue, parent=feature_id)
157
+
158
+ for rel in ann.get("relationships") or []:
159
+ value = json.dumps(rel.get("relationship") or {}, sort_keys=True)
160
+ emit("relationship", rel.get("name"), value)
161
+
162
+ return rows
163
+
164
+
165
+ @dataclass(frozen=True)
166
+ class Summary:
167
+ """Triage view of a pull: how much came back, of what kind, how fresh."""
168
+
169
+ n_data_rows: int
170
+ n_labelled: int
171
+ n_reached_unlabelled: int
172
+ feature_kinds: dict[str, int]
173
+ feature_names: dict[str, int]
174
+ statuses: dict[str, int]
175
+ latest_created_at: str | None
176
+
177
+
178
+ def summarize(rows: Iterable[JsonDict], features: Iterable[FeatureRow]) -> Summary:
179
+ """Count data rows, labelled rows, and per-kind/name/status breakdowns."""
180
+ rows = list(rows)
181
+ features = list(features)
182
+ labelled_keys = {f.global_key for f in features}
183
+ kinds: dict[str, int] = {}
184
+ names: dict[str, int] = {}
185
+ statuses: dict[str, int] = {}
186
+ latest: str | None = None
187
+ for f in features:
188
+ if f.feature_kind == "label":
189
+ if f.workflow_status:
190
+ statuses[f.workflow_status] = statuses.get(f.workflow_status, 0) + 1
191
+ if f.created_at and (latest is None or f.created_at > latest):
192
+ latest = f.created_at
193
+ continue
194
+ kinds[f.feature_kind] = kinds.get(f.feature_kind, 0) + 1
195
+ if f.feature_name:
196
+ names[f.feature_name] = names.get(f.feature_name, 0) + 1
197
+ n_data_rows = len(rows)
198
+ n_labelled = len(labelled_keys)
199
+ return Summary(
200
+ n_data_rows=n_data_rows,
201
+ n_labelled=n_labelled,
202
+ n_reached_unlabelled=max(n_data_rows - n_labelled, 0),
203
+ feature_kinds=kinds,
204
+ feature_names=names,
205
+ statuses=statuses,
206
+ latest_created_at=latest,
207
+ )
208
+
209
+
210
+ # --- internals -------------------------------------------------------------
211
+
212
+
213
+ def _make_client(api_key: str | None) -> Any:
214
+ try:
215
+ import labelbox as lb # noqa: PLC0415 (optional dep, imported only for live pulls)
216
+ except ImportError as exc: # pragma: no cover - exercised only without the SDK
217
+ raise RuntimeError(
218
+ "a live pull needs the Labelbox SDK: pip install 'labelpull[live]'"
219
+ ) from exc
220
+ key = api_key or os.environ.get("LABELBOX_API_KEY")
221
+ if not key:
222
+ raise RuntimeError(
223
+ "no Labelbox API key: pass api_key=... or set LABELBOX_API_KEY "
224
+ "(or use a saved export with read_export_file)"
225
+ )
226
+ return lb.Client(api_key=key)
227
+
228
+
229
+ def _select_project(dr: JsonDict, project_id: str | None) -> JsonDict:
230
+ projects = dr.get("projects") or {}
231
+ if project_id is not None:
232
+ return projects.get(project_id) or {}
233
+ if len(projects) == 1:
234
+ return next(iter(projects.values()))
235
+ return {} # ambiguous: caller must name the project
236
+
237
+
238
+ def _latest_label(proj: JsonDict) -> JsonDict | None:
239
+ # A QC-reviewed row carries the annotator's label *and* the reviewer's; the
240
+ # verified answer is the most recently created, not labels[0].
241
+ labels = proj.get("labels") or []
242
+ if not labels:
243
+ return None
244
+ return cast("JsonDict", max(labels, key=_created_at_of_label))
245
+
246
+
247
+ def _created_at_of_label(label: JsonDict) -> str:
248
+ return (label.get("label_details") or {}).get("created_at") or ""
249
+
250
+
251
+ def _created_at(proj: JsonDict) -> str:
252
+ label = _latest_label(proj)
253
+ return _created_at_of_label(label) if label else ""
254
+
255
+
256
+ def _workflow_status(proj: JsonDict) -> str | None:
257
+ details = proj.get("project_details") or {}
258
+ status = details.get("workflow_status")
259
+ if status is None:
260
+ queue = details.get("task_queue_name") or details.get("task_queue_status")
261
+ status = "Done" if queue == "Done" else queue
262
+ return status
263
+
264
+
265
+ def _classification_value(cls: JsonDict) -> tuple[str, str]:
266
+ if cls.get("radio_answer"):
267
+ answer = cls["radio_answer"]
268
+ return "radio", answer.get("value") or answer.get("name") or ""
269
+ if cls.get("checklist_answers") is not None:
270
+ values = [a.get("value") or a.get("name") or "" for a in cls["checklist_answers"]]
271
+ return "checklist", ";".join(v for v in values if v)
272
+ if cls.get("text_answer") is not None:
273
+ return "text", (cls["text_answer"] or {}).get("content") or ""
274
+ return "unknown", ""
275
+
276
+
277
+ def _object_geometry(obj: JsonDict) -> tuple[str, str]:
278
+ for kind in _GEOMETRY_KINDS:
279
+ geom = obj.get(kind)
280
+ if geom is None:
281
+ continue
282
+ if kind == "mask":
283
+ return "mask", (geom or {}).get("url") or ""
284
+ return kind, json.dumps(geom, sort_keys=True)
285
+ return "unknown", ""
labelpull/py.typed ADDED
File without changes
@@ -0,0 +1,69 @@
1
+ Metadata-Version: 2.4
2
+ Name: labelpull
3
+ Version: 0.1.0
4
+ Summary: Pull the latest Labelbox annotations into a tidy, ontology-agnostic table.
5
+ Author-email: Wietze Suijker <wietze.suijker@gmail.com>
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.10
8
+ Requires-Dist: typer>=0.9
9
+ Provides-Extra: dev
10
+ Requires-Dist: mypy>=1.8; extra == 'dev'
11
+ Requires-Dist: pytest-cov>=4.1; extra == 'dev'
12
+ Requires-Dist: pytest>=7.4; extra == 'dev'
13
+ Requires-Dist: ruff>=0.4; extra == 'dev'
14
+ Provides-Extra: live
15
+ Requires-Dist: labelbox>=7.0; extra == 'live'
16
+ Description-Content-Type: text/markdown
17
+
18
+ # labelpull
19
+
20
+ Pull the latest Labelbox annotations into a tidy, ontology-agnostic table.
21
+
22
+ The Labelbox SDK already exports a project's labels and streams them. What it
23
+ doesn't give you is a *tabular* view of that deeply nested JSON, the correctness
24
+ logic to pick the right label when a row was reviewed, or a workflow status that
25
+ is always populated. `labelpull` is exactly that thin layer on top of the SDK.
26
+
27
+ ## Install
28
+
29
+ ```bash
30
+ pip install labelpull # offline parsing + CLI
31
+ pip install 'labelpull[live]' # + the Labelbox SDK for live pulls
32
+ ```
33
+
34
+ ## CLI
35
+
36
+ ```bash
37
+ export LABELBOX_API_KEY=...
38
+ labelpull pull <PROJECT_ID> -o labels.csv # generic long CSV (any ontology)
39
+ labelpull pull <PROJECT_ID> --status Done # only verified rows
40
+ labelpull pull <PROJECT_ID> --since 2026-06-01 # only the latest labels
41
+ labelpull pull <PROJECT_ID> --from-export export.ndjson # offline, no API key
42
+ labelpull pull <PROJECT_ID> --schema species -o taxa.csv # speciesfirst Taxon/Organs wide CSV
43
+ ```
44
+
45
+ `--schema generic` (default) writes one row per feature — every classification
46
+ and object, any ontology:
47
+
48
+ ```
49
+ global_key,data_row_id,feature_kind,feature_name,value,workflow_status,labeled_by,created_at,parent_feature_id
50
+ ```
51
+
52
+ ## Library
53
+
54
+ ```python
55
+ import labelpull
56
+
57
+ rows = list(labelpull.export("proj_id", status="Done")) # or read_export_file("export.ndjson")
58
+ features = [f for r in rows for f in labelpull.flatten(r, "proj_id")]
59
+ labelpull.write_csv("labels.csv", labelpull.GenericAdapter(), features)
60
+ print(labelpull.summarize(rows, features))
61
+ ```
62
+
63
+ `flatten()` handles radio / checklist / text classifications and bbox / polygon /
64
+ line / point / mask objects (with nested classifications linked to their parent),
65
+ and always selects the most recently created label so a QC-reviewed row reports
66
+ the reviewer's answer, not the annotator's.
67
+
68
+ Write your own `Adapter` to collapse features into a project-specific wide table;
69
+ `SpeciesAdapter` is the reference implementation.
@@ -0,0 +1,9 @@
1
+ labelpull/__init__.py,sha256=J8P7ntqvXjksIW6zE4l2nJf8w3As9B5_ekxdIT6NvvA,929
2
+ labelpull/adapters.py,sha256=5nIpl7NI13BfkQtn7eQ_eyD7maOmCkePXk7F8V9qHOc,3834
3
+ labelpull/cli.py,sha256=042bOraQpPAaRtATC-I0UUqKf1I8DR1ASb0opT21ZjQ,3536
4
+ labelpull/core.py,sha256=VGPRMcd2pQLo16KGuHKWGnJ2iyoUxWCq5Y53DdLIXtM,10765
5
+ labelpull/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ labelpull-0.1.0.dist-info/METADATA,sha256=H2Onwo9BL_7J-3gKtgpd8JKl8DyKh3ff0FlTmaf8kwI,2572
7
+ labelpull-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
8
+ labelpull-0.1.0.dist-info/entry_points.txt,sha256=vCh16Czaiyg87c3851QSoQTVIjIV55vK0vQ6xWZEbZI,48
9
+ labelpull-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ labelpull = labelpull.cli:app