finwave-wavefront 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- finwave_wavefront-0.1.0.dist-info/METADATA +109 -0
- finwave_wavefront-0.1.0.dist-info/RECORD +11 -0
- finwave_wavefront-0.1.0.dist-info/WHEEL +4 -0
- finwave_wavefront-0.1.0.dist-info/entry_points.txt +2 -0
- finwave_wavefront-0.1.0.dist-info/licenses/LICENSE +21 -0
- wavefront/__init__.py +69 -0
- wavefront/__main__.py +85 -0
- wavefront/_art.py +107 -0
- wavefront/client.py +277 -0
- wavefront/exceptions.py +49 -0
- wavefront/models.py +111 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: finwave-wavefront
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Official Python client for fetching finwave datasets over the dataset-API handshake.
|
|
5
|
+
Project-URL: Homepage, https://operationalecology.io
|
|
6
|
+
Project-URL: Source, https://github.com/Operational-Ecology/Wavefront
|
|
7
|
+
Project-URL: finwave, https://finwave.io
|
|
8
|
+
Author-email: Alexander Barnhill <alex.c.barnhill@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: conservation,datasets,finwave,photo-identification,wildlife,yolo
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Requires-Dist: httpx>=0.24
|
|
20
|
+
Provides-Extra: test
|
|
21
|
+
Requires-Dist: pytest>=7; extra == 'test'
|
|
22
|
+
Requires-Dist: respx>=0.20; extra == 'test'
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# wavefront
|
|
26
|
+
|
|
27
|
+
The official Python client for **[Finwave](https://finwave.io)** datasets.
|
|
28
|
+
|
|
29
|
+
Finwave serves frozen, versioned wildlife photo-identification and detector
|
|
30
|
+
datasets behind a small handshake API. `wavefront` turns that into one call.
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install finwave-wavefront
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Quick start
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
import wavefront
|
|
40
|
+
|
|
41
|
+
# the API key is read from $FW_API_TOKEN (or passed as api_key=...)
|
|
42
|
+
ds = wavefront.fetch("a7673931-9810-4c52-9654-1c9b1fafb63d", format="yolo")
|
|
43
|
+
|
|
44
|
+
print(ds.path) # extracted, ready to train on
|
|
45
|
+
print(ds.classes) # ['fluke']
|
|
46
|
+
print(ds.num_images) # 497
|
|
47
|
+
print(ds.fingerprint) # content hash — record it next to any model you train
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
`ds` is path-like, so it drops straight into a trainer:
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from ultralytics import YOLO
|
|
54
|
+
YOLO("yolo11n.pt").train(data=f"{ds.path}/data.yaml")
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Pre-flight without downloading
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
m = wavefront.manifest("a7673931-9810-4c52-9654-1c9b1fafb63d")
|
|
61
|
+
print(m.name, m.sample_count, m.available_formats) # Flukes v1 497 ['Yolo']
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### A reusable client
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from wavefront import Client
|
|
68
|
+
client = Client(api_key="...", base_url="https://finwave.io")
|
|
69
|
+
ds = client.fetch(dataset_id, format="yolo", dest="./data/flukes")
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Command line
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
export FW_API_TOKEN=...
|
|
76
|
+
wavefront manifest a7673931-9810-4c52-9654-1c9b1fafb63d
|
|
77
|
+
wavefront fetch a7673931-9810-4c52-9654-1c9b1fafb63d --format yolo --dest ./data/flukes
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## How it works
|
|
81
|
+
|
|
82
|
+
1. `GET /manifest` — cheap metadata + which export formats are ready.
|
|
83
|
+
2. `GET ?format=…` — a **handshake** that mints a short-lived signed download URL.
|
|
84
|
+
3. Download that URL → a zip → extract → a `Dataset`.
|
|
85
|
+
|
|
86
|
+
Downloads are **cached by content fingerprint**, so re-fetching a frozen
|
|
87
|
+
version is a no-op. The key needs the dataset-download scope.
|
|
88
|
+
|
|
89
|
+
## Authentication
|
|
90
|
+
|
|
91
|
+
Provide the key explicitly (`fetch(..., api_key=...)`) or set **`FW_API_TOKEN`**.
|
|
92
|
+
For compatibility, `WAVEFRONT_API_KEY`, `FINWAVE_DATASET_API_KEY` and
|
|
93
|
+
`DATASET_API_KEY` are also accepted (in that order).
|
|
94
|
+
|
|
95
|
+
## Errors
|
|
96
|
+
|
|
97
|
+
All errors subclass `wavefront.WavefrontError`:
|
|
98
|
+
|
|
99
|
+
| Exception | When |
|
|
100
|
+
|---|---|
|
|
101
|
+
| `AuthError` | key missing / rejected (401/403) |
|
|
102
|
+
| `DatasetNotFoundError` | no such version, or not visible to the key (404) |
|
|
103
|
+
| `FormatNotAvailableError` | the version exists but that export hasn't been generated yet (`.available` lists what is) |
|
|
104
|
+
| `APIError` | any other non-success response |
|
|
105
|
+
|
|
106
|
+
## License
|
|
107
|
+
|
|
108
|
+
MIT © Alexander Barnhill / [Operational Ecology](https://operationalecology.io).
|
|
109
|
+
A partnership artifact between finwave and Operational Ecology.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
wavefront/__init__.py,sha256=TE1Xkoxce6mGQuRHuE44j8s7ZAfFsLEgxwapy2MRjnM,2353
|
|
2
|
+
wavefront/__main__.py,sha256=H4gQQ-7sbs7aNcuH9HkWJF48M14jBnz6FB9HLaYxpmo,3452
|
|
3
|
+
wavefront/_art.py,sha256=rS7Eg2VHcaAw1wzOoLF1OhSbi0XK7j9BywBzDIoaBOc,3768
|
|
4
|
+
wavefront/client.py,sha256=UQU6LP7HuhXJFVNL6lyLBftdUToCSI1CgnDtgLwU8vQ,11619
|
|
5
|
+
wavefront/exceptions.py,sha256=euSj98BqtPwLQjOn8g0vlYMwo650ilnA8KGPrcnv138,1612
|
|
6
|
+
wavefront/models.py,sha256=10XzDm33ND0N6R5LNBqOT9n-8bj9vbvTB46oWOKdPuc,3551
|
|
7
|
+
finwave_wavefront-0.1.0.dist-info/METADATA,sha256=YK2A9-pcAmUM3jsCsQTxX9so9HhmvGq_TNOdAttiaP4,3563
|
|
8
|
+
finwave_wavefront-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
9
|
+
finwave_wavefront-0.1.0.dist-info/entry_points.txt,sha256=ywBYOo2r3QbUAZJTAa9q-u1t3F_y9a60zqYykqahz5k,54
|
|
10
|
+
finwave_wavefront-0.1.0.dist-info/licenses/LICENSE,sha256=HOVL0nFm4EJpu4aftbrw6bElA9TYRozpE8QP5ZEsmjE,1097
|
|
11
|
+
finwave_wavefront-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Alexander Barnhill / Operational Ecology
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
wavefront/__init__.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""wavefront — the official Python client for finwave datasets.
|
|
2
|
+
|
|
3
|
+
finwave (https://finwave.io) serves frozen, versioned wildlife photo-ID and
|
|
4
|
+
detector datasets behind a small handshake API. ``wavefront`` turns that into
|
|
5
|
+
one call:
|
|
6
|
+
|
|
7
|
+
>>> import wavefront
|
|
8
|
+
>>> ds = wavefront.fetch("a7673931-9810-4c52-9654-1c9b1fafb63d", format="yolo")
|
|
9
|
+
>>> ds.path, ds.classes, ds.num_images
|
|
10
|
+
(PosixPath('.../Yolo-81f97dec8667'), ['fluke'], 497)
|
|
11
|
+
|
|
12
|
+
The key is read from the ``FW_API_TOKEN`` environment variable (or passed
|
|
13
|
+
explicitly as ``api_key=``); ``WAVEFRONT_API_KEY``, ``FINWAVE_DATASET_API_KEY``
|
|
14
|
+
and ``DATASET_API_KEY`` are also accepted for compatibility. For repeated or
|
|
15
|
+
configured use, construct a :class:`Client`.
|
|
16
|
+
|
|
17
|
+
Every step logs on the ``wavefront`` logger. The library attaches a
|
|
18
|
+
``NullHandler`` and never configures logging itself — enable output with
|
|
19
|
+
``logging.basicConfig(level=logging.INFO)`` in your application.
|
|
20
|
+
|
|
21
|
+
Built by Operational Ecology (https://operationalecology.io).
|
|
22
|
+
"""
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import logging
|
|
26
|
+
from typing import Optional
|
|
27
|
+
|
|
28
|
+
from .client import API_KEY_ENV, DEFAULT_BASE_URL, Client
|
|
29
|
+
|
|
30
|
+
logging.getLogger("wavefront").addHandler(logging.NullHandler())
|
|
31
|
+
from .exceptions import (
|
|
32
|
+
APIError,
|
|
33
|
+
AuthError,
|
|
34
|
+
DatasetNotFoundError,
|
|
35
|
+
FormatNotAvailableError,
|
|
36
|
+
IntegrityError,
|
|
37
|
+
WavefrontError,
|
|
38
|
+
)
|
|
39
|
+
from .models import Dataset, Manifest
|
|
40
|
+
|
|
41
|
+
__version__ = "0.1.0"
|
|
42
|
+
__all__ = [
|
|
43
|
+
"fetch",
|
|
44
|
+
"manifest",
|
|
45
|
+
"Client",
|
|
46
|
+
"Dataset",
|
|
47
|
+
"Manifest",
|
|
48
|
+
"WavefrontError",
|
|
49
|
+
"AuthError",
|
|
50
|
+
"DatasetNotFoundError",
|
|
51
|
+
"FormatNotAvailableError",
|
|
52
|
+
"IntegrityError",
|
|
53
|
+
"APIError",
|
|
54
|
+
"DEFAULT_BASE_URL",
|
|
55
|
+
"API_KEY_ENV",
|
|
56
|
+
"__version__",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def fetch(dataset_version_id: str, *, format: str = "yolo",
|
|
61
|
+
api_key: Optional[str] = None, base_url: str = DEFAULT_BASE_URL, **kwargs) -> Dataset:
|
|
62
|
+
"""Fetch + extract a dataset version with a one-off client. See :meth:`Client.fetch`."""
|
|
63
|
+
return Client(api_key, base_url=base_url).fetch(dataset_version_id, format=format, **kwargs)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def manifest(dataset_version_id: str, *,
|
|
67
|
+
api_key: Optional[str] = None, base_url: str = DEFAULT_BASE_URL) -> Manifest:
|
|
68
|
+
"""Return a dataset version's manifest with a one-off client. See :meth:`Client.manifest`."""
|
|
69
|
+
return Client(api_key, base_url=base_url).manifest(dataset_version_id)
|
wavefront/__main__.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Command-line interface: ``wavefront fetch|manifest <id>``."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import logging
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from . import __version__, _art
|
|
9
|
+
from .client import Client
|
|
10
|
+
from .exceptions import WavefrontError
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _fmt_bytes(n: int) -> str:
|
|
14
|
+
for unit in ("B", "KB", "MB", "GB"):
|
|
15
|
+
if n < 1024 or unit == "GB":
|
|
16
|
+
return f"{n:.0f}{unit}" if unit == "B" else f"{n/1:.0f}{unit}"
|
|
17
|
+
n /= 1024
|
|
18
|
+
return f"{n:.0f}B"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def main(argv=None) -> int:
|
|
22
|
+
p = argparse.ArgumentParser(prog="wavefront", description="Fetch finwave datasets.")
|
|
23
|
+
p.add_argument("--version", action="version", version=f"wavefront {__version__}")
|
|
24
|
+
p.add_argument("--api-key", default=None, help="overrides $FW_API_TOKEN")
|
|
25
|
+
p.add_argument("--base-url", default=None, help="finwave base URL")
|
|
26
|
+
p.add_argument("-v", "--verbose", action="store_true", help="debug-level logging")
|
|
27
|
+
p.add_argument("-q", "--quiet", action="store_true", help="warnings and errors only")
|
|
28
|
+
p.add_argument("--no-art", action="store_true", help="disable the wave animation")
|
|
29
|
+
sub = p.add_subparsers(dest="cmd", required=False)
|
|
30
|
+
|
|
31
|
+
m = sub.add_parser("manifest", help="print a version's metadata + formats")
|
|
32
|
+
m.add_argument("dataset_version_id")
|
|
33
|
+
|
|
34
|
+
f = sub.add_parser("fetch", help="download + extract a dataset version")
|
|
35
|
+
f.add_argument("dataset_version_id")
|
|
36
|
+
f.add_argument("--format", default="yolo")
|
|
37
|
+
f.add_argument("--dest", default=None, help="extract dir (default: cache)")
|
|
38
|
+
f.add_argument("--force", action="store_true", help="ignore cache")
|
|
39
|
+
|
|
40
|
+
args = p.parse_args(argv)
|
|
41
|
+
level = logging.WARNING if args.quiet else (logging.DEBUG if args.verbose else logging.INFO)
|
|
42
|
+
logging.basicConfig(level=level, format="%(message)s", stream=sys.stderr)
|
|
43
|
+
show_art = not (args.no_art or args.quiet)
|
|
44
|
+
if args.cmd is None: # bare `wavefront` → wave + wordmark
|
|
45
|
+
if show_art:
|
|
46
|
+
_art.banner()
|
|
47
|
+
return 0
|
|
48
|
+
kw = {}
|
|
49
|
+
if args.base_url:
|
|
50
|
+
kw["base_url"] = args.base_url
|
|
51
|
+
try:
|
|
52
|
+
client = Client(args.api_key, **kw)
|
|
53
|
+
if args.cmd == "manifest":
|
|
54
|
+
mf = client.manifest(args.dataset_version_id)
|
|
55
|
+
print(f"{mf.name} (v{mf.version_number})")
|
|
56
|
+
print(f" samples: {mf.sample_count} annotations: {mf.annotation_count}")
|
|
57
|
+
print(f" formats: {mf.available_formats or '(none generated yet)'}")
|
|
58
|
+
print(f" fingerprint: {mf.fingerprint}")
|
|
59
|
+
return 0
|
|
60
|
+
if args.cmd == "fetch":
|
|
61
|
+
if show_art:
|
|
62
|
+
_art.wave()
|
|
63
|
+
last = [0.0]
|
|
64
|
+
|
|
65
|
+
def prog(got, total):
|
|
66
|
+
pct = f" {100*got/total:.0f}%" if total else ""
|
|
67
|
+
if got - last[0] >= (1 << 23) or got == total: # ~8MB steps
|
|
68
|
+
print(f"\r downloading {_fmt_bytes(got)}{pct}", end="", file=sys.stderr)
|
|
69
|
+
last[0] = got
|
|
70
|
+
|
|
71
|
+
ds = client.fetch(args.dataset_version_id, format=args.format,
|
|
72
|
+
dest=args.dest, force=args.force, progress=prog)
|
|
73
|
+
print("", file=sys.stderr)
|
|
74
|
+
print(ds.path)
|
|
75
|
+
print(f" {ds.num_images} images, {ds.num_labels} labels, classes={ds.classes}",
|
|
76
|
+
file=sys.stderr)
|
|
77
|
+
return 0
|
|
78
|
+
except WavefrontError as e:
|
|
79
|
+
print(f"error: {e}", file=sys.stderr)
|
|
80
|
+
return 1
|
|
81
|
+
return 0
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
if __name__ == "__main__":
|
|
85
|
+
raise SystemExit(main())
|
wavefront/_art.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""A small finwave-blue wave flourish for the terminal. Purely cosmetic.
|
|
2
|
+
|
|
3
|
+
Animates a travelling, foam-tipped wave in the finwave palette. No-ops on
|
|
4
|
+
non-TTY streams, under ``NO_COLOR``, or for dumb terminals, so it never
|
|
5
|
+
corrupts piped or logged output.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import math
|
|
10
|
+
import os
|
|
11
|
+
import shutil
|
|
12
|
+
import sys
|
|
13
|
+
import time
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
# finwave palette, deep water → crest (mirrors the logo's blue gradient)
|
|
17
|
+
_GRAD = [
|
|
18
|
+
(14, 63, 133), (21, 88, 184), (31, 111, 230),
|
|
19
|
+
(59, 143, 255), (93, 158, 255), (130, 185, 255),
|
|
20
|
+
]
|
|
21
|
+
_FOAM = (224, 238, 255)
|
|
22
|
+
_BLOCKS = " ▁▂▃▄▅▆▇█"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def supported(stream) -> bool:
|
|
26
|
+
return (
|
|
27
|
+
hasattr(stream, "isatty")
|
|
28
|
+
and stream.isatty()
|
|
29
|
+
and not os.environ.get("NO_COLOR")
|
|
30
|
+
and not os.environ.get("WAVEFRONT_NO_ART")
|
|
31
|
+
and os.environ.get("TERM", "") not in ("", "dumb")
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _rgb(rgb) -> str:
|
|
36
|
+
return f"\x1b[38;2;{rgb[0]};{rgb[1]};{rgb[2]}m"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _surface(width: int, rows: int, phase: float) -> list[float]:
|
|
40
|
+
"""Water height in cells (0..rows) per column — two summed travelling waves."""
|
|
41
|
+
out = []
|
|
42
|
+
for x in range(width):
|
|
43
|
+
h = 0.52 + 0.30 * math.sin(x * 0.26 - phase) + 0.14 * math.sin(x * 0.11 + phase * 0.6)
|
|
44
|
+
out.append(max(0.0, min(1.0, h)) * rows)
|
|
45
|
+
return out
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _render_frame(width: int, rows: int, phase: float) -> str:
|
|
49
|
+
surf = _surface(width, rows, phase)
|
|
50
|
+
lines = []
|
|
51
|
+
for r in range(rows): # r = 0 is the top row
|
|
52
|
+
depth_from_surface = r # 0 near crest → lighter
|
|
53
|
+
line = []
|
|
54
|
+
for x in range(width):
|
|
55
|
+
band = surf[x] - (rows - 1 - r) # cells of water in this row (>1 = full)
|
|
56
|
+
if band <= 0:
|
|
57
|
+
line.append(" ")
|
|
58
|
+
continue
|
|
59
|
+
crest = band < 1.0 # the topmost filled cell
|
|
60
|
+
block = _BLOCKS[min(8, max(1, int(round(band * 8))))] if crest else "█"
|
|
61
|
+
if crest:
|
|
62
|
+
color = _FOAM
|
|
63
|
+
else:
|
|
64
|
+
gi = min(len(_GRAD) - 1, depth_from_surface)
|
|
65
|
+
color = _GRAD[len(_GRAD) - 1 - gi] if False else _GRAD[gi]
|
|
66
|
+
line.append(_rgb(color) + block)
|
|
67
|
+
lines.append("".join(line) + "\x1b[0m")
|
|
68
|
+
return "\n".join(lines)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def wave(stream=None, *, duration: float = 1.3, fps: int = 30,
|
|
72
|
+
width: Optional[int] = None, rows: int = 4) -> None:
|
|
73
|
+
"""Play the wave flourish, then leave the terminal clean."""
|
|
74
|
+
stream = stream or sys.stderr
|
|
75
|
+
if not supported(stream):
|
|
76
|
+
return
|
|
77
|
+
cols = shutil.get_terminal_size((80, 24)).columns
|
|
78
|
+
w = min(width or cols - 2, 60)
|
|
79
|
+
frames = max(1, int(duration * fps))
|
|
80
|
+
stream.write("\x1b[?25l") # hide cursor
|
|
81
|
+
try:
|
|
82
|
+
for f in range(frames):
|
|
83
|
+
stream.write(_render_frame(w, rows, f / fps * 6.5))
|
|
84
|
+
if f < frames - 1:
|
|
85
|
+
stream.write(f"\x1b[{rows - 1}A\r") # back to top of the wave
|
|
86
|
+
stream.flush()
|
|
87
|
+
time.sleep(1.0 / fps)
|
|
88
|
+
stream.write("\n")
|
|
89
|
+
finally:
|
|
90
|
+
stream.write("\x1b[?25h\x1b[0m") # restore cursor + reset
|
|
91
|
+
stream.flush()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def banner(stream=None) -> None:
|
|
95
|
+
"""A one-shot wave + wordmark, used by the bare ``wavefront`` command."""
|
|
96
|
+
from . import __version__
|
|
97
|
+
stream = stream or sys.stderr
|
|
98
|
+
wave(stream, duration=1.1)
|
|
99
|
+
if supported(stream):
|
|
100
|
+
stream.write(_rgb(_GRAD[3]) + " wavefront " + _rgb(_GRAD[5])
|
|
101
|
+
+ f"v{__version__}\x1b[0m " + "\x1b[2mfinwave datasets, one call\x1b[0m\n")
|
|
102
|
+
else:
|
|
103
|
+
stream.write(f"wavefront v{__version__} — finwave datasets, one call\n")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
if __name__ == "__main__": # quick visual check: `python -m wavefront._art`
|
|
107
|
+
wave(sys.stdout, duration=3.0)
|
wavefront/client.py
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""The finwave dataset client.
|
|
2
|
+
|
|
3
|
+
The flow mirrors the finwave dataset API exactly:
|
|
4
|
+
|
|
5
|
+
1. ``GET /api/datasets-api/{id}/manifest`` → cheap metadata + available formats
|
|
6
|
+
2. ``GET /api/datasets-api/{id}?format=...`` → a *handshake* that mints a short-
|
|
7
|
+
lived signed download URL (no bytes yet)
|
|
8
|
+
3. download the signed URL → a zip → extract → a :class:`~wavefront.models.Dataset`
|
|
9
|
+
|
|
10
|
+
Authentication is the ``X-API-KEY`` header; the key needs the dataset-download
|
|
11
|
+
scope. Downloads are cached by content fingerprint, so a repeated fetch of the
|
|
12
|
+
same frozen version is a no-op.
|
|
13
|
+
|
|
14
|
+
Every step emits an ``INFO`` log line on the ``wavefront`` logger so a caller
|
|
15
|
+
can see exactly what happened; the library installs a ``NullHandler`` and never
|
|
16
|
+
configures logging itself.
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
import os
|
|
22
|
+
import shutil
|
|
23
|
+
import tempfile
|
|
24
|
+
import time
|
|
25
|
+
import zipfile
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Callable, Optional
|
|
28
|
+
|
|
29
|
+
import httpx
|
|
30
|
+
|
|
31
|
+
from .exceptions import (
|
|
32
|
+
APIError,
|
|
33
|
+
AuthError,
|
|
34
|
+
DatasetNotFoundError,
|
|
35
|
+
FormatNotAvailableError,
|
|
36
|
+
)
|
|
37
|
+
from .models import Dataset, Manifest
|
|
38
|
+
|
|
39
|
+
log = logging.getLogger("wavefront")
|
|
40
|
+
|
|
41
|
+
DEFAULT_BASE_URL = "https://finwave.io"
|
|
42
|
+
#: Environment variables consulted for the API key, in order. ``FW_API_TOKEN``
|
|
43
|
+
#: is the canonical name; the rest are accepted for compatibility.
|
|
44
|
+
API_KEY_ENV = ("FW_API_TOKEN", "WAVEFRONT_API_KEY", "FINWAVE_DATASET_API_KEY", "DATASET_API_KEY")
|
|
45
|
+
_FORMAT_ALIASES = {"yolo": "Yolo", "coco": "Coco", "pascalvoc": "PascalVoc", "voc": "PascalVoc"}
|
|
46
|
+
_COMPLETE_MARKER = ".wavefront-complete"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _mask(key: str) -> str:
|
|
50
|
+
"""A safe-to-log fingerprint of a secret: never the secret itself."""
|
|
51
|
+
return f"{key[:3]}…{key[-2:]} ({len(key)} chars)" if len(key) >= 6 else "set"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _resolve_key(api_key: Optional[str]) -> tuple[str, str]:
|
|
55
|
+
"""Return (key, source) — source is 'argument' or the env var name."""
|
|
56
|
+
if api_key:
|
|
57
|
+
return api_key, "argument"
|
|
58
|
+
for name in API_KEY_ENV:
|
|
59
|
+
v = os.environ.get(name)
|
|
60
|
+
if v:
|
|
61
|
+
return v, name
|
|
62
|
+
raise AuthError(
|
|
63
|
+
"No API key provided. Pass api_key=... or set the FW_API_TOKEN "
|
|
64
|
+
"environment variable (also accepted: " + ", ".join(API_KEY_ENV[1:]) + ")."
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _canonical_format(fmt: str) -> str:
|
|
69
|
+
return _FORMAT_ALIASES.get(fmt.lower(), fmt)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _default_cache_root() -> Path:
|
|
73
|
+
root = os.environ.get("WAVEFRONT_CACHE")
|
|
74
|
+
if root:
|
|
75
|
+
return Path(root)
|
|
76
|
+
base = os.environ.get("XDG_CACHE_HOME") or os.path.join(os.path.expanduser("~"), ".cache")
|
|
77
|
+
return Path(base) / "wavefront"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _human_bytes(n: Optional[int]) -> str:
|
|
81
|
+
if not n:
|
|
82
|
+
return "?"
|
|
83
|
+
f = float(n)
|
|
84
|
+
for unit in ("B", "KB", "MB", "GB", "TB"):
|
|
85
|
+
if f < 1024 or unit == "TB":
|
|
86
|
+
return f"{f:.0f} {unit}" if unit == "B" else f"{f:.1f} {unit}"
|
|
87
|
+
f /= 1024
|
|
88
|
+
return f"{f:.1f} TB"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class Client:
|
|
92
|
+
"""A reusable finwave dataset client.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
api_key:
|
|
97
|
+
Dataset-download-scoped key. If omitted, the ``FW_API_TOKEN`` environment
|
|
98
|
+
variable is used (also accepted: ``WAVEFRONT_API_KEY``,
|
|
99
|
+
``FINWAVE_DATASET_API_KEY``, ``DATASET_API_KEY``).
|
|
100
|
+
base_url:
|
|
101
|
+
finwave base URL (default ``https://finwave.io``).
|
|
102
|
+
timeout:
|
|
103
|
+
Per-request timeout in seconds for the API calls (the large artifact
|
|
104
|
+
download uses a longer, separate timeout).
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
def __init__(
|
|
108
|
+
self,
|
|
109
|
+
api_key: Optional[str] = None,
|
|
110
|
+
*,
|
|
111
|
+
base_url: str = DEFAULT_BASE_URL,
|
|
112
|
+
timeout: float = 30.0,
|
|
113
|
+
) -> None:
|
|
114
|
+
self.api_key, source = _resolve_key(api_key)
|
|
115
|
+
self.base_url = base_url.rstrip("/")
|
|
116
|
+
self.timeout = timeout
|
|
117
|
+
log.info("wavefront client ready: base_url=%s, key from %s [%s]",
|
|
118
|
+
self.base_url, source, _mask(self.api_key))
|
|
119
|
+
|
|
120
|
+
# ── low-level ────────────────────────────────────────────────────────────
|
|
121
|
+
def _get(self, path: str, **kwargs) -> httpx.Response:
|
|
122
|
+
url = f"{self.base_url}/api/datasets-api/{path}"
|
|
123
|
+
log.debug("GET %s %s", url, kwargs.get("params", ""))
|
|
124
|
+
try:
|
|
125
|
+
resp = httpx.get(url, headers={"X-API-KEY": self.api_key},
|
|
126
|
+
timeout=self.timeout, **kwargs)
|
|
127
|
+
except httpx.HTTPError as e: # network-level
|
|
128
|
+
log.error("request to %s failed: %s", url, e)
|
|
129
|
+
raise APIError(f"request to {url} failed: {e}") from e
|
|
130
|
+
log.debug("→ HTTP %d (%s)", resp.status_code, _human_bytes(len(resp.content)))
|
|
131
|
+
if resp.status_code in (401, 403):
|
|
132
|
+
raise AuthError(
|
|
133
|
+
"API key rejected (HTTP %d) — check the key and that it has the "
|
|
134
|
+
"dataset-download scope." % resp.status_code
|
|
135
|
+
)
|
|
136
|
+
return resp
|
|
137
|
+
|
|
138
|
+
@staticmethod
|
|
139
|
+
def _error_payload(resp: httpx.Response) -> dict:
|
|
140
|
+
try:
|
|
141
|
+
return resp.json()
|
|
142
|
+
except Exception:
|
|
143
|
+
return {}
|
|
144
|
+
|
|
145
|
+
# ── public API ───────────────────────────────────────────────────────────
|
|
146
|
+
def manifest(self, dataset_version_id: str) -> Manifest:
|
|
147
|
+
"""Return version metadata + available export formats (no download)."""
|
|
148
|
+
log.info("manifest: requesting %s", dataset_version_id)
|
|
149
|
+
resp = self._get(f"{dataset_version_id}/manifest")
|
|
150
|
+
if resp.status_code == 404:
|
|
151
|
+
raise DatasetNotFoundError(
|
|
152
|
+
f"dataset version {dataset_version_id!r} not found (or not visible to this key)"
|
|
153
|
+
)
|
|
154
|
+
if resp.status_code != 200:
|
|
155
|
+
raise APIError("manifest request failed", status_code=resp.status_code,
|
|
156
|
+
payload=self._error_payload(resp))
|
|
157
|
+
m = Manifest.from_response(resp.json())
|
|
158
|
+
log.info("manifest: '%s' v%d — %d samples, %d annotations, formats=%s",
|
|
159
|
+
m.name, m.version_number, m.sample_count, m.annotation_count,
|
|
160
|
+
m.available_formats or "none yet")
|
|
161
|
+
return m
|
|
162
|
+
|
|
163
|
+
def fetch(
|
|
164
|
+
self,
|
|
165
|
+
dataset_version_id: str,
|
|
166
|
+
*,
|
|
167
|
+
format: str = "yolo",
|
|
168
|
+
dest: Optional[os.PathLike] = None,
|
|
169
|
+
cache: bool = True,
|
|
170
|
+
force: bool = False,
|
|
171
|
+
progress: Optional[Callable[[int, Optional[int]], None]] = None,
|
|
172
|
+
) -> Dataset:
|
|
173
|
+
"""Fetch + extract a dataset version, returning a :class:`Dataset`.
|
|
174
|
+
|
|
175
|
+
Parameters
|
|
176
|
+
----------
|
|
177
|
+
format:
|
|
178
|
+
Export format, case-insensitive (``"yolo"`` by default).
|
|
179
|
+
dest:
|
|
180
|
+
Directory to extract into. Defaults to the fingerprint-keyed cache.
|
|
181
|
+
cache:
|
|
182
|
+
Reuse a previously-completed download of the same frozen fingerprint.
|
|
183
|
+
force:
|
|
184
|
+
Re-download even if a cached copy exists.
|
|
185
|
+
progress:
|
|
186
|
+
Optional callback ``(bytes_downloaded, total_or_None)`` for the
|
|
187
|
+
artifact download.
|
|
188
|
+
"""
|
|
189
|
+
fmt = _canonical_format(format)
|
|
190
|
+
log.info("fetch: %s (format=%s)", dataset_version_id, fmt)
|
|
191
|
+
m = self.manifest(dataset_version_id)
|
|
192
|
+
if not m.has_format(fmt):
|
|
193
|
+
raise FormatNotAvailableError(
|
|
194
|
+
f"format {fmt!r} is not available for '{m.name}'. "
|
|
195
|
+
f"Available: {m.available_formats or 'none yet — an export must be generated'}.",
|
|
196
|
+
available=m.available_formats,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
if dest is not None:
|
|
200
|
+
out = Path(dest)
|
|
201
|
+
else:
|
|
202
|
+
out = _default_cache_root() / f"{dataset_version_id}" / f"{fmt}-{m.fingerprint[:12]}"
|
|
203
|
+
|
|
204
|
+
marker = out / _COMPLETE_MARKER
|
|
205
|
+
if cache and not force and marker.exists() and marker.read_text().strip() == m.fingerprint:
|
|
206
|
+
ds = Dataset.from_extracted(root=out, manifest=m, fmt=fmt)
|
|
207
|
+
log.info("fetch: cache hit (fingerprint %s) → %s [%d images]",
|
|
208
|
+
m.fingerprint[:12], out, ds.num_images)
|
|
209
|
+
return ds
|
|
210
|
+
|
|
211
|
+
log.info("fetch: requesting download handshake…")
|
|
212
|
+
download_url = self._handshake(dataset_version_id, fmt)
|
|
213
|
+
out.mkdir(parents=True, exist_ok=True)
|
|
214
|
+
with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp:
|
|
215
|
+
tmp_path = Path(tmp.name)
|
|
216
|
+
try:
|
|
217
|
+
self._download(download_url, tmp_path, progress=progress)
|
|
218
|
+
log.info("fetch: extracting to %s", out)
|
|
219
|
+
for child in out.iterdir():
|
|
220
|
+
if child.name == _COMPLETE_MARKER:
|
|
221
|
+
continue
|
|
222
|
+
shutil.rmtree(child) if child.is_dir() else child.unlink()
|
|
223
|
+
with zipfile.ZipFile(tmp_path) as zf:
|
|
224
|
+
zf.extractall(out)
|
|
225
|
+
finally:
|
|
226
|
+
tmp_path.unlink(missing_ok=True)
|
|
227
|
+
marker.write_text(m.fingerprint)
|
|
228
|
+
ds = Dataset.from_extracted(root=out, manifest=m, fmt=fmt)
|
|
229
|
+
log.info("fetch: ready → %s [%d images, %d labels, classes=%s]",
|
|
230
|
+
out, ds.num_images, ds.num_labels, ds.classes)
|
|
231
|
+
return ds
|
|
232
|
+
|
|
233
|
+
# ── internals ────────────────────────────────────────────────────────────
|
|
234
|
+
def _handshake(self, dataset_version_id: str, fmt: str) -> str:
|
|
235
|
+
resp = self._get(dataset_version_id, params={"format": fmt})
|
|
236
|
+
if resp.status_code == 404:
|
|
237
|
+
payload = self._error_payload(resp)
|
|
238
|
+
detail = (payload.get("detail") or "").lower()
|
|
239
|
+
if "format" in detail:
|
|
240
|
+
raise FormatNotAvailableError(
|
|
241
|
+
f"format {fmt!r} has not been produced for this version yet "
|
|
242
|
+
"(exports are generated separately from freezing)."
|
|
243
|
+
)
|
|
244
|
+
raise DatasetNotFoundError(f"dataset version {dataset_version_id!r} not found")
|
|
245
|
+
if resp.status_code != 200:
|
|
246
|
+
raise APIError("handshake failed", status_code=resp.status_code,
|
|
247
|
+
payload=self._error_payload(resp))
|
|
248
|
+
body = resp.json()
|
|
249
|
+
url = body.get("downloadUrl")
|
|
250
|
+
if not url:
|
|
251
|
+
raise APIError("handshake response had no downloadUrl",
|
|
252
|
+
status_code=resp.status_code, payload=body)
|
|
253
|
+
log.info("handshake: signed URL minted (expires %s)",
|
|
254
|
+
body.get("sasExpiresAt", "soon"))
|
|
255
|
+
return url
|
|
256
|
+
|
|
257
|
+
def _download(self, url: str, dest: Path, *,
|
|
258
|
+
progress: Optional[Callable[[int, Optional[int]], None]] = None) -> None:
|
|
259
|
+
# The download URL is a pre-signed object URL — no API key, long timeout.
|
|
260
|
+
t0 = time.monotonic()
|
|
261
|
+
with httpx.stream("GET", url, timeout=httpx.Timeout(None, connect=30.0),
|
|
262
|
+
follow_redirects=True) as resp:
|
|
263
|
+
if resp.status_code != 200:
|
|
264
|
+
raise APIError(f"artifact download failed (HTTP {resp.status_code})",
|
|
265
|
+
status_code=resp.status_code)
|
|
266
|
+
total = int(resp.headers.get("Content-Length", 0)) or None
|
|
267
|
+
log.info("download: %s …", _human_bytes(total))
|
|
268
|
+
got = 0
|
|
269
|
+
with open(dest, "wb") as f:
|
|
270
|
+
for chunk in resp.iter_bytes(1 << 20):
|
|
271
|
+
f.write(chunk)
|
|
272
|
+
got += len(chunk)
|
|
273
|
+
if progress is not None:
|
|
274
|
+
progress(got, total)
|
|
275
|
+
dt = time.monotonic() - t0
|
|
276
|
+
rate = got / dt / (1 << 20) if dt > 0 else 0
|
|
277
|
+
log.info("download: %s in %.1fs (%.0f MB/s)", _human_bytes(got), dt, rate)
|
wavefront/exceptions.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Exceptions raised by wavefront.
|
|
2
|
+
|
|
3
|
+
All inherit from :class:`WavefrontError`, so callers can catch the whole family
|
|
4
|
+
with a single ``except WavefrontError``.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class WavefrontError(Exception):
|
|
10
|
+
"""Base class for all wavefront errors."""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AuthError(WavefrontError):
|
|
14
|
+
"""The API key was missing, malformed, or rejected (HTTP 401/403)."""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class DatasetNotFoundError(WavefrontError):
|
|
18
|
+
"""No dataset version with the given id is visible to this key (HTTP 404)."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class FormatNotAvailableError(WavefrontError):
|
|
22
|
+
"""The requested export format has not been produced for this version yet.
|
|
23
|
+
|
|
24
|
+
The dataset exists but ``availableFormats`` does not include the requested
|
|
25
|
+
format — an admin must generate the export first (it is not produced on
|
|
26
|
+
freeze). :attr:`available` lists what *is* ready.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, message: str, *, available: list[str] | None = None) -> None:
|
|
30
|
+
super().__init__(message)
|
|
31
|
+
self.available = available or []
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class IntegrityError(WavefrontError):
|
|
35
|
+
"""A downloaded artifact did not match the fingerprint the server declared."""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class APIError(WavefrontError):
|
|
39
|
+
"""An unexpected, non-success response from the finwave API.
|
|
40
|
+
|
|
41
|
+
:attr:`status_code` is the HTTP status; :attr:`payload` is the parsed error
|
|
42
|
+
body when the server returned one.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, message: str, *, status_code: int | None = None,
|
|
46
|
+
payload: dict | None = None) -> None:
|
|
47
|
+
super().__init__(message)
|
|
48
|
+
self.status_code = status_code
|
|
49
|
+
self.payload = payload or {}
|
wavefront/models.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Typed views over the finwave dataset-API responses and the local result.
|
|
2
|
+
|
|
3
|
+
These are thin, read-only dataclasses; the wire shapes they mirror are the
|
|
4
|
+
``/manifest`` and handshake responses of ``/api/datasets-api/{id}``.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _parse_dt(value: Optional[str]) -> Optional[datetime]:
|
|
16
|
+
if not value:
|
|
17
|
+
return None
|
|
18
|
+
try:
|
|
19
|
+
return datetime.fromisoformat(value.replace("Z", "+00:00"))
|
|
20
|
+
except ValueError:
|
|
21
|
+
return None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class Manifest:
|
|
26
|
+
"""Cheap pre-flight metadata for a dataset version (no download minted)."""
|
|
27
|
+
|
|
28
|
+
dataset_version_id: str
|
|
29
|
+
parent_dataset_id: str
|
|
30
|
+
name: str
|
|
31
|
+
version_number: int
|
|
32
|
+
fingerprint: str
|
|
33
|
+
sample_count: int
|
|
34
|
+
annotation_count: int
|
|
35
|
+
includes_negatives: bool
|
|
36
|
+
available_formats: list[str]
|
|
37
|
+
frozen_at: Optional[datetime] = None
|
|
38
|
+
raw: dict = field(default_factory=dict, repr=False)
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def from_response(cls, d: dict) -> "Manifest":
|
|
42
|
+
return cls(
|
|
43
|
+
dataset_version_id=d.get("datasetVersionId", ""),
|
|
44
|
+
parent_dataset_id=d.get("parentDatasetId", ""),
|
|
45
|
+
name=d.get("name", ""),
|
|
46
|
+
version_number=int(d.get("versionNumber", 0) or 0),
|
|
47
|
+
fingerprint=d.get("fingerprint", ""),
|
|
48
|
+
sample_count=int(d.get("sampleCount", 0) or 0),
|
|
49
|
+
annotation_count=int(d.get("annotationCount", 0) or 0),
|
|
50
|
+
includes_negatives=bool(d.get("includesNegatives", False)),
|
|
51
|
+
available_formats=list(d.get("availableFormats", []) or []),
|
|
52
|
+
frozen_at=_parse_dt(d.get("frozenAt")),
|
|
53
|
+
raw=d,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def has_format(self, fmt: str) -> bool:
|
|
57
|
+
return fmt.lower() in {f.lower() for f in self.available_formats}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass(frozen=True)
|
|
61
|
+
class Dataset:
|
|
62
|
+
"""A fetched, extracted dataset on local disk.
|
|
63
|
+
|
|
64
|
+
``fingerprint`` is the server's content hash for this exact version — record
|
|
65
|
+
it alongside any model you train so the data is traceable.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
id: str
|
|
69
|
+
name: str
|
|
70
|
+
version: int
|
|
71
|
+
fmt: str
|
|
72
|
+
fingerprint: str
|
|
73
|
+
path: Path
|
|
74
|
+
classes: list[str] = field(default_factory=list)
|
|
75
|
+
num_images: int = 0
|
|
76
|
+
num_labels: int = 0
|
|
77
|
+
|
|
78
|
+
def __fspath__(self) -> str: # usable directly as a path
|
|
79
|
+
return str(self.path)
|
|
80
|
+
|
|
81
|
+
def __str__(self) -> str:
|
|
82
|
+
return str(self.path)
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def images_dir(self) -> Path:
|
|
86
|
+
return self.path / "images"
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def labels_dir(self) -> Path:
|
|
90
|
+
return self.path / "labels"
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def from_extracted(cls, *, root: Path, manifest: Manifest, fmt: str) -> "Dataset":
|
|
94
|
+
exts = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp")
|
|
95
|
+
imgs = [p for p in root.rglob("*") if p.suffix.lower() in exts]
|
|
96
|
+
labels = [p for p in root.rglob("*.txt") if "label" in str(p.parent).lower()]
|
|
97
|
+
classes: list[str] = []
|
|
98
|
+
cf = next((p for p in root.rglob("classes.txt")), None)
|
|
99
|
+
if cf is not None:
|
|
100
|
+
classes = [ln.strip() for ln in cf.read_text().splitlines() if ln.strip()]
|
|
101
|
+
return cls(
|
|
102
|
+
id=manifest.dataset_version_id,
|
|
103
|
+
name=manifest.name,
|
|
104
|
+
version=manifest.version_number,
|
|
105
|
+
fmt=fmt,
|
|
106
|
+
fingerprint=manifest.fingerprint,
|
|
107
|
+
path=root,
|
|
108
|
+
classes=classes,
|
|
109
|
+
num_images=len(imgs),
|
|
110
|
+
num_labels=len(labels),
|
|
111
|
+
)
|