dataerai-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataerai/__init__.py +28 -0
- dataerai/_daemon.py +57 -0
- dataerai/_errors.py +17 -0
- dataerai/_framing.py +58 -0
- dataerai/_types.py +112 -0
- dataerai/_version.py +12 -0
- dataerai/client.py +686 -0
- dataerai/ml/__init__.py +10 -0
- dataerai/ml/access.py +161 -0
- dataerai/ml/lineage.py +182 -0
- dataerai/ml/zarr_dataset.py +326 -0
- dataerai_sdk-0.1.0.dist-info/METADATA +498 -0
- dataerai_sdk-0.1.0.dist-info/RECORD +16 -0
- dataerai_sdk-0.1.0.dist-info/WHEEL +5 -0
- dataerai_sdk-0.1.0.dist-info/licenses/LICENSE +348 -0
- dataerai_sdk-0.1.0.dist-info/top_level.txt +1 -0
dataerai/__init__.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Dataerai Python SDK — public API surface."""
|
|
2
|
+
|
|
3
|
+
from ._version import __version__
|
|
4
|
+
from .client import DataeraiClient
|
|
5
|
+
from ._errors import DaemonError, DaemonTimeoutError
|
|
6
|
+
from ._types import (
|
|
7
|
+
AssetMetadata,
|
|
8
|
+
AuthStatus,
|
|
9
|
+
CollectionDownloadResult,
|
|
10
|
+
DownloadedFile,
|
|
11
|
+
DownloadResult,
|
|
12
|
+
ProgressEvent,
|
|
13
|
+
UploadResult,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"__version__",
|
|
18
|
+
"DataeraiClient",
|
|
19
|
+
"DaemonError",
|
|
20
|
+
"DaemonTimeoutError",
|
|
21
|
+
"AuthStatus",
|
|
22
|
+
"UploadResult",
|
|
23
|
+
"DownloadedFile",
|
|
24
|
+
"DownloadResult",
|
|
25
|
+
"CollectionDownloadResult",
|
|
26
|
+
"ProgressEvent",
|
|
27
|
+
"AssetMetadata",
|
|
28
|
+
]
|
dataerai/_daemon.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Daemon process management utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import subprocess
|
|
7
|
+
import tempfile
|
|
8
|
+
import time
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def default_socket_path() -> str:
|
|
13
|
+
"""Return the default Unix socket path.
|
|
14
|
+
|
|
15
|
+
Resolution order:
|
|
16
|
+
1. ``$DATAERAI_SOCKET`` environment variable
|
|
17
|
+
2. ``$XDG_RUNTIME_DIR/dataerai-transfer.sock`` (Linux / systemd)
|
|
18
|
+
3. ``<tempdir>/dataerai-transfer.sock`` (macOS and other platforms)
|
|
19
|
+
"""
|
|
20
|
+
env = os.environ.get("DATAERAI_SOCKET")
|
|
21
|
+
if env:
|
|
22
|
+
return env
|
|
23
|
+
xdg = os.environ.get("XDG_RUNTIME_DIR")
|
|
24
|
+
if xdg:
|
|
25
|
+
return str(Path(xdg) / "dataerai-transfer.sock")
|
|
26
|
+
return str(Path(tempfile.gettempdir()) / "dataerai-transfer.sock")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def spawn_daemon(binary_path: str, socket_path: str) -> subprocess.Popen:
|
|
30
|
+
"""Spawn the Dataerai daemon and return the :class:`subprocess.Popen` handle.
|
|
31
|
+
|
|
32
|
+
The caller is responsible for eventually killing the process.
|
|
33
|
+
stdout/stderr are piped so the daemon does not pollute the caller's output.
|
|
34
|
+
"""
|
|
35
|
+
env = {**os.environ, "DATAERAI_SOCKET": socket_path}
|
|
36
|
+
return subprocess.Popen(
|
|
37
|
+
[binary_path, "--socket", socket_path],
|
|
38
|
+
stdin=subprocess.DEVNULL,
|
|
39
|
+
stdout=subprocess.PIPE,
|
|
40
|
+
stderr=subprocess.PIPE,
|
|
41
|
+
env=env,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def wait_for_socket(socket_path: str, timeout_s: float = 10.0) -> None:
|
|
46
|
+
"""Poll until *socket_path* exists or *timeout_s* elapses.
|
|
47
|
+
|
|
48
|
+
Raises :class:`TimeoutError` if the socket does not appear in time.
|
|
49
|
+
"""
|
|
50
|
+
deadline = time.monotonic() + timeout_s
|
|
51
|
+
while not Path(socket_path).exists():
|
|
52
|
+
if time.monotonic() >= deadline:
|
|
53
|
+
raise TimeoutError(
|
|
54
|
+
f"Daemon socket '{socket_path}' not available after {timeout_s:.0f}s. "
|
|
55
|
+
"Ensure the Dataerai binary is installed and runnable."
|
|
56
|
+
)
|
|
57
|
+
time.sleep(0.2)
|
dataerai/_errors.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class DaemonError(Exception):
|
|
5
|
+
"""Raised when the daemon returns a coded error response."""
|
|
6
|
+
|
|
7
|
+
def __init__(self, code: str, message: str) -> None:
|
|
8
|
+
super().__init__(message)
|
|
9
|
+
self.code = code
|
|
10
|
+
self.message = message
|
|
11
|
+
|
|
12
|
+
def __repr__(self) -> str: # pragma: no cover
|
|
13
|
+
return f"DaemonError(code={self.code!r}, message={self.message!r})"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DaemonTimeoutError(TimeoutError):
|
|
17
|
+
"""Raised when a request to the daemon (or a transfer) exceeds its timeout."""
|
dataerai/_framing.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""
|
|
2
|
+
IPC wire framing for the Dataerai transfer daemon.
|
|
3
|
+
|
|
4
|
+
Wire format per frame:
|
|
5
|
+
[length: uint32 LE] [direction: uint8] [JSON envelope: <length> bytes]
|
|
6
|
+
|
|
7
|
+
direction = 0x01 Python SDK → Daemon (command)
|
|
8
|
+
direction = 0x02 Daemon → Python SDK (message / event)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import struct
|
|
15
|
+
|
|
16
|
+
DIR_COMMAND: int = 0x01
|
|
17
|
+
DIR_MESSAGE: int = 0x02
|
|
18
|
+
MAX_FRAME_BYTES: int = 32 * 1024 * 1024 # 32 MiB
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def encode_frame(payload: dict) -> bytes:
|
|
22
|
+
"""Encode *payload* as a command frame (direction = DIR_COMMAND)."""
|
|
23
|
+
data = json.dumps(payload, separators=(",", ":")).encode("utf-8")
|
|
24
|
+
if len(data) > MAX_FRAME_BYTES:
|
|
25
|
+
raise ValueError(
|
|
26
|
+
f"payload too large: {len(data):,} bytes (max {MAX_FRAME_BYTES:,})"
|
|
27
|
+
)
|
|
28
|
+
return struct.pack("<IB", len(data), DIR_COMMAND) + data
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class FrameReader:
|
|
32
|
+
"""Stateful streaming frame parser.
|
|
33
|
+
|
|
34
|
+
Feed incoming socket data via :meth:`push`. *on_frame* is called once for
|
|
35
|
+
each complete, valid frame decoded from the stream.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, on_frame) -> None:
|
|
39
|
+
self._buf = bytearray()
|
|
40
|
+
self._on_frame = on_frame
|
|
41
|
+
|
|
42
|
+
def push(self, chunk: bytes) -> None:
|
|
43
|
+
self._buf += chunk
|
|
44
|
+
while len(self._buf) >= 5:
|
|
45
|
+
(length,) = struct.unpack_from("<I", self._buf, 0)
|
|
46
|
+
if length > MAX_FRAME_BYTES:
|
|
47
|
+
# Corrupt stream — reset and wait for re-sync.
|
|
48
|
+
self._buf.clear()
|
|
49
|
+
return
|
|
50
|
+
if len(self._buf) < 5 + length:
|
|
51
|
+
break
|
|
52
|
+
raw = bytes(self._buf[5 : 5 + length])
|
|
53
|
+
del self._buf[: 5 + length]
|
|
54
|
+
try:
|
|
55
|
+
self._on_frame(json.loads(raw.decode("utf-8")))
|
|
56
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
57
|
+
# Silently discard frames with unparseable JSON.
|
|
58
|
+
pass
|
dataerai/_types.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Public data-transfer types for the Dataerai Python SDK."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Any, Callable, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# ─── Auth ─────────────────────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class AuthStatus:
|
|
15
|
+
"""Result of :meth:`DataeraiClient.auth_status`."""
|
|
16
|
+
|
|
17
|
+
user_email: str
|
|
18
|
+
expires_at: datetime
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ─── Upload ───────────────────────────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class UploadResult:
|
|
26
|
+
"""Returned by :meth:`DataeraiClient.upload` once the transfer completes."""
|
|
27
|
+
|
|
28
|
+
transfer_id: str
|
|
29
|
+
asset_id: str
|
|
30
|
+
content_id: str
|
|
31
|
+
total_bytes: int
|
|
32
|
+
chunk_count: int
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# ─── Download ─────────────────────────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class DownloadedFile:
|
|
40
|
+
filename: str
|
|
41
|
+
size: int
|
|
42
|
+
local_path: str
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class DownloadResult:
|
|
47
|
+
"""Returned by :meth:`DataeraiClient.download` once the transfer completes."""
|
|
48
|
+
|
|
49
|
+
transfer_id: str
|
|
50
|
+
asset_id: str
|
|
51
|
+
content_id: str
|
|
52
|
+
total_bytes: int
|
|
53
|
+
chunk_count: int
|
|
54
|
+
files: list[DownloadedFile] = field(default_factory=list)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ─── Progress ─────────────────────────────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class ProgressEvent:
|
|
62
|
+
"""Emitted periodically during an upload or download."""
|
|
63
|
+
|
|
64
|
+
transfer_id: str
|
|
65
|
+
bytes_done: int
|
|
66
|
+
bytes_total: int
|
|
67
|
+
chunk_index: int
|
|
68
|
+
chunk_count: int
|
|
69
|
+
rate_bps: float
|
|
70
|
+
file_index: int
|
|
71
|
+
file_name: str
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def percent(self) -> float:
|
|
75
|
+
"""Transfer completion as a percentage (0–100)."""
|
|
76
|
+
if self.bytes_total == 0:
|
|
77
|
+
return 0.0
|
|
78
|
+
return self.bytes_done / self.bytes_total * 100
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def rate_mbps(self) -> float:
|
|
82
|
+
"""Current transfer rate in MB/s (megabytes, not megabits)."""
|
|
83
|
+
return self.rate_bps / 1_000_000
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# ─── Metadata ─────────────────────────────────────────────────────────────────
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class AssetMetadata:
|
|
91
|
+
"""Asset metadata as returned by the daemon."""
|
|
92
|
+
|
|
93
|
+
asset_id: str
|
|
94
|
+
title: str
|
|
95
|
+
description: Optional[str] = None
|
|
96
|
+
alias: Optional[str] = None
|
|
97
|
+
tags: Optional[list[str]] = None
|
|
98
|
+
metadata: Optional[dict[str, Any]] = None
|
|
99
|
+
created_at: Optional[str] = None
|
|
100
|
+
updated_at: Optional[str] = None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ─── Collection download ───────────────────────────────────────────────────────
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@dataclass
|
|
107
|
+
class CollectionDownloadResult:
|
|
108
|
+
"""Returned by :meth:`DataeraiClient.download_collection` once all transfers are queued."""
|
|
109
|
+
|
|
110
|
+
collection_id: str
|
|
111
|
+
asset_count: int
|
|
112
|
+
transfers: list[DownloadResult] = field(default_factory=list)
|
dataerai/_version.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Single source of truth for the SDK's release *floor*.
|
|
2
|
+
|
|
3
|
+
Hand-set ``MAJOR.MINOR`` here to start a new release line (a minor/major
|
|
4
|
+
release) — that is the "easy to set" knob. The *patch* digit is the floor for
|
|
5
|
+
a fresh line; CI auto-promotes the patch from the published git tags on every
|
|
6
|
+
release to the stable channel, so you normally never touch it.
|
|
7
|
+
|
|
8
|
+
See ``packaging/version.py`` (the ``compute_version`` brain) and
|
|
9
|
+
``doc/runbooks/20260616_SDK_CLI_PyPI_Publish_Runbook.md``.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
__version__ = "0.1.0"
|