localml 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- localml/__init__.py +62 -0
- localml/_state.py +31 -0
- localml/adapters/__init__.py +6 -0
- localml/adapters/base.py +54 -0
- localml/cli.py +62 -0
- localml/client.py +208 -0
- localml/config.py +97 -0
- localml/exceptions.py +35 -0
- localml/huggingface.py +33 -0
- localml/jax.py +48 -0
- localml/mlx.py +32 -0
- localml/ops.py +75 -0
- localml/py.typed +0 -0
- localml/run.py +39 -0
- localml/torch.py +44 -0
- localml/types.py +98 -0
- localml-0.1.0.dist-info/METADATA +180 -0
- localml-0.1.0.dist-info/RECORD +21 -0
- localml-0.1.0.dist-info/WHEEL +4 -0
- localml-0.1.0.dist-info/entry_points.txt +2 -0
- localml-0.1.0.dist-info/licenses/LICENSE +21 -0
localml/__init__.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""localml — Local ML experimentation platform SDK.
|
|
2
|
+
|
|
3
|
+
Public API::
|
|
4
|
+
|
|
5
|
+
import localml as ml
|
|
6
|
+
|
|
7
|
+
ml.configure(api_url="http://localhost:8000", token="local-dev-token")
|
|
8
|
+
|
|
9
|
+
with ml.start_run(project="demo", config={"lr": 0.001}) as run:
|
|
10
|
+
ml.log_metrics({"accuracy": 0.91})
|
|
11
|
+
ml.log_artifact("outputs/model.safetensors")
|
|
12
|
+
version = ml.mlx.log_model(name="assistant", model_dir="./model")
|
|
13
|
+
job = ml.evaluate(model=version, dataset="datasets/eval.jsonl", metrics=["accuracy"])
|
|
14
|
+
job.wait()
|
|
15
|
+
ml.deploy(model=version, target="local")
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from . import huggingface, jax, mlx, torch
|
|
21
|
+
from .config import Config, configure
|
|
22
|
+
from .exceptions import (
|
|
23
|
+
ArtifactUploadError,
|
|
24
|
+
AuthenticationError,
|
|
25
|
+
DeploymentError,
|
|
26
|
+
EvaluationFailedError,
|
|
27
|
+
LocalMLError,
|
|
28
|
+
ModelRegistrationError,
|
|
29
|
+
ValidationError,
|
|
30
|
+
)
|
|
31
|
+
from .ops import deploy, evaluate, log_artifact, log_metrics, log_params, register_model
|
|
32
|
+
from .run import start_run
|
|
33
|
+
from .types import Deployment, EvaluationJob, ModelVersion, Run
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
"ArtifactUploadError",
|
|
37
|
+
"AuthenticationError",
|
|
38
|
+
"Config",
|
|
39
|
+
"Deployment",
|
|
40
|
+
"DeploymentError",
|
|
41
|
+
"EvaluationFailedError",
|
|
42
|
+
"EvaluationJob",
|
|
43
|
+
"LocalMLError",
|
|
44
|
+
"ModelRegistrationError",
|
|
45
|
+
"ModelVersion",
|
|
46
|
+
"Run",
|
|
47
|
+
"ValidationError",
|
|
48
|
+
"configure",
|
|
49
|
+
"deploy",
|
|
50
|
+
"evaluate",
|
|
51
|
+
"huggingface",
|
|
52
|
+
"jax",
|
|
53
|
+
"log_artifact",
|
|
54
|
+
"log_metrics",
|
|
55
|
+
"log_params",
|
|
56
|
+
"mlx",
|
|
57
|
+
"register_model",
|
|
58
|
+
"start_run",
|
|
59
|
+
"torch",
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
__version__ = "0.1.0"
|
localml/_state.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Context-local tracking of the active run.
|
|
2
|
+
|
|
3
|
+
Kept separate from the public API so adapters and ops modules can discover the current
|
|
4
|
+
run without import cycles. Uses :class:`contextvars.ContextVar` so concurrent runs (e.g.
|
|
5
|
+
in async or threaded notebooks) don't clobber each other.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from contextvars import ContextVar
|
|
11
|
+
|
|
12
|
+
from .types import Run
|
|
13
|
+
|
|
14
|
+
_current_run: ContextVar[Run | None] = ContextVar("localml_current_run", default=None)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def set_current_run(run: Run | None) -> None:
|
|
18
|
+
_current_run.set(run)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_current_run() -> Run:
|
|
22
|
+
run = _current_run.get()
|
|
23
|
+
if run is None:
|
|
24
|
+
raise RuntimeError(
|
|
25
|
+
"No active run. Use `with localml.start_run(...) as run:` before logging."
|
|
26
|
+
)
|
|
27
|
+
return run
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def maybe_current_run() -> Run | None:
|
|
31
|
+
return _current_run.get()
|
localml/adapters/base.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Shared logic for framework adapters.
|
|
2
|
+
|
|
3
|
+
Adapters are **stateless**: they validate inputs, package framework-specific artifacts,
|
|
4
|
+
normalize metadata into the shared schema, and then call the common model-registration
|
|
5
|
+
path. Each framework module (``torch``, ``jax``, ``mlx``, ``huggingface``) builds on these
|
|
6
|
+
helpers so the platform core stays framework-neutral.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from ..client import get_client
|
|
15
|
+
from ..exceptions import ValidationError
|
|
16
|
+
from ..types import ModelVersion
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def require_dir(model_dir: str | Path, *, required_files: list[str] | None = None) -> Path:
|
|
20
|
+
"""Validate that ``model_dir`` exists and contains any required files."""
|
|
21
|
+
path = Path(model_dir)
|
|
22
|
+
if not path.is_dir():
|
|
23
|
+
raise ValidationError(f"model_dir is not a directory: {path}")
|
|
24
|
+
for name in required_files or []:
|
|
25
|
+
if not (path / name).exists():
|
|
26
|
+
raise ValidationError(f"missing required file '{name}' in {path}")
|
|
27
|
+
return path
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def stage_artifact(path: Path) -> str:
|
|
31
|
+
"""Stage a local artifact and return its URI.
|
|
32
|
+
|
|
33
|
+
Scaffold behavior: returns a ``file://`` URI pointing at the resolved path. Phase 2
|
|
34
|
+
replaces this with a real MinIO upload (direct or pre-signed) plus checksum.
|
|
35
|
+
"""
|
|
36
|
+
return path.resolve().as_uri()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def register(
|
|
40
|
+
*,
|
|
41
|
+
name: str,
|
|
42
|
+
framework: str,
|
|
43
|
+
artifact_uri: str,
|
|
44
|
+
metadata: dict[str, Any],
|
|
45
|
+
) -> ModelVersion:
|
|
46
|
+
"""Normalize metadata and register a model version via the control plane."""
|
|
47
|
+
base_meta = {"framework": framework}
|
|
48
|
+
base_meta.update(metadata)
|
|
49
|
+
return get_client().register_model_version(
|
|
50
|
+
name=name,
|
|
51
|
+
framework=framework,
|
|
52
|
+
artifact_uri=artifact_uri,
|
|
53
|
+
metadata=base_meta,
|
|
54
|
+
)
|
localml/cli.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Typer CLI for localml.
|
|
2
|
+
|
|
3
|
+
Thin wrapper over the control-plane client. Run ``localml --help`` for usage.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import typer
|
|
12
|
+
|
|
13
|
+
from .client import get_client
|
|
14
|
+
from .config import configure, get_config
|
|
15
|
+
|
|
16
|
+
app = typer.Typer(help="localml — local ML experimentation platform CLI", no_args_is_help=True)
|
|
17
|
+
|
|
18
|
+
projects_app = typer.Typer(help="Manage projects")
|
|
19
|
+
runs_app = typer.Typer(help="Inspect runs")
|
|
20
|
+
models_app = typer.Typer(help="Inspect model versions")
|
|
21
|
+
app.add_typer(projects_app, name="projects")
|
|
22
|
+
app.add_typer(runs_app, name="runs")
|
|
23
|
+
app.add_typer(models_app, name="models")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _echo(obj: Any) -> None:
|
|
27
|
+
typer.echo(json.dumps(obj, indent=2, default=str))
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@app.command()
|
|
31
|
+
def config(
|
|
32
|
+
api_url: str = typer.Option(None, help="Control plane URL"),
|
|
33
|
+
token: str = typer.Option(None, help="Bearer token"),
|
|
34
|
+
) -> None:
|
|
35
|
+
"""Show or update the active SDK configuration."""
|
|
36
|
+
if api_url or token:
|
|
37
|
+
configure(api_url=api_url, token=token)
|
|
38
|
+
cfg = get_config()
|
|
39
|
+
_echo({"api_url": cfg.api_url, "token_set": bool(cfg.token), "timeout": cfg.timeout})
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@app.command()
|
|
43
|
+
def health() -> None:
|
|
44
|
+
"""Check control-plane health."""
|
|
45
|
+
client = get_client()
|
|
46
|
+
_echo(client._request("GET", "/health"))
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@runs_app.command("get")
|
|
50
|
+
def runs_get(run_id: str) -> None:
|
|
51
|
+
"""Fetch a run by id."""
|
|
52
|
+
_echo(get_client()._request("GET", f"/runs/{run_id}"))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@models_app.command("get")
|
|
56
|
+
def models_get(name: str) -> None:
|
|
57
|
+
"""Fetch a model and its versions by name."""
|
|
58
|
+
_echo(get_client()._request("GET", f"/models/{name}"))
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
if __name__ == "__main__":
|
|
62
|
+
app()
|
localml/client.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""HTTPX client for the FastAPI control plane.
|
|
2
|
+
|
|
3
|
+
Thin wrapper that handles auth headers, retries on transient HTTP failures, and maps
|
|
4
|
+
non-2xx responses to typed SDK exceptions. Create-style calls accept an idempotency key so
|
|
5
|
+
they can be safely retried.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import time
|
|
11
|
+
import uuid
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import httpx
|
|
15
|
+
|
|
16
|
+
from .config import Config, get_config
|
|
17
|
+
from .exceptions import (
|
|
18
|
+
AuthenticationError,
|
|
19
|
+
DeploymentError,
|
|
20
|
+
LocalMLError,
|
|
21
|
+
ModelRegistrationError,
|
|
22
|
+
ValidationError,
|
|
23
|
+
)
|
|
24
|
+
from .types import Deployment, EvaluationJob, ModelVersion, Run
|
|
25
|
+
|
|
26
|
+
_RETRYABLE_STATUS = {429, 500, 502, 503, 504}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Client:
|
|
30
|
+
"""Synchronous control-plane client."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, config: Config | None = None) -> None:
|
|
33
|
+
self.config = config or get_config()
|
|
34
|
+
headers = {}
|
|
35
|
+
if self.config.token:
|
|
36
|
+
headers["Authorization"] = f"Bearer {self.config.token}"
|
|
37
|
+
self._http = httpx.Client(
|
|
38
|
+
base_url=self.config.api_url,
|
|
39
|
+
headers=headers,
|
|
40
|
+
timeout=self.config.timeout,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# -- low-level ---------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
def _request(self, method: str, path: str, *, idempotent: bool = False, **kwargs: Any) -> Any:
|
|
46
|
+
attempts = self.config.max_retries if (method == "GET" or idempotent) else 1
|
|
47
|
+
if idempotent:
|
|
48
|
+
kwargs.setdefault("headers", {})
|
|
49
|
+
kwargs["headers"].setdefault("Idempotency-Key", str(uuid.uuid4()))
|
|
50
|
+
last_exc: Exception | None = None
|
|
51
|
+
for attempt in range(attempts):
|
|
52
|
+
try:
|
|
53
|
+
resp = self._http.request(method, path, **kwargs)
|
|
54
|
+
except httpx.TransportError as exc: # network-level, retry
|
|
55
|
+
last_exc = exc
|
|
56
|
+
time.sleep(min(2**attempt, 5))
|
|
57
|
+
continue
|
|
58
|
+
if resp.status_code in _RETRYABLE_STATUS and attempt < attempts - 1:
|
|
59
|
+
time.sleep(min(2**attempt, 5))
|
|
60
|
+
continue
|
|
61
|
+
return self._handle(resp)
|
|
62
|
+
raise LocalMLError(f"request failed after {attempts} attempts: {last_exc}")
|
|
63
|
+
|
|
64
|
+
@staticmethod
|
|
65
|
+
def _handle(resp: httpx.Response) -> Any:
|
|
66
|
+
if resp.is_success:
|
|
67
|
+
return resp.json() if resp.content else None
|
|
68
|
+
detail = resp.text
|
|
69
|
+
if resp.status_code == 401:
|
|
70
|
+
raise AuthenticationError(detail)
|
|
71
|
+
if resp.status_code in (400, 422):
|
|
72
|
+
raise ValidationError(detail)
|
|
73
|
+
raise LocalMLError(f"HTTP {resp.status_code}: {detail}")
|
|
74
|
+
|
|
75
|
+
# -- runs --------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
def create_run(self, project: str, config: dict[str, Any]) -> Run:
|
|
78
|
+
data = self._request(
|
|
79
|
+
"POST", "/runs", idempotent=True, json={"project": project, "config": config}
|
|
80
|
+
)
|
|
81
|
+
return Run(id=data["id"], project=data["project"], status=data.get("status", "running"))
|
|
82
|
+
|
|
83
|
+
def log_metrics(self, run_id: str, metrics: dict[str, float], step: int | None = None) -> None:
|
|
84
|
+
self._request("POST", f"/runs/{run_id}/metrics", json={"metrics": metrics, "step": step})
|
|
85
|
+
|
|
86
|
+
def log_params(self, run_id: str, params: dict[str, Any]) -> None:
|
|
87
|
+
self._request("POST", f"/runs/{run_id}/params", json={"params": params})
|
|
88
|
+
|
|
89
|
+
def log_artifact(self, run_id: str, uri: str, artifact_type: str) -> None:
|
|
90
|
+
self._request(
|
|
91
|
+
"POST",
|
|
92
|
+
f"/runs/{run_id}/artifacts",
|
|
93
|
+
json={"uri": uri, "artifact_type": artifact_type},
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
def complete_run(self, run_id: str, status: str) -> None:
|
|
97
|
+
self._request("POST", f"/runs/{run_id}/metrics", json={"metrics": {}, "status": status})
|
|
98
|
+
|
|
99
|
+
# -- models ------------------------------------------------------------------
|
|
100
|
+
|
|
101
|
+
def register_model_version(
|
|
102
|
+
self,
|
|
103
|
+
name: str,
|
|
104
|
+
framework: str,
|
|
105
|
+
artifact_uri: str,
|
|
106
|
+
metadata: dict[str, Any],
|
|
107
|
+
) -> ModelVersion:
|
|
108
|
+
try:
|
|
109
|
+
data = self._request(
|
|
110
|
+
"POST",
|
|
111
|
+
f"/models/{name}/versions",
|
|
112
|
+
idempotent=True,
|
|
113
|
+
json={
|
|
114
|
+
"model_name": name,
|
|
115
|
+
"framework": framework,
|
|
116
|
+
"artifact_uri": artifact_uri,
|
|
117
|
+
"metadata": metadata,
|
|
118
|
+
},
|
|
119
|
+
)
|
|
120
|
+
except ValidationError as exc:
|
|
121
|
+
raise ModelRegistrationError(str(exc)) from exc
|
|
122
|
+
return ModelVersion(
|
|
123
|
+
id=data["id"],
|
|
124
|
+
model_name=data["model_name"],
|
|
125
|
+
version=data["version"],
|
|
126
|
+
framework=data["framework"],
|
|
127
|
+
artifact_uri=data["artifact_uri"],
|
|
128
|
+
status=data.get("status", "created"),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# -- evaluations -------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
def create_evaluation(
|
|
134
|
+
self, model_version_id: str, dataset_uri: str, metrics: list[str]
|
|
135
|
+
) -> EvaluationJob:
|
|
136
|
+
data = self._request(
|
|
137
|
+
"POST",
|
|
138
|
+
"/evaluations",
|
|
139
|
+
idempotent=True,
|
|
140
|
+
json={
|
|
141
|
+
"model_version_id": model_version_id,
|
|
142
|
+
"dataset_uri": dataset_uri,
|
|
143
|
+
"metrics": metrics,
|
|
144
|
+
},
|
|
145
|
+
)
|
|
146
|
+
return EvaluationJob(
|
|
147
|
+
id=data["id"],
|
|
148
|
+
model_version_id=data["model_version_id"],
|
|
149
|
+
status=data.get("status", "queued"),
|
|
150
|
+
metrics=data.get("metrics"),
|
|
151
|
+
_client=self,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
def get_evaluation(self, job_id: str) -> EvaluationJob:
|
|
155
|
+
data = self._request("GET", f"/evaluations/{job_id}")
|
|
156
|
+
return EvaluationJob(
|
|
157
|
+
id=data["id"],
|
|
158
|
+
model_version_id=data["model_version_id"],
|
|
159
|
+
status=data["status"],
|
|
160
|
+
metrics=data.get("metrics"),
|
|
161
|
+
_client=self,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# -- deployments -------------------------------------------------------------
|
|
165
|
+
|
|
166
|
+
def create_deployment(self, model_version_id: str, target: str) -> Deployment:
|
|
167
|
+
try:
|
|
168
|
+
data = self._request(
|
|
169
|
+
"POST",
|
|
170
|
+
"/deployments",
|
|
171
|
+
idempotent=True,
|
|
172
|
+
json={"model_version_id": model_version_id, "target": target},
|
|
173
|
+
)
|
|
174
|
+
except ValidationError as exc:
|
|
175
|
+
raise DeploymentError(str(exc)) from exc
|
|
176
|
+
return Deployment(
|
|
177
|
+
id=data["id"],
|
|
178
|
+
model_version_id=data["model_version_id"],
|
|
179
|
+
target=data["target"],
|
|
180
|
+
status=data.get("status", "active"),
|
|
181
|
+
endpoint_url=data.get("endpoint_url"),
|
|
182
|
+
_client=self,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def predict(self, deployment_id: str, payload: dict[str, Any]) -> dict[str, Any]:
|
|
186
|
+
return self._request("POST", f"/deployments/{deployment_id}/predict", json=payload)
|
|
187
|
+
|
|
188
|
+
def close(self) -> None:
|
|
189
|
+
self._http.close()
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
_default_client: Client | None = None
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def get_client() -> Client:
|
|
196
|
+
"""Return a process-wide default client, creating it on first use."""
|
|
197
|
+
global _default_client
|
|
198
|
+
if _default_client is None:
|
|
199
|
+
_default_client = Client()
|
|
200
|
+
return _default_client
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def reset_client() -> None:
|
|
204
|
+
"""Drop the cached default client (e.g. after reconfiguring)."""
|
|
205
|
+
global _default_client
|
|
206
|
+
if _default_client is not None:
|
|
207
|
+
_default_client.close()
|
|
208
|
+
_default_client = None
|
localml/config.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""SDK configuration.
|
|
2
|
+
|
|
3
|
+
Configuration precedence (highest first):
|
|
4
|
+
|
|
5
|
+
1. Explicit arguments to :func:`configure`.
|
|
6
|
+
2. Environment variables (``LOCALML_API_URL``, ``LOCALML_API_TOKEN``).
|
|
7
|
+
3. ``~/.localml/config.toml``.
|
|
8
|
+
4. Built-in defaults.
|
|
9
|
+
|
|
10
|
+
The active config is process-global; the control plane remains the source of truth for
|
|
11
|
+
all platform state.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import tomllib
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import SupportsFloat, SupportsIndex
|
|
21
|
+
|
|
22
|
+
DEFAULT_API_URL = "http://localhost:8000"
|
|
23
|
+
CONFIG_PATH = Path.home() / ".localml" / "config.toml"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class Config:
|
|
28
|
+
"""Resolved SDK configuration."""
|
|
29
|
+
|
|
30
|
+
api_url: str = DEFAULT_API_URL
|
|
31
|
+
token: str | None = None
|
|
32
|
+
timeout: float = 30.0
|
|
33
|
+
max_retries: int = 3
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
_active: Config | None = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _load_file(path: Path = CONFIG_PATH) -> dict[str, object]:
|
|
40
|
+
if not path.exists():
|
|
41
|
+
return {}
|
|
42
|
+
with path.open("rb") as fh:
|
|
43
|
+
return tomllib.load(fh)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _float_config(value: object, default: float) -> float:
|
|
47
|
+
if isinstance(value, str | bytes | bytearray | SupportsFloat | SupportsIndex):
|
|
48
|
+
return float(value)
|
|
49
|
+
return default
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _int_config(value: object, default: int) -> int:
|
|
53
|
+
if isinstance(value, str | bytes | bytearray | SupportsIndex):
|
|
54
|
+
return int(value)
|
|
55
|
+
return default
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def configure(
|
|
59
|
+
api_url: str | None = None,
|
|
60
|
+
token: str | None = None,
|
|
61
|
+
*,
|
|
62
|
+
timeout: float | None = None,
|
|
63
|
+
max_retries: int | None = None,
|
|
64
|
+
) -> Config:
|
|
65
|
+
"""Set and return the active SDK configuration.
|
|
66
|
+
|
|
67
|
+
Values not provided fall back to environment variables, then the config file, then
|
|
68
|
+
built-in defaults.
|
|
69
|
+
"""
|
|
70
|
+
global _active
|
|
71
|
+
file_cfg = _load_file()
|
|
72
|
+
|
|
73
|
+
cfg = Config(
|
|
74
|
+
api_url=(
|
|
75
|
+
api_url
|
|
76
|
+
or os.environ.get("LOCALML_API_URL")
|
|
77
|
+
or str(file_cfg.get("api_url", DEFAULT_API_URL))
|
|
78
|
+
),
|
|
79
|
+
token=(
|
|
80
|
+
token
|
|
81
|
+
or os.environ.get("LOCALML_API_TOKEN")
|
|
82
|
+
or (str(file_cfg["token"]) if "token" in file_cfg else None)
|
|
83
|
+
),
|
|
84
|
+
timeout=timeout if timeout is not None else _float_config(file_cfg.get("timeout"), 30.0),
|
|
85
|
+
max_retries=(
|
|
86
|
+
max_retries if max_retries is not None else _int_config(file_cfg.get("max_retries"), 3)
|
|
87
|
+
),
|
|
88
|
+
)
|
|
89
|
+
_active = cfg
|
|
90
|
+
return cfg
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def get_config() -> Config:
|
|
94
|
+
"""Return the active config, initializing from env/file/defaults on first use."""
|
|
95
|
+
if _active is None:
|
|
96
|
+
return configure()
|
|
97
|
+
return _active
|
localml/exceptions.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Typed SDK exceptions.
|
|
2
|
+
|
|
3
|
+
All SDK errors derive from :class:`LocalMLError` so callers can catch the whole family
|
|
4
|
+
with a single ``except`` while still being able to discriminate specific failures.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LocalMLError(Exception):
|
|
11
|
+
"""Base class for all localml SDK errors."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AuthenticationError(LocalMLError):
|
|
15
|
+
"""Raised when the API rejects the configured token (HTTP 401)."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ValidationError(LocalMLError):
|
|
19
|
+
"""Raised for invalid arguments or rejected requests (HTTP 400/422)."""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ArtifactUploadError(LocalMLError):
|
|
23
|
+
"""Raised when an artifact upload fails or is incomplete."""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ModelRegistrationError(LocalMLError):
|
|
27
|
+
"""Raised when registering a model version fails."""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class EvaluationFailedError(LocalMLError):
|
|
31
|
+
"""Raised when an evaluation job ends in a ``failed`` state."""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class DeploymentError(LocalMLError):
|
|
35
|
+
"""Raised when a deployment cannot be created or activated."""
|
localml/huggingface.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Hugging Face framework adapter — ``localml.huggingface``.
|
|
2
|
+
|
|
3
|
+
Captures ``config.json``, tokenizer files, safetensors/bin weights, generation config, and
|
|
4
|
+
Hugging Face model metadata.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from .adapters import base
|
|
12
|
+
from .types import ModelVersion
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def log_pretrained(
|
|
16
|
+
name: str,
|
|
17
|
+
model_dir: str,
|
|
18
|
+
*,
|
|
19
|
+
metadata: dict[str, Any] | None = None,
|
|
20
|
+
) -> ModelVersion:
|
|
21
|
+
"""Register a Hugging Face pretrained model directory as a model version.
|
|
22
|
+
|
|
23
|
+
Example::
|
|
24
|
+
|
|
25
|
+
ml.huggingface.log_pretrained(name="hf-assistant", model_dir="./model")
|
|
26
|
+
"""
|
|
27
|
+
path = base.require_dir(model_dir, required_files=["config.json"])
|
|
28
|
+
meta: dict[str, Any] = {"source": "huggingface"}
|
|
29
|
+
meta.update(metadata or {})
|
|
30
|
+
artifact_uri = base.stage_artifact(path)
|
|
31
|
+
return base.register(
|
|
32
|
+
name=name, framework="huggingface", artifact_uri=artifact_uri, metadata=meta
|
|
33
|
+
)
|
localml/jax.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""JAX framework adapter — ``localml.jax``.
|
|
2
|
+
|
|
3
|
+
Captures PyTree parameters, training state, an Orbax checkpoint directory, shape/dtype
|
|
4
|
+
metadata, the JAX version, and optional sharding metadata.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from .adapters import base
|
|
13
|
+
from .types import ModelVersion
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def log_checkpoint(
|
|
17
|
+
name: str,
|
|
18
|
+
*,
|
|
19
|
+
params: Any | None = None,
|
|
20
|
+
state: Any | None = None,
|
|
21
|
+
config: dict[str, Any] | None = None,
|
|
22
|
+
checkpoint_format: str = "orbax",
|
|
23
|
+
checkpoint_dir: str | None = None,
|
|
24
|
+
metadata: dict[str, Any] | None = None,
|
|
25
|
+
) -> ModelVersion:
|
|
26
|
+
"""Register a JAX checkpoint as a model version.
|
|
27
|
+
|
|
28
|
+
Example::
|
|
29
|
+
|
|
30
|
+
ml.jax.log_checkpoint(
|
|
31
|
+
name="ranker",
|
|
32
|
+
params=params,
|
|
33
|
+
state=train_state,
|
|
34
|
+
config=config,
|
|
35
|
+
checkpoint_format="orbax",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
Scaffold note: real Orbax serialization + sharding capture land in Phase 2.
|
|
39
|
+
"""
|
|
40
|
+
meta: dict[str, Any] = {"checkpoint_format": checkpoint_format, "config": config or {}}
|
|
41
|
+
meta.update(metadata or {})
|
|
42
|
+
|
|
43
|
+
target = Path(checkpoint_dir) if checkpoint_dir else Path(f"./.localml/jax/{name}")
|
|
44
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
# TODO(phase2): orbax.checkpoint save of params/state; capture shape/dtype + jax version.
|
|
46
|
+
artifact_uri = base.stage_artifact(target)
|
|
47
|
+
|
|
48
|
+
return base.register(name=name, framework="jax", artifact_uri=artifact_uri, metadata=meta)
|
localml/mlx.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""MLX framework adapter — ``localml.mlx``.
|
|
2
|
+
|
|
3
|
+
Captures MLX model files, tokenizer/config, quantization metadata, the MLX version, and
|
|
4
|
+
Apple Silicon runtime metadata.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from .adapters import base
|
|
12
|
+
from .types import ModelVersion
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def log_model(
|
|
16
|
+
name: str,
|
|
17
|
+
model_dir: str,
|
|
18
|
+
*,
|
|
19
|
+
quantization: str | None = None,
|
|
20
|
+
metadata: dict[str, Any] | None = None,
|
|
21
|
+
) -> ModelVersion:
|
|
22
|
+
"""Register an MLX model directory as a model version.
|
|
23
|
+
|
|
24
|
+
Example::
|
|
25
|
+
|
|
26
|
+
ml.mlx.log_model(name="assistant", model_dir="./mlx_model", quantization="4bit")
|
|
27
|
+
"""
|
|
28
|
+
path = base.require_dir(model_dir)
|
|
29
|
+
meta: dict[str, Any] = {"runtime": "mlx", "quantization": quantization}
|
|
30
|
+
meta.update(metadata or {})
|
|
31
|
+
artifact_uri = base.stage_artifact(path)
|
|
32
|
+
return base.register(name=name, framework="mlx", artifact_uri=artifact_uri, metadata=meta)
|
localml/ops.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Top-level lifecycle operations exposed on the ``localml`` namespace.
|
|
2
|
+
|
|
3
|
+
These are thin convenience wrappers that resolve the active run/client and delegate to the
|
|
4
|
+
control plane. Framework-specific model logging lives in :mod:`localml.adapters`.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from ._state import get_current_run
|
|
13
|
+
from .client import get_client
|
|
14
|
+
from .types import Deployment, EvaluationJob, ModelVersion
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def log_metrics(metrics: dict[str, float], *, step: int | None = None) -> None:
|
|
18
|
+
"""Log scalar metrics to the active run."""
|
|
19
|
+
run = get_current_run()
|
|
20
|
+
get_client().log_metrics(run.id, metrics, step=step)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def log_params(params: dict[str, Any]) -> None:
|
|
24
|
+
"""Log hyperparameters / config values to the active run."""
|
|
25
|
+
run = get_current_run()
|
|
26
|
+
get_client().log_params(run.id, params)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def log_artifact(path: str, *, artifact_type: str = "file") -> None:
|
|
30
|
+
"""Register a local artifact with the active run.
|
|
31
|
+
|
|
32
|
+
Note: in this scaffold the path is recorded as-is. Real uploads to MinIO (direct or
|
|
33
|
+
pre-signed URL) land in Phase 2 — see ROADMAP.md.
|
|
34
|
+
"""
|
|
35
|
+
p = Path(path)
|
|
36
|
+
if not p.exists():
|
|
37
|
+
raise FileNotFoundError(f"artifact path does not exist: {path}")
|
|
38
|
+
run = get_current_run()
|
|
39
|
+
get_client().log_artifact(run.id, uri=str(p.resolve()), artifact_type=artifact_type)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def register_model(
|
|
43
|
+
name: str,
|
|
44
|
+
artifact_uri: str,
|
|
45
|
+
*,
|
|
46
|
+
framework: str = "generic",
|
|
47
|
+
metadata: dict[str, Any] | None = None,
|
|
48
|
+
) -> ModelVersion:
|
|
49
|
+
"""Register a new model version from an already-staged artifact URI."""
|
|
50
|
+
return get_client().register_model_version(
|
|
51
|
+
name=name,
|
|
52
|
+
framework=framework,
|
|
53
|
+
artifact_uri=artifact_uri,
|
|
54
|
+
metadata=metadata or {},
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def evaluate(
|
|
59
|
+
model: ModelVersion | str,
|
|
60
|
+
dataset: str,
|
|
61
|
+
metrics: list[str],
|
|
62
|
+
) -> EvaluationJob:
|
|
63
|
+
"""Queue an evaluation job for a model version against a dataset."""
|
|
64
|
+
model_version_id = model.id if isinstance(model, ModelVersion) else model
|
|
65
|
+
return get_client().create_evaluation(
|
|
66
|
+
model_version_id=model_version_id,
|
|
67
|
+
dataset_uri=dataset,
|
|
68
|
+
metrics=metrics,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def deploy(model: ModelVersion | str, target: str = "local") -> Deployment:
|
|
73
|
+
"""Deploy a model version to a serving target (currently ``local``)."""
|
|
74
|
+
model_version_id = model.id if isinstance(model, ModelVersion) else model
|
|
75
|
+
return get_client().create_deployment(model_version_id=model_version_id, target=target)
|
localml/py.typed
ADDED
|
File without changes
|
localml/run.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Run lifecycle context manager."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Iterator
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from ._state import set_current_run
|
|
10
|
+
from .client import get_client
|
|
11
|
+
from .types import Run
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@contextmanager
|
|
15
|
+
def start_run(project: str, config: dict[str, Any] | None = None) -> Iterator[Run]:
|
|
16
|
+
"""Start a tracked run and set it as the active run for the duration of the block.
|
|
17
|
+
|
|
18
|
+
On a clean exit the run is marked ``completed``; on an exception it is marked
|
|
19
|
+
``failed`` and the exception propagates.
|
|
20
|
+
|
|
21
|
+
Example::
|
|
22
|
+
|
|
23
|
+
with localml.start_run(project="demo", config={"lr": 1e-3}) as run:
|
|
24
|
+
localml.log_metrics({"accuracy": 0.91})
|
|
25
|
+
"""
|
|
26
|
+
client = get_client()
|
|
27
|
+
run = client.create_run(project=project, config=config or {})
|
|
28
|
+
set_current_run(run)
|
|
29
|
+
try:
|
|
30
|
+
yield run
|
|
31
|
+
except Exception:
|
|
32
|
+
run.status = "failed"
|
|
33
|
+
client.complete_run(run.id, status="failed")
|
|
34
|
+
raise
|
|
35
|
+
else:
|
|
36
|
+
run.status = "completed"
|
|
37
|
+
client.complete_run(run.id, status="completed")
|
|
38
|
+
finally:
|
|
39
|
+
set_current_run(None)
|
localml/torch.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""PyTorch framework adapter — ``localml.torch``.
|
|
2
|
+
|
|
3
|
+
Captures ``state_dict``, model config, optional input/output schema, the PyTorch version,
|
|
4
|
+
and Python dependencies, then registers the result as a shared model version.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from .adapters import base
|
|
13
|
+
from .types import ModelVersion
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def log_model(
|
|
17
|
+
model: Any,
|
|
18
|
+
name: str,
|
|
19
|
+
*,
|
|
20
|
+
example_input: Any | None = None,
|
|
21
|
+
metadata: dict[str, Any] | None = None,
|
|
22
|
+
save_dir: str | None = None,
|
|
23
|
+
) -> ModelVersion:
|
|
24
|
+
"""Serialize a PyTorch model and register it as a model version.
|
|
25
|
+
|
|
26
|
+
Example::
|
|
27
|
+
|
|
28
|
+
ml.torch.log_model(
|
|
29
|
+
model=model, name="classifier", example_input=batch, metadata={"architecture": "resnet"}
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
Scaffold note: real ``state_dict`` serialization and schema inference land in Phase 2.
|
|
33
|
+
"""
|
|
34
|
+
meta: dict[str, Any] = {"task": None}
|
|
35
|
+
meta.update(metadata or {})
|
|
36
|
+
if example_input is not None:
|
|
37
|
+
meta["has_example_input"] = True
|
|
38
|
+
|
|
39
|
+
target = Path(save_dir) if save_dir else Path(f"./.localml/torch/{name}")
|
|
40
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
# TODO(phase2): torch.save(model.state_dict(), target / "model.pt"); capture versions.
|
|
42
|
+
artifact_uri = base.stage_artifact(target)
|
|
43
|
+
|
|
44
|
+
return base.register(name=name, framework="pytorch", artifact_uri=artifact_uri, metadata=meta)
|
localml/types.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Shared platform primitives returned by the SDK.
|
|
2
|
+
|
|
3
|
+
These mirror the control-plane resources. They are intentionally thin data holders; the
|
|
4
|
+
server remains the source of truth. ``EvaluationJob`` and ``Deployment`` carry helper
|
|
5
|
+
methods that round-trip back through the API.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import time
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from typing import TYPE_CHECKING, Any
|
|
13
|
+
|
|
14
|
+
from .exceptions import EvaluationFailedError
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from .client import Client
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class Run:
|
|
22
|
+
"""One tracked experiment execution."""
|
|
23
|
+
|
|
24
|
+
id: str
|
|
25
|
+
project: str
|
|
26
|
+
status: str = "running"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class ModelVersion:
|
|
31
|
+
"""Immutable record of a specific model artifact and its metadata."""
|
|
32
|
+
|
|
33
|
+
id: str
|
|
34
|
+
model_name: str
|
|
35
|
+
version: int
|
|
36
|
+
framework: str
|
|
37
|
+
artifact_uri: str
|
|
38
|
+
status: str = "created"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class EvaluationJob:
|
|
43
|
+
"""Background job that evaluates a model version against a dataset."""
|
|
44
|
+
|
|
45
|
+
id: str
|
|
46
|
+
model_version_id: str
|
|
47
|
+
status: str = "queued"
|
|
48
|
+
metrics: dict[str, float] | None = None
|
|
49
|
+
_client: Client | None = field(default=None, repr=False, compare=False)
|
|
50
|
+
|
|
51
|
+
_TERMINAL = frozenset({"completed", "failed"})
|
|
52
|
+
|
|
53
|
+
def refresh(self) -> EvaluationJob:
|
|
54
|
+
"""Fetch the latest job state from the control plane."""
|
|
55
|
+
if self._client is None:
|
|
56
|
+
return self
|
|
57
|
+
latest = self._client.get_evaluation(self.id)
|
|
58
|
+
self.status = latest.status
|
|
59
|
+
self.metrics = latest.metrics
|
|
60
|
+
return self
|
|
61
|
+
|
|
62
|
+
def wait(self, *, timeout: float = 600.0, poll_interval: float = 1.0) -> EvaluationJob:
|
|
63
|
+
"""Poll until the job reaches a terminal state.
|
|
64
|
+
|
|
65
|
+
Uses exponential backoff capped at 10s. Raises :class:`EvaluationFailedError`
|
|
66
|
+
if the job ends in ``failed``.
|
|
67
|
+
"""
|
|
68
|
+
deadline = time.monotonic() + timeout
|
|
69
|
+
interval = poll_interval
|
|
70
|
+
while self.status not in self._TERMINAL:
|
|
71
|
+
if time.monotonic() > deadline:
|
|
72
|
+
raise EvaluationFailedError(
|
|
73
|
+
f"evaluation {self.id} timed out (status={self.status})"
|
|
74
|
+
)
|
|
75
|
+
time.sleep(interval)
|
|
76
|
+
interval = min(interval * 2, 10.0)
|
|
77
|
+
self.refresh()
|
|
78
|
+
if self.status == "failed":
|
|
79
|
+
raise EvaluationFailedError(f"evaluation {self.id} failed")
|
|
80
|
+
return self
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class Deployment:
|
|
85
|
+
"""Active or historical serving configuration for a model version."""
|
|
86
|
+
|
|
87
|
+
id: str
|
|
88
|
+
model_version_id: str
|
|
89
|
+
target: str = "local"
|
|
90
|
+
status: str = "active"
|
|
91
|
+
endpoint_url: str | None = None
|
|
92
|
+
_client: Client | None = field(default=None, repr=False, compare=False)
|
|
93
|
+
|
|
94
|
+
def predict(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
95
|
+
"""Send a prediction request to the deployed model's endpoint."""
|
|
96
|
+
if self._client is None:
|
|
97
|
+
raise RuntimeError("deployment is not bound to a client")
|
|
98
|
+
return self._client.predict(self.id, payload)
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: localml
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Local ML experimentation platform demo SDK and control plane
|
|
5
|
+
Project-URL: Homepage, https://github.com/guenp/localml
|
|
6
|
+
Project-URL: Documentation, https://guenp.github.io/localml/
|
|
7
|
+
Project-URL: Repository, https://github.com/guenp/localml
|
|
8
|
+
Project-URL: Changelog, https://github.com/guenp/localml/blob/main/CHANGELOG.md
|
|
9
|
+
Project-URL: Issues, https://github.com/guenp/localml/issues
|
|
10
|
+
Author: Guenevere Prawiroatmodjo
|
|
11
|
+
License-Expression: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: local-development,machine-learning,mlops,sdk
|
|
14
|
+
Classifier: Development Status :: 3 - Alpha
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
23
|
+
Classifier: Typing :: Typed
|
|
24
|
+
Requires-Python: >=3.11
|
|
25
|
+
Requires-Dist: httpx>=0.27
|
|
26
|
+
Requires-Dist: pydantic>=2.6
|
|
27
|
+
Requires-Dist: typer>=0.12
|
|
28
|
+
Provides-Extra: api
|
|
29
|
+
Requires-Dist: alembic>=1.13; extra == 'api'
|
|
30
|
+
Requires-Dist: boto3>=1.34; extra == 'api'
|
|
31
|
+
Requires-Dist: fastapi>=0.110; extra == 'api'
|
|
32
|
+
Requires-Dist: mlflow>=2.12; extra == 'api'
|
|
33
|
+
Requires-Dist: psycopg[binary]>=3.1; extra == 'api'
|
|
34
|
+
Requires-Dist: redis>=5.0; extra == 'api'
|
|
35
|
+
Requires-Dist: sqlalchemy>=2.0; extra == 'api'
|
|
36
|
+
Requires-Dist: uvicorn[standard]>=0.29; extra == 'api'
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# localml
|
|
40
|
+
|
|
41
|
+
[](https://github.com/guenp/localml/actions/workflows/ci.yml)
|
|
42
|
+
|
|
43
|
+
A **local ML experimentation platform demo** that runs entirely on an Apple Silicon
|
|
44
|
+
workstation. It demonstrates the core architecture of a production ML platform at local
|
|
45
|
+
scale: a Python SDK, framework adapters, experiment tracking, a model registry, artifact
|
|
46
|
+
storage, evaluation jobs, and local model serving.
|
|
47
|
+
|
|
48
|
+
> Status: **early scaffold.** Most components are stubs with coherent interfaces. See
|
|
49
|
+
> [`ROADMAP.md`](./ROADMAP.md) for what's planned and [`docs/design.md`](./docs/design.md)
|
|
50
|
+
> for the full software design document.
|
|
51
|
+
|
|
52
|
+
## What's here
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
localml/
|
|
56
|
+
├── src/localml/ # Python SDK (`import localml as ml`)
|
|
57
|
+
│ ├── adapters/ # torch / jax / mlx / huggingface framework adapters
|
|
58
|
+
│ ├── client.py # HTTPX client for the control plane
|
|
59
|
+
│ ├── config.py # ~/.localml/config.toml handling
|
|
60
|
+
│ ├── exceptions.py # typed SDK errors
|
|
61
|
+
│ ├── run.py # run context manager
|
|
62
|
+
│ ├── types.py # Run / ModelVersion / EvaluationJob / Deployment
|
|
63
|
+
│ └── cli.py # Typer CLI
|
|
64
|
+
├── services/
|
|
65
|
+
│ ├── api/ # FastAPI control plane
|
|
66
|
+
│ ├── worker/ # Redis-backed evaluation worker
|
|
67
|
+
│ └── mlflow/ # MLflow tracking + registry image
|
|
68
|
+
├── docs/ # Zensical documentation site and design document
|
|
69
|
+
├── docker-compose.yml # Local stack: api, worker, postgres, redis, minio, mlflow, serving
|
|
70
|
+
└── tests/
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Architecture (at a glance)
|
|
74
|
+
|
|
75
|
+
```mermaid
|
|
76
|
+
flowchart LR
|
|
77
|
+
User[SDK / CLI / Notebook] --> API[FastAPI control plane]
|
|
78
|
+
API --> MLflow[MLflow<br/>tracking + registry]
|
|
79
|
+
API --> DB[(Postgres<br/>metadata)]
|
|
80
|
+
API --> Store[(MinIO<br/>artifacts)]
|
|
81
|
+
API --> Queue[Redis<br/>job queue]
|
|
82
|
+
API --> Serving[Local inference<br/>Ollama / MLX]
|
|
83
|
+
Queue --> Worker[Worker]
|
|
84
|
+
Worker --> Store
|
|
85
|
+
Worker --> DB
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
The control plane (Postgres) is the source of truth for platform metadata. MLflow holds
|
|
89
|
+
experiment tracking state, MinIO holds artifacts, and Redis holds transient job state.
|
|
90
|
+
|
|
91
|
+
## Quick start
|
|
92
|
+
|
|
93
|
+
### 1. Bring up the stack
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
cp .env.example .env
|
|
97
|
+
docker compose up -d
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
This starts Postgres, Redis, MinIO, MLflow, the FastAPI control plane, the worker, and a
|
|
101
|
+
local serving runtime.
|
|
102
|
+
|
|
103
|
+
| Service | URL |
|
|
104
|
+
| ------------- | ----------------------- |
|
|
105
|
+
| Control plane | http://localhost:8000 |
|
|
106
|
+
| API docs | http://localhost:8000/docs |
|
|
107
|
+
| MLflow UI | http://localhost:5000 |
|
|
108
|
+
| MinIO console | http://localhost:9001 |
|
|
109
|
+
|
|
110
|
+
### 2. Install the SDK
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
uv sync # or: pip install -e .
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### 3. Run the example workflow
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
import localml as ml
|
|
120
|
+
|
|
121
|
+
ml.configure(api_url="http://localhost:8000", token="local-dev-token")
|
|
122
|
+
|
|
123
|
+
with ml.start_run(project="local-demo", config={"model": "tiny-llm"}) as run:
|
|
124
|
+
ml.log_params({"batch_size": 4, "quantization": "4bit"})
|
|
125
|
+
ml.log_metrics({"baseline_accuracy": 0.82})
|
|
126
|
+
|
|
127
|
+
version = ml.huggingface.log_pretrained(
|
|
128
|
+
name="tiny-assistant",
|
|
129
|
+
model_dir="./models/tiny-assistant",
|
|
130
|
+
metadata={"task": "chat", "runtime": "mlx"},
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
eval_job = ml.evaluate(
|
|
134
|
+
model=version,
|
|
135
|
+
dataset="datasets/eval.jsonl",
|
|
136
|
+
metrics=["exact_match", "latency_p95"],
|
|
137
|
+
)
|
|
138
|
+
eval_job.wait()
|
|
139
|
+
|
|
140
|
+
deployment = ml.deploy(model=version, target="local")
|
|
141
|
+
print(deployment.predict({"prompt": "Explain model registries simply."}))
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### CLI
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
localml --help
|
|
148
|
+
localml projects list
|
|
149
|
+
localml runs get <run_id>
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Development
|
|
153
|
+
|
|
154
|
+
Uses [`uv`](https://docs.astral.sh/uv/) for Python and dependency management; `uv.lock` is
|
|
155
|
+
canonical and CI runs with `UV_FROZEN=true`.
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
uv sync
|
|
159
|
+
pre-commit install
|
|
160
|
+
|
|
161
|
+
uv run pytest # tests with coverage
|
|
162
|
+
uv run ruff check # lint
|
|
163
|
+
uv run ruff format --check # format check
|
|
164
|
+
uv run ty check src/ # type check
|
|
165
|
+
uv run zensical serve # live-preview the docs
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Docs are authored in `docs/` and built with [Zensical](https://zensical.org);
|
|
169
|
+
`docs.yml` deploys them to GitHub Pages on every push to `main`.
|
|
170
|
+
|
|
171
|
+
## Model lifecycle
|
|
172
|
+
|
|
173
|
+
```
|
|
174
|
+
created → candidate → staging → production → deprecated → archived
|
|
175
|
+
↘ failed (from candidate/staging) ↘ archived (terminal)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## License
|
|
179
|
+
|
|
180
|
+
MIT. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
localml/__init__.py,sha256=qCJbPjMEAiafeI9yE6A5bpkprJudTwAy6geIdAu8Ga8,1564
|
|
2
|
+
localml/_state.py,sha256=2Klfw468iQVkVz8NQvOMlU1r6Js3M2zfd56zCEt7HjE,834
|
|
3
|
+
localml/cli.py,sha256=5-x4GX8O4eJi6X_Guj914PPc7Xil59Le39MVt1mWKks,1643
|
|
4
|
+
localml/client.py,sha256=YgmY7avfUexfsc_F_gHgqDkJ81WzQQT5VT7h-dYdEJE,7255
|
|
5
|
+
localml/config.py,sha256=xMvsczgau0AeloYXPZRGdNjGkpOPRaWm7oXp9k-_Sro,2547
|
|
6
|
+
localml/exceptions.py,sha256=EsDxXQ5pkTP5gA_wOrO0N420ugH6Fxogp9rXSHm93B8,977
|
|
7
|
+
localml/huggingface.py,sha256=VdOp-rWV25HBGhKtvPXY6Cmj4geExxeJwzVd_BYuP6Y,935
|
|
8
|
+
localml/jax.py,sha256=srGhJ5aSvIMDV8PrGveAcvr4WO5Qa8C76YWzL18z_XA,1475
|
|
9
|
+
localml/mlx.py,sha256=jZjA3CfHFRtgn6UlPQrjZitCWfDO2-pGHtHZFp_Cjso,909
|
|
10
|
+
localml/ops.py,sha256=tK063QX3DBiR9sVOnTko78kKN3CKTd2qb_up6MXJh5M,2512
|
|
11
|
+
localml/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
localml/run.py,sha256=BJNqYn2VcjHgZ2vpfjVcrb-ZO_x-Dax1ni4_4Y4MfJE,1137
|
|
13
|
+
localml/torch.py,sha256=CCzwP3BAAMN3IhszsgK7v3a5t105JH15JKOcGpRiq8c,1395
|
|
14
|
+
localml/types.py,sha256=BZN_6GuOFeME2g35vll_sXGU368yBPaREGGqPEpZ5dQ,2952
|
|
15
|
+
localml/adapters/__init__.py,sha256=8Vi0Cw7Ie99fzp3P2qCu1T2elDh7-aGCFiv3WiWUOn8,247
|
|
16
|
+
localml/adapters/base.py,sha256=7xzhJd6wJ0i36buaeXFDbNcNHH4hf3MFb2nhCf3oX-I,1777
|
|
17
|
+
localml-0.1.0.dist-info/METADATA,sha256=n2H7KixwkCiNC5HO-G7avo1m7rkzV6IT-o3GT9Vv27I,6060
|
|
18
|
+
localml-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
19
|
+
localml-0.1.0.dist-info/entry_points.txt,sha256=xgFy7ist_6zSy97ermiBJwaIFdQ170Z03LglaKz42XU,44
|
|
20
|
+
localml-0.1.0.dist-info/licenses/LICENSE,sha256=SBXDdJAtgrn8Y3ZseXAIE6WsLLcdGPhstydvQ01HDa4,1081
|
|
21
|
+
localml-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Guenevere Prawiroatmodjo
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|