methodic-research 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- methodic/__init__.py +79 -0
- methodic/assets.py +143 -0
- methodic/chronicle.py +88 -0
- methodic/errors.py +70 -0
- methodic/experiments.py +342 -0
- methodic/reports.py +294 -0
- methodic/runs.py +306 -0
- methodic/search.py +78 -0
- methodic/transport.py +91 -0
- methodic/types.py +344 -0
- methodic/upload_tracker.py +181 -0
- methodic/variations.py +166 -0
- methodic_research-0.1.2.dist-info/METADATA +19 -0
- methodic_research-0.1.2.dist-info/RECORD +16 -0
- methodic_research-0.1.2.dist-info/WHEEL +5 -0
- methodic_research-0.1.2.dist-info/top_level.txt +1 -0
methodic/types.py
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"""Dataclasses for Chronicle response payloads.
|
|
2
|
+
|
|
3
|
+
Wire types use plain strings for UUIDs and ISO 8601 datetimes — same as
|
|
4
|
+
the JSON they originate from. Callers that want strong typing can parse
|
|
5
|
+
on their side; we don't bake `uuid.UUID` / `datetime.datetime` parsing
|
|
6
|
+
into the SDK to keep dependencies minimal and round-tripping explicit.
|
|
7
|
+
|
|
8
|
+
Each dataclass exposes a `from_dict` classmethod that ignores unknown
|
|
9
|
+
keys, so server-side additions don't break older clients.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import dataclasses
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import Any, TypeVar
|
|
17
|
+
|
|
18
|
+
T = TypeVar("T")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _from_dict(cls: type[T], data: dict[str, Any]) -> T:
|
|
22
|
+
"""Build a dataclass from a dict, dropping keys the dataclass doesn't declare."""
|
|
23
|
+
field_names = {f.name for f in dataclasses.fields(cls)} # type: ignore[arg-type]
|
|
24
|
+
return cls(**{k: v for k, v in data.items() if k in field_names}) # type: ignore[arg-type]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class Experiment:
|
|
29
|
+
"""Mirror of the server's `Experiment` struct.
|
|
30
|
+
|
|
31
|
+
`git_repo_state` defaults to `"pending"` so older server payloads (which
|
|
32
|
+
may not include the field yet) deserialize cleanly.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
id: str
|
|
36
|
+
owner_subject: str
|
|
37
|
+
hypothesis_summary: str
|
|
38
|
+
created_at: str
|
|
39
|
+
created_by: str
|
|
40
|
+
state: str
|
|
41
|
+
rationale: str | None = None
|
|
42
|
+
description: str | None = None
|
|
43
|
+
committed_at: str | None = None
|
|
44
|
+
concluded_at: str | None = None
|
|
45
|
+
retracted_at: str | None = None
|
|
46
|
+
retraction_reason: str | None = None
|
|
47
|
+
git_repo_state: str = "pending"
|
|
48
|
+
git_repo_url: str | None = None
|
|
49
|
+
git_repo_failure_reason: str | None = None
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def from_dict(cls, data: dict[str, Any]) -> Experiment:
|
|
53
|
+
return _from_dict(cls, data)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class VariationSummary:
|
|
58
|
+
"""One variation as it appears in `ExperimentDetail.variations`."""
|
|
59
|
+
|
|
60
|
+
variation: int
|
|
61
|
+
created_at: str
|
|
62
|
+
run_count: int
|
|
63
|
+
state: str
|
|
64
|
+
description: str | None = None
|
|
65
|
+
latest_status: str | None = None
|
|
66
|
+
committed_at: str | None = None
|
|
67
|
+
retracted_at: str | None = None
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def from_dict(cls, data: dict[str, Any]) -> VariationSummary:
|
|
71
|
+
return _from_dict(cls, data)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class Variation:
|
|
76
|
+
"""Mirror of the server's `Variation` struct.
|
|
77
|
+
|
|
78
|
+
`config_json`, `accelerate_config_json`, and `launch_config` arrive as
|
|
79
|
+
arbitrary JSON — kept as `dict[str, Any]` since the schema is open.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
experiment_id: str
|
|
83
|
+
variation: int
|
|
84
|
+
config_json: dict[str, Any]
|
|
85
|
+
config_yaml: str
|
|
86
|
+
created_at: str
|
|
87
|
+
created_by: str
|
|
88
|
+
state: str
|
|
89
|
+
accelerate_config_json: dict[str, Any] | None = None
|
|
90
|
+
accelerate_config_yaml: str | None = None
|
|
91
|
+
launch_config: dict[str, Any] | None = None
|
|
92
|
+
description: str | None = None
|
|
93
|
+
committed_at: str | None = None
|
|
94
|
+
retracted_at: str | None = None
|
|
95
|
+
retraction_reason: str | None = None
|
|
96
|
+
git_ref: str | None = None
|
|
97
|
+
git_sha: str | None = None
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def from_dict(cls, data: dict[str, Any]) -> Variation:
|
|
101
|
+
return _from_dict(cls, data)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass
|
|
105
|
+
class ExperimentDetail:
|
|
106
|
+
"""`GET /experiments/{id}` response: experiment + parents + variation summaries."""
|
|
107
|
+
|
|
108
|
+
experiment: Experiment
|
|
109
|
+
parent_ids: list[str]
|
|
110
|
+
variations: list[VariationSummary]
|
|
111
|
+
|
|
112
|
+
@classmethod
|
|
113
|
+
def from_dict(cls, data: dict[str, Any]) -> ExperimentDetail:
|
|
114
|
+
return cls(
|
|
115
|
+
experiment=Experiment.from_dict(data["experiment"]),
|
|
116
|
+
parent_ids=list(data.get("parent_ids", [])),
|
|
117
|
+
variations=[VariationSummary.from_dict(v) for v in data.get("variations", [])],
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@dataclass
|
|
122
|
+
class ExperimentSummary:
|
|
123
|
+
"""One row in the experiments list."""
|
|
124
|
+
|
|
125
|
+
id: str
|
|
126
|
+
hypothesis_summary: str
|
|
127
|
+
variation_count: int
|
|
128
|
+
created_at: str
|
|
129
|
+
created_by: str
|
|
130
|
+
state: str
|
|
131
|
+
status: str | None = None
|
|
132
|
+
committed_at: str | None = None
|
|
133
|
+
concluded_at: str | None = None
|
|
134
|
+
retracted_at: str | None = None
|
|
135
|
+
|
|
136
|
+
@classmethod
|
|
137
|
+
def from_dict(cls, data: dict[str, Any]) -> ExperimentSummary:
|
|
138
|
+
return _from_dict(cls, data)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@dataclass
|
|
142
|
+
class LineageResponse:
|
|
143
|
+
experiment_id: str
|
|
144
|
+
ancestors: list[Experiment]
|
|
145
|
+
descendants: list[Experiment]
|
|
146
|
+
|
|
147
|
+
@classmethod
|
|
148
|
+
def from_dict(cls, data: dict[str, Any]) -> LineageResponse:
|
|
149
|
+
return cls(
|
|
150
|
+
experiment_id=data["experiment_id"],
|
|
151
|
+
ancestors=[Experiment.from_dict(e) for e in data.get("ancestors", [])],
|
|
152
|
+
descendants=[Experiment.from_dict(e) for e in data.get("descendants", [])],
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@dataclass
|
|
157
|
+
class UpstreamRetraction:
|
|
158
|
+
experiment_id: str
|
|
159
|
+
retracted_at: str
|
|
160
|
+
reason: str
|
|
161
|
+
depth: int
|
|
162
|
+
variation: int | None = None
|
|
163
|
+
document_asset_id: str | None = None
|
|
164
|
+
chain: list[str] | None = None
|
|
165
|
+
|
|
166
|
+
@classmethod
|
|
167
|
+
def from_dict(cls, data: dict[str, Any]) -> UpstreamRetraction:
|
|
168
|
+
return _from_dict(cls, data)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@dataclass
|
|
172
|
+
class UpstreamRetractionsResponse:
|
|
173
|
+
has_retractions: bool
|
|
174
|
+
retractions: list[UpstreamRetraction]
|
|
175
|
+
|
|
176
|
+
@classmethod
|
|
177
|
+
def from_dict(cls, data: dict[str, Any]) -> UpstreamRetractionsResponse:
|
|
178
|
+
return cls(
|
|
179
|
+
has_retractions=bool(data.get("has_retractions", False)),
|
|
180
|
+
retractions=[UpstreamRetraction.from_dict(r) for r in data.get("retractions", [])],
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@dataclass
|
|
185
|
+
class CreateExperimentResponse:
|
|
186
|
+
"""`POST /experiments` response: the new experiment plus the always-created variation 0 / run 0."""
|
|
187
|
+
|
|
188
|
+
experiment_id: str
|
|
189
|
+
variation: int
|
|
190
|
+
run: int
|
|
191
|
+
|
|
192
|
+
@classmethod
|
|
193
|
+
def from_dict(cls, data: dict[str, Any]) -> CreateExperimentResponse:
|
|
194
|
+
return _from_dict(cls, data)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@dataclass
|
|
198
|
+
class GitBranch:
|
|
199
|
+
"""One branch on the experiment repo, returned by `experiments.git_status`."""
|
|
200
|
+
|
|
201
|
+
name: str
|
|
202
|
+
head_sha: str
|
|
203
|
+
variation: int | None = None
|
|
204
|
+
|
|
205
|
+
@classmethod
|
|
206
|
+
def from_dict(cls, data: dict[str, Any]) -> GitBranch:
|
|
207
|
+
return _from_dict(cls, data)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
@dataclass
|
|
211
|
+
class GitToken:
|
|
212
|
+
"""1-hour GitHub installation access token returned by `experiments.mint_git_token`.
|
|
213
|
+
|
|
214
|
+
`token` is what callers paste into git as
|
|
215
|
+
`https://x:<token>@github.com/<org>/<repo>`. Per the design, the
|
|
216
|
+
token has Administration permission stripped server-side, so it cannot
|
|
217
|
+
push to `agent/*` branches or modify branch protection — only the
|
|
218
|
+
Chronicle App can do those.
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
token: str
|
|
222
|
+
expires_at: str
|
|
223
|
+
repo_url: str
|
|
224
|
+
|
|
225
|
+
@classmethod
|
|
226
|
+
def from_dict(cls, data: dict[str, Any]) -> GitToken:
|
|
227
|
+
return _from_dict(cls, data)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
@dataclass
|
|
231
|
+
class GitStatus:
|
|
232
|
+
"""Response from `GET /experiments/{id}/git`.
|
|
233
|
+
|
|
234
|
+
`state` mirrors `Experiment.git_repo_state`. When `state == "ready"`,
|
|
235
|
+
`repo_url` is populated; when `state == "failed"`, `failure_reason` is
|
|
236
|
+
populated. `branches` is empty until branch enumeration is wired up
|
|
237
|
+
server-side (Phase 2 follow-up).
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
state: str
|
|
241
|
+
default_branch: str = "main"
|
|
242
|
+
repo_url: str | None = None
|
|
243
|
+
failure_reason: str | None = None
|
|
244
|
+
branches: list[GitBranch] = dataclasses.field(default_factory=list)
|
|
245
|
+
|
|
246
|
+
@classmethod
|
|
247
|
+
def from_dict(cls, data: dict[str, Any]) -> GitStatus:
|
|
248
|
+
return cls(
|
|
249
|
+
state=data["state"],
|
|
250
|
+
default_branch=data.get("default_branch", "main"),
|
|
251
|
+
repo_url=data.get("repo_url"),
|
|
252
|
+
failure_reason=data.get("failure_reason"),
|
|
253
|
+
branches=[GitBranch.from_dict(b) for b in data.get("branches", [])],
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
@dataclass
|
|
258
|
+
class SearchFilters:
|
|
259
|
+
"""Filters layered on top of the RBAC + namespace filters the server adds."""
|
|
260
|
+
|
|
261
|
+
asset_types: list[str] | None = None
|
|
262
|
+
organization_id: str | None = None
|
|
263
|
+
team_id: str | None = None
|
|
264
|
+
created_after: str | None = None
|
|
265
|
+
created_before: str | None = None
|
|
266
|
+
created_by: str | None = None
|
|
267
|
+
source_type: str | None = None
|
|
268
|
+
|
|
269
|
+
def to_dict(self) -> dict[str, Any]:
|
|
270
|
+
out: dict[str, Any] = {}
|
|
271
|
+
if self.asset_types is not None:
|
|
272
|
+
out["asset_types"] = self.asset_types
|
|
273
|
+
if self.organization_id is not None:
|
|
274
|
+
out["organization_id"] = self.organization_id
|
|
275
|
+
if self.team_id is not None:
|
|
276
|
+
out["team_id"] = self.team_id
|
|
277
|
+
if self.created_after is not None:
|
|
278
|
+
out["created_after"] = self.created_after
|
|
279
|
+
if self.created_before is not None:
|
|
280
|
+
out["created_before"] = self.created_before
|
|
281
|
+
if self.created_by is not None:
|
|
282
|
+
out["created_by"] = self.created_by
|
|
283
|
+
if self.source_type is not None:
|
|
284
|
+
out["source_type"] = self.source_type
|
|
285
|
+
return out
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
@dataclass
|
|
289
|
+
class SearchResult:
|
|
290
|
+
"""One hit from the Vertex-backed search."""
|
|
291
|
+
|
|
292
|
+
document_id: str
|
|
293
|
+
source_type: str
|
|
294
|
+
relevance_score: float
|
|
295
|
+
lineage_boost: bool
|
|
296
|
+
asset_type: str | None = None
|
|
297
|
+
title: str | None = None
|
|
298
|
+
snippet: str | None = None
|
|
299
|
+
experiment_ids: list[str] = dataclasses.field(default_factory=list)
|
|
300
|
+
created_at: str | None = None
|
|
301
|
+
|
|
302
|
+
@classmethod
|
|
303
|
+
def from_dict(cls, data: dict[str, Any]) -> SearchResult:
|
|
304
|
+
return _from_dict(cls, data)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
@dataclass
|
|
308
|
+
class SearchResponse:
|
|
309
|
+
results: list[SearchResult]
|
|
310
|
+
total_size: int
|
|
311
|
+
next_page_token: str | None = None
|
|
312
|
+
|
|
313
|
+
@classmethod
|
|
314
|
+
def from_dict(cls, data: dict[str, Any]) -> SearchResponse:
|
|
315
|
+
return cls(
|
|
316
|
+
results=[SearchResult.from_dict(r) for r in data.get("results", [])],
|
|
317
|
+
total_size=int(data.get("total_size", 0)),
|
|
318
|
+
next_page_token=data.get("next_page_token"),
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
@dataclass
|
|
323
|
+
class ExperimentListPage:
|
|
324
|
+
"""One page of `experiments.list` results plus a cursor for the next page.
|
|
325
|
+
|
|
326
|
+
The current server returns a flat array; we normalize that into a
|
|
327
|
+
single-page response with `next_page_token=None`. When the server
|
|
328
|
+
grows pagination, the same dataclass keeps working.
|
|
329
|
+
"""
|
|
330
|
+
|
|
331
|
+
results: list[ExperimentSummary]
|
|
332
|
+
next_page_token: str | None = None
|
|
333
|
+
|
|
334
|
+
@classmethod
|
|
335
|
+
def from_dict(cls, data: Any) -> ExperimentListPage:
|
|
336
|
+
if isinstance(data, list):
|
|
337
|
+
return cls(
|
|
338
|
+
results=[ExperimentSummary.from_dict(e) for e in data],
|
|
339
|
+
next_page_token=None,
|
|
340
|
+
)
|
|
341
|
+
return cls(
|
|
342
|
+
results=[ExperimentSummary.from_dict(e) for e in data.get("results", [])],
|
|
343
|
+
next_page_token=data.get("next_page_token"),
|
|
344
|
+
)
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""Local SQLite tracker for binary asset upload state.
|
|
2
|
+
|
|
3
|
+
Shared by all producers (checkpoint manager, renderer). Enables:
|
|
4
|
+
1. Crash recovery — on restart, find the latest checkpoint for training resume
|
|
5
|
+
2. Upload coordination — background thread drains the queue
|
|
6
|
+
Uses WAL mode for safe concurrent access. See docs/design.md for full design.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import sqlite3
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
_CREATE_TABLE = """
|
|
18
|
+
CREATE TABLE IF NOT EXISTS upload_state (
|
|
19
|
+
asset_id TEXT NOT NULL,
|
|
20
|
+
asset_type TEXT NOT NULL,
|
|
21
|
+
component TEXT NOT NULL,
|
|
22
|
+
local_path TEXT NOT NULL,
|
|
23
|
+
size INTEGER,
|
|
24
|
+
state TEXT NOT NULL,
|
|
25
|
+
created_at TEXT NOT NULL,
|
|
26
|
+
PRIMARY KEY (asset_id, component)
|
|
27
|
+
)
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class PendingUpload:
|
|
33
|
+
"""A component that needs uploading."""
|
|
34
|
+
|
|
35
|
+
asset_id: str
|
|
36
|
+
asset_type: str
|
|
37
|
+
component: str
|
|
38
|
+
local_path: str
|
|
39
|
+
state: str
|
|
40
|
+
created_at: str
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class UploadTracker:
|
|
44
|
+
"""Tracks upload state in a local SQLite database.
|
|
45
|
+
|
|
46
|
+
Thread-safe via WAL mode. Multiple producers can register files
|
|
47
|
+
concurrently; the background upload thread drains the queue.
|
|
48
|
+
|
|
49
|
+
The tracker is passive storage — it doesn't gate work. Atomicity at
|
|
50
|
+
the asset level (an asset is invisible until all components ship) is
|
|
51
|
+
enforced by the Chronicle backend's `pending → ready` finalize state
|
|
52
|
+
machine, not here. The tracker provides two things: batch-atomic
|
|
53
|
+
registration via SQLite transactions, and persistent state so a
|
|
54
|
+
restarted process can resume incomplete uploads.
|
|
55
|
+
|
|
56
|
+
Caller requirement: register all components for a given asset_id in a
|
|
57
|
+
SINGLE `register_components()` batch. Splitting registration across
|
|
58
|
+
calls races against `all_components_completed()` — that check returns
|
|
59
|
+
true once every row currently in the table is COMPLETED, with no
|
|
60
|
+
notion of "components still to come". A second batch registered after
|
|
61
|
+
the first has already finalized lands too late: the asset is already
|
|
62
|
+
`ready` with only the first batch's components. `register_and_upload_async()`
|
|
63
|
+
on `Client` handles this for you by registering the full component
|
|
64
|
+
list in one shot.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(self, db_path: str | Path = "upload_state.db") -> None:
|
|
68
|
+
self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
|
|
69
|
+
self._conn.execute("PRAGMA journal_mode=WAL")
|
|
70
|
+
self._conn.execute(_CREATE_TABLE)
|
|
71
|
+
self._conn.commit()
|
|
72
|
+
|
|
73
|
+
def _now(self) -> str:
|
|
74
|
+
return datetime.now(timezone.utc).isoformat()
|
|
75
|
+
|
|
76
|
+
def register_components(
|
|
77
|
+
self,
|
|
78
|
+
asset_id: str,
|
|
79
|
+
asset_type: str,
|
|
80
|
+
components: list[tuple[str, str, int | None]],
|
|
81
|
+
) -> None:
|
|
82
|
+
"""Register all components for an asset as PENDING in a single batch.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
asset_id: Chronicle asset UUID.
|
|
86
|
+
asset_type: 'checkpoint', 'snapshot', 'visualization', etc.
|
|
87
|
+
components: List of (component_name, local_path, size) tuples.
|
|
88
|
+
"""
|
|
89
|
+
now = self._now()
|
|
90
|
+
self._conn.executemany(
|
|
91
|
+
"INSERT OR REPLACE INTO upload_state "
|
|
92
|
+
"(asset_id, asset_type, component, local_path, size, state, created_at) "
|
|
93
|
+
"VALUES (?, ?, ?, ?, ?, 'PENDING', ?)",
|
|
94
|
+
[(asset_id, asset_type, name, path, size, now) for name, path, size in components],
|
|
95
|
+
)
|
|
96
|
+
self._conn.commit()
|
|
97
|
+
|
|
98
|
+
def mark_pending(
|
|
99
|
+
self,
|
|
100
|
+
asset_id: str,
|
|
101
|
+
component: str,
|
|
102
|
+
local_path: str,
|
|
103
|
+
size: int | None = None,
|
|
104
|
+
asset_type: str = "unknown",
|
|
105
|
+
) -> None:
|
|
106
|
+
"""Register a single component as PENDING."""
|
|
107
|
+
self._conn.execute(
|
|
108
|
+
"INSERT OR REPLACE INTO upload_state "
|
|
109
|
+
"(asset_id, asset_type, component, local_path, size, state, created_at) "
|
|
110
|
+
"VALUES (?, ?, ?, ?, ?, 'PENDING', ?)",
|
|
111
|
+
(asset_id, asset_type, component, local_path, size, self._now()),
|
|
112
|
+
)
|
|
113
|
+
self._conn.commit()
|
|
114
|
+
|
|
115
|
+
def mark_uploading(self, asset_id: str, component: str) -> None:
|
|
116
|
+
self._conn.execute(
|
|
117
|
+
"UPDATE upload_state SET state = 'UPLOADING' "
|
|
118
|
+
"WHERE asset_id = ? AND component = ?",
|
|
119
|
+
(asset_id, component),
|
|
120
|
+
)
|
|
121
|
+
self._conn.commit()
|
|
122
|
+
|
|
123
|
+
def mark_completed(self, asset_id: str, component: str) -> None:
|
|
124
|
+
self._conn.execute(
|
|
125
|
+
"UPDATE upload_state SET state = 'COMPLETED' "
|
|
126
|
+
"WHERE asset_id = ? AND component = ?",
|
|
127
|
+
(asset_id, component),
|
|
128
|
+
)
|
|
129
|
+
self._conn.commit()
|
|
130
|
+
|
|
131
|
+
def get_incomplete(self) -> list[PendingUpload]:
|
|
132
|
+
"""Return all rows that are not COMPLETED."""
|
|
133
|
+
cursor = self._conn.execute(
|
|
134
|
+
"SELECT asset_id, asset_type, component, local_path, state, created_at "
|
|
135
|
+
"FROM upload_state WHERE state != 'COMPLETED'"
|
|
136
|
+
)
|
|
137
|
+
return [
|
|
138
|
+
PendingUpload(
|
|
139
|
+
asset_id=r[0], asset_type=r[1], component=r[2],
|
|
140
|
+
local_path=r[3], state=r[4], created_at=r[5],
|
|
141
|
+
)
|
|
142
|
+
for r in cursor.fetchall()
|
|
143
|
+
]
|
|
144
|
+
|
|
145
|
+
def get_latest_checkpoint_path(self) -> str | None:
|
|
146
|
+
"""Get the local_path of the most recently registered checkpoint asset.
|
|
147
|
+
|
|
148
|
+
Returns the local_path of any component from the latest checkpoint
|
|
149
|
+
asset (by created_at). The checkpoint directory is the parent of
|
|
150
|
+
any component file. Returns None if no checkpoints are registered.
|
|
151
|
+
|
|
152
|
+
Used for in-container crash recovery — the latest registered checkpoint
|
|
153
|
+
is the last one fully flushed to disk.
|
|
154
|
+
"""
|
|
155
|
+
cursor = self._conn.execute(
|
|
156
|
+
"SELECT local_path FROM upload_state "
|
|
157
|
+
"WHERE asset_type = 'checkpoint' "
|
|
158
|
+
"ORDER BY created_at DESC LIMIT 1"
|
|
159
|
+
)
|
|
160
|
+
row = cursor.fetchone()
|
|
161
|
+
if row is None:
|
|
162
|
+
return None
|
|
163
|
+
# The checkpoint directory is the parent of the component file
|
|
164
|
+
return str(Path(row[0]).parent)
|
|
165
|
+
|
|
166
|
+
def has_registered_assets(self) -> bool:
|
|
167
|
+
"""Check if there are any registered assets (indicates a restart)."""
|
|
168
|
+
cursor = self._conn.execute("SELECT COUNT(*) FROM upload_state")
|
|
169
|
+
return cursor.fetchone()[0] > 0
|
|
170
|
+
|
|
171
|
+
def all_components_completed(self, asset_id: str) -> bool:
|
|
172
|
+
"""Check if all components for an asset are in COMPLETED state."""
|
|
173
|
+
cursor = self._conn.execute(
|
|
174
|
+
"SELECT COUNT(*) FROM upload_state "
|
|
175
|
+
"WHERE asset_id = ? AND state != 'COMPLETED'",
|
|
176
|
+
(asset_id,),
|
|
177
|
+
)
|
|
178
|
+
return cursor.fetchone()[0] == 0
|
|
179
|
+
|
|
180
|
+
def close(self) -> None:
|
|
181
|
+
self._conn.close()
|
methodic/variations.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""Variations namespace and resource handle."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
from methodic.transport import Transport
|
|
9
|
+
from methodic.types import Variation as VariationData
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from methodic.chronicle import Chronicle
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class VariationsAPI:
|
|
18
|
+
"""Variations namespace. Keys every operation on (experiment_id, variation)."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, transport: Transport, chronicle: Chronicle) -> None:
|
|
21
|
+
self._t = transport
|
|
22
|
+
self._chronicle = chronicle
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def _path(experiment_id: str, variation: int | None = None) -> str:
|
|
26
|
+
base = f"/experiments/{experiment_id}/variations"
|
|
27
|
+
return f"{base}/{variation}" if variation is not None else base
|
|
28
|
+
|
|
29
|
+
def create(
|
|
30
|
+
self,
|
|
31
|
+
experiment_id: str,
|
|
32
|
+
*,
|
|
33
|
+
config_yaml: str,
|
|
34
|
+
accelerate_config_yaml: str | None = None,
|
|
35
|
+
launch_config: dict[str, Any] | None = None,
|
|
36
|
+
description: str | None = None,
|
|
37
|
+
input_asset_ids: list[str] | None = None,
|
|
38
|
+
git_ref: str | None = None,
|
|
39
|
+
) -> Variation:
|
|
40
|
+
"""Create a new variation under `experiment_id`. Returns a `Variation` handle.
|
|
41
|
+
|
|
42
|
+
`git_ref` optionally associates the variation with a branch on the
|
|
43
|
+
experiment's GitHub repo. Server captures the branch name now;
|
|
44
|
+
SHA resolution + the branch-rename-to-`agent/...` flow happens at
|
|
45
|
+
variation commit (Phase 3). Pre-Phase-3, registering with `git_ref`
|
|
46
|
+
is informational only.
|
|
47
|
+
"""
|
|
48
|
+
payload: dict[str, Any] = {"config_yaml": config_yaml}
|
|
49
|
+
if accelerate_config_yaml is not None:
|
|
50
|
+
payload["accelerate_config_yaml"] = accelerate_config_yaml
|
|
51
|
+
if launch_config is not None:
|
|
52
|
+
payload["launch_config"] = launch_config
|
|
53
|
+
if description is not None:
|
|
54
|
+
payload["description"] = description
|
|
55
|
+
if input_asset_ids is not None:
|
|
56
|
+
payload["input_asset_ids"] = input_asset_ids
|
|
57
|
+
if git_ref is not None:
|
|
58
|
+
payload["git_ref"] = git_ref
|
|
59
|
+
|
|
60
|
+
resp = self._t.post(self._path(experiment_id), json=payload)
|
|
61
|
+
return Variation(self._chronicle, experiment_id, resp["variation"])
|
|
62
|
+
|
|
63
|
+
def get(self, experiment_id: str, variation: int) -> VariationData:
|
|
64
|
+
return VariationData.from_dict(self._t.get(self._path(experiment_id, variation)))
|
|
65
|
+
|
|
66
|
+
def commit(self, experiment_id: str, variation: int) -> dict[str, Any]:
|
|
67
|
+
return self._t.put(f"{self._path(experiment_id, variation)}/commit")
|
|
68
|
+
|
|
69
|
+
def retract(
|
|
70
|
+
self,
|
|
71
|
+
experiment_id: str,
|
|
72
|
+
variation: int,
|
|
73
|
+
*,
|
|
74
|
+
reason: str,
|
|
75
|
+
document_asset_id: str | None = None,
|
|
76
|
+
) -> dict[str, Any]:
|
|
77
|
+
payload: dict[str, Any] = {"reason": reason}
|
|
78
|
+
if document_asset_id is not None:
|
|
79
|
+
payload["document_asset_id"] = document_asset_id
|
|
80
|
+
return self._t.put(f"{self._path(experiment_id, variation)}/retract", json=payload)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class _BoundVariations:
|
|
84
|
+
"""`exp.variations` — VariationsAPI pre-bound to one experiment id."""
|
|
85
|
+
|
|
86
|
+
def __init__(self, api: VariationsAPI, experiment_id: str) -> None:
|
|
87
|
+
self._api = api
|
|
88
|
+
self._experiment_id = experiment_id
|
|
89
|
+
|
|
90
|
+
def create(self, **kwargs: Any) -> Variation:
|
|
91
|
+
return self._api.create(self._experiment_id, **kwargs)
|
|
92
|
+
|
|
93
|
+
def get(self, variation: int) -> VariationData:
|
|
94
|
+
return self._api.get(self._experiment_id, variation)
|
|
95
|
+
|
|
96
|
+
def commit(self, variation: int) -> dict[str, Any]:
|
|
97
|
+
return self._api.commit(self._experiment_id, variation)
|
|
98
|
+
|
|
99
|
+
def retract(
|
|
100
|
+
self, variation: int, *, reason: str, document_asset_id: str | None = None
|
|
101
|
+
) -> dict[str, Any]:
|
|
102
|
+
return self._api.retract(
|
|
103
|
+
self._experiment_id,
|
|
104
|
+
variation,
|
|
105
|
+
reason=reason,
|
|
106
|
+
document_asset_id=document_asset_id,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class Variation:
|
|
111
|
+
"""Handle for one variation. Holds (experiment_id, variation) and lazy-loaded data."""
|
|
112
|
+
|
|
113
|
+
def __init__(
|
|
114
|
+
self,
|
|
115
|
+
chronicle: Chronicle,
|
|
116
|
+
experiment_id: str,
|
|
117
|
+
variation: int,
|
|
118
|
+
*,
|
|
119
|
+
_data: VariationData | None = None,
|
|
120
|
+
) -> None:
|
|
121
|
+
self._chronicle = chronicle
|
|
122
|
+
self.experiment_id = experiment_id
|
|
123
|
+
self.variation = variation
|
|
124
|
+
self._data = _data
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def data(self) -> VariationData:
|
|
128
|
+
"""Server-side variation record. Auto-fetched on first access; refetched after mutations."""
|
|
129
|
+
if self._data is None:
|
|
130
|
+
self._data = self._chronicle.variations.get(self.experiment_id, self.variation)
|
|
131
|
+
return self._data
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def state(self) -> str:
|
|
135
|
+
return self.data.state
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def committed_at(self) -> str | None:
|
|
139
|
+
return self.data.committed_at
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def retracted_at(self) -> str | None:
|
|
143
|
+
return self.data.retracted_at
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def config_yaml(self) -> str:
|
|
147
|
+
return self.data.config_yaml
|
|
148
|
+
|
|
149
|
+
@property
|
|
150
|
+
def launch_config(self) -> dict[str, Any] | None:
|
|
151
|
+
return self.data.launch_config
|
|
152
|
+
|
|
153
|
+
def commit(self) -> Variation:
|
|
154
|
+
self._chronicle.variations.commit(self.experiment_id, self.variation)
|
|
155
|
+
self._data = None
|
|
156
|
+
return self
|
|
157
|
+
|
|
158
|
+
def retract(self, *, reason: str, document_asset_id: str | None = None) -> Variation:
|
|
159
|
+
self._chronicle.variations.retract(
|
|
160
|
+
self.experiment_id,
|
|
161
|
+
self.variation,
|
|
162
|
+
reason=reason,
|
|
163
|
+
document_asset_id=document_asset_id,
|
|
164
|
+
)
|
|
165
|
+
self._data = None
|
|
166
|
+
return self
|