methodic-research 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
methodic/runs.py ADDED
@@ -0,0 +1,306 @@
1
+ """Run lifecycle and worker-side asset uploads.
2
+
3
+ `RunsAPI` is the stateless namespace; `Run` is a resource handle bound to a
4
+ specific `(experiment_id, variation, run)` triple. Workers should use the
5
+ handle (`chronicle.run(exp_id, v, r)`) — the convenience methods auto-populate
6
+ the `output_of` field on uploaded assets and track in-flight async uploads.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ from concurrent.futures import Future, ThreadPoolExecutor
13
+ from pathlib import Path
14
+ from typing import TYPE_CHECKING, Any
15
+
16
+ from methodic.assets import AssetsAPI, AssetUploadInfo
17
+ from methodic.transport import Transport
18
+
19
+ if TYPE_CHECKING:
20
+ from methodic.upload_tracker import UploadTracker
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class RunsAPI:
26
+ """Run-lifecycle namespace. Stateless across calls; takes the run triple as args."""
27
+
28
+ def __init__(
29
+ self, transport: Transport, assets: AssetsAPI, executor: ThreadPoolExecutor
30
+ ) -> None:
31
+ self._t = transport
32
+ self._assets = assets
33
+ self._executor = executor
34
+
35
+ @staticmethod
36
+ def _variation_path(experiment_id: str, variation: int) -> str:
37
+ return f"/experiments/{experiment_id}/variations/{variation}"
38
+
39
+ def _run_path(self, experiment_id: str, variation: int, run: int) -> str:
40
+ return f"{self._variation_path(experiment_id, variation)}/runs/{run}"
41
+
42
+ def get_variation_config(self, experiment_id: str, variation: int) -> dict[str, Any]:
43
+ path = self._variation_path(experiment_id, variation)
44
+ logger.info("Pulling variation config from %s", path)
45
+ return self._t.get(path)
46
+
47
+ def get_status(self, experiment_id: str, variation: int, run: int) -> Any:
48
+ return self._t.get(f"{self._run_path(experiment_id, variation, run)}/status")
49
+
50
+ def start(self, experiment_id: str, variation: int, run: int) -> None:
51
+ logger.info(
52
+ "Starting run %s/v%s/r%s", experiment_id, variation, run
53
+ )
54
+ self._t.post(f"{self._run_path(experiment_id, variation, run)}/start")
55
+
56
+ def succeed(self, experiment_id: str, variation: int, run: int) -> None:
57
+ logger.info("Run %s/v%s/r%s succeeded", experiment_id, variation, run)
58
+ self._t.post(f"{self._run_path(experiment_id, variation, run)}/succeed")
59
+
60
+ def fail(
61
+ self, experiment_id: str, variation: int, run: int, *, reason: str = "crash"
62
+ ) -> None:
63
+ """Mark a run failed. `reason` is `crash` (worker error) or `abandoned` (cancel)."""
64
+ logger.info(
65
+ "Run %s/v%s/r%s failed (reason=%s)", experiment_id, variation, run, reason
66
+ )
67
+ self._t.post(
68
+ f"{self._run_path(experiment_id, variation, run)}/fail", json={"reason": reason}
69
+ )
70
+
71
+ def heartbeat(self, experiment_id: str, variation: int, run: int) -> None:
72
+ self._t.post(
73
+ "/heartbeat",
74
+ json={"experiment_id": experiment_id, "variation": variation, "run": run},
75
+ timeout=10,
76
+ )
77
+
78
+
79
+ class Run:
80
+ """Handle for a specific `(experiment_id, variation, run)` run.
81
+
82
+ Mutators return `self` so worker code can chain (`run.start().heartbeat()`).
83
+ Asset-upload helpers auto-populate `output_of` from the bound triple.
84
+ """
85
+
86
+ def __init__(self, api: RunsAPI, experiment_id: str, variation: int, run: int) -> None:
87
+ self._api = api
88
+ self.experiment_id = experiment_id
89
+ self.variation = variation
90
+ self.run = run
91
+ self._pending_uploads: list[Future] = []
92
+
93
+ @property
94
+ def _output_of(self) -> dict[str, Any]:
95
+ return {
96
+ "experiment_id": self.experiment_id,
97
+ "variation": self.variation,
98
+ "run": self.run,
99
+ }
100
+
101
+ # --- Lifecycle ---
102
+
103
+ def get_variation_config(self) -> dict[str, Any]:
104
+ return self._api.get_variation_config(self.experiment_id, self.variation)
105
+
106
+ def get_status(self) -> Any:
107
+ return self._api.get_status(self.experiment_id, self.variation, self.run)
108
+
109
+ def start(self) -> Run:
110
+ self._api.start(self.experiment_id, self.variation, self.run)
111
+ return self
112
+
113
+ def succeed(self) -> Run:
114
+ """Mark the run succeeded after waiting for any pending async uploads."""
115
+ self.wait_for_uploads()
116
+ self._api.succeed(self.experiment_id, self.variation, self.run)
117
+ return self
118
+
119
+ def fail(self, reason: str = "crash") -> Run:
120
+ self._api.fail(self.experiment_id, self.variation, self.run, reason=reason)
121
+ return self
122
+
123
+ def heartbeat(self) -> Run:
124
+ self._api.heartbeat(self.experiment_id, self.variation, self.run)
125
+ return self
126
+
127
+ # --- Asset uploads (output_of bound to this run) ---
128
+
129
+ def upload_asset(
130
+ self,
131
+ asset_type: str,
132
+ content: Any,
133
+ name: str | None = None,
134
+ content_type: str = "application/json",
135
+ asset_config: dict[str, Any] | None = None,
136
+ ) -> dict[str, Any]:
137
+ """Upload a small inline asset (auto-finalized). Linked to this run as output."""
138
+ return self._api._assets.create_inline(
139
+ asset_type=asset_type,
140
+ content=content,
141
+ name=name,
142
+ content_type=content_type,
143
+ output_of=self._output_of,
144
+ asset_config=asset_config,
145
+ )
146
+
147
+ def upload_environment(self, environment: dict[str, Any]) -> None:
148
+ self.upload_asset(asset_type="environment", content=environment)
149
+
150
+ def create_asset_presigned(
151
+ self,
152
+ asset_type: str,
153
+ components: list[str],
154
+ name: str | None = None,
155
+ content_type: str = "application/octet-stream",
156
+ ) -> AssetUploadInfo:
157
+ """Register a new asset for component upload via presigned URLs."""
158
+ return self._api._assets.create_with_presigned(
159
+ asset_type=asset_type,
160
+ components=components,
161
+ name=name,
162
+ content_type=content_type,
163
+ output_of=self._output_of,
164
+ )
165
+
166
+ def upload_component(
167
+ self, upload_url: str, local_path: Path, content_type: str
168
+ ) -> None:
169
+ self._api._assets.upload_component(upload_url, local_path, content_type)
170
+
171
+ def finalize_asset(self, asset_id: str) -> None:
172
+ self._api._assets.finalize(asset_id)
173
+
174
+ def get_asset(self, asset_id: str, include_presigned: bool = False) -> dict[str, Any]:
175
+ return self._api._assets.get(asset_id, include_presigned=include_presigned)
176
+
177
+ def presign_asset(
178
+ self, asset_id: str, operation: str = "read", components: list[str] | None = None
179
+ ) -> dict[str, Any]:
180
+ return self._api._assets.presign(asset_id, operation=operation, components=components)
181
+
182
+ def download_asset(self, asset_id: str, local_dir: Path) -> Path:
183
+ return self._api._assets.download(asset_id, local_dir)
184
+
185
+ def upload_file(
186
+ self,
187
+ local_path: Path,
188
+ asset_type: str,
189
+ content_type: str = "application/octet-stream",
190
+ ) -> str:
191
+ info = self.create_asset_presigned(
192
+ asset_type=asset_type,
193
+ components=[local_path.name],
194
+ content_type=content_type,
195
+ )
196
+ upload_url = info.upload_urls.get(local_path.name)
197
+ if upload_url:
198
+ self.upload_component(upload_url, local_path, content_type)
199
+ self.finalize_asset(info.asset_id)
200
+ return info.asset_id
201
+
202
+ # --- Async directory upload ---
203
+
204
+ def register_and_upload_async(
205
+ self,
206
+ local_dir: Path,
207
+ asset_type: str,
208
+ upload_tracker: UploadTracker,
209
+ content_type: str = "application/octet-stream",
210
+ ) -> str:
211
+ """Register every file in a directory, then upload + finalize on a background thread."""
212
+ files = [f for f in local_dir.rglob("*") if f.is_file()]
213
+ if not files:
214
+ return ""
215
+
216
+ components = [str(f.relative_to(local_dir)) for f in files]
217
+ info = self.create_asset_presigned(
218
+ asset_type=asset_type, components=components, content_type=content_type
219
+ )
220
+
221
+ upload_tracker.register_components(
222
+ info.asset_id,
223
+ asset_type,
224
+ [
225
+ (comp, str(local_dir / comp), (local_dir / comp).stat().st_size)
226
+ for comp in components
227
+ ],
228
+ )
229
+
230
+ future = self._api._executor.submit(
231
+ self._upload_registered_components,
232
+ info.asset_id,
233
+ info.upload_urls,
234
+ local_dir,
235
+ components,
236
+ content_type,
237
+ upload_tracker,
238
+ )
239
+ self._pending_uploads.append(future)
240
+ return info.asset_id
241
+
242
+ def _upload_registered_components(
243
+ self,
244
+ asset_id: str,
245
+ upload_urls: dict[str, str],
246
+ local_dir: Path,
247
+ components: list[str],
248
+ content_type: str,
249
+ tracker: UploadTracker,
250
+ ) -> None:
251
+ for comp in components:
252
+ upload_url = upload_urls.get(comp)
253
+ if not upload_url:
254
+ logger.warning("No upload URL for component %s, skipping", comp)
255
+ continue
256
+ tracker.mark_uploading(asset_id, comp)
257
+ self.upload_component(upload_url, local_dir / comp, content_type)
258
+ tracker.mark_completed(asset_id, comp)
259
+
260
+ if tracker.all_components_completed(asset_id):
261
+ self.finalize_asset(asset_id)
262
+ logger.info("Finalized asset %s (%d components)", asset_id, len(components))
263
+
264
+ def upload_directory_async(
265
+ self,
266
+ local_dir: Path,
267
+ asset_type: str,
268
+ content_type: str = "application/octet-stream",
269
+ upload_tracker: UploadTracker | None = None,
270
+ ) -> None:
271
+ """Upload every file in a directory on a background thread.
272
+
273
+ With `upload_tracker`, uses the register-then-upload flow for crash
274
+ recovery. Without, a thinner path that uploads and finalizes inline.
275
+ """
276
+ if upload_tracker:
277
+ self.register_and_upload_async(
278
+ local_dir, asset_type, upload_tracker, content_type
279
+ )
280
+ return
281
+
282
+ future = self._api._executor.submit(
283
+ self._upload_directory_no_tracking, local_dir, asset_type, content_type
284
+ )
285
+ self._pending_uploads.append(future)
286
+
287
+ def _upload_directory_no_tracking(
288
+ self, local_dir: Path, asset_type: str, content_type: str
289
+ ) -> None:
290
+ files = [f for f in local_dir.rglob("*") if f.is_file()]
291
+ if not files:
292
+ return
293
+ components = [str(f.relative_to(local_dir)) for f in files]
294
+ info = self.create_asset_presigned(
295
+ asset_type=asset_type, components=components, content_type=content_type
296
+ )
297
+ for comp in components:
298
+ upload_url = info.upload_urls.get(comp)
299
+ if upload_url:
300
+ self.upload_component(upload_url, local_dir / comp, content_type)
301
+ self.finalize_asset(info.asset_id)
302
+
303
+ def wait_for_uploads(self) -> None:
304
+ for future in self._pending_uploads:
305
+ future.result()
306
+ self._pending_uploads.clear()
methodic/search.py ADDED
@@ -0,0 +1,78 @@
1
+ """Search namespace.
2
+
3
+ Wraps `POST /search`, which is backed by Vertex AI Search server-side. The
4
+ endpoint returns 503 in environments without Vertex configured (local dev,
5
+ CI without credentials) — callers that want to gate against this should
6
+ catch `methodic.errors.ServerError` and check `status_code == 503`.
7
+
8
+ RBAC and storage-prefix isolation are applied server-side: callers don't
9
+ need to filter for them. `SearchFilters` is layered on top of those server
10
+ filters for narrowing by asset type, time window, source, etc.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ from typing import TYPE_CHECKING, Any, Iterator
17
+
18
+ from methodic.transport import Transport
19
+ from methodic.types import SearchFilters, SearchResponse, SearchResult
20
+
21
+ if TYPE_CHECKING:
22
+ from methodic.chronicle import Chronicle
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class SearchAPI:
28
+ """Vertex-backed search across research docs, experiment metadata, and arxiv assets."""
29
+
30
+ def __init__(self, transport: Transport, chronicle: Chronicle) -> None:
31
+ self._t = transport
32
+ self._chronicle = chronicle
33
+
34
+ def query(
35
+ self,
36
+ query: str,
37
+ *,
38
+ filters: SearchFilters | dict[str, Any] | None = None,
39
+ experiment_context: list[str] | None = None,
40
+ page_size: int | None = None,
41
+ page_token: str | None = None,
42
+ ) -> SearchResponse:
43
+ """Run a single search request. Returns one page; use `iter` to walk pages."""
44
+ payload: dict[str, Any] = {"query": query}
45
+ if filters is not None:
46
+ payload["filters"] = (
47
+ filters.to_dict() if isinstance(filters, SearchFilters) else filters
48
+ )
49
+ if experiment_context is not None:
50
+ payload["experiment_context"] = experiment_context
51
+ if page_size is not None:
52
+ payload["page_size"] = page_size
53
+ if page_token is not None:
54
+ payload["page_token"] = page_token
55
+ return SearchResponse.from_dict(self._t.post("/search", json=payload))
56
+
57
+ def iter(
58
+ self,
59
+ query: str,
60
+ *,
61
+ filters: SearchFilters | dict[str, Any] | None = None,
62
+ experiment_context: list[str] | None = None,
63
+ page_size: int | None = None,
64
+ ) -> Iterator[SearchResult]:
65
+ """Yield every search hit, paging server-side as needed."""
66
+ token: str | None = None
67
+ while True:
68
+ page = self.query(
69
+ query,
70
+ filters=filters,
71
+ experiment_context=experiment_context,
72
+ page_size=page_size,
73
+ page_token=token,
74
+ )
75
+ yield from page.results
76
+ if page.next_page_token is None:
77
+ return
78
+ token = page.next_page_token
methodic/transport.py ADDED
@@ -0,0 +1,91 @@
1
+ """HTTP transport shared by every namespace.
2
+
3
+ Namespaces never call `requests` directly — they go through `Transport`, which
4
+ owns the connection pool, attaches the auth header, and translates non-2xx
5
+ responses into the right `APIError` subclass.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from typing import Any
12
+
13
+ import requests
14
+
15
+ from methodic.errors import raise_for_response
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class Transport:
21
+ """Tiny wrapper over `requests.Session` for the Chronicle REST API."""
22
+
23
+ def __init__(self, server_url: str, api_key: str, timeout: int = 30) -> None:
24
+ self.base = server_url.rstrip("/")
25
+ self._api_key = api_key
26
+ self._timeout = timeout
27
+ self._session = requests.Session()
28
+
29
+ @property
30
+ def auth_headers(self) -> dict[str, str]:
31
+ return {"Authorization": f"Bearer {self._api_key}"}
32
+
33
+ def _url(self, path: str) -> str:
34
+ return f"{self.base}{path}" if path.startswith("/") else f"{self.base}/{path}"
35
+
36
+ def get(self, path: str, *, params: dict | None = None, timeout: int | None = None) -> Any:
37
+ resp = self._session.get(
38
+ self._url(path),
39
+ params=params,
40
+ headers=self.auth_headers,
41
+ timeout=timeout or self._timeout,
42
+ )
43
+ raise_for_response(resp)
44
+ return resp.json() if resp.content else None
45
+
46
+ def post(self, path: str, *, json: Any = None, timeout: int | None = None) -> Any:
47
+ resp = self._session.post(
48
+ self._url(path),
49
+ json=json,
50
+ headers=self.auth_headers,
51
+ timeout=timeout or self._timeout,
52
+ )
53
+ raise_for_response(resp)
54
+ return resp.json() if resp.content else None
55
+
56
+ def put(self, path: str, *, json: Any = None, timeout: int | None = None) -> Any:
57
+ resp = self._session.put(
58
+ self._url(path),
59
+ json=json,
60
+ headers=self.auth_headers,
61
+ timeout=timeout or self._timeout,
62
+ )
63
+ raise_for_response(resp)
64
+ return resp.json() if resp.content else None
65
+
66
+ def delete(self, path: str, *, timeout: int | None = None) -> Any:
67
+ resp = self._session.delete(
68
+ self._url(path),
69
+ headers=self.auth_headers,
70
+ timeout=timeout or self._timeout,
71
+ )
72
+ raise_for_response(resp)
73
+ return resp.json() if resp.content else None
74
+
75
+ def put_to_presigned(
76
+ self, url: str, *, data: Any, content_type: str, timeout: int = 600
77
+ ) -> None:
78
+ """PUT to a presigned cloud-storage URL. No auth header — the URL itself carries it."""
79
+ resp = self._session.put(
80
+ url, data=data, headers={"Content-Type": content_type}, timeout=timeout
81
+ )
82
+ resp.raise_for_status()
83
+
84
+ def get_streaming(self, url: str, *, timeout: int = 600) -> requests.Response:
85
+ """GET a presigned URL and return the raw streaming response. Caller closes."""
86
+ resp = self._session.get(url, stream=True, timeout=timeout)
87
+ resp.raise_for_status()
88
+ return resp
89
+
90
+ def close(self) -> None:
91
+ self._session.close()