generic-ml-cache-cli 0.7.0__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/PKG-INFO +20 -8
  2. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/README.md +18 -6
  3. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/pyproject.toml +2 -2
  4. generic_ml_cache_cli-0.8.0/src/generic_ml_cache_cli/async_jobs.py +244 -0
  5. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/src/generic_ml_cache_cli/cli.py +510 -36
  6. generic_ml_cache_cli-0.8.0/tests/test_async_jobs.py +323 -0
  7. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/tests/test_cli.py +43 -0
  8. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/.gitignore +0 -0
  9. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/LICENSE +0 -0
  10. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/NOTICE +0 -0
  11. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/src/generic_ml_cache_cli/__init__.py +0 -0
  12. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/src/generic_ml_cache_cli/__main__.py +0 -0
  13. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/src/generic_ml_cache_cli/config.py +0 -0
  14. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/tests/conftest.py +0 -0
  15. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/tests/fake_client.py +0 -0
  16. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/tests/test_config.py +0 -0
  17. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/tests/test_discover.py +0 -0
  18. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/tests/test_effort.py +0 -0
  19. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/tests/test_encrypted_run.py +0 -0
  20. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/tests/test_encryption_cli.py +0 -0
  21. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/tests/test_interrupt.py +0 -0
  22. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/tests/test_models.py +0 -0
  23. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/tests/test_passthrough.py +0 -0
  24. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/tests/test_robustness.py +0 -0
  25. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/tests/test_session_cli.py +0 -0
  26. {generic_ml_cache_cli-0.7.0 → generic_ml_cache_cli-0.8.0}/tests/test_stdin_delivery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: generic-ml-cache-cli
3
- Version: 0.7.0
3
+ Version: 0.8.0
4
4
  Summary: Terminal UI for generic-ml-cache: the gmlcache command. A thin inbound driver over generic-ml-cache-core -- reads config, provides the data source, maps commands onto the core library.
5
5
  Project-URL: Homepage, https://github.com/danielslobozian/generic-ml-cache
6
6
  Project-URL: Repository, https://github.com/danielslobozian/generic-ml-cache
@@ -24,7 +24,7 @@ Classifier: Programming Language :: Python :: 3.13
24
24
  Classifier: Topic :: Utilities
25
25
  Requires-Python: >=3.9
26
26
  Requires-Dist: argcomplete<4,>=3
27
- Requires-Dist: generic-ml-cache-core>=0.7.0
27
+ Requires-Dist: generic-ml-cache-core>=0.8.0
28
28
  Provides-Extra: dev
29
29
  Requires-Dist: coverage>=7; extra == 'dev'
30
30
  Requires-Dist: pytest-cov; extra == 'dev'
@@ -57,6 +57,12 @@ API) call once, replay it forever by its content key, offline and byte-for-byte.
57
57
  <img src="https://raw.githubusercontent.com/danielslobozian/generic-ml-cache/main/docs/images/gmlcache-demo.gif" alt="gmlcache: a miss records the real client call; the same command again is served instantly from cache, byte-identical" width="760">
58
58
  </p>
59
59
 
60
+ <p align="center"><sub><b>Detached + live streaming</b> — <code>run --detach</code> returns an id; <code>execution watch</code> follows the client's live progress to the result</sub></p>
61
+
62
+ <p align="center">
63
+ <img src="https://raw.githubusercontent.com/danielslobozian/generic-ml-cache/main/docs/images/gmlcache-async.gif" alt="gmlcache run --detach, then execution watch streaming the client's live thinking and tool calls to the result" width="760">
64
+ </p>
65
+
60
66
  ## Install
61
67
 
62
68
  ```bash
@@ -69,12 +75,15 @@ This installs the `gmlcache` command and pulls in the engine,
69
75
  ## Use
70
76
 
71
77
  ```bash
72
- gmlcache run --client claude --model sonnet --prompt "…" # record on a miss, replay on a hit
73
- gmlcache check --client claude --model sonnet --prompt "…" # is this exact call already cached?
74
- gmlcache list # stored executions, grouped by client/model
75
- gmlcache stats # totals, hit counts, token usage & cost
76
- gmlcache inspect <key> # pretty-print one stored execution
77
- gmlcache doctor | models | status | init # environment & configuration helpers
78
+ gmlcache run --client claude --model sonnet --prompt "…" # record on a miss, replay on a hit
79
+ gmlcache check --client claude --model sonnet --prompt "…" # forecast: is this exact call cached?
80
+ gmlcache run --client claude --model sonnet --prompt "…" --detach # run detached prints an execution id
81
+ gmlcache execution watch <id> # follow a detached run's live progress
82
+ gmlcache session report <id> # token usage by provider/model for a workflow
83
+ gmlcache encrypt # encrypt the whole store at rest
84
+ gmlcache export --tag eval -o data.jsonl # export the (input, output) dataset corpus
85
+ gmlcache list | tags | stats | inspect <key> # browse stored executions
86
+ gmlcache doctor | models | status | init # environment & configuration helpers
78
87
  ```
79
88
 
80
89
  ## What it does
@@ -84,6 +93,9 @@ gmlcache doctor | models | status | init # environment & con
84
93
  - **Replays** an identical request instantly and offline, **byte-for-byte** — gmlcache
85
94
  adds nothing to the client's output, so it is a transparent drop-in.
86
95
  - **Reports** — list, group, inspect, and measure stored executions and their savings.
96
+ - **And more** — group a workflow's runs into **sessions** with per-provider/model usage
97
+ reports, **encrypt** the whole store at rest, run **detached** (`--detach`) with a live
98
+ progress stream, and **export** an `(input, output)` dataset.
87
99
 
88
100
  ## Built on a reusable engine
89
101
 
@@ -22,6 +22,12 @@ API) call once, replay it forever by its content key, offline and byte-for-byte.
22
22
  <img src="https://raw.githubusercontent.com/danielslobozian/generic-ml-cache/main/docs/images/gmlcache-demo.gif" alt="gmlcache: a miss records the real client call; the same command again is served instantly from cache, byte-identical" width="760">
23
23
  </p>
24
24
 
25
+ <p align="center"><sub><b>Detached + live streaming</b> — <code>run --detach</code> returns an id; <code>execution watch</code> follows the client's live progress to the result</sub></p>
26
+
27
+ <p align="center">
28
+ <img src="https://raw.githubusercontent.com/danielslobozian/generic-ml-cache/main/docs/images/gmlcache-async.gif" alt="gmlcache run --detach, then execution watch streaming the client's live thinking and tool calls to the result" width="760">
29
+ </p>
30
+
25
31
  ## Install
26
32
 
27
33
  ```bash
@@ -34,12 +40,15 @@ This installs the `gmlcache` command and pulls in the engine,
34
40
  ## Use
35
41
 
36
42
  ```bash
37
- gmlcache run --client claude --model sonnet --prompt "…" # record on a miss, replay on a hit
38
- gmlcache check --client claude --model sonnet --prompt "…" # is this exact call already cached?
39
- gmlcache list # stored executions, grouped by client/model
40
- gmlcache stats # totals, hit counts, token usage & cost
41
- gmlcache inspect <key> # pretty-print one stored execution
42
- gmlcache doctor | models | status | init # environment & configuration helpers
43
+ gmlcache run --client claude --model sonnet --prompt "…" # record on a miss, replay on a hit
44
+ gmlcache check --client claude --model sonnet --prompt "…" # forecast: is this exact call cached?
45
+ gmlcache run --client claude --model sonnet --prompt "…" --detach # run detached prints an execution id
46
+ gmlcache execution watch <id> # follow a detached run's live progress
47
+ gmlcache session report <id> # token usage by provider/model for a workflow
48
+ gmlcache encrypt # encrypt the whole store at rest
49
+ gmlcache export --tag eval -o data.jsonl # export the (input, output) dataset corpus
50
+ gmlcache list | tags | stats | inspect <key> # browse stored executions
51
+ gmlcache doctor | models | status | init # environment & configuration helpers
43
52
  ```
44
53
 
45
54
  ## What it does
@@ -49,6 +58,9 @@ gmlcache doctor | models | status | init # environment & con
49
58
  - **Replays** an identical request instantly and offline, **byte-for-byte** — gmlcache
50
59
  adds nothing to the client's output, so it is a transparent drop-in.
51
60
  - **Reports** — list, group, inspect, and measure stored executions and their savings.
61
+ - **And more** — group a workflow's runs into **sessions** with per-provider/model usage
62
+ reports, **encrypt** the whole store at rest, run **detached** (`--detach`) with a live
63
+ progress stream, and **export** an `(input, output)` dataset.
52
64
 
53
65
  ## Built on a reusable engine
54
66
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "generic-ml-cache-cli"
7
- version = "0.7.0"
7
+ version = "0.8.0"
8
8
  description = "Terminal UI for generic-ml-cache: the gmlcache command. A thin inbound driver over generic-ml-cache-core -- reads config, provides the data source, maps commands onto the core library."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -25,7 +25,7 @@ classifiers = [
25
25
  "Programming Language :: Python :: 3.13",
26
26
  "Topic :: Utilities",
27
27
  ]
28
- dependencies = ["generic-ml-cache-core>=0.7.0", "argcomplete>=3,<4"]
28
+ dependencies = ["generic-ml-cache-core>=0.8.0", "argcomplete>=3,<4"]
29
29
 
30
30
  [project.urls]
31
31
  Homepage = "https://github.com/danielslobozian/generic-ml-cache"
@@ -0,0 +1,244 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Detached ("async") execution jobs.
4
+
5
+ A detached managed run is a separate, OS-detached ``gmlcache`` worker process that does an
6
+ ordinary managed run and records the result into the normal content-addressed cache. The
7
+ launch command returns immediately with a **job id**; the worker outlives it.
8
+
9
+ State lives under ``<store>/jobs/``:
10
+
11
+ * ``<id>/spec.json`` — the run to perform (the serialized ``run`` arguments).
12
+ * ``<id>/status.json`` — the mutable lifecycle (submitted → running → succeeded | failed),
13
+ timings, exit code, and the resulting cache key once done.
14
+ * ``<id>/events.jsonl`` — the durable, append-only NDJSON progress log (for ``watch``).
15
+ * ``locks/<id>.lock`` — a **liveness lock** the worker holds for its whole run.
16
+
17
+ The liveness lock reuses SQLite's ``BEGIN EXCLUSIVE`` (same trick as the encryption store
18
+ lock): it is released by the OS when the holder's process dies, with no stale locks, on every
19
+ platform. So a reader can tell a *live* worker (lock held) from one that *vanished* mid-run
20
+ (lock free while ``status.json`` still says ``running`` → **interrupted**).
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ import os
27
+ import re
28
+ import sqlite3
29
+ import subprocess
30
+ import sys
31
+ from contextlib import contextmanager
32
+ from datetime import datetime, timezone
33
+ from pathlib import Path
34
+ from typing import Iterator, List, Optional
35
+
36
+ from generic_ml_cache_core.common.errors import StoreLocked
37
+ from generic_ml_cache_core.stream import StreamWriter
38
+
39
+ #: A job id is generated by gmlcache (secrets.token_hex). Validating against this
40
+ #: allowlist before it ever builds a filesystem path stops a crafted id (``../…``)
41
+ #: from escaping the jobs directory — a user supplies it to `execution <id>`.
42
+ _JOB_ID = re.compile(r"\A[a-z0-9]{1,64}\Z")
43
+
44
+
45
+ def _safe_job_id(job_id: str) -> str:
46
+ if not isinstance(job_id, str) or not _JOB_ID.match(job_id):
47
+ raise ValueError(f"invalid job id: {job_id!r}")
48
+ return job_id
49
+
50
+
51
+ # Stored lifecycle states.
52
+ SUBMITTED = "submitted"
53
+ RUNNING = "running"
54
+ SUCCEEDED = "succeeded"
55
+ FAILED = "failed"
56
+ #: Derived (never stored): status says running but no worker holds the lock.
57
+ INTERRUPTED = "interrupted"
58
+ TERMINAL = frozenset({SUCCEEDED, FAILED})
59
+
60
+
61
+ def _now() -> str:
62
+ return datetime.now(timezone.utc).isoformat(timespec="seconds")
63
+
64
+
65
+ class JobStore:
66
+ """The on-disk layout for detached jobs under ``<store>/jobs/``."""
67
+
68
+ def __init__(self, store_root: Path) -> None:
69
+ self._jobs = Path(store_root) / "jobs"
70
+
71
+ def _within_jobs(self, candidate: Path, job_id: str) -> Path:
72
+ # Containment guard (same shape as the output-file writer): the resolved path
73
+ # must stay inside the jobs directory. Redundant after _safe_job_id, but it is
74
+ # the explicit, recognized way to prove the path is not user-steerable.
75
+ resolved = candidate.resolve()
76
+ base = self._jobs.resolve()
77
+ if base != resolved and base not in resolved.parents:
78
+ raise ValueError(f"job id escapes the jobs directory: {job_id!r}")
79
+ return resolved
80
+
81
+ def job_dir(self, job_id: str) -> Path:
82
+ return self._within_jobs(self._jobs / _safe_job_id(job_id), job_id)
83
+
84
+ def lock_path(self, job_id: str) -> Path:
85
+ return self._within_jobs(self._jobs / "locks" / f"{_safe_job_id(job_id)}.lock", job_id)
86
+
87
+ def events_path(self, job_id: str) -> Path:
88
+ return self.job_dir(job_id) / "events.jsonl"
89
+
90
+ def _spec_path(self, job_id: str) -> Path:
91
+ return self.job_dir(job_id) / "spec.json"
92
+
93
+ def _status_path(self, job_id: str) -> Path:
94
+ return self.job_dir(job_id) / "status.json"
95
+
96
+ def exists(self, job_id: str) -> bool:
97
+ try:
98
+ return self._status_path(job_id).exists() or self._spec_path(job_id).exists()
99
+ except ValueError:
100
+ return False # an invalid id never names a real job
101
+
102
+ def list_ids(self) -> List[str]:
103
+ if not self._jobs.exists():
104
+ return []
105
+ return sorted(p.name for p in self._jobs.iterdir() if p.is_dir() and p.name != "locks")
106
+
107
+ def write_spec(self, job_id: str, spec: dict) -> None:
108
+ self.job_dir(job_id).mkdir(parents=True, exist_ok=True)
109
+ self._write_json(self._spec_path(job_id), spec)
110
+
111
+ def read_spec(self, job_id: str) -> dict:
112
+ return json.loads(self._spec_path(job_id).read_text(encoding="utf-8"))
113
+
114
+ def read_status(self, job_id: str) -> Optional[dict]:
115
+ try:
116
+ return json.loads(self._status_path(job_id).read_text(encoding="utf-8"))
117
+ except (OSError, ValueError):
118
+ return None
119
+
120
+ def update_status(self, job_id: str, **fields: object) -> dict:
121
+ """Merge ``fields`` into the job's status.json (creating it), and return it."""
122
+ status = self.read_status(job_id) or {"job": job_id}
123
+ status.update(fields)
124
+ self.job_dir(job_id).mkdir(parents=True, exist_ok=True)
125
+ self._write_json(self._status_path(job_id), status)
126
+ return status
127
+
128
+ @staticmethod
129
+ def _write_json(path: Path, data: dict) -> None:
130
+ # ``path`` is built only from a job id validated by _safe_job_id (allowlist
131
+ # ``[a-z0-9]{1,64}``) and confined by _within_jobs, so it cannot escape the jobs
132
+ # directory; ``data`` is gmlcache's own job record (it intentionally stores the run
133
+ # spec). The taint engine cannot follow the validation across the call chain, so this
134
+ # verified false positive is suppressed.
135
+ tmp = path.with_suffix(path.suffix + ".tmp")
136
+ tmp.write_text(json.dumps(data, indent=2), encoding="utf-8") # NOSONAR(S2083)
137
+ tmp.replace(path) # NOSONAR(S2083)
138
+
139
+
140
+ # -- liveness lock (SQLite BEGIN EXCLUSIVE; OS-released on process death) ------
141
+
142
+
143
+ @contextmanager
144
+ def hold_job_lock(lock_path: Path) -> Iterator[None]:
145
+ """Hold the job's exclusive lock for the duration of the block. Raises
146
+ :class:`StoreLocked` if another worker already owns this job."""
147
+ lock_path.parent.mkdir(parents=True, exist_ok=True)
148
+ connection = sqlite3.connect(lock_path, timeout=0)
149
+ try:
150
+ connection.execute("BEGIN EXCLUSIVE")
151
+ except sqlite3.OperationalError as exc:
152
+ connection.close()
153
+ raise StoreLocked(f"job {lock_path.stem} is already owned by a running worker") from exc
154
+ try:
155
+ yield
156
+ finally:
157
+ try:
158
+ connection.rollback()
159
+ finally:
160
+ connection.close()
161
+
162
+
163
+ def job_lock_held(lock_path: Path) -> bool:
164
+ """Probe: is a worker currently holding this job's lock? (acquire-and-release;
165
+ held ⇒ a live worker owns the job, free ⇒ no worker is running it)."""
166
+ if not lock_path.exists():
167
+ return False
168
+ try:
169
+ connection = sqlite3.connect(lock_path, timeout=0)
170
+ except sqlite3.Error:
171
+ return False
172
+ try:
173
+ connection.execute("BEGIN EXCLUSIVE")
174
+ connection.rollback()
175
+ return False
176
+ except sqlite3.OperationalError:
177
+ return True
178
+ finally:
179
+ connection.close()
180
+
181
+
182
+ def derived_state(status: Optional[dict], lock_held: bool) -> str:
183
+ """The reported state: terminal as stored; a stored ``running`` with no live
184
+ worker (lock free) is reported as :data:`INTERRUPTED`."""
185
+ if status is None:
186
+ return "unknown"
187
+ state = str(status.get("state", "unknown"))
188
+ if state == RUNNING and not lock_held:
189
+ return INTERRUPTED
190
+ return state
191
+
192
+
193
+ # -- detached spawn -----------------------------------------------------------
194
+
195
+
196
+ def spawn_worker(store_root: Path, job_id: str, token: Optional[str] = None) -> None:
197
+ """Launch a detached ``gmlcache`` worker for ``job_id``. The child is fully
198
+ detached (new session / process group, no console, I/O to devnull), so it
199
+ outlives this command. Cross-platform (POSIX setsid; Windows DETACHED_PROCESS).
200
+
201
+ If ``token`` is given, it is handed to the worker through its **environment**
202
+ (``GMLCACHE_TOKEN``) so a detached run can write to an encrypted store — never on
203
+ disk. The worker holds it in memory for the run, exactly as a sync call would."""
204
+ argv = [sys.executable, "-m", "generic_ml_cache_cli", "__worker", str(store_root), job_id]
205
+ env = None
206
+ if token is not None:
207
+ env = dict(os.environ)
208
+ env["GMLCACHE_TOKEN"] = token
209
+ devnull = subprocess.DEVNULL
210
+ if os.name == "nt":
211
+ flags = subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP # type: ignore[attr-defined]
212
+ subprocess.Popen(
213
+ argv,
214
+ stdin=devnull,
215
+ stdout=devnull,
216
+ stderr=devnull,
217
+ creationflags=flags,
218
+ close_fds=True,
219
+ env=env,
220
+ )
221
+ else:
222
+ subprocess.Popen(
223
+ argv,
224
+ stdin=devnull,
225
+ stdout=devnull,
226
+ stderr=devnull,
227
+ start_new_session=True,
228
+ close_fds=True,
229
+ env=env,
230
+ )
231
+
232
+
233
+ def append_event(events_path: Path, kind: str, **fields: object) -> None:
234
+ """Append one NDJSON progress event to the job's durable event log (best-effort).
235
+ Same format as the run stream, so ``watch`` reads one log whatever wrote it."""
236
+ writer = StreamWriter(events_path)
237
+ try:
238
+ writer.event(kind, **fields)
239
+ finally:
240
+ writer.close()
241
+
242
+
243
+ def now() -> str:
244
+ return _now()