generic-ml-cache-core 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- generic_ml_cache_core/__init__.py +64 -0
- generic_ml_cache_core/adapter/__init__.py +1 -0
- generic_ml_cache_core/adapter/inbound/__init__.py +1 -0
- generic_ml_cache_core/adapter/inbound/composition.py +96 -0
- generic_ml_cache_core/adapter/out/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/api/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/api/stub_api_client_adapter.py +30 -0
- generic_ml_cache_core/adapter/out/client/__init__.py +28 -0
- generic_ml_cache_core/adapter/out/client/claude.py +214 -0
- generic_ml_cache_core/adapter/out/client/codex.py +171 -0
- generic_ml_cache_core/adapter/out/client/cursor.py +208 -0
- generic_ml_cache_core/adapter/out/client/discover.py +121 -0
- generic_ml_cache_core/adapter/out/client/isolation.py +396 -0
- generic_ml_cache_core/adapter/out/client/local_client_runner.py +54 -0
- generic_ml_cache_core/adapter/out/client/passthrough_client_runner.py +47 -0
- generic_ml_cache_core/adapter/out/client/prime_directive.py +53 -0
- generic_ml_cache_core/adapter/out/client/registry.py +34 -0
- generic_ml_cache_core/adapter/out/clock/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/clock/system_clock.py +16 -0
- generic_ml_cache_core/adapter/out/fingerprint/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/fingerprint/filesystem_file_fingerprint.py +30 -0
- generic_ml_cache_core/adapter/out/metrics/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/metrics/access_registry.py +147 -0
- generic_ml_cache_core/adapter/out/metrics/journal_metrics.py +45 -0
- generic_ml_cache_core/adapter/out/persistence/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/persistence/call_identity_serialization.py +100 -0
- generic_ml_cache_core/adapter/out/persistence/in_memory_execution_repository.py +69 -0
- generic_ml_cache_core/adapter/out/persistence/sqlite_execution_repository.py +398 -0
- generic_ml_cache_core/adapter/out/storage/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/storage/filesystem_blob_store.py +47 -0
- generic_ml_cache_core/application/__init__.py +1 -0
- generic_ml_cache_core/application/domain/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/client_status.py +17 -0
- generic_ml_cache_core/application/domain/model/execution/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/execution/artifact.py +78 -0
- generic_ml_cache_core/application/domain/model/execution/execution_failure.py +32 -0
- generic_ml_cache_core/application/domain/model/execution/execution_kind.py +26 -0
- generic_ml_cache_core/application/domain/model/execution/execution_state.py +21 -0
- generic_ml_cache_core/application/domain/model/execution/ml_execution.py +41 -0
- generic_ml_cache_core/application/domain/model/identity/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/identity/api_call_identity.py +36 -0
- generic_ml_cache_core/application/domain/model/identity/call_identity.py +25 -0
- generic_ml_cache_core/application/domain/model/identity/managed_call_identity.py +54 -0
- generic_ml_cache_core/application/domain/model/identity/passthrough_call_identity.py +35 -0
- generic_ml_cache_core/application/domain/model/model_info.py +20 -0
- generic_ml_cache_core/application/domain/model/model_listing.py +29 -0
- generic_ml_cache_core/application/domain/model/parsed_output.py +23 -0
- generic_ml_cache_core/application/domain/model/probe/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/probe/probe_report.py +26 -0
- generic_ml_cache_core/application/domain/model/probe/probe_status.py +13 -0
- generic_ml_cache_core/application/domain/model/run/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/run/cache_mode.py +21 -0
- generic_ml_cache_core/application/domain/model/run/client_run_request.py +35 -0
- generic_ml_cache_core/application/domain/model/run/client_run_result.py +65 -0
- generic_ml_cache_core/application/domain/model/run/message.py +20 -0
- generic_ml_cache_core/application/domain/model/usage/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/usage/token_usage.py +53 -0
- generic_ml_cache_core/application/domain/model/usage/usage.py +108 -0
- generic_ml_cache_core/application/domain/service/__init__.py +1 -0
- generic_ml_cache_core/application/domain/service/cacheability.py +19 -0
- generic_ml_cache_core/application/domain/service/message_fingerprinting.py +25 -0
- generic_ml_cache_core/application/port/__init__.py +1 -0
- generic_ml_cache_core/application/port/inbound/__init__.py +1 -0
- generic_ml_cache_core/application/port/inbound/probe_command.py +35 -0
- generic_ml_cache_core/application/port/inbound/probe_use_case.py +19 -0
- generic_ml_cache_core/application/port/inbound/run_api_execution_command.py +40 -0
- generic_ml_cache_core/application/port/inbound/run_api_execution_use_case.py +20 -0
- generic_ml_cache_core/application/port/inbound/run_managed_local_execution_command.py +48 -0
- generic_ml_cache_core/application/port/inbound/run_managed_local_execution_use_case.py +25 -0
- generic_ml_cache_core/application/port/inbound/run_passthrough_execution_command.py +35 -0
- generic_ml_cache_core/application/port/inbound/run_passthrough_execution_use_case.py +20 -0
- generic_ml_cache_core/application/port/out/__init__.py +1 -0
- generic_ml_cache_core/application/port/out/api_client_port.py +26 -0
- generic_ml_cache_core/application/port/out/base.py +272 -0
- generic_ml_cache_core/application/port/out/blob_store_port.py +37 -0
- generic_ml_cache_core/application/port/out/client_runner_port.py +26 -0
- generic_ml_cache_core/application/port/out/clock_port.py +22 -0
- generic_ml_cache_core/application/port/out/execution_repository_port.py +40 -0
- generic_ml_cache_core/application/port/out/file_fingerprint_port.py +25 -0
- generic_ml_cache_core/application/port/out/metrics_port.py +54 -0
- generic_ml_cache_core/application/port/out/passthrough_runner_port.py +25 -0
- generic_ml_cache_core/application/usecase/__init__.py +1 -0
- generic_ml_cache_core/application/usecase/cached_ml_execution_service.py +198 -0
- generic_ml_cache_core/application/usecase/call_identity_building.py +60 -0
- generic_ml_cache_core/application/usecase/journal_events.py +19 -0
- generic_ml_cache_core/application/usecase/probe_service.py +44 -0
- generic_ml_cache_core/application/usecase/run_api_execution_service.py +69 -0
- generic_ml_cache_core/application/usecase/run_managed_local_execution_service.py +84 -0
- generic_ml_cache_core/application/usecase/run_passthrough_execution_service.py +67 -0
- generic_ml_cache_core/common/__init__.py +1 -0
- generic_ml_cache_core/common/checksum.py +82 -0
- generic_ml_cache_core/common/errors.py +76 -0
- generic_ml_cache_core/stream.py +65 -0
- generic_ml_cache_core-0.2.0.dist-info/METADATA +104 -0
- generic_ml_cache_core-0.2.0.dist-info/RECORD +99 -0
- generic_ml_cache_core-0.2.0.dist-info/WHEEL +4 -0
- generic_ml_cache_core-0.2.0.dist-info/licenses/LICENSE +201 -0
- generic_ml_cache_core-0.2.0.dist-info/licenses/NOTICE +8 -0
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""generic-ml-cache-core: the hexagonal core library.
|
|
4
|
+
|
|
5
|
+
Record a real ML client (or API) call once, replay it forever by its content key.
|
|
6
|
+
|
|
7
|
+
This is a stateless library: it holds the domain model, the use cases, the port
|
|
8
|
+
contracts, AND the default outbound adapters (SQLite execution repository,
|
|
9
|
+
filesystem blob store, local client runner, API client, metrics, clock,
|
|
10
|
+
fingerprint). It bakes in *structure* (table names, blob naming, schema) but no
|
|
11
|
+
*location* -- the data source (store path) and configuration are injected by the
|
|
12
|
+
caller. Wire it with :func:`build_use_cases`, or construct the adapters and use
|
|
13
|
+
cases directly. The CLI / a daemon / an embedding app are inbound drivers that
|
|
14
|
+
supply the data source and map their surface onto this library.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from importlib.metadata import PackageNotFoundError
|
|
20
|
+
from importlib.metadata import version as _pkg_version
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
__version__ = _pkg_version("generic-ml-cache-core")
|
|
24
|
+
except PackageNotFoundError: # running from an uninstalled source tree
|
|
25
|
+
__version__ = "0+unknown"
|
|
26
|
+
|
|
27
|
+
from generic_ml_cache_core.adapter.inbound.composition import ( # noqa: E402 # fmt: skip
|
|
28
|
+
WiredUseCases,
|
|
29
|
+
build_use_cases,
|
|
30
|
+
)
|
|
31
|
+
from generic_ml_cache_core.adapter.out.client import ( # noqa: E402 # fmt: skip
|
|
32
|
+
ClientAdapter,
|
|
33
|
+
get_adapter,
|
|
34
|
+
register,
|
|
35
|
+
)
|
|
36
|
+
from generic_ml_cache_core.common.checksum import ( # noqa: E402 # fmt: skip
|
|
37
|
+
checksum_input_data,
|
|
38
|
+
file_content_fingerprint,
|
|
39
|
+
text_checksum,
|
|
40
|
+
)
|
|
41
|
+
from generic_ml_cache_core.common.errors import ( # noqa: E402 # fmt: skip
|
|
42
|
+
CacheError,
|
|
43
|
+
CacheMiss,
|
|
44
|
+
ClientNotFound,
|
|
45
|
+
RunInterrupted,
|
|
46
|
+
UnknownClient,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
__all__ = [
|
|
50
|
+
"__version__",
|
|
51
|
+
"build_use_cases",
|
|
52
|
+
"WiredUseCases",
|
|
53
|
+
"register",
|
|
54
|
+
"get_adapter",
|
|
55
|
+
"ClientAdapter",
|
|
56
|
+
"checksum_input_data",
|
|
57
|
+
"text_checksum",
|
|
58
|
+
"file_content_fingerprint",
|
|
59
|
+
"CacheError",
|
|
60
|
+
"CacheMiss",
|
|
61
|
+
"ClientNotFound",
|
|
62
|
+
"RunInterrupted",
|
|
63
|
+
"UnknownClient",
|
|
64
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""The composition root: build the real adapters and wire the use cases.
|
|
4
|
+
|
|
5
|
+
This is the *only* place that names every concrete adapter. It reads where to
|
|
6
|
+
store things, constructs the outbound adapters, and hands them to the use-case
|
|
7
|
+
services through their constructors. A driving adapter (the CLI) asks for the
|
|
8
|
+
wired use cases and depends only on the inbound ports.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Callable, Optional
|
|
16
|
+
|
|
17
|
+
from generic_ml_cache_core.adapter.out.api.stub_api_client_adapter import StubApiClientAdapter
|
|
18
|
+
from generic_ml_cache_core.adapter.out.client.local_client_runner import LocalClientRunner
|
|
19
|
+
from generic_ml_cache_core.adapter.out.client.passthrough_client_runner import (
|
|
20
|
+
PassthroughClientRunner,
|
|
21
|
+
)
|
|
22
|
+
from generic_ml_cache_core.adapter.out.clock.system_clock import SystemClock
|
|
23
|
+
from generic_ml_cache_core.adapter.out.fingerprint.filesystem_file_fingerprint import (
|
|
24
|
+
FilesystemFileFingerprint,
|
|
25
|
+
)
|
|
26
|
+
from generic_ml_cache_core.adapter.out.metrics.access_registry import AccessRegistry
|
|
27
|
+
from generic_ml_cache_core.adapter.out.metrics.journal_metrics import JournalMetrics
|
|
28
|
+
from generic_ml_cache_core.adapter.out.persistence.sqlite_execution_repository import (
|
|
29
|
+
SqliteExecutionRepository,
|
|
30
|
+
)
|
|
31
|
+
from generic_ml_cache_core.adapter.out.storage.filesystem_blob_store import FilesystemBlobStore
|
|
32
|
+
from generic_ml_cache_core.application.usecase.probe_service import ProbeService
|
|
33
|
+
from generic_ml_cache_core.application.usecase.run_api_execution_service import (
|
|
34
|
+
RunApiExecutionService,
|
|
35
|
+
)
|
|
36
|
+
from generic_ml_cache_core.application.usecase.run_managed_local_execution_service import (
|
|
37
|
+
RunManagedLocalExecutionService,
|
|
38
|
+
)
|
|
39
|
+
from generic_ml_cache_core.application.usecase.run_passthrough_execution_service import (
|
|
40
|
+
RunPassthroughExecutionService,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
_BLOBS_DIRNAME = "blobs"
|
|
44
|
+
_EXECUTIONS_DB = "executions.sqlite3"
|
|
45
|
+
|
|
46
|
+
#: given a client name, the executable override to use, or None for the default.
|
|
47
|
+
ExecutableOverride = Callable[[str], Optional[str]]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True)
|
|
51
|
+
class WiredUseCases:
|
|
52
|
+
"""The use cases a driving adapter needs, plus the stores the read-only CLI
|
|
53
|
+
views (inspect/stats/list) query directly."""
|
|
54
|
+
|
|
55
|
+
run_managed: RunManagedLocalExecutionService
|
|
56
|
+
run_passthrough: RunPassthroughExecutionService
|
|
57
|
+
run_api: RunApiExecutionService
|
|
58
|
+
probe: ProbeService
|
|
59
|
+
blob_store: FilesystemBlobStore
|
|
60
|
+
repository: SqliteExecutionRepository
|
|
61
|
+
metrics: JournalMetrics
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def build_use_cases(
|
|
65
|
+
store_root: Path,
|
|
66
|
+
executable_override: Optional[ExecutableOverride] = None,
|
|
67
|
+
timeout: Optional[float] = None,
|
|
68
|
+
) -> WiredUseCases:
|
|
69
|
+
"""Construct the outbound adapters under ``store_root`` and wire the services.
|
|
70
|
+
|
|
71
|
+
Layout: ``store_root/blobs/`` for output bytes, ``store_root/executions.sqlite3``
|
|
72
|
+
for the structured records, and the access-event registry beside them.
|
|
73
|
+
"""
|
|
74
|
+
store_root = Path(store_root)
|
|
75
|
+
clock = SystemClock()
|
|
76
|
+
blob_store = FilesystemBlobStore(store_root / _BLOBS_DIRNAME)
|
|
77
|
+
repository = SqliteExecutionRepository(store_root / _EXECUTIONS_DB, clock)
|
|
78
|
+
metrics = JournalMetrics(AccessRegistry(store_root))
|
|
79
|
+
file_fingerprint = FilesystemFileFingerprint()
|
|
80
|
+
local_runner = LocalClientRunner(executable_override, timeout)
|
|
81
|
+
passthrough_runner = PassthroughClientRunner(executable_override, timeout)
|
|
82
|
+
api_client = StubApiClientAdapter()
|
|
83
|
+
|
|
84
|
+
return WiredUseCases(
|
|
85
|
+
run_managed=RunManagedLocalExecutionService(
|
|
86
|
+
file_fingerprint, local_runner, blob_store, repository, metrics
|
|
87
|
+
),
|
|
88
|
+
run_passthrough=RunPassthroughExecutionService(
|
|
89
|
+
passthrough_runner, blob_store, repository, metrics
|
|
90
|
+
),
|
|
91
|
+
run_api=RunApiExecutionService(api_client, blob_store, repository, metrics),
|
|
92
|
+
probe=ProbeService(file_fingerprint, repository),
|
|
93
|
+
blob_store=blob_store,
|
|
94
|
+
repository=repository,
|
|
95
|
+
metrics=metrics,
|
|
96
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""StubApiClientAdapter: a deterministic, offline stand-in for a provider API."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import List
|
|
8
|
+
|
|
9
|
+
from generic_ml_cache_core.application.domain.model.run.client_run_result import ClientRunResult
|
|
10
|
+
from generic_ml_cache_core.application.domain.model.run.message import Message
|
|
11
|
+
from generic_ml_cache_core.application.domain.model.usage.token_usage import TokenUsage
|
|
12
|
+
from generic_ml_cache_core.application.port.out.api_client_port import ApiClientPort
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class StubApiClientAdapter(ApiClientPort):
|
|
16
|
+
"""A deterministic stand-in for a real provider API.
|
|
17
|
+
|
|
18
|
+
It runs everywhere — including CI, where no provider is reachable — by
|
|
19
|
+
synthesising a reply from the inputs: stdout echoes the last message, and the
|
|
20
|
+
token usage is a deterministic function of the message sizes. Same inputs ->
|
|
21
|
+
same result, so it behaves correctly under caching. Swap in a real adapter
|
|
22
|
+
when one exists; the port contract is identical.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def run(self, provider: str, model: str, messages: List[Message]) -> ClientRunResult:
|
|
26
|
+
last_content = messages[-1].content if messages else ""
|
|
27
|
+
reply = f"[stub:{provider}:{model}] {last_content}"
|
|
28
|
+
input_tokens = sum(len(message.content) for message in messages)
|
|
29
|
+
token_usage = TokenUsage(input_tokens=input_tokens, output_tokens=len(reply))
|
|
30
|
+
return ClientRunResult(exit_code=0, stdout=reply, token_usage=token_usage)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Client adapters.
|
|
4
|
+
|
|
5
|
+
Importing this package registers the built-in adapters. The Claude adapter is
|
|
6
|
+
registered eagerly. Codex and Cursor are also registered so all three v0.0.1
|
|
7
|
+
clients are available out of the box; their flag mappings are best-effort and
|
|
8
|
+
documented as such.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from generic_ml_cache_core.adapter.out.client.codex import CodexAdapter
|
|
14
|
+
from generic_ml_cache_core.adapter.out.client.cursor import CursorAdapter
|
|
15
|
+
from generic_ml_cache_core.adapter.out.client.registry import (
|
|
16
|
+
get_adapter,
|
|
17
|
+
register,
|
|
18
|
+
registered_names,
|
|
19
|
+
)
|
|
20
|
+
from generic_ml_cache_core.application.port.out.base import ClientAdapter
|
|
21
|
+
|
|
22
|
+
# Eager registration of the built-in adapters.
|
|
23
|
+
from . import claude # noqa: F401 (registers ClaudeAdapter)
|
|
24
|
+
|
|
25
|
+
register(CodexAdapter())
|
|
26
|
+
register(CursorAdapter())
|
|
27
|
+
|
|
28
|
+
__all__ = ["ClientAdapter", "get_adapter", "register", "registered_names"]
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Adapter for Anthropic's Claude Code CLI (headless / print mode).
|
|
4
|
+
|
|
5
|
+
Treat the exact flags as configuration, not gospel -- override the executable
|
|
6
|
+
with the seam and adjust here if the CLI changes.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import shutil
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
|
|
16
|
+
from generic_ml_cache_core.adapter.out.client.registry import register
|
|
17
|
+
from generic_ml_cache_core.application.domain.model.parsed_output import ParsedOutput
|
|
18
|
+
from generic_ml_cache_core.application.domain.model.usage.usage import (
|
|
19
|
+
Usage,
|
|
20
|
+
float_or_none,
|
|
21
|
+
int_or_none,
|
|
22
|
+
)
|
|
23
|
+
from generic_ml_cache_core.application.port.out.base import (
|
|
24
|
+
ClientAdapter,
|
|
25
|
+
ensure_trailing_newline,
|
|
26
|
+
final_result_object,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ClaudeAdapter(ClientAdapter):
|
|
31
|
+
name = "claude"
|
|
32
|
+
default_executable = "claude"
|
|
33
|
+
|
|
34
|
+
def build_argv(
|
|
35
|
+
self,
|
|
36
|
+
executable,
|
|
37
|
+
run_dir,
|
|
38
|
+
model,
|
|
39
|
+
effort,
|
|
40
|
+
context,
|
|
41
|
+
prompt,
|
|
42
|
+
system_prompt,
|
|
43
|
+
client_args=(),
|
|
44
|
+
grants=(),
|
|
45
|
+
) -> List[str]:
|
|
46
|
+
# The prompt and context are delivered on stdin (see stdin_payload), never
|
|
47
|
+
# as an argv argument, so an arbitrarily large prompt cannot hit the OS
|
|
48
|
+
# single-argument size limit. With -p/--print and no prompt argument,
|
|
49
|
+
# Claude reads the prompt from stdin.
|
|
50
|
+
# Capability doors (write/read/shell/net/web-search) now live in a config
|
|
51
|
+
# FILE written by grant_setup into CLAUDE_CONFIG_DIR -- not in argv.
|
|
52
|
+
# build_argv carries only transport: print mode, model, effort, the system
|
|
53
|
+
# prompt, JSON output (so usage comes back), and the verbatim passthrough.
|
|
54
|
+
argv = [executable, "-p", "--model", model]
|
|
55
|
+
# Effort is optional: when omitted, let Claude apply its own per-model
|
|
56
|
+
# default rather than passing an empty (and invalid) --effort value.
|
|
57
|
+
if effort:
|
|
58
|
+
argv += ["--effort", effort]
|
|
59
|
+
# Streaming output mode (one NDJSON event per line) so a live consumer can
|
|
60
|
+
# watch progress; the recorded answer + usage are lifted from the final
|
|
61
|
+
# `result` event, which is byte-identical to the old single-object json
|
|
62
|
+
# (proven against the live CLI), so the stored output is unchanged. --verbose is
|
|
63
|
+
# required for stream-json to emit the full stream; --include-partial-
|
|
64
|
+
# messages adds token-level deltas for the live feed.
|
|
65
|
+
argv += [
|
|
66
|
+
"--append-system-prompt",
|
|
67
|
+
system_prompt,
|
|
68
|
+
"--output-format",
|
|
69
|
+
"stream-json",
|
|
70
|
+
"--verbose",
|
|
71
|
+
"--include-partial-messages",
|
|
72
|
+
]
|
|
73
|
+
# Passthrough args go last: Claude takes the prompt on stdin, so there is no
|
|
74
|
+
# trailing positional to sit in front of. Appended verbatim, uninterpreted.
|
|
75
|
+
argv += client_args
|
|
76
|
+
return argv
|
|
77
|
+
|
|
78
|
+
def parse_output(self, stdout: str) -> ParsedOutput:
|
|
79
|
+
"""Claude's headless JSON is a single object: ``result`` is the answer
|
|
80
|
+
text, ``usage`` holds the (primary-model) token counts, ``total_cost_usd``
|
|
81
|
+
is the cumulative cost estimate across every model the run used, and
|
|
82
|
+
``modelUsage`` breaks it down per model (the main model plus any subagent
|
|
83
|
+
models). The per-model breakdown is kept verbatim in ``raw``; the
|
|
84
|
+
normalized counts come from the headline ``usage`` block, the cost from the
|
|
85
|
+
cumulative ``total_cost_usd``.
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
doc = final_result_object(stdout)
|
|
89
|
+
if not isinstance(doc, dict):
|
|
90
|
+
raise ValueError("no result object")
|
|
91
|
+
except (json.JSONDecodeError, ValueError):
|
|
92
|
+
return ParsedOutput(text=stdout, usage=None)
|
|
93
|
+
|
|
94
|
+
text = doc.get("result")
|
|
95
|
+
if not isinstance(text, str):
|
|
96
|
+
# Not the shape we expected -- keep the raw output, skip usage.
|
|
97
|
+
return ParsedOutput(text=stdout, usage=None)
|
|
98
|
+
|
|
99
|
+
block = doc.get("usage") if isinstance(doc.get("usage"), dict) else {}
|
|
100
|
+
raw: Dict[str, Any] = {}
|
|
101
|
+
for key in ("usage", "modelUsage", "total_cost_usd"):
|
|
102
|
+
if key in doc:
|
|
103
|
+
raw[key] = doc[key]
|
|
104
|
+
|
|
105
|
+
usage = Usage(
|
|
106
|
+
input_tokens=int_or_none(block.get("input_tokens")),
|
|
107
|
+
output_tokens=int_or_none(block.get("output_tokens")),
|
|
108
|
+
cache_read_tokens=int_or_none(block.get("cache_read_input_tokens")),
|
|
109
|
+
cache_write_tokens=int_or_none(block.get("cache_creation_input_tokens")),
|
|
110
|
+
# Claude folds reasoning into output_tokens; it is not separable here.
|
|
111
|
+
reasoning_tokens=None,
|
|
112
|
+
cost_usd=float_or_none(doc.get("total_cost_usd")),
|
|
113
|
+
raw=raw,
|
|
114
|
+
)
|
|
115
|
+
return ParsedOutput(text=ensure_trailing_newline(text), usage=usage)
|
|
116
|
+
|
|
117
|
+
def stdin_payload(self, context, prompt, system_prompt) -> Optional[str]:
|
|
118
|
+
# Prompt + context go to the client on stdin. The system prompt is a
|
|
119
|
+
# separate, small argv flag (--append-system-prompt), so it stays in argv.
|
|
120
|
+
return f"{context}\n\n{prompt}" if context else prompt
|
|
121
|
+
|
|
122
|
+
def read_access_argv(self, paths):
|
|
123
|
+
# Claude Code grants read access to extra directories via --add-dir.
|
|
124
|
+
argv = []
|
|
125
|
+
for p in paths:
|
|
126
|
+
argv += ["--add-dir", p]
|
|
127
|
+
return argv
|
|
128
|
+
|
|
129
|
+
def grant_setup(self, run_dir, config_home, grants):
|
|
130
|
+
# Uniform door: write settings.json into a redirected CLAUDE_CONFIG_DIR so
|
|
131
|
+
# the FILE (not a flag) enables capabilities. Verified against the live CLI:
|
|
132
|
+
# the redirected home governs because the run folder is clean of a project
|
|
133
|
+
# .claude/ that would outrank it. acceptEdits + Write/Edit are always on so
|
|
134
|
+
# a file-producing call actually writes (the record-path guarantee); each
|
|
135
|
+
# named grant ADDS its allow-token. The cache enables; it never closes
|
|
136
|
+
# (docs/reference/grants.md).
|
|
137
|
+
allow = ["Write(**)", "Edit(**)"]
|
|
138
|
+
if "read" in grants:
|
|
139
|
+
allow.append("Read(**)")
|
|
140
|
+
if "shell" in grants:
|
|
141
|
+
allow.append("Bash(**)")
|
|
142
|
+
if "net" in grants:
|
|
143
|
+
allow.append("WebFetch")
|
|
144
|
+
if "web-search" in grants:
|
|
145
|
+
allow.append("WebSearch")
|
|
146
|
+
settings = {"permissions": {"allow": allow}, "defaultMode": "acceptEdits"}
|
|
147
|
+
config_home.mkdir(parents=True, exist_ok=True)
|
|
148
|
+
(config_home / "settings.json").write_text(json.dumps(settings), encoding="utf-8")
|
|
149
|
+
# Seed credentials so the relocated home is authenticated. Subscription
|
|
150
|
+
# login lives in ~/.claude; an API key in the env carries over on its own.
|
|
151
|
+
# Bulk caches (projects/history/todos/shell-snapshots) are skipped; a stray
|
|
152
|
+
# settings.local.json would outrank ours, so it is dropped.
|
|
153
|
+
src = Path.home() / ".claude"
|
|
154
|
+
if src.is_dir():
|
|
155
|
+
skip = {
|
|
156
|
+
"projects",
|
|
157
|
+
"history",
|
|
158
|
+
"todos",
|
|
159
|
+
"shell-snapshots",
|
|
160
|
+
"settings.json",
|
|
161
|
+
"settings.local.json",
|
|
162
|
+
}
|
|
163
|
+
for child in src.iterdir():
|
|
164
|
+
if child.name in skip:
|
|
165
|
+
continue
|
|
166
|
+
dest = config_home / child.name
|
|
167
|
+
try:
|
|
168
|
+
if child.is_dir():
|
|
169
|
+
shutil.copytree(child, dest, dirs_exist_ok=True)
|
|
170
|
+
else:
|
|
171
|
+
shutil.copy2(child, dest)
|
|
172
|
+
except OSError:
|
|
173
|
+
pass # best-effort seeding; an env API key still authenticates
|
|
174
|
+
# Claude Code also keeps a top-level ~/.claude.json (account, onboarding,
|
|
175
|
+
# project trust) BESIDE the ~/.claude dir; under a redirected CLAUDE_CONFIG_DIR
|
|
176
|
+
# it is expected at <home>/.claude.json. Seed it too, or Claude Code finds no
|
|
177
|
+
# main config, writes a fresh default and backs it up, and warns once per
|
|
178
|
+
# internal phase (harmless but noisy). Deleted with the run like the rest.
|
|
179
|
+
main_config = Path.home() / ".claude.json"
|
|
180
|
+
if main_config.is_file():
|
|
181
|
+
try:
|
|
182
|
+
shutil.copy2(main_config, config_home / ".claude.json")
|
|
183
|
+
except OSError:
|
|
184
|
+
pass # best-effort; the run still proceeds without it (just noisy)
|
|
185
|
+
return {"CLAUDE_CONFIG_DIR": str(config_home)}
|
|
186
|
+
|
|
187
|
+
def stream_event(self, raw_line):
|
|
188
|
+
try:
|
|
189
|
+
d = json.loads(raw_line)
|
|
190
|
+
except (json.JSONDecodeError, ValueError):
|
|
191
|
+
return None
|
|
192
|
+
if not isinstance(d, dict):
|
|
193
|
+
return None
|
|
194
|
+
t = d.get("type")
|
|
195
|
+
if t == "system":
|
|
196
|
+
sub = d.get("subtype")
|
|
197
|
+
if sub == "init":
|
|
198
|
+
return {"kind": "start"}
|
|
199
|
+
if sub == "thinking_tokens":
|
|
200
|
+
return {"kind": "thinking"}
|
|
201
|
+
return None
|
|
202
|
+
if t == "stream_event":
|
|
203
|
+
ev = d.get("event")
|
|
204
|
+
if isinstance(ev, dict) and ev.get("type") == "content_block_start":
|
|
205
|
+
block = ev.get("content_block")
|
|
206
|
+
if isinstance(block, dict) and block.get("type") == "tool_use":
|
|
207
|
+
return {"kind": "tool", "name": block.get("name")}
|
|
208
|
+
return None
|
|
209
|
+
if t == "result":
|
|
210
|
+
return {"kind": "result"}
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
register(ClaudeAdapter())
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Adapter for OpenAI's Codex CLI (non-interactive).
|
|
4
|
+
|
|
5
|
+
Effort maps to ``model_reasoning_effort``. Flags
|
|
6
|
+
are best-effort for v0.0.1 and are easy to correct here; the executable seam
|
|
7
|
+
lets you point at any binary.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import shutil
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Dict, List, Optional
|
|
16
|
+
|
|
17
|
+
from generic_ml_cache_core.application.domain.model.parsed_output import ParsedOutput
|
|
18
|
+
from generic_ml_cache_core.application.domain.model.usage.usage import Usage, int_or_none
|
|
19
|
+
from generic_ml_cache_core.application.port.out.base import (
|
|
20
|
+
ClientAdapter,
|
|
21
|
+
ensure_trailing_newline,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CodexAdapter(ClientAdapter):
|
|
26
|
+
name = "codex"
|
|
27
|
+
default_executable = "codex"
|
|
28
|
+
|
|
29
|
+
def build_argv(
|
|
30
|
+
self,
|
|
31
|
+
executable,
|
|
32
|
+
run_dir,
|
|
33
|
+
model,
|
|
34
|
+
effort,
|
|
35
|
+
context,
|
|
36
|
+
prompt,
|
|
37
|
+
system_prompt,
|
|
38
|
+
client_args=(),
|
|
39
|
+
grants=(),
|
|
40
|
+
) -> List[str]:
|
|
41
|
+
# Capability doors (sandbox write, network, web-search) now live in
|
|
42
|
+
# $CODEX_HOME/config.toml written by grant_setup -- not in argv. build_argv
|
|
43
|
+
# carries only transport: the exec subcommand, JSON events, the no-git-repo
|
|
44
|
+
# escape and the cwd fence, model + effort, the system prompt, passthrough,
|
|
45
|
+
# and the trailing stdin marker.
|
|
46
|
+
argv = [
|
|
47
|
+
executable,
|
|
48
|
+
"exec",
|
|
49
|
+
"--json",
|
|
50
|
+
"--skip-git-repo-check",
|
|
51
|
+
"-C",
|
|
52
|
+
str(run_dir),
|
|
53
|
+
"--model",
|
|
54
|
+
model,
|
|
55
|
+
]
|
|
56
|
+
# Effort is optional: when omitted, leave model_reasoning_effort unset so
|
|
57
|
+
# Codex uses the model's own default instead of an empty override.
|
|
58
|
+
if effort:
|
|
59
|
+
argv += ["-c", f"model_reasoning_effort={effort}"]
|
|
60
|
+
# The prompt + context are delivered on stdin (see stdin_payload), so a
|
|
61
|
+
# large prompt cannot exceed the OS argument-size limit. The trailing "-"
|
|
62
|
+
# tells `codex exec` to read the prompt from stdin instead of an argv
|
|
63
|
+
# argument; communicate() then closes stdin on EOF, which also avoids the
|
|
64
|
+
# "reading additional input from stdin" hang codex shows in a non-TTY child
|
|
65
|
+
# when a prompt argument is given but stdin is left open. The system prompt
|
|
66
|
+
# is a small config value and stays in argv.
|
|
67
|
+
argv += ["-c", f"experimental_instructions={system_prompt}"]
|
|
68
|
+
# Passthrough args before the trailing "-" (the stdin marker, codex's last
|
|
69
|
+
# positional), so they are still parsed as flags. Appended verbatim.
|
|
70
|
+
argv += client_args
|
|
71
|
+
argv.append("-")
|
|
72
|
+
return argv
|
|
73
|
+
|
|
74
|
+
def stdin_payload(self, context, prompt, system_prompt) -> Optional[str]:
|
|
75
|
+
return f"{context}\n\n{prompt}" if context else prompt
|
|
76
|
+
|
|
77
|
+
def parse_output(self, stdout: str) -> ParsedOutput:
|
|
78
|
+
"""Codex's ``--json`` output is a JSON-lines *stream* of events, one per
|
|
79
|
+
line. The answer text is the ``text`` of the final ``agent_message`` item;
|
|
80
|
+
the usage is the ``usage`` block on the final ``turn.completed`` event.
|
|
81
|
+
Codex reports reasoning tokens *separately* from output, reports no
|
|
82
|
+
cache-write count (so that stays unknown), and reports no cost.
|
|
83
|
+
"""
|
|
84
|
+
answer: Optional[str] = None
|
|
85
|
+
usage_block: Optional[Dict[str, Any]] = None
|
|
86
|
+
for line in stdout.splitlines():
|
|
87
|
+
line = line.strip()
|
|
88
|
+
if not line:
|
|
89
|
+
continue
|
|
90
|
+
try:
|
|
91
|
+
event = json.loads(line)
|
|
92
|
+
except json.JSONDecodeError:
|
|
93
|
+
continue # tolerate a stray non-JSON line in the stream
|
|
94
|
+
if not isinstance(event, dict):
|
|
95
|
+
continue
|
|
96
|
+
if event.get("type") == "item.completed":
|
|
97
|
+
item = event.get("item")
|
|
98
|
+
if isinstance(item, dict) and item.get("type") == "agent_message":
|
|
99
|
+
text = item.get("text")
|
|
100
|
+
if isinstance(text, str):
|
|
101
|
+
answer = text # keep the latest; the final one is the answer
|
|
102
|
+
elif event.get("type") == "turn.completed":
|
|
103
|
+
block = event.get("usage")
|
|
104
|
+
if isinstance(block, dict):
|
|
105
|
+
usage_block = block
|
|
106
|
+
|
|
107
|
+
if answer is None:
|
|
108
|
+
# Never found the answer event -- hand back the raw stream, no usage.
|
|
109
|
+
return ParsedOutput(text=stdout, usage=None)
|
|
110
|
+
|
|
111
|
+
usage = None
|
|
112
|
+
if usage_block is not None:
|
|
113
|
+
usage = Usage(
|
|
114
|
+
input_tokens=int_or_none(usage_block.get("input_tokens")),
|
|
115
|
+
output_tokens=int_or_none(usage_block.get("output_tokens")),
|
|
116
|
+
cache_read_tokens=int_or_none(usage_block.get("cached_input_tokens")),
|
|
117
|
+
# Codex reports no cache-write count: unknown, not zero.
|
|
118
|
+
cache_write_tokens=None,
|
|
119
|
+
reasoning_tokens=int_or_none(usage_block.get("reasoning_output_tokens")),
|
|
120
|
+
cost_usd=None,
|
|
121
|
+
raw=dict(usage_block),
|
|
122
|
+
)
|
|
123
|
+
return ParsedOutput(text=ensure_trailing_newline(answer), usage=usage)
|
|
124
|
+
|
|
125
|
+
def grant_setup(self, run_dir, config_home, grants):
|
|
126
|
+
# Uniform door: write $CODEX_HOME/config.toml so the FILE enables
|
|
127
|
+
# capabilities. The run folder is untrusted (a fresh temp dir), so a
|
|
128
|
+
# folder-local .codex/config.toml would be skipped -- we redirect the home
|
|
129
|
+
# instead and seed auth.json into it. workspace-write is always on (the
|
|
130
|
+
# record-path guarantee); it already permits read + shell, so granting
|
|
131
|
+
# those needs nothing extra (Codex exposes no file-level read/shell *deny*
|
|
132
|
+
# -- a documented limit, not a door we close). net flips network_access on;
|
|
133
|
+
# web-search sets web_search=live. The cache enables (docs/reference/grants.md).
|
|
134
|
+
lines = ['approval_policy = "never"', 'sandbox_mode = "workspace-write"']
|
|
135
|
+
if "web-search" in grants:
|
|
136
|
+
lines.append('web_search = "live"')
|
|
137
|
+
if "net" in grants:
|
|
138
|
+
lines += ["[sandbox_workspace_write]", "network_access = true"]
|
|
139
|
+
config_home.mkdir(parents=True, exist_ok=True)
|
|
140
|
+
(config_home / "config.toml").write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
141
|
+
auth = Path.home() / ".codex" / "auth.json"
|
|
142
|
+
if auth.is_file():
|
|
143
|
+
try:
|
|
144
|
+
shutil.copy2(auth, config_home / "auth.json")
|
|
145
|
+
except OSError:
|
|
146
|
+
pass # best-effort; an env API key still authenticates the run
|
|
147
|
+
return {"CODEX_HOME": str(config_home)}
|
|
148
|
+
|
|
149
|
+
def stream_event(self, raw_line):
|
|
150
|
+
try:
|
|
151
|
+
ev = json.loads(raw_line)
|
|
152
|
+
except (json.JSONDecodeError, ValueError):
|
|
153
|
+
return None
|
|
154
|
+
if not isinstance(ev, dict):
|
|
155
|
+
return None
|
|
156
|
+
t = ev.get("type")
|
|
157
|
+
if t == "thread.started":
|
|
158
|
+
return {"kind": "start"}
|
|
159
|
+
if t == "turn.completed":
|
|
160
|
+
return {"kind": "result"}
|
|
161
|
+
if t in ("error", "turn.failed"):
|
|
162
|
+
return {"kind": "error"}
|
|
163
|
+
if t == "item.completed":
|
|
164
|
+
item = ev.get("item")
|
|
165
|
+
if isinstance(item, dict):
|
|
166
|
+
it = item.get("type")
|
|
167
|
+
if it == "agent_message":
|
|
168
|
+
return {"kind": "message"}
|
|
169
|
+
if it:
|
|
170
|
+
return {"kind": "tool", "name": it}
|
|
171
|
+
return None
|