coding-cli-runtime 0.2.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coding_cli_runtime-0.4.0/CHANGELOG.md +96 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/PKG-INFO +163 -16
- coding_cli_runtime-0.4.0/README.md +333 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/pyproject.toml +2 -2
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/__init__.py +31 -3
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/codex_cli.py +10 -8
- coding_cli_runtime-0.4.0/src/coding_cli_runtime/headless.py +124 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/provider_contracts.py +220 -39
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/session_logs.py +56 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime.egg-info/PKG-INFO +163 -16
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime.egg-info/SOURCES.txt +5 -1
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_coverage_gaps.py +15 -7
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_packaging.py +4 -3
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_provider_contracts.py +3 -3
- coding_cli_runtime-0.4.0/tests/test_stage2_tier1.py +241 -0
- coding_cli_runtime-0.4.0/tests/test_stage3_io_contracts.py +119 -0
- coding_cli_runtime-0.4.0/tests/test_stage4_helpers.py +105 -0
- coding_cli_runtime-0.2.0/CHANGELOG.md +0 -69
- coding_cli_runtime-0.2.0/README.md +0 -186
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/CONTRIBUTING.md +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/LICENSE +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/MANIFEST.in +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/setup.cfg +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/auth.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/contracts.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/copilot_reasoning_baseline.json +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/copilot_reasoning_logs.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/failure_classification.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/json_io.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/provider_controls.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/provider_specs.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/py.typed +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/reasoning.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/redaction.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/schema_validation.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/schemas/normalized_run_result.v1.json +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/schemas/reasoning_metadata.v1.json +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/session_execution.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/subprocess_runner.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime.egg-info/dependency_links.txt +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime.egg-info/top_level.txt +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_copilot_reasoning_logs.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_package_resources.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_playground_probe_smoke.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_provider_catalog_resolution.py +0 -0
- {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_runtime_parity.py +0 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/).
|
|
6
|
+
|
|
7
|
+
## [Unreleased]
|
|
8
|
+
|
|
9
|
+
## [0.4.0] - 2026-04-09
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- `OutputContract`, `IoContract`, `SessionDiscoveryContract`,
|
|
13
|
+
`DiagnosticsContract` sub-contracts on `ProviderContract`, with data
|
|
14
|
+
populated for all four providers.
|
|
15
|
+
- `WorkspaceEnvVar` structured type with `name` + `value_source` semantics
|
|
16
|
+
(replaces bare env-var name strings in `IoContract.workspace_env_vars`).
|
|
17
|
+
- `WorkspaceEnvValueSource` — closed vocabulary (`"execution_dir"` /
|
|
18
|
+
`"workspace_root"`) for `WorkspaceEnvVar.value_source`.
|
|
19
|
+
- `resolve_workspace_env()` — turns `IoContract.workspace_env_vars` into a
|
|
20
|
+
concrete env overlay from an execution directory.
|
|
21
|
+
- `resolve_session_search_paths()` — expands `SessionDiscoveryContract`
|
|
22
|
+
roots into concrete host paths.
|
|
23
|
+
- `is_provider_installed()` — checks whether a provider CLI binary is on
|
|
24
|
+
PATH.
|
|
25
|
+
- README sections: "Query provider I/O conventions" and "Common integration
|
|
26
|
+
tasks" with copy-pasteable examples.
|
|
27
|
+
- `WorkspaceEnvVar` added to key-types table in README.
|
|
28
|
+
|
|
29
|
+
### Changed
|
|
30
|
+
- Gemini `session_glob` tightened from `"*.json"` to `"*/chats/session-*.json"`
|
|
31
|
+
to match the real `tmp/{hash}/chats/session-*.json` layout.
|
|
32
|
+
- Claude `session_glob` tightened from `"*.jsonl"` to
|
|
33
|
+
`"*/conversation.jsonl"` to match per-project subdirectory structure.
|
|
34
|
+
|
|
35
|
+
## [0.3.0] - 2026-04-09
|
|
36
|
+
|
|
37
|
+
### Added
|
|
38
|
+
- Per-provider headless launch helpers: `build_claude_headless_core()`,
|
|
39
|
+
`build_codex_headless_core()`, `build_copilot_headless_core()`,
|
|
40
|
+
`build_gemini_headless_core()`. These emit the standard non-interactive
|
|
41
|
+
flags for each provider; callers append app-specific tails.
|
|
42
|
+
- Session log discovery section in README.
|
|
43
|
+
- API summary table in README.
|
|
44
|
+
|
|
45
|
+
### Changed
|
|
46
|
+
- `build_codex_exec_spec()` now delegates to `build_codex_headless_core()`.
|
|
47
|
+
`full_auto` and `skip_git_repo_check` params preserved.
|
|
48
|
+
- README rewritten with task-oriented examples, `run_interactive_session`
|
|
49
|
+
usage, `uv add` install, and API summary.
|
|
50
|
+
|
|
51
|
+
## [0.2.0] - 2026-04-08
|
|
52
|
+
|
|
53
|
+
### Added
|
|
54
|
+
- `ProviderContract` API — structured, nested metadata for all four provider
|
|
55
|
+
CLIs (Claude, Codex, Gemini, Copilot). Composed of `AuthContract`,
|
|
56
|
+
`PathContract`, `HeadlessContract`, `PromptTransport`, `ApprovalContract`,
|
|
57
|
+
`SandboxContract`.
|
|
58
|
+
- `get_provider_contract(provider_id)` — returns structured contract for a
|
|
59
|
+
provider.
|
|
60
|
+
- `build_env_overlay(contract, api_key, base_url)` — builds provider-specific
|
|
61
|
+
env var overlay from contract metadata.
|
|
62
|
+
- `resolve_config_paths(contract, containerized)` — resolves host and container
|
|
63
|
+
config directory paths.
|
|
64
|
+
- `render_prompt(transport, prompt)` — resolves prompt delivery into argv args +
|
|
65
|
+
stdin text based on provider transport mode.
|
|
66
|
+
- `PromptPayload` dataclass for resolved prompt delivery.
|
|
67
|
+
- `resolve_auth()` — resolves provider auth status from environment.
|
|
68
|
+
- `__version__` attribute.
|
|
69
|
+
- `CONTRIBUTING.md` with development setup and quality checks.
|
|
70
|
+
|
|
71
|
+
### Changed
|
|
72
|
+
- `run_interactive_session()` observability kwargs (`job_name`, `phase_tag`)
|
|
73
|
+
now have sensible defaults so callers don't need to supply them.
|
|
74
|
+
- `CliRunResult.command` type widened from `tuple[str, ...]` to `Sequence[str]`.
|
|
75
|
+
- Provider model catalogs resolved with three-tier fallback: user override
|
|
76
|
+
file > live CLI discovery > hardcoded fallback.
|
|
77
|
+
|
|
78
|
+
### Fixed
|
|
79
|
+
- Copilot BYOK (`COPILOT_PROVIDER_API_KEY`) now discoverable via contract
|
|
80
|
+
but not reported as "required" in `resolve_auth()` — BYOK is opt-in.
|
|
81
|
+
|
|
82
|
+
## [0.1.0] - 2026-04-07
|
|
83
|
+
|
|
84
|
+
### Added
|
|
85
|
+
- Provider metadata and controls for Claude, Codex, Copilot, and Gemini CLIs.
|
|
86
|
+
- Shared request/result contracts (`CliRunRequest`, `CliRunResult`, `CliLaunchSpec`).
|
|
87
|
+
- Schema loading and payload validation (`load_schema`, `validate_payload`).
|
|
88
|
+
- Synchronous and asynchronous subprocess execution helpers.
|
|
89
|
+
- Interactive session execution with transcript mirroring.
|
|
90
|
+
- Session log discovery and parsing utilities.
|
|
91
|
+
- Claude reasoning policy resolution.
|
|
92
|
+
- Log redaction helpers.
|
|
93
|
+
- Copilot reasoning log parsing and classification.
|
|
94
|
+
- PEP 561 `py.typed` markers for both `coding_cli_runtime` and `shared_cli_runtime`.
|
|
95
|
+
- Packaged JSON schemas and Copilot reasoning baseline data.
|
|
96
|
+
- Playground knowledge base with probing guides and experiment templates.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coding-cli-runtime
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Reusable CLI runtime primitives for provider-backed automation workflows
|
|
5
5
|
Author-email: LLM Eval maintainers <llm-eval-maintainers@users.noreply.github.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -40,17 +40,21 @@ code doesn't need provider-specific subprocess handling.
|
|
|
40
40
|
|
|
41
41
|
**What it does (and why not just `subprocess.run`):**
|
|
42
42
|
|
|
43
|
-
-
|
|
44
|
-
-
|
|
45
|
-
-
|
|
46
|
-
-
|
|
47
|
-
-
|
|
48
|
-
-
|
|
43
|
+
- Run any provider CLI with unified request/result types and timeout enforcement
|
|
44
|
+
- Query the model catalog (with user-override and live-cache fallback)
|
|
45
|
+
- Classify failures as retryable vs fatal per provider
|
|
46
|
+
- Look up provider auth, config dirs, and headless launch flags
|
|
47
|
+
- Build non-interactive launch commands without hardcoding provider flags
|
|
48
|
+
- Find session logs after a run (Codex, Claude)
|
|
49
|
+
- Run long-lived sessions with process-group cleanup and transcript mirroring
|
|
50
|
+
- No Python package dependencies — only requires the provider CLIs themselves
|
|
49
51
|
|
|
50
52
|
## Installation
|
|
51
53
|
|
|
52
54
|
```bash
|
|
53
55
|
pip install coding-cli-runtime
|
|
56
|
+
# or
|
|
57
|
+
uv add coding-cli-runtime
|
|
54
58
|
```
|
|
55
59
|
|
|
56
60
|
Requires Python 3.10+.
|
|
@@ -65,7 +69,7 @@ from pathlib import Path
|
|
|
65
69
|
from coding_cli_runtime import CliRunRequest, run_cli_command
|
|
66
70
|
|
|
67
71
|
request = CliRunRequest(
|
|
68
|
-
cmd_parts=("codex", "--model", "
|
|
72
|
+
cmd_parts=("codex", "--model", "gpt-5.4", "--quiet", "exec", "fix the tests"),
|
|
69
73
|
cwd=Path("/tmp/my-project"),
|
|
70
74
|
timeout_seconds=120,
|
|
71
75
|
)
|
|
@@ -148,6 +152,44 @@ else:
|
|
|
148
152
|
Works for all four providers. Recognizes auth failures, rate limits,
|
|
149
153
|
network transients, and other provider-specific error patterns.
|
|
150
154
|
|
|
155
|
+
### Common integration tasks
|
|
156
|
+
|
|
157
|
+
#### Check whether a provider CLI is installed
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from coding_cli_runtime import is_provider_installed
|
|
161
|
+
|
|
162
|
+
if not is_provider_installed("claude"):
|
|
163
|
+
raise RuntimeError("Claude Code is not available on PATH")
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
This is intentionally minimal: it checks whether the provider binary exists on
|
|
167
|
+
PATH. Deeper CLI drift validation belongs in maintainer tooling, not the
|
|
168
|
+
runtime API.
|
|
169
|
+
|
|
170
|
+
#### Resolve workspace env vars and session search paths
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
from coding_cli_runtime import (
|
|
174
|
+
get_provider_contract,
|
|
175
|
+
resolve_session_search_paths,
|
|
176
|
+
resolve_workspace_env,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
gemini = get_provider_contract("gemini")
|
|
180
|
+
|
|
181
|
+
# Derive provider-specific workspace env vars from contract metadata
|
|
182
|
+
env = resolve_workspace_env(gemini, "/tmp/run-dir")
|
|
183
|
+
# {"GEMINI_CLI_IDE_WORKSPACE_PATH": "/tmp/run-dir"}
|
|
184
|
+
|
|
185
|
+
# Expand concrete host paths for session log searches
|
|
186
|
+
paths = resolve_session_search_paths(gemini)
|
|
187
|
+
# (Path.home() / ".gemini" / "tmp",)
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Use these helpers when you want the contract facts turned into concrete
|
|
191
|
+
filesystem/env values without rebuilding the same glue logic in each consumer.
|
|
192
|
+
|
|
151
193
|
### Look up provider contract metadata
|
|
152
194
|
|
|
153
195
|
```python
|
|
@@ -175,11 +217,75 @@ payload = render_prompt(contract.headless.prompt, "Fix the bug")
|
|
|
175
217
|
```
|
|
176
218
|
|
|
177
219
|
`ProviderContract` is structured as nested sub-contracts
|
|
178
|
-
(`AuthContract`, `PathContract`, `HeadlessContract`
|
|
220
|
+
(`AuthContract`, `PathContract`, `HeadlessContract`, `OutputContract`,
|
|
221
|
+
`IoContract`, `SessionDiscoveryContract`, `DiagnosticsContract`) so consumers
|
|
179
222
|
can drill into whichever aspect they need. This is reference metadata,
|
|
180
|
-
not a command-construction control plane —
|
|
223
|
+
not a command-construction control plane — callers keep their own
|
|
181
224
|
command assembly and adopt contract fields selectively.
|
|
182
225
|
|
|
226
|
+
### Query provider I/O conventions
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
from coding_cli_runtime import get_provider_contract
|
|
230
|
+
|
|
231
|
+
gemini = get_provider_contract("gemini")
|
|
232
|
+
|
|
233
|
+
# Workspace env vars with value semantics
|
|
234
|
+
for wev in gemini.io.workspace_env_vars:
|
|
235
|
+
print(f"{wev.name} = {wev.value_source}")
|
|
236
|
+
# GEMINI_CLI_IDE_WORKSPACE_PATH = execution_dir
|
|
237
|
+
|
|
238
|
+
# Session discovery (where session logs live)
|
|
239
|
+
sd = gemini.session_discovery
|
|
240
|
+
print(sd.session_roots) # ("tmp",)
|
|
241
|
+
print(sd.session_glob) # "*/chats/session-*.json"
|
|
242
|
+
|
|
243
|
+
# Output format support
|
|
244
|
+
codex = get_provider_contract("codex")
|
|
245
|
+
print(codex.output.output_path_flag) # "-o"
|
|
246
|
+
print(codex.output.schema_path_flag) # "--output-schema"
|
|
247
|
+
|
|
248
|
+
# Diagnostics (Copilot only)
|
|
249
|
+
copilot = get_provider_contract("copilot")
|
|
250
|
+
if copilot.diagnostics:
|
|
251
|
+
print(copilot.diagnostics.log_glob) # "logs/process-*.log"
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
`WorkspaceEnvVar.value_source` uses a closed vocabulary:
|
|
255
|
+
`"execution_dir"` or `"workspace_root"`.
|
|
256
|
+
|
|
257
|
+
### Build headless launch commands
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
from coding_cli_runtime import build_claude_headless_core, build_codex_headless_core
|
|
261
|
+
|
|
262
|
+
# Claude: binary + --print + --permission-mode + --dangerously-skip-permissions + --model
|
|
263
|
+
cmd = build_claude_headless_core("claude-sonnet-4-6")
|
|
264
|
+
cmd.extend(["--output-format", "text", "--disallowedTools", "Bash,Task"])
|
|
265
|
+
|
|
266
|
+
# Codex: binary + exec + --full-auto + --sandbox + --skip-git-repo-check + --model
|
|
267
|
+
cmd = build_codex_headless_core("gpt-5.4", sandbox_mode="read-only")
|
|
268
|
+
cmd.extend(["-C", str(workdir)])
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
Headless core helpers emit the standard flags for non-interactive runs.
|
|
272
|
+
Consumers append app-specific tails (tool restrictions, output paths, etc.).
|
|
273
|
+
|
|
274
|
+
### Find session logs after a run
|
|
275
|
+
|
|
276
|
+
```python
|
|
277
|
+
import time
|
|
278
|
+
from coding_cli_runtime import find_codex_session, find_claude_session
|
|
279
|
+
|
|
280
|
+
# Find the most recent Codex session log for a given working directory
|
|
281
|
+
session = find_codex_session("/path/to/project", since_ts=time.time() - 300)
|
|
282
|
+
if session:
|
|
283
|
+
print(f"Session log: {session}") # ~/.codex/sessions/.../conversation.jsonl
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
Works for Codex and Claude. Scans provider config directories for session
|
|
287
|
+
files matching the working directory and time window.
|
|
288
|
+
|
|
183
289
|
## Key types
|
|
184
290
|
|
|
185
291
|
| Type | Purpose |
|
|
@@ -188,14 +294,55 @@ command assembly and adopt contract fields selectively.
|
|
|
188
294
|
| `CliRunResult` | Result: returncode, stdout/stderr, duration, error code |
|
|
189
295
|
| `ErrorCode` | `none` · `spawn_failed` · `timed_out` · `non_zero_exit` |
|
|
190
296
|
| `ProviderSpec` | Provider catalog entry with models, controls, defaults |
|
|
191
|
-
| `ProviderContract` | Structured provider CLI metadata (auth, paths, headless
|
|
297
|
+
| `ProviderContract` | Structured provider CLI metadata (auth, paths, headless, I/O, sessions) |
|
|
298
|
+
| `WorkspaceEnvVar` | Env var with value-source semantics (`execution_dir`, `workspace_root`) |
|
|
192
299
|
| `FailureClassification` | Classified error with retryable flag and category |
|
|
193
300
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
301
|
+
### Run long-lived CLI sessions
|
|
302
|
+
|
|
303
|
+
For CLI runs that take minutes (e.g., full app generation), use
|
|
304
|
+
`run_interactive_session()` instead of `run_cli_command()`. It adds:
|
|
305
|
+
|
|
306
|
+
- Process-group cleanup (kills orphaned child processes on timeout)
|
|
307
|
+
- Transcript mirroring (streams CLI output to a file while the process runs)
|
|
308
|
+
- Automatic retries on transient failures
|
|
309
|
+
|
|
310
|
+
```python
|
|
311
|
+
from coding_cli_runtime import run_interactive_session
|
|
312
|
+
|
|
313
|
+
result = await run_interactive_session(
|
|
314
|
+
cmd_parts=("claude", "--print", "--model", "claude-sonnet-4-6"),
|
|
315
|
+
cwd=workdir,
|
|
316
|
+
stdin_text=prompt,
|
|
317
|
+
logger=logger,
|
|
318
|
+
timeout_seconds=600,
|
|
319
|
+
)
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are required.
|
|
323
|
+
Other parameters have sensible defaults.
|
|
324
|
+
|
|
325
|
+
## API summary
|
|
326
|
+
|
|
327
|
+
The full public API is listed in [`__init__.py`](src/coding_cli_runtime/__init__.py).
|
|
328
|
+
Key function groups:
|
|
329
|
+
|
|
330
|
+
| Group | Functions |
|
|
331
|
+
|-------|-----------|
|
|
332
|
+
| Execution | `run_cli_command`, `run_cli_command_sync`, `run_interactive_session` |
|
|
333
|
+
| Provider metadata | `get_provider_contract`, `get_provider_spec`, `list_provider_specs` |
|
|
334
|
+
| Contract helpers | `build_env_overlay`, `resolve_config_paths`, `render_prompt`, `resolve_auth`, `resolve_workspace_env`, `resolve_session_search_paths` |
|
|
335
|
+
| Headless launch | `build_claude_headless_core`, `build_codex_headless_core`, `build_copilot_headless_core`, `build_gemini_headless_core` |
|
|
336
|
+
| Codex batch | `build_codex_exec_spec` |
|
|
337
|
+
| Failure handling | `classify_provider_failure` |
|
|
338
|
+
| Installation check | `is_provider_installed` |
|
|
339
|
+
| Session logs | `find_codex_session`, `find_claude_session` |
|
|
340
|
+
| Schema | `load_schema`, `validate_payload` |
|
|
341
|
+
| Utilities | `redact_text`, `build_model_id`, `normalize_path_str` |
|
|
342
|
+
|
|
343
|
+
## Contributing
|
|
344
|
+
|
|
345
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and quality checks.
|
|
199
346
|
|
|
200
347
|
## Prerequisites
|
|
201
348
|
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
# coding-cli-runtime
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/coding-cli-runtime/)
|
|
4
|
+
[](https://pypi.org/project/coding-cli-runtime/)
|
|
5
|
+
[](https://github.com/pj-ms/llm-eval/actions/workflows/ci.yml)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
A Python library for orchestrating LLM coding agent CLIs — [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Codex](https://github.com/openai/codex), [Gemini CLI](https://github.com/google-gemini/gemini-cli), and [GitHub Copilot](https://docs.github.com/en/copilot).
|
|
9
|
+
|
|
10
|
+
These CLIs each have different invocation patterns, output formats, error
|
|
11
|
+
shapes, and timeout behaviors. This library normalizes all of that behind
|
|
12
|
+
a common `CliRunRequest` → `CliRunResult` contract, so your automation
|
|
13
|
+
code doesn't need provider-specific subprocess handling.
|
|
14
|
+
|
|
15
|
+
**What it does (and why not just `subprocess.run`):**
|
|
16
|
+
|
|
17
|
+
- Run any provider CLI with unified request/result types and timeout enforcement
|
|
18
|
+
- Query the model catalog (with user-override and live-cache fallback)
|
|
19
|
+
- Classify failures as retryable vs fatal per provider
|
|
20
|
+
- Look up provider auth, config dirs, and headless launch flags
|
|
21
|
+
- Build non-interactive launch commands without hardcoding provider flags
|
|
22
|
+
- Find session logs after a run (Codex, Claude)
|
|
23
|
+
- Run long-lived sessions with process-group cleanup and transcript mirroring
|
|
24
|
+
- No Python package dependencies — only requires the provider CLIs themselves
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install coding-cli-runtime
|
|
30
|
+
# or
|
|
31
|
+
uv add coding-cli-runtime
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Requires Python 3.10+.
|
|
35
|
+
|
|
36
|
+
## Examples
|
|
37
|
+
|
|
38
|
+
### Execute a provider CLI
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
import asyncio
|
|
42
|
+
from pathlib import Path
|
|
43
|
+
from coding_cli_runtime import CliRunRequest, run_cli_command
|
|
44
|
+
|
|
45
|
+
request = CliRunRequest(
|
|
46
|
+
cmd_parts=("codex", "--model", "gpt-5.4", "--quiet", "exec", "fix the tests"),
|
|
47
|
+
cwd=Path("/tmp/my-project"),
|
|
48
|
+
timeout_seconds=120,
|
|
49
|
+
)
|
|
50
|
+
result = asyncio.run(run_cli_command(request))
|
|
51
|
+
|
|
52
|
+
print(result.returncode) # 0
|
|
53
|
+
print(result.error_code) # "none"
|
|
54
|
+
print(result.duration_seconds) # 14.2
|
|
55
|
+
print(result.stdout_text[:200])
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Swap `codex` for `claude`, `gemini`, or `copilot` — the request/result
|
|
59
|
+
shape stays the same. A synchronous variant `run_cli_command_sync` is also
|
|
60
|
+
available.
|
|
61
|
+
|
|
62
|
+
### Pick a model from the provider catalog
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from coding_cli_runtime import get_provider_spec
|
|
66
|
+
|
|
67
|
+
codex = get_provider_spec("codex")
|
|
68
|
+
print(codex.default_model) # "gpt-5.3-codex"
|
|
69
|
+
print(codex.model_source) # "codex_cli_cache", "override", or "code"
|
|
70
|
+
|
|
71
|
+
for model in codex.models:
|
|
72
|
+
print(f" {model.name}: {model.description}")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
The catalog covers all four providers — each with model names, reasoning
|
|
76
|
+
levels, default settings, and visibility flags.
|
|
77
|
+
|
|
78
|
+
Model lists are resolved with a three-tier fallback:
|
|
79
|
+
|
|
80
|
+
1. **User override** — drop a JSON file at
|
|
81
|
+
`~/.config/coding-cli-runtime/providers/<provider>.json` to use your own
|
|
82
|
+
model list immediately, without waiting for a package update.
|
|
83
|
+
2. **Live CLI cache** — for Codex, the library reads
|
|
84
|
+
`~/.codex/models_cache.json` (auto-refreshed by the Codex CLI) when
|
|
85
|
+
present. Other providers fall through because their CLIs don't expose a
|
|
86
|
+
machine-readable model list.
|
|
87
|
+
3. **Hardcoded fallback** — the model list shipped with the package.
|
|
88
|
+
|
|
89
|
+
Override file format:
|
|
90
|
+
|
|
91
|
+
```json
|
|
92
|
+
{
|
|
93
|
+
"default_model": "claude-sonnet-4-7",
|
|
94
|
+
"models": [
|
|
95
|
+
"claude-sonnet-4-7",
|
|
96
|
+
{
|
|
97
|
+
"name": "claude-opus-5",
|
|
98
|
+
"description": "Latest opus model",
|
|
99
|
+
"controls": [
|
|
100
|
+
{ "name": "effort", "kind": "choice", "choices": ["low", "high"], "default": "low" }
|
|
101
|
+
]
|
|
102
|
+
}
|
|
103
|
+
]
|
|
104
|
+
}
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Set `CODING_CLI_RUNTIME_CONFIG_DIR` to change the config directory
|
|
108
|
+
(default: `~/.config/coding-cli-runtime`).
|
|
109
|
+
|
|
110
|
+
### Decide whether to retry a failed run
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
from coding_cli_runtime import classify_provider_failure
|
|
114
|
+
|
|
115
|
+
classification = classify_provider_failure(
|
|
116
|
+
provider="gemini",
|
|
117
|
+
stderr_text="429 Resource exhausted: rate limit exceeded",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if classification.retryable:
|
|
121
|
+
print(f"Retryable ({classification.category}) — will retry")
|
|
122
|
+
else:
|
|
123
|
+
print(f"Fatal ({classification.category}) — giving up")
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Works for all four providers. Recognizes auth failures, rate limits,
|
|
127
|
+
network transients, and other provider-specific error patterns.
|
|
128
|
+
|
|
129
|
+
### Common integration tasks
|
|
130
|
+
|
|
131
|
+
#### Check whether a provider CLI is installed
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from coding_cli_runtime import is_provider_installed
|
|
135
|
+
|
|
136
|
+
if not is_provider_installed("claude"):
|
|
137
|
+
raise RuntimeError("Claude Code is not available on PATH")
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
This is intentionally minimal: it checks whether the provider binary exists on
|
|
141
|
+
PATH. Deeper CLI drift validation belongs in maintainer tooling, not the
|
|
142
|
+
runtime API.
|
|
143
|
+
|
|
144
|
+
#### Resolve workspace env vars and session search paths
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from coding_cli_runtime import (
|
|
148
|
+
get_provider_contract,
|
|
149
|
+
resolve_session_search_paths,
|
|
150
|
+
resolve_workspace_env,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
gemini = get_provider_contract("gemini")
|
|
154
|
+
|
|
155
|
+
# Derive provider-specific workspace env vars from contract metadata
|
|
156
|
+
env = resolve_workspace_env(gemini, "/tmp/run-dir")
|
|
157
|
+
# {"GEMINI_CLI_IDE_WORKSPACE_PATH": "/tmp/run-dir"}
|
|
158
|
+
|
|
159
|
+
# Expand concrete host paths for session log searches
|
|
160
|
+
paths = resolve_session_search_paths(gemini)
|
|
161
|
+
# (Path.home() / ".gemini" / "tmp",)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Use these helpers when you want the contract facts turned into concrete
|
|
165
|
+
filesystem/env values without rebuilding the same glue logic in each consumer.
|
|
166
|
+
|
|
167
|
+
### Look up provider contract metadata
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
from coding_cli_runtime import get_provider_contract, build_env_overlay, resolve_config_paths, render_prompt
|
|
171
|
+
|
|
172
|
+
# Get structured metadata for any supported provider
|
|
173
|
+
contract = get_provider_contract("claude")
|
|
174
|
+
print(contract.binary) # "claude"
|
|
175
|
+
print(contract.auth.api_key_env_var) # "CLAUDE_API_KEY"
|
|
176
|
+
print(contract.paths.config_dir) # "~/.claude"
|
|
177
|
+
print(contract.headless.approval.flag) # "--dangerously-skip-permissions"
|
|
178
|
+
|
|
179
|
+
# Build env var overlay for subprocess
|
|
180
|
+
env = build_env_overlay(contract, api_key="sk-...", base_url="https://custom.example.com")
|
|
181
|
+
# {"CLAUDE_API_KEY": "sk-...", "ANTHROPIC_BASE_URL": "https://custom.example.com"}
|
|
182
|
+
|
|
183
|
+
# Resolve config paths for container mounts
|
|
184
|
+
host_dir, container_dir = resolve_config_paths(contract, containerized=True)
|
|
185
|
+
# ("/home/user/.claude", "/root/.claude")
|
|
186
|
+
|
|
187
|
+
# Resolve prompt delivery (stdin vs flag vs activation)
|
|
188
|
+
payload = render_prompt(contract.headless.prompt, "Fix the bug")
|
|
189
|
+
# payload.args = () (stdin delivery for Claude)
|
|
190
|
+
# payload.stdin_text = "Fix the bug"
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
`ProviderContract` is structured as nested sub-contracts
|
|
194
|
+
(`AuthContract`, `PathContract`, `HeadlessContract`, `OutputContract`,
|
|
195
|
+
`IoContract`, `SessionDiscoveryContract`, `DiagnosticsContract`) so consumers
|
|
196
|
+
can drill into whichever aspect they need. This is reference metadata,
|
|
197
|
+
not a command-construction control plane — callers keep their own
|
|
198
|
+
command assembly and adopt contract fields selectively.
|
|
199
|
+
|
|
200
|
+
### Query provider I/O conventions
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
from coding_cli_runtime import get_provider_contract
|
|
204
|
+
|
|
205
|
+
gemini = get_provider_contract("gemini")
|
|
206
|
+
|
|
207
|
+
# Workspace env vars with value semantics
|
|
208
|
+
for wev in gemini.io.workspace_env_vars:
|
|
209
|
+
print(f"{wev.name} = {wev.value_source}")
|
|
210
|
+
# GEMINI_CLI_IDE_WORKSPACE_PATH = execution_dir
|
|
211
|
+
|
|
212
|
+
# Session discovery (where session logs live)
|
|
213
|
+
sd = gemini.session_discovery
|
|
214
|
+
print(sd.session_roots) # ("tmp",)
|
|
215
|
+
print(sd.session_glob) # "*/chats/session-*.json"
|
|
216
|
+
|
|
217
|
+
# Output format support
|
|
218
|
+
codex = get_provider_contract("codex")
|
|
219
|
+
print(codex.output.output_path_flag) # "-o"
|
|
220
|
+
print(codex.output.schema_path_flag) # "--output-schema"
|
|
221
|
+
|
|
222
|
+
# Diagnostics (Copilot only)
|
|
223
|
+
copilot = get_provider_contract("copilot")
|
|
224
|
+
if copilot.diagnostics:
|
|
225
|
+
print(copilot.diagnostics.log_glob) # "logs/process-*.log"
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
`WorkspaceEnvVar.value_source` uses a closed vocabulary:
|
|
229
|
+
`"execution_dir"` or `"workspace_root"`.
|
|
230
|
+
|
|
231
|
+
### Build headless launch commands
|
|
232
|
+
|
|
233
|
+
```python
|
|
234
|
+
from coding_cli_runtime import build_claude_headless_core, build_codex_headless_core
|
|
235
|
+
|
|
236
|
+
# Claude: binary + --print + --permission-mode + --dangerously-skip-permissions + --model
|
|
237
|
+
cmd = build_claude_headless_core("claude-sonnet-4-6")
|
|
238
|
+
cmd.extend(["--output-format", "text", "--disallowedTools", "Bash,Task"])
|
|
239
|
+
|
|
240
|
+
# Codex: binary + exec + --full-auto + --sandbox + --skip-git-repo-check + --model
|
|
241
|
+
cmd = build_codex_headless_core("gpt-5.4", sandbox_mode="read-only")
|
|
242
|
+
cmd.extend(["-C", str(workdir)])
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
Headless core helpers emit the standard flags for non-interactive runs.
|
|
246
|
+
Consumers append app-specific tails (tool restrictions, output paths, etc.).
|
|
247
|
+
|
|
248
|
+
### Find session logs after a run
|
|
249
|
+
|
|
250
|
+
```python
|
|
251
|
+
import time
|
|
252
|
+
from coding_cli_runtime import find_codex_session, find_claude_session
|
|
253
|
+
|
|
254
|
+
# Find the most recent Codex session log for a given working directory
|
|
255
|
+
session = find_codex_session("/path/to/project", since_ts=time.time() - 300)
|
|
256
|
+
if session:
|
|
257
|
+
print(f"Session log: {session}") # ~/.codex/sessions/.../conversation.jsonl
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
Works for Codex and Claude. Scans provider config directories for session
|
|
261
|
+
files matching the working directory and time window.
|
|
262
|
+
|
|
263
|
+
## Key types
|
|
264
|
+
|
|
265
|
+
| Type | Purpose |
|
|
266
|
+
|------|---------|
|
|
267
|
+
| `CliRunRequest` | Command spec: cmd, cwd, env, timeout, stream paths |
|
|
268
|
+
| `CliRunResult` | Result: returncode, stdout/stderr, duration, error code |
|
|
269
|
+
| `ErrorCode` | `none` · `spawn_failed` · `timed_out` · `non_zero_exit` |
|
|
270
|
+
| `ProviderSpec` | Provider catalog entry with models, controls, defaults |
|
|
271
|
+
| `ProviderContract` | Structured provider CLI metadata (auth, paths, headless, I/O, sessions) |
|
|
272
|
+
| `WorkspaceEnvVar` | Env var with value-source semantics (`execution_dir`, `workspace_root`) |
|
|
273
|
+
| `FailureClassification` | Classified error with retryable flag and category |
|
|
274
|
+
|
|
275
|
+
### Run long-lived CLI sessions
|
|
276
|
+
|
|
277
|
+
For CLI runs that take minutes (e.g., full app generation), use
|
|
278
|
+
`run_interactive_session()` instead of `run_cli_command()`. It adds:
|
|
279
|
+
|
|
280
|
+
- Process-group cleanup (kills orphaned child processes on timeout)
|
|
281
|
+
- Transcript mirroring (streams CLI output to a file while the process runs)
|
|
282
|
+
- Automatic retries on transient failures
|
|
283
|
+
|
|
284
|
+
```python
|
|
285
|
+
from coding_cli_runtime import run_interactive_session
|
|
286
|
+
|
|
287
|
+
result = await run_interactive_session(
|
|
288
|
+
cmd_parts=("claude", "--print", "--model", "claude-sonnet-4-6"),
|
|
289
|
+
cwd=workdir,
|
|
290
|
+
stdin_text=prompt,
|
|
291
|
+
logger=logger,
|
|
292
|
+
timeout_seconds=600,
|
|
293
|
+
)
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are required.
|
|
297
|
+
Other parameters have sensible defaults.
|
|
298
|
+
|
|
299
|
+
## API summary
|
|
300
|
+
|
|
301
|
+
The full public API is listed in [`__init__.py`](src/coding_cli_runtime/__init__.py).
|
|
302
|
+
Key function groups:
|
|
303
|
+
|
|
304
|
+
| Group | Functions |
|
|
305
|
+
|-------|-----------|
|
|
306
|
+
| Execution | `run_cli_command`, `run_cli_command_sync`, `run_interactive_session` |
|
|
307
|
+
| Provider metadata | `get_provider_contract`, `get_provider_spec`, `list_provider_specs` |
|
|
308
|
+
| Contract helpers | `build_env_overlay`, `resolve_config_paths`, `render_prompt`, `resolve_auth`, `resolve_workspace_env`, `resolve_session_search_paths` |
|
|
309
|
+
| Headless launch | `build_claude_headless_core`, `build_codex_headless_core`, `build_copilot_headless_core`, `build_gemini_headless_core` |
|
|
310
|
+
| Codex batch | `build_codex_exec_spec` |
|
|
311
|
+
| Failure handling | `classify_provider_failure` |
|
|
312
|
+
| Installation check | `is_provider_installed` |
|
|
313
|
+
| Session logs | `find_codex_session`, `find_claude_session` |
|
|
314
|
+
| Schema | `load_schema`, `validate_payload` |
|
|
315
|
+
| Utilities | `redact_text`, `build_model_id`, `normalize_path_str` |
|
|
316
|
+
|
|
317
|
+
## Contributing
|
|
318
|
+
|
|
319
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and quality checks.
|
|
320
|
+
|
|
321
|
+
## Prerequisites
|
|
322
|
+
|
|
323
|
+
This package does **not** bundle any CLI binaries or credentials. You must
|
|
324
|
+
install and authenticate the relevant provider CLI yourself before using the
|
|
325
|
+
execution helpers.
|
|
326
|
+
|
|
327
|
+
## Status
|
|
328
|
+
|
|
329
|
+
Pre-1.0. API may change between minor versions.
|
|
330
|
+
|
|
331
|
+
## License
|
|
332
|
+
|
|
333
|
+
MIT
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "coding-cli-runtime"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.4.0"
|
|
8
8
|
description = "Reusable CLI runtime primitives for provider-backed automation workflows"
|
|
9
9
|
readme = {file = "README.md", content-type = "text/markdown"}
|
|
10
10
|
license = "MIT"
|
|
@@ -94,7 +94,7 @@ disallow_untyped_defs = false
|
|
|
94
94
|
warn_return_any = false
|
|
95
95
|
|
|
96
96
|
[tool.bumpversion]
|
|
97
|
-
current_version = "0.
|
|
97
|
+
current_version = "0.4.0"
|
|
98
98
|
parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
|
|
99
99
|
serialize = ["{major}.{minor}.{patch}"]
|
|
100
100
|
commit = true
|