coding-cli-runtime 0.1.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coding_cli_runtime-0.3.0/CHANGELOG.md +101 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/PKG-INFO +121 -13
- coding_cli_runtime-0.3.0/README.md +261 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/pyproject.toml +2 -2
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/__init__.py +38 -2
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/auth.py +25 -9
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/codex_cli.py +10 -8
- coding_cli_runtime-0.3.0/src/coding_cli_runtime/headless.py +124 -0
- coding_cli_runtime-0.3.0/src/coding_cli_runtime/provider_contracts.py +482 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/session_logs.py +56 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime.egg-info/PKG-INFO +121 -13
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime.egg-info/SOURCES.txt +6 -1
- coding_cli_runtime-0.3.0/tests/test_coverage_gaps.py +913 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/tests/test_packaging.py +10 -3
- coding_cli_runtime-0.3.0/tests/test_provider_contracts.py +322 -0
- coding_cli_runtime-0.3.0/tests/test_stage2_tier1.py +241 -0
- coding_cli_runtime-0.1.0/CHANGELOG.md +0 -57
- coding_cli_runtime-0.1.0/README.md +0 -153
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/CONTRIBUTING.md +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/LICENSE +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/MANIFEST.in +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/setup.cfg +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/contracts.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/copilot_reasoning_baseline.json +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/copilot_reasoning_logs.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/failure_classification.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/json_io.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/provider_controls.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/provider_specs.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/py.typed +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/reasoning.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/redaction.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/schema_validation.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/schemas/normalized_run_result.v1.json +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/schemas/reasoning_metadata.v1.json +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/session_execution.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/subprocess_runner.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime.egg-info/dependency_links.txt +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime.egg-info/top_level.txt +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/tests/test_copilot_reasoning_logs.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/tests/test_package_resources.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/tests/test_playground_probe_smoke.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/tests/test_provider_catalog_resolution.py +0 -0
- {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/tests/test_runtime_parity.py +0 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/).
|
|
6
|
+
|
|
7
|
+
## [Unreleased]
|
|
8
|
+
|
|
9
|
+
## [0.3.0] - 2026-04-09
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- **Headless launch core helpers** — per-provider arg renderers derived from
|
|
13
|
+
`ProviderContract.headless`: `build_claude_headless_core()`,
|
|
14
|
+
`build_codex_headless_core()`, `build_copilot_headless_core()`,
|
|
15
|
+
`build_gemini_headless_core()`. All consumers (app-generation, feather,
|
|
16
|
+
codex_cli, provider_contracts builder) now delegate to these.
|
|
17
|
+
- `scan_session_dir()` — generic directory-scanning primitive for session log
|
|
18
|
+
discovery with `extract_fn` callback (internal, not in public `__all__`).
|
|
19
|
+
- Session log discovery section in README.
|
|
20
|
+
- API summary table in README.
|
|
21
|
+
- 27 new Stage 2 tests for headless cores, builder delegation, and
|
|
22
|
+
`scan_session_dir`.
|
|
23
|
+
|
|
24
|
+
### Changed
|
|
25
|
+
- `build_codex_exec_spec()` now delegates to `build_codex_headless_core()`.
|
|
26
|
+
`full_auto` and `skip_git_repo_check` params preserved and passed through.
|
|
27
|
+
- `_build_non_interactive_run()` now delegates to per-provider headless core
|
|
28
|
+
helpers instead of assembling flags inline.
|
|
29
|
+
- Feather `report_data.py` and `report_sections.py` use headless core helpers
|
|
30
|
+
with fallback for environments without `coding_cli_runtime`.
|
|
31
|
+
- Feather `generate_report.py` Codex session discovery replaced with
|
|
32
|
+
`find_codex_session()` from `coding_cli_runtime`.
|
|
33
|
+
- App-generation `claude_impl.py`, `copilot_impl.py`, `gemini_impl.py`
|
|
34
|
+
`build_command()` functions delegate to headless core helpers.
|
|
35
|
+
- Dead headless opt-out flags removed from Copilot (`--allow-all`, `--ask-user`,
|
|
36
|
+
`--use-custom-instructions`) and Gemini (`--auto-approve`) CLI specs —
|
|
37
|
+
these were never used in batch runs and are now handled by the headless core.
|
|
38
|
+
- README rewritten: user-action feature list, `run_interactive_session` example,
|
|
39
|
+
`uv add` install, API summary, Contributing link, session log discovery.
|
|
40
|
+
|
|
41
|
+
## [0.2.0] - 2026-04-08
|
|
42
|
+
|
|
43
|
+
### Added
|
|
44
|
+
- **ProviderContract API** — structured, nested metadata for all four provider CLIs
|
|
45
|
+
(Claude, Codex, Gemini, Copilot). Composed of `AuthContract`, `PathContract`,
|
|
46
|
+
`HeadlessContract`, `PromptTransport`, `ApprovalContract`, `SandboxContract`.
|
|
47
|
+
- `get_provider_contract(provider_id)` — returns structured contract for a provider.
|
|
48
|
+
- `build_env_overlay(contract, api_key, base_url)` — builds provider-specific env
|
|
49
|
+
var overlay from contract metadata.
|
|
50
|
+
- `resolve_config_paths(contract, containerized)` — resolves host and container
|
|
51
|
+
config directory paths.
|
|
52
|
+
- `render_prompt(transport, prompt)` — resolves prompt delivery into argv args +
|
|
53
|
+
stdin text based on provider transport mode.
|
|
54
|
+
- `PromptPayload` dataclass for resolved prompt delivery.
|
|
55
|
+
- `__version__` attribute in `coding_cli_runtime`.
|
|
56
|
+
- `CONTRIBUTING.md`, `MANIFEST.in`, `.pre-commit-config.yaml`.
|
|
57
|
+
- PyPI / Python / Build / License badges in `README.md`.
|
|
58
|
+
- `bump-my-version` configuration syncing `pyproject.toml` and `__init__.py`.
|
|
59
|
+
- `ruff`, `mypy` (strict), and `pytest-cov` added to dev dependencies.
|
|
60
|
+
- CI quality gates: ruff check, ruff format, mypy, pytest-cov.
|
|
61
|
+
- README section documenting the new ProviderContract API with examples.
|
|
62
|
+
- 75 new tests for provider contracts, helpers, internal builder, failure
|
|
63
|
+
classification, codex_cli, schema validation (including nested), reasoning,
|
|
64
|
+
redaction, json_io, provider_controls, and auth. Package coverage 47% → 62%.
|
|
65
|
+
|
|
66
|
+
### Changed
|
|
67
|
+
- Consolidated `shared_cli_runtime` into `coding_cli_runtime`. The package now
|
|
68
|
+
ships a single top-level package; the `shared_cli_runtime` directory is removed.
|
|
69
|
+
- `MANIFEST.in` and docs updated to reference `coding_cli_runtime` paths.
|
|
70
|
+
- `run_interactive_session()` observability kwargs (`provider_label`, `job_name`,
|
|
71
|
+
`phase_tag`, `process_label`, `timeout_seconds`) now have sensible defaults so
|
|
72
|
+
external callers don't need to supply internal batch-system labels.
|
|
73
|
+
- Provider model catalogs are now resolved with a three-tier fallback:
|
|
74
|
+
user override file > live CLI discovery > hardcoded fallback.
|
|
75
|
+
- `auth.py`: `_PROVIDER_ENV_HINTS` now derived from `provider_contracts.py`
|
|
76
|
+
(single source of truth for auth env var names).
|
|
77
|
+
- `CliRunResult.command` type widened from `tuple[str, ...]` to `Sequence[str]`.
|
|
78
|
+
- Publish workflow path corrected (`shared-cli-runtime` → `coding-cli-runtime`).
|
|
79
|
+
|
|
80
|
+
### Fixed
|
|
81
|
+
- mypy strict compliance: return-type annotations, per-module overrides.
|
|
82
|
+
- ruff lint and format compliance across all source and test files.
|
|
83
|
+
- Copilot BYOK (`COPILOT_PROVIDER_API_KEY`) now discoverable via contract
|
|
84
|
+
but not reported as "required" in `resolve_auth()` — BYOK is opt-in.
|
|
85
|
+
|
|
86
|
+
## [0.1.0] - 2026-04-07
|
|
87
|
+
|
|
88
|
+
### Added
|
|
89
|
+
- Initial extraction from `llm-eval` monorepo.
|
|
90
|
+
- Provider metadata and controls for Claude, Codex, Copilot, and Gemini CLIs.
|
|
91
|
+
- Shared request/result contracts (`CliRunRequest`, `CliRunResult`, `CliLaunchSpec`).
|
|
92
|
+
- Schema loading and payload validation (`load_schema`, `validate_payload`).
|
|
93
|
+
- Synchronous and asynchronous subprocess execution helpers.
|
|
94
|
+
- Interactive session execution with transcript mirroring.
|
|
95
|
+
- Session log discovery and parsing utilities.
|
|
96
|
+
- Claude reasoning policy resolution.
|
|
97
|
+
- Log redaction helpers.
|
|
98
|
+
- Copilot reasoning log parsing and classification.
|
|
99
|
+
- PEP 561 `py.typed` markers for both `coding_cli_runtime` and `shared_cli_runtime`.
|
|
100
|
+
- Packaged JSON schemas and Copilot reasoning baseline data.
|
|
101
|
+
- Playground knowledge base with probing guides and experiment templates.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coding-cli-runtime
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Reusable CLI runtime primitives for provider-backed automation workflows
|
|
5
5
|
Author-email: LLM Eval maintainers <llm-eval-maintainers@users.noreply.github.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -40,17 +40,21 @@ code doesn't need provider-specific subprocess handling.
|
|
|
40
40
|
|
|
41
41
|
**What it does (and why not just `subprocess.run`):**
|
|
42
42
|
|
|
43
|
-
-
|
|
44
|
-
-
|
|
45
|
-
-
|
|
46
|
-
-
|
|
47
|
-
-
|
|
48
|
-
-
|
|
43
|
+
- Run any provider CLI with unified request/result types and timeout enforcement
|
|
44
|
+
- Query the model catalog (with user-override and live-cache fallback)
|
|
45
|
+
- Classify failures as retryable vs fatal per provider
|
|
46
|
+
- Look up provider auth, config dirs, and headless launch flags
|
|
47
|
+
- Build non-interactive launch commands without hardcoding provider flags
|
|
48
|
+
- Find session logs after a run (Codex, Claude)
|
|
49
|
+
- Run long-lived sessions with process-group cleanup and transcript mirroring
|
|
50
|
+
- No Python package dependencies — only requires the provider CLIs themselves
|
|
49
51
|
|
|
50
52
|
## Installation
|
|
51
53
|
|
|
52
54
|
```bash
|
|
53
55
|
pip install coding-cli-runtime
|
|
56
|
+
# or
|
|
57
|
+
uv add coding-cli-runtime
|
|
54
58
|
```
|
|
55
59
|
|
|
56
60
|
Requires Python 3.10+.
|
|
@@ -65,7 +69,7 @@ from pathlib import Path
|
|
|
65
69
|
from coding_cli_runtime import CliRunRequest, run_cli_command
|
|
66
70
|
|
|
67
71
|
request = CliRunRequest(
|
|
68
|
-
cmd_parts=("codex", "--model", "
|
|
72
|
+
cmd_parts=("codex", "--model", "gpt-5.4", "--quiet", "exec", "fix the tests"),
|
|
69
73
|
cwd=Path("/tmp/my-project"),
|
|
70
74
|
timeout_seconds=120,
|
|
71
75
|
)
|
|
@@ -148,6 +152,70 @@ else:
|
|
|
148
152
|
Works for all four providers. Recognizes auth failures, rate limits,
|
|
149
153
|
network transients, and other provider-specific error patterns.
|
|
150
154
|
|
|
155
|
+
### Look up provider contract metadata
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from coding_cli_runtime import get_provider_contract, build_env_overlay, resolve_config_paths, render_prompt
|
|
159
|
+
|
|
160
|
+
# Get structured metadata for any supported provider
|
|
161
|
+
contract = get_provider_contract("claude")
|
|
162
|
+
print(contract.binary) # "claude"
|
|
163
|
+
print(contract.auth.api_key_env_var) # "CLAUDE_API_KEY"
|
|
164
|
+
print(contract.paths.config_dir) # "~/.claude"
|
|
165
|
+
print(contract.headless.approval.flag) # "--dangerously-skip-permissions"
|
|
166
|
+
|
|
167
|
+
# Build env var overlay for subprocess
|
|
168
|
+
env = build_env_overlay(contract, api_key="sk-...", base_url="https://custom.example.com")
|
|
169
|
+
# {"CLAUDE_API_KEY": "sk-...", "ANTHROPIC_BASE_URL": "https://custom.example.com"}
|
|
170
|
+
|
|
171
|
+
# Resolve config paths for container mounts
|
|
172
|
+
host_dir, container_dir = resolve_config_paths(contract, containerized=True)
|
|
173
|
+
# ("/home/user/.claude", "/root/.claude")
|
|
174
|
+
|
|
175
|
+
# Resolve prompt delivery (stdin vs flag vs activation)
|
|
176
|
+
payload = render_prompt(contract.headless.prompt, "Fix the bug")
|
|
177
|
+
# payload.args = () (stdin delivery for Claude)
|
|
178
|
+
# payload.stdin_text = "Fix the bug"
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
`ProviderContract` is structured as nested sub-contracts
|
|
182
|
+
(`AuthContract`, `PathContract`, `HeadlessContract`) so consumers
|
|
183
|
+
can drill into whichever aspect they need. This is reference metadata,
|
|
184
|
+
not a command-construction control plane — consumers keep their own
|
|
185
|
+
command assembly and adopt contract fields selectively.
|
|
186
|
+
|
|
187
|
+
### Build headless launch commands
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
from coding_cli_runtime import build_claude_headless_core, build_codex_headless_core
|
|
191
|
+
|
|
192
|
+
# Claude: binary + --print + --permission-mode + --dangerously-skip-permissions + --model
|
|
193
|
+
cmd = build_claude_headless_core("claude-sonnet-4-6")
|
|
194
|
+
cmd.extend(["--output-format", "text", "--disallowedTools", "Bash,Task"])
|
|
195
|
+
|
|
196
|
+
# Codex: binary + exec + --full-auto + --sandbox + --skip-git-repo-check + --model
|
|
197
|
+
cmd = build_codex_headless_core("gpt-5.4", sandbox_mode="read-only")
|
|
198
|
+
cmd.extend(["-C", str(workdir)])
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
Headless core helpers emit the standard flags for non-interactive runs.
|
|
202
|
+
Consumers append app-specific tails (tool restrictions, output paths, etc.).
|
|
203
|
+
|
|
204
|
+
### Find session logs after a run
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
import time
|
|
208
|
+
from coding_cli_runtime import find_codex_session, find_claude_session
|
|
209
|
+
|
|
210
|
+
# Find the most recent Codex session log for a given working directory
|
|
211
|
+
session = find_codex_session("/path/to/project", since_ts=time.time() - 300)
|
|
212
|
+
if session:
|
|
213
|
+
print(f"Session log: {session}") # ~/.codex/sessions/.../conversation.jsonl
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
Works for Codex and Claude. Scans provider config directories for session
|
|
217
|
+
files matching the working directory and time window.
|
|
218
|
+
|
|
151
219
|
## Key types
|
|
152
220
|
|
|
153
221
|
| Type | Purpose |
|
|
@@ -156,13 +224,53 @@ network transients, and other provider-specific error patterns.
|
|
|
156
224
|
| `CliRunResult` | Result: returncode, stdout/stderr, duration, error code |
|
|
157
225
|
| `ErrorCode` | `none` · `spawn_failed` · `timed_out` · `non_zero_exit` |
|
|
158
226
|
| `ProviderSpec` | Provider catalog entry with models, controls, defaults |
|
|
227
|
+
| `ProviderContract` | Structured provider CLI metadata (auth, paths, headless launch) |
|
|
159
228
|
| `FailureClassification` | Classified error with retryable flag and category |
|
|
160
229
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
230
|
+
### Run long-lived CLI sessions
|
|
231
|
+
|
|
232
|
+
For CLI runs that take minutes (e.g., full app generation), use
|
|
233
|
+
`run_interactive_session()` instead of `run_cli_command()`. It adds:
|
|
234
|
+
|
|
235
|
+
- Process-group cleanup (kills orphaned child processes on timeout)
|
|
236
|
+
- Transcript mirroring (streams CLI output to a file while the process runs)
|
|
237
|
+
- Automatic retries on transient failures
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
from coding_cli_runtime import run_interactive_session
|
|
241
|
+
|
|
242
|
+
result = await run_interactive_session(
|
|
243
|
+
cmd_parts=("claude", "--print", "--model", "claude-sonnet-4-6"),
|
|
244
|
+
cwd=workdir,
|
|
245
|
+
stdin_text=prompt,
|
|
246
|
+
logger=logger,
|
|
247
|
+
timeout_seconds=600,
|
|
248
|
+
)
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are required.
|
|
252
|
+
Observability labels (`job_name`, `phase_tag`) default to sensible values.
|
|
253
|
+
|
|
254
|
+
## API summary
|
|
255
|
+
|
|
256
|
+
The full public API is listed in [`__init__.py`](src/coding_cli_runtime/__init__.py).
|
|
257
|
+
Key function groups:
|
|
258
|
+
|
|
259
|
+
| Group | Functions |
|
|
260
|
+
|-------|-----------|
|
|
261
|
+
| Execution | `run_cli_command`, `run_cli_command_sync`, `run_interactive_session` |
|
|
262
|
+
| Provider metadata | `get_provider_contract`, `get_provider_spec`, `list_provider_specs` |
|
|
263
|
+
| Contract helpers | `build_env_overlay`, `resolve_config_paths`, `render_prompt`, `resolve_auth` |
|
|
264
|
+
| Headless launch | `build_claude_headless_core`, `build_codex_headless_core`, `build_copilot_headless_core`, `build_gemini_headless_core` |
|
|
265
|
+
| Codex batch | `build_codex_exec_spec` |
|
|
266
|
+
| Failure handling | `classify_provider_failure` |
|
|
267
|
+
| Session logs | `find_codex_session`, `find_claude_session` |
|
|
268
|
+
| Schema | `load_schema`, `validate_payload` |
|
|
269
|
+
| Utilities | `redact_text`, `build_model_id`, `normalize_path_str` |
|
|
270
|
+
|
|
271
|
+
## Contributing
|
|
272
|
+
|
|
273
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and quality checks.
|
|
166
274
|
|
|
167
275
|
## Prerequisites
|
|
168
276
|
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
# coding-cli-runtime
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/coding-cli-runtime/)
|
|
4
|
+
[](https://pypi.org/project/coding-cli-runtime/)
|
|
5
|
+
[](https://github.com/pj-ms/llm-eval/actions/workflows/ci.yml)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
A Python library for orchestrating LLM coding agent CLIs — [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Codex](https://github.com/openai/codex), [Gemini CLI](https://github.com/google-gemini/gemini-cli), and [GitHub Copilot](https://docs.github.com/en/copilot).
|
|
9
|
+
|
|
10
|
+
These CLIs each have different invocation patterns, output formats, error
|
|
11
|
+
shapes, and timeout behaviors. This library normalizes all of that behind
|
|
12
|
+
a common `CliRunRequest` → `CliRunResult` contract, so your automation
|
|
13
|
+
code doesn't need provider-specific subprocess handling.
|
|
14
|
+
|
|
15
|
+
**What it does (and why not just `subprocess.run`):**
|
|
16
|
+
|
|
17
|
+
- Run any provider CLI with unified request/result types and timeout enforcement
|
|
18
|
+
- Query the model catalog (with user-override and live-cache fallback)
|
|
19
|
+
- Classify failures as retryable vs fatal per provider
|
|
20
|
+
- Look up provider auth, config dirs, and headless launch flags
|
|
21
|
+
- Build non-interactive launch commands without hardcoding provider flags
|
|
22
|
+
- Find session logs after a run (Codex, Claude)
|
|
23
|
+
- Run long-lived sessions with process-group cleanup and transcript mirroring
|
|
24
|
+
- No Python package dependencies — only requires the provider CLIs themselves
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install coding-cli-runtime
|
|
30
|
+
# or
|
|
31
|
+
uv add coding-cli-runtime
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Requires Python 3.10+.
|
|
35
|
+
|
|
36
|
+
## Examples
|
|
37
|
+
|
|
38
|
+
### Execute a provider CLI
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
import asyncio
|
|
42
|
+
from pathlib import Path
|
|
43
|
+
from coding_cli_runtime import CliRunRequest, run_cli_command
|
|
44
|
+
|
|
45
|
+
request = CliRunRequest(
|
|
46
|
+
cmd_parts=("codex", "--model", "gpt-5.4", "--quiet", "exec", "fix the tests"),
|
|
47
|
+
cwd=Path("/tmp/my-project"),
|
|
48
|
+
timeout_seconds=120,
|
|
49
|
+
)
|
|
50
|
+
result = asyncio.run(run_cli_command(request))
|
|
51
|
+
|
|
52
|
+
print(result.returncode) # 0
|
|
53
|
+
print(result.error_code) # "none"
|
|
54
|
+
print(result.duration_seconds) # 14.2
|
|
55
|
+
print(result.stdout_text[:200])
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Swap `codex` for `claude`, `gemini`, or `copilot` — the request/result
|
|
59
|
+
shape stays the same. A synchronous variant `run_cli_command_sync` is also
|
|
60
|
+
available.
|
|
61
|
+
|
|
62
|
+
### Pick a model from the provider catalog
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from coding_cli_runtime import get_provider_spec
|
|
66
|
+
|
|
67
|
+
codex = get_provider_spec("codex")
|
|
68
|
+
print(codex.default_model) # "gpt-5.3-codex"
|
|
69
|
+
print(codex.model_source) # "codex_cli_cache", "override", or "code"
|
|
70
|
+
|
|
71
|
+
for model in codex.models:
|
|
72
|
+
print(f" {model.name}: {model.description}")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
The catalog covers all four providers — each with model names, reasoning
|
|
76
|
+
levels, default settings, and visibility flags.
|
|
77
|
+
|
|
78
|
+
Model lists are resolved with a three-tier fallback:
|
|
79
|
+
|
|
80
|
+
1. **User override** — drop a JSON file at
|
|
81
|
+
`~/.config/coding-cli-runtime/providers/<provider>.json` to use your own
|
|
82
|
+
model list immediately, without waiting for a package update.
|
|
83
|
+
2. **Live CLI cache** — for Codex, the library reads
|
|
84
|
+
`~/.codex/models_cache.json` (auto-refreshed by the Codex CLI) when
|
|
85
|
+
present. Other providers fall through because their CLIs don't expose a
|
|
86
|
+
machine-readable model list.
|
|
87
|
+
3. **Hardcoded fallback** — the model list shipped with the package.
|
|
88
|
+
|
|
89
|
+
Override file format:
|
|
90
|
+
|
|
91
|
+
```json
|
|
92
|
+
{
|
|
93
|
+
"default_model": "claude-sonnet-4-7",
|
|
94
|
+
"models": [
|
|
95
|
+
"claude-sonnet-4-7",
|
|
96
|
+
{
|
|
97
|
+
"name": "claude-opus-5",
|
|
98
|
+
"description": "Latest opus model",
|
|
99
|
+
"controls": [
|
|
100
|
+
{ "name": "effort", "kind": "choice", "choices": ["low", "high"], "default": "low" }
|
|
101
|
+
]
|
|
102
|
+
}
|
|
103
|
+
]
|
|
104
|
+
}
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Set `CODING_CLI_RUNTIME_CONFIG_DIR` to change the config directory
|
|
108
|
+
(default: `~/.config/coding-cli-runtime`).
|
|
109
|
+
|
|
110
|
+
### Decide whether to retry a failed run
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
from coding_cli_runtime import classify_provider_failure
|
|
114
|
+
|
|
115
|
+
classification = classify_provider_failure(
|
|
116
|
+
provider="gemini",
|
|
117
|
+
stderr_text="429 Resource exhausted: rate limit exceeded",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if classification.retryable:
|
|
121
|
+
print(f"Retryable ({classification.category}) — will retry")
|
|
122
|
+
else:
|
|
123
|
+
print(f"Fatal ({classification.category}) — giving up")
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Works for all four providers. Recognizes auth failures, rate limits,
|
|
127
|
+
network transients, and other provider-specific error patterns.
|
|
128
|
+
|
|
129
|
+
### Look up provider contract metadata
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
from coding_cli_runtime import get_provider_contract, build_env_overlay, resolve_config_paths, render_prompt
|
|
133
|
+
|
|
134
|
+
# Get structured metadata for any supported provider
|
|
135
|
+
contract = get_provider_contract("claude")
|
|
136
|
+
print(contract.binary) # "claude"
|
|
137
|
+
print(contract.auth.api_key_env_var) # "CLAUDE_API_KEY"
|
|
138
|
+
print(contract.paths.config_dir) # "~/.claude"
|
|
139
|
+
print(contract.headless.approval.flag) # "--dangerously-skip-permissions"
|
|
140
|
+
|
|
141
|
+
# Build env var overlay for subprocess
|
|
142
|
+
env = build_env_overlay(contract, api_key="sk-...", base_url="https://custom.example.com")
|
|
143
|
+
# {"CLAUDE_API_KEY": "sk-...", "ANTHROPIC_BASE_URL": "https://custom.example.com"}
|
|
144
|
+
|
|
145
|
+
# Resolve config paths for container mounts
|
|
146
|
+
host_dir, container_dir = resolve_config_paths(contract, containerized=True)
|
|
147
|
+
# ("/home/user/.claude", "/root/.claude")
|
|
148
|
+
|
|
149
|
+
# Resolve prompt delivery (stdin vs flag vs activation)
|
|
150
|
+
payload = render_prompt(contract.headless.prompt, "Fix the bug")
|
|
151
|
+
# payload.args = () (stdin delivery for Claude)
|
|
152
|
+
# payload.stdin_text = "Fix the bug"
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
`ProviderContract` is structured as nested sub-contracts
|
|
156
|
+
(`AuthContract`, `PathContract`, `HeadlessContract`) so consumers
|
|
157
|
+
can drill into whichever aspect they need. This is reference metadata,
|
|
158
|
+
not a command-construction control plane — consumers keep their own
|
|
159
|
+
command assembly and adopt contract fields selectively.
|
|
160
|
+
|
|
161
|
+
### Build headless launch commands
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
from coding_cli_runtime import build_claude_headless_core, build_codex_headless_core
|
|
165
|
+
|
|
166
|
+
# Claude: binary + --print + --permission-mode + --dangerously-skip-permissions + --model
|
|
167
|
+
cmd = build_claude_headless_core("claude-sonnet-4-6")
|
|
168
|
+
cmd.extend(["--output-format", "text", "--disallowedTools", "Bash,Task"])
|
|
169
|
+
|
|
170
|
+
# Codex: binary + exec + --full-auto + --sandbox + --skip-git-repo-check + --model
|
|
171
|
+
cmd = build_codex_headless_core("gpt-5.4", sandbox_mode="read-only")
|
|
172
|
+
cmd.extend(["-C", str(workdir)])
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Headless core helpers emit the standard flags for non-interactive runs.
|
|
176
|
+
Consumers append app-specific tails (tool restrictions, output paths, etc.).
|
|
177
|
+
|
|
178
|
+
### Find session logs after a run
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
import time
|
|
182
|
+
from coding_cli_runtime import find_codex_session, find_claude_session
|
|
183
|
+
|
|
184
|
+
# Find the most recent Codex session log for a given working directory
|
|
185
|
+
session = find_codex_session("/path/to/project", since_ts=time.time() - 300)
|
|
186
|
+
if session:
|
|
187
|
+
print(f"Session log: {session}") # ~/.codex/sessions/.../conversation.jsonl
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Works for Codex and Claude. Scans provider config directories for session
|
|
191
|
+
files matching the working directory and time window.
|
|
192
|
+
|
|
193
|
+
## Key types
|
|
194
|
+
|
|
195
|
+
| Type | Purpose |
|
|
196
|
+
|------|---------|
|
|
197
|
+
| `CliRunRequest` | Command spec: cmd, cwd, env, timeout, stream paths |
|
|
198
|
+
| `CliRunResult` | Result: returncode, stdout/stderr, duration, error code |
|
|
199
|
+
| `ErrorCode` | `none` · `spawn_failed` · `timed_out` · `non_zero_exit` |
|
|
200
|
+
| `ProviderSpec` | Provider catalog entry with models, controls, defaults |
|
|
201
|
+
| `ProviderContract` | Structured provider CLI metadata (auth, paths, headless launch) |
|
|
202
|
+
| `FailureClassification` | Classified error with retryable flag and category |
|
|
203
|
+
|
|
204
|
+
### Run long-lived CLI sessions
|
|
205
|
+
|
|
206
|
+
For CLI runs that take minutes (e.g., full app generation), use
|
|
207
|
+
`run_interactive_session()` instead of `run_cli_command()`. It adds:
|
|
208
|
+
|
|
209
|
+
- Process-group cleanup (kills orphaned child processes on timeout)
|
|
210
|
+
- Transcript mirroring (streams CLI output to a file while the process runs)
|
|
211
|
+
- Automatic retries on transient failures
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
from coding_cli_runtime import run_interactive_session
|
|
215
|
+
|
|
216
|
+
result = await run_interactive_session(
|
|
217
|
+
cmd_parts=("claude", "--print", "--model", "claude-sonnet-4-6"),
|
|
218
|
+
cwd=workdir,
|
|
219
|
+
stdin_text=prompt,
|
|
220
|
+
logger=logger,
|
|
221
|
+
timeout_seconds=600,
|
|
222
|
+
)
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are required.
|
|
226
|
+
Observability labels (`job_name`, `phase_tag`) default to sensible values.
|
|
227
|
+
|
|
228
|
+
## API summary
|
|
229
|
+
|
|
230
|
+
The full public API is listed in [`__init__.py`](src/coding_cli_runtime/__init__.py).
|
|
231
|
+
Key function groups:
|
|
232
|
+
|
|
233
|
+
| Group | Functions |
|
|
234
|
+
|-------|-----------|
|
|
235
|
+
| Execution | `run_cli_command`, `run_cli_command_sync`, `run_interactive_session` |
|
|
236
|
+
| Provider metadata | `get_provider_contract`, `get_provider_spec`, `list_provider_specs` |
|
|
237
|
+
| Contract helpers | `build_env_overlay`, `resolve_config_paths`, `render_prompt`, `resolve_auth` |
|
|
238
|
+
| Headless launch | `build_claude_headless_core`, `build_codex_headless_core`, `build_copilot_headless_core`, `build_gemini_headless_core` |
|
|
239
|
+
| Codex batch | `build_codex_exec_spec` |
|
|
240
|
+
| Failure handling | `classify_provider_failure` |
|
|
241
|
+
| Session logs | `find_codex_session`, `find_claude_session` |
|
|
242
|
+
| Schema | `load_schema`, `validate_payload` |
|
|
243
|
+
| Utilities | `redact_text`, `build_model_id`, `normalize_path_str` |
|
|
244
|
+
|
|
245
|
+
## Contributing
|
|
246
|
+
|
|
247
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and quality checks.
|
|
248
|
+
|
|
249
|
+
## Prerequisites
|
|
250
|
+
|
|
251
|
+
This package does **not** bundle any CLI binaries or credentials. You must
|
|
252
|
+
install and authenticate the relevant provider CLI yourself before using the
|
|
253
|
+
execution helpers.
|
|
254
|
+
|
|
255
|
+
## Status
|
|
256
|
+
|
|
257
|
+
Pre-1.0. API may change between minor versions.
|
|
258
|
+
|
|
259
|
+
## License
|
|
260
|
+
|
|
261
|
+
MIT
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "coding-cli-runtime"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "Reusable CLI runtime primitives for provider-backed automation workflows"
|
|
9
9
|
readme = {file = "README.md", content-type = "text/markdown"}
|
|
10
10
|
license = "MIT"
|
|
@@ -94,7 +94,7 @@ disallow_untyped_defs = false
|
|
|
94
94
|
warn_return_any = false
|
|
95
95
|
|
|
96
96
|
[tool.bumpversion]
|
|
97
|
-
current_version = "0.
|
|
97
|
+
current_version = "0.3.0"
|
|
98
98
|
parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
|
|
99
99
|
serialize = ["{major}.{minor}.{patch}"]
|
|
100
100
|
commit = true
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
__version__ = "0.
|
|
5
|
+
__version__ = "0.3.0"
|
|
6
6
|
|
|
7
7
|
from .auth import AuthResolution, resolve_auth
|
|
8
8
|
from .codex_cli import CodexExecSpec, build_codex_exec_spec
|
|
@@ -15,6 +15,26 @@ from .contracts import (
|
|
|
15
15
|
ErrorCode,
|
|
16
16
|
)
|
|
17
17
|
from .failure_classification import FailureClassification, classify_provider_failure
|
|
18
|
+
from .headless import (
|
|
19
|
+
build_claude_headless_core,
|
|
20
|
+
build_codex_headless_core,
|
|
21
|
+
build_copilot_headless_core,
|
|
22
|
+
build_gemini_headless_core,
|
|
23
|
+
)
|
|
24
|
+
from .provider_contracts import (
|
|
25
|
+
ApprovalContract,
|
|
26
|
+
AuthContract,
|
|
27
|
+
HeadlessContract,
|
|
28
|
+
PathContract,
|
|
29
|
+
PromptPayload,
|
|
30
|
+
PromptTransport,
|
|
31
|
+
ProviderContract,
|
|
32
|
+
SandboxContract,
|
|
33
|
+
build_env_overlay,
|
|
34
|
+
get_provider_contract,
|
|
35
|
+
render_prompt,
|
|
36
|
+
resolve_config_paths,
|
|
37
|
+
)
|
|
18
38
|
from .provider_controls import build_model_id, resolve_provider_model_controls
|
|
19
39
|
from .provider_specs import (
|
|
20
40
|
ChoiceSpec,
|
|
@@ -56,6 +76,8 @@ from .session_logs import (
|
|
|
56
76
|
from .subprocess_runner import run_cli_command, run_cli_command_sync
|
|
57
77
|
|
|
58
78
|
__all__ = [
|
|
79
|
+
"ApprovalContract",
|
|
80
|
+
"AuthContract",
|
|
59
81
|
"AuthMode",
|
|
60
82
|
"AuthResolution",
|
|
61
83
|
"CliRunRequest",
|
|
@@ -67,14 +89,26 @@ __all__ = [
|
|
|
67
89
|
"ControlSpec",
|
|
68
90
|
"ErrorCode",
|
|
69
91
|
"FailureClassification",
|
|
92
|
+
"HeadlessContract",
|
|
70
93
|
"ModelSpec",
|
|
94
|
+
"PathContract",
|
|
95
|
+
"PromptPayload",
|
|
96
|
+
"PromptTransport",
|
|
97
|
+
"ProviderContract",
|
|
71
98
|
"ProviderSpec",
|
|
99
|
+
"SandboxContract",
|
|
72
100
|
"SchemaValidationError",
|
|
73
101
|
"InteractiveCliRunResult",
|
|
74
102
|
"SessionProgressEvent",
|
|
75
103
|
"SessionRetryDecision",
|
|
76
104
|
"SessionExecutionTimeoutError",
|
|
77
105
|
"TranscriptMirrorStrategy",
|
|
106
|
+
"build_claude_headless_core",
|
|
107
|
+
"build_codex_exec_spec",
|
|
108
|
+
"build_codex_headless_core",
|
|
109
|
+
"build_copilot_headless_core",
|
|
110
|
+
"build_env_overlay",
|
|
111
|
+
"build_gemini_headless_core",
|
|
78
112
|
"get_claude_default_model",
|
|
79
113
|
"get_claude_effort_levels",
|
|
80
114
|
"get_claude_model_candidates",
|
|
@@ -85,14 +119,16 @@ __all__ = [
|
|
|
85
119
|
"get_copilot_model_catalog",
|
|
86
120
|
"get_gemini_default_model",
|
|
87
121
|
"get_gemini_model_options",
|
|
122
|
+
"get_provider_contract",
|
|
88
123
|
"get_provider_spec",
|
|
89
124
|
"list_provider_specs",
|
|
90
125
|
"build_model_id",
|
|
91
|
-
"build_codex_exec_spec",
|
|
92
126
|
"classify_provider_failure",
|
|
93
127
|
"load_schema",
|
|
128
|
+
"render_prompt",
|
|
94
129
|
"resolve_auth",
|
|
95
130
|
"resolve_claude_reasoning_policy",
|
|
131
|
+
"resolve_config_paths",
|
|
96
132
|
"resolve_provider_model_controls",
|
|
97
133
|
"redact_text",
|
|
98
134
|
"claude_project_key",
|