coding-cli-runtime 0.1.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. coding_cli_runtime-0.3.0/CHANGELOG.md +101 -0
  2. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/PKG-INFO +121 -13
  3. coding_cli_runtime-0.3.0/README.md +261 -0
  4. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/pyproject.toml +2 -2
  5. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/__init__.py +38 -2
  6. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/auth.py +25 -9
  7. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/codex_cli.py +10 -8
  8. coding_cli_runtime-0.3.0/src/coding_cli_runtime/headless.py +124 -0
  9. coding_cli_runtime-0.3.0/src/coding_cli_runtime/provider_contracts.py +482 -0
  10. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/session_logs.py +56 -0
  11. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime.egg-info/PKG-INFO +121 -13
  12. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime.egg-info/SOURCES.txt +6 -1
  13. coding_cli_runtime-0.3.0/tests/test_coverage_gaps.py +913 -0
  14. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/tests/test_packaging.py +10 -3
  15. coding_cli_runtime-0.3.0/tests/test_provider_contracts.py +322 -0
  16. coding_cli_runtime-0.3.0/tests/test_stage2_tier1.py +241 -0
  17. coding_cli_runtime-0.1.0/CHANGELOG.md +0 -57
  18. coding_cli_runtime-0.1.0/README.md +0 -153
  19. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/CONTRIBUTING.md +0 -0
  20. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/LICENSE +0 -0
  21. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/MANIFEST.in +0 -0
  22. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/setup.cfg +0 -0
  23. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/contracts.py +0 -0
  24. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/copilot_reasoning_baseline.json +0 -0
  25. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/copilot_reasoning_logs.py +0 -0
  26. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/failure_classification.py +0 -0
  27. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/json_io.py +0 -0
  28. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/provider_controls.py +0 -0
  29. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/provider_specs.py +0 -0
  30. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/py.typed +0 -0
  31. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/reasoning.py +0 -0
  32. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/redaction.py +0 -0
  33. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/schema_validation.py +0 -0
  34. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/schemas/normalized_run_result.v1.json +0 -0
  35. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/schemas/reasoning_metadata.v1.json +0 -0
  36. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/session_execution.py +0 -0
  37. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/subprocess_runner.py +0 -0
  38. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime.egg-info/dependency_links.txt +0 -0
  39. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime.egg-info/top_level.txt +0 -0
  40. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/tests/test_copilot_reasoning_logs.py +0 -0
  41. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/tests/test_package_resources.py +0 -0
  42. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/tests/test_playground_probe_smoke.py +0 -0
  43. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/tests/test_provider_catalog_resolution.py +0 -0
  44. {coding_cli_runtime-0.1.0 → coding_cli_runtime-0.3.0}/tests/test_runtime_parity.py +0 -0
@@ -0,0 +1,101 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/).
6
+
7
+ ## [Unreleased]
8
+
9
+ ## [0.3.0] - 2026-04-09
10
+
11
+ ### Added
12
+ - **Headless launch core helpers** — per-provider arg renderers derived from
13
+ `ProviderContract.headless`: `build_claude_headless_core()`,
14
+ `build_codex_headless_core()`, `build_copilot_headless_core()`,
15
+ `build_gemini_headless_core()`. All consumers (app-generation, feather,
16
+ codex_cli, provider_contracts builder) now delegate to these.
17
+ - `scan_session_dir()` — generic directory-scanning primitive for session log
18
+ discovery with `extract_fn` callback (internal, not in public `__all__`).
19
+ - Session log discovery section in README.
20
+ - API summary table in README.
21
+ - 27 new Stage 2 tests for headless cores, builder delegation, and
22
+ `scan_session_dir`.
23
+
24
+ ### Changed
25
+ - `build_codex_exec_spec()` now delegates to `build_codex_headless_core()`.
26
+ `full_auto` and `skip_git_repo_check` params preserved and passed through.
27
+ - `_build_non_interactive_run()` now delegates to per-provider headless core
28
+ helpers instead of assembling flags inline.
29
+ - Feather `report_data.py` and `report_sections.py` use headless core helpers
30
+ with fallback for environments without `coding_cli_runtime`.
31
+ - Feather `generate_report.py` Codex session discovery replaced with
32
+ `find_codex_session()` from `coding_cli_runtime`.
33
+ - App-generation `claude_impl.py`, `copilot_impl.py`, `gemini_impl.py`
34
+ `build_command()` functions delegate to headless core helpers.
35
+ - Dead headless opt-out flags removed from Copilot (`--allow-all`, `--ask-user`,
36
+ `--use-custom-instructions`) and Gemini (`--auto-approve`) CLI specs —
37
+ these were never used in batch runs and are now handled by the headless core.
38
+ - README rewritten: user-action feature list, `run_interactive_session` example,
39
+ `uv add` install, API summary, Contributing link, session log discovery.
40
+
41
+ ## [0.2.0] - 2026-04-08
42
+
43
+ ### Added
44
+ - **ProviderContract API** — structured, nested metadata for all four provider CLIs
45
+ (Claude, Codex, Gemini, Copilot). Composed of `AuthContract`, `PathContract`,
46
+ `HeadlessContract`, `PromptTransport`, `ApprovalContract`, `SandboxContract`.
47
+ - `get_provider_contract(provider_id)` — returns structured contract for a provider.
48
+ - `build_env_overlay(contract, api_key, base_url)` — builds provider-specific env
49
+ var overlay from contract metadata.
50
+ - `resolve_config_paths(contract, containerized)` — resolves host and container
51
+ config directory paths.
52
+ - `render_prompt(transport, prompt)` — resolves prompt delivery into argv args +
53
+ stdin text based on provider transport mode.
54
+ - `PromptPayload` dataclass for resolved prompt delivery.
55
+ - `__version__` attribute in `coding_cli_runtime`.
56
+ - `CONTRIBUTING.md`, `MANIFEST.in`, `.pre-commit-config.yaml`.
57
+ - PyPI / Python / Build / License badges in `README.md`.
58
+ - `bump-my-version` configuration syncing `pyproject.toml` and `__init__.py`.
59
+ - `ruff`, `mypy` (strict), and `pytest-cov` added to dev dependencies.
60
+ - CI quality gates: ruff check, ruff format, mypy, pytest-cov.
61
+ - README section documenting the new ProviderContract API with examples.
62
+ - 75 new tests for provider contracts, helpers, internal builder, failure
63
+ classification, codex_cli, schema validation (including nested), reasoning,
64
+ redaction, json_io, provider_controls, and auth. Package coverage 47% → 62%.
65
+
66
+ ### Changed
67
+ - Consolidated `shared_cli_runtime` into `coding_cli_runtime`. The package now
68
+ ships a single top-level package; the `shared_cli_runtime` directory is removed.
69
+ - `MANIFEST.in` and docs updated to reference `coding_cli_runtime` paths.
70
+ - `run_interactive_session()` observability kwargs (`provider_label`, `job_name`,
71
+ `phase_tag`, `process_label`, `timeout_seconds`) now have sensible defaults so
72
+ external callers don't need to supply internal batch-system labels.
73
+ - Provider model catalogs are now resolved with a three-tier fallback:
74
+ user override file > live CLI discovery > hardcoded fallback.
75
+ - `auth.py`: `_PROVIDER_ENV_HINTS` now derived from `provider_contracts.py`
76
+ (single source of truth for auth env var names).
77
+ - `CliRunResult.command` type widened from `tuple[str, ...]` to `Sequence[str]`.
78
+ - Publish workflow path corrected (`shared-cli-runtime` → `coding-cli-runtime`).
79
+
80
+ ### Fixed
81
+ - mypy strict compliance: return-type annotations, per-module overrides.
82
+ - ruff lint and format compliance across all source and test files.
83
+ - Copilot BYOK (`COPILOT_PROVIDER_API_KEY`) now discoverable via contract
84
+ but not reported as "required" in `resolve_auth()` — BYOK is opt-in.
85
+
86
+ ## [0.1.0] - 2026-04-07
87
+
88
+ ### Added
89
+ - Initial extraction from `llm-eval` monorepo.
90
+ - Provider metadata and controls for Claude, Codex, Copilot, and Gemini CLIs.
91
+ - Shared request/result contracts (`CliRunRequest`, `CliRunResult`, `CliLaunchSpec`).
92
+ - Schema loading and payload validation (`load_schema`, `validate_payload`).
93
+ - Synchronous and asynchronous subprocess execution helpers.
94
+ - Interactive session execution with transcript mirroring.
95
+ - Session log discovery and parsing utilities.
96
+ - Claude reasoning policy resolution.
97
+ - Log redaction helpers.
98
+ - Copilot reasoning log parsing and classification.
99
+ - PEP 561 `py.typed` markers for both `coding_cli_runtime` and `shared_cli_runtime`.
100
+ - Packaged JSON schemas and Copilot reasoning baseline data.
101
+ - Playground knowledge base with probing guides and experiment templates.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coding-cli-runtime
3
- Version: 0.1.0
3
+ Version: 0.3.0
4
4
  Summary: Reusable CLI runtime primitives for provider-backed automation workflows
5
5
  Author-email: LLM Eval maintainers <llm-eval-maintainers@users.noreply.github.com>
6
6
  License-Expression: MIT
@@ -40,17 +40,21 @@ code doesn't need provider-specific subprocess handling.
40
40
 
41
41
  **What it does (and why not just `subprocess.run`):**
42
42
 
43
- - Unified request/result types across all four CLIs
44
- - Timeout enforcement with graceful process termination
45
- - Provider-aware failure classification (retryable vs fatal)
46
- - Built-in model catalog with defaults, reasoning levels, and capabilities
47
- - Interactive session management for long-running generation tasks
48
- - Zero runtime dependencies
43
+ - Run any provider CLI with unified request/result types and timeout enforcement
44
+ - Query the model catalog (with user-override and live-cache fallback)
45
+ - Classify failures as retryable vs fatal per provider
46
+ - Look up provider auth, config dirs, and headless launch flags
47
+ - Build non-interactive launch commands without hardcoding provider flags
48
+ - Find session logs after a run (Codex, Claude)
49
+ - Run long-lived sessions with process-group cleanup and transcript mirroring
50
+ - No Python package dependencies — only requires the provider CLIs themselves
49
51
 
50
52
  ## Installation
51
53
 
52
54
  ```bash
53
55
  pip install coding-cli-runtime
56
+ # or
57
+ uv add coding-cli-runtime
54
58
  ```
55
59
 
56
60
  Requires Python 3.10+.
@@ -65,7 +69,7 @@ from pathlib import Path
65
69
  from coding_cli_runtime import CliRunRequest, run_cli_command
66
70
 
67
71
  request = CliRunRequest(
68
- cmd_parts=("codex", "--model", "o4-mini", "--quiet", "exec", "fix the tests"),
72
+ cmd_parts=("codex", "--model", "gpt-5.4", "--quiet", "exec", "fix the tests"),
69
73
  cwd=Path("/tmp/my-project"),
70
74
  timeout_seconds=120,
71
75
  )
@@ -148,6 +152,70 @@ else:
148
152
  Works for all four providers. Recognizes auth failures, rate limits,
149
153
  network transients, and other provider-specific error patterns.
150
154
 
155
+ ### Look up provider contract metadata
156
+
157
+ ```python
158
+ from coding_cli_runtime import get_provider_contract, build_env_overlay, resolve_config_paths, render_prompt
159
+
160
+ # Get structured metadata for any supported provider
161
+ contract = get_provider_contract("claude")
162
+ print(contract.binary) # "claude"
163
+ print(contract.auth.api_key_env_var) # "CLAUDE_API_KEY"
164
+ print(contract.paths.config_dir) # "~/.claude"
165
+ print(contract.headless.approval.flag) # "--dangerously-skip-permissions"
166
+
167
+ # Build env var overlay for subprocess
168
+ env = build_env_overlay(contract, api_key="sk-...", base_url="https://custom.example.com")
169
+ # {"CLAUDE_API_KEY": "sk-...", "ANTHROPIC_BASE_URL": "https://custom.example.com"}
170
+
171
+ # Resolve config paths for container mounts
172
+ host_dir, container_dir = resolve_config_paths(contract, containerized=True)
173
+ # ("/home/user/.claude", "/root/.claude")
174
+
175
+ # Resolve prompt delivery (stdin vs flag vs activation)
176
+ payload = render_prompt(contract.headless.prompt, "Fix the bug")
177
+ # payload.args = () (stdin delivery for Claude)
178
+ # payload.stdin_text = "Fix the bug"
179
+ ```
180
+
181
+ `ProviderContract` is structured as nested sub-contracts
182
+ (`AuthContract`, `PathContract`, `HeadlessContract`) so consumers
183
+ can drill into whichever aspect they need. This is reference metadata,
184
+ not a command-construction control plane — consumers keep their own
185
+ command assembly and adopt contract fields selectively.
186
+
187
+ ### Build headless launch commands
188
+
189
+ ```python
190
+ from coding_cli_runtime import build_claude_headless_core, build_codex_headless_core
191
+
192
+ # Claude: binary + --print + --permission-mode + --dangerously-skip-permissions + --model
193
+ cmd = build_claude_headless_core("claude-sonnet-4-6")
194
+ cmd.extend(["--output-format", "text", "--disallowedTools", "Bash,Task"])
195
+
196
+ # Codex: binary + exec + --full-auto + --sandbox + --skip-git-repo-check + --model
197
+ cmd = build_codex_headless_core("gpt-5.4", sandbox_mode="read-only")
198
+ cmd.extend(["-C", str(workdir)])
199
+ ```
200
+
201
+ Headless core helpers emit the standard flags for non-interactive runs.
202
+ Consumers append app-specific tails (tool restrictions, output paths, etc.).
203
+
204
+ ### Find session logs after a run
205
+
206
+ ```python
207
+ import time
208
+ from coding_cli_runtime import find_codex_session, find_claude_session
209
+
210
+ # Find the most recent Codex session log for a given working directory
211
+ session = find_codex_session("/path/to/project", since_ts=time.time() - 300)
212
+ if session:
213
+ print(f"Session log: {session}") # ~/.codex/sessions/.../conversation.jsonl
214
+ ```
215
+
216
+ Works for Codex and Claude. Scans provider config directories for session
217
+ files matching the working directory and time window.
218
+
151
219
  ## Key types
152
220
 
153
221
  | Type | Purpose |
@@ -156,13 +224,53 @@ network transients, and other provider-specific error patterns.
156
224
  | `CliRunResult` | Result: returncode, stdout/stderr, duration, error code |
157
225
  | `ErrorCode` | `none` · `spawn_failed` · `timed_out` · `non_zero_exit` |
158
226
  | `ProviderSpec` | Provider catalog entry with models, controls, defaults |
227
+ | `ProviderContract` | Structured provider CLI metadata (auth, paths, headless launch) |
159
228
  | `FailureClassification` | Classified error with retryable flag and category |
160
229
 
161
- `run_interactive_session()` manages long-running CLI processes with
162
- timeout enforcement, process-group cleanup, transcript mirroring, and
163
- automatic retries. Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are
164
- required — observability labels like `job_name` and `phase_tag` default to
165
- sensible values so external callers don't need to invent them.
230
+ ### Run long-lived CLI sessions
231
+
232
+ For CLI runs that take minutes (e.g., full app generation), use
233
+ `run_interactive_session()` instead of `run_cli_command()`. It adds:
234
+
235
+ - Process-group cleanup (kills orphaned child processes on timeout)
236
+ - Transcript mirroring (streams CLI output to a file while the process runs)
237
+ - Automatic retries on transient failures
238
+
239
+ ```python
240
+ from coding_cli_runtime import run_interactive_session
241
+
242
+ result = await run_interactive_session(
243
+ cmd_parts=("claude", "--print", "--model", "claude-sonnet-4-6"),
244
+ cwd=workdir,
245
+ stdin_text=prompt,
246
+ logger=logger,
247
+ timeout_seconds=600,
248
+ )
249
+ ```
250
+
251
+ Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are required.
252
+ Observability labels (`job_name`, `phase_tag`) default to sensible values.
253
+
254
+ ## API summary
255
+
256
+ The full public API is listed in [`__init__.py`](src/coding_cli_runtime/__init__.py).
257
+ Key function groups:
258
+
259
+ | Group | Functions |
260
+ |-------|-----------|
261
+ | Execution | `run_cli_command`, `run_cli_command_sync`, `run_interactive_session` |
262
+ | Provider metadata | `get_provider_contract`, `get_provider_spec`, `list_provider_specs` |
263
+ | Contract helpers | `build_env_overlay`, `resolve_config_paths`, `render_prompt`, `resolve_auth` |
264
+ | Headless launch | `build_claude_headless_core`, `build_codex_headless_core`, `build_copilot_headless_core`, `build_gemini_headless_core` |
265
+ | Codex batch | `build_codex_exec_spec` |
266
+ | Failure handling | `classify_provider_failure` |
267
+ | Session logs | `find_codex_session`, `find_claude_session` |
268
+ | Schema | `load_schema`, `validate_payload` |
269
+ | Utilities | `redact_text`, `build_model_id`, `normalize_path_str` |
270
+
271
+ ## Contributing
272
+
273
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and quality checks.
166
274
 
167
275
  ## Prerequisites
168
276
 
@@ -0,0 +1,261 @@
1
+ # coding-cli-runtime
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/coding-cli-runtime)](https://pypi.org/project/coding-cli-runtime/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/coding-cli-runtime)](https://pypi.org/project/coding-cli-runtime/)
5
+ [![Build](https://github.com/pj-ms/llm-eval/actions/workflows/ci.yml/badge.svg)](https://github.com/pj-ms/llm-eval/actions/workflows/ci.yml)
6
+ [![License](https://img.shields.io/pypi/l/coding-cli-runtime)](LICENSE)
7
+
8
+ A Python library for orchestrating LLM coding agent CLIs — [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Codex](https://github.com/openai/codex), [Gemini CLI](https://github.com/google-gemini/gemini-cli), and [GitHub Copilot](https://docs.github.com/en/copilot).
9
+
10
+ These CLIs each have different invocation patterns, output formats, error
11
+ shapes, and timeout behaviors. This library normalizes all of that behind
12
+ a common `CliRunRequest` → `CliRunResult` contract, so your automation
13
+ code doesn't need provider-specific subprocess handling.
14
+
15
+ **What it does (and why not just `subprocess.run`):**
16
+
17
+ - Run any provider CLI with unified request/result types and timeout enforcement
18
+ - Query the model catalog (with user-override and live-cache fallback)
19
+ - Classify failures as retryable vs fatal per provider
20
+ - Look up provider auth, config dirs, and headless launch flags
21
+ - Build non-interactive launch commands without hardcoding provider flags
22
+ - Find session logs after a run (Codex, Claude)
23
+ - Run long-lived sessions with process-group cleanup and transcript mirroring
24
+ - No Python package dependencies — only requires the provider CLIs themselves
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ pip install coding-cli-runtime
30
+ # or
31
+ uv add coding-cli-runtime
32
+ ```
33
+
34
+ Requires Python 3.10+.
35
+
36
+ ## Examples
37
+
38
+ ### Execute a provider CLI
39
+
40
+ ```python
41
+ import asyncio
42
+ from pathlib import Path
43
+ from coding_cli_runtime import CliRunRequest, run_cli_command
44
+
45
+ request = CliRunRequest(
46
+ cmd_parts=("codex", "--model", "gpt-5.4", "--quiet", "exec", "fix the tests"),
47
+ cwd=Path("/tmp/my-project"),
48
+ timeout_seconds=120,
49
+ )
50
+ result = asyncio.run(run_cli_command(request))
51
+
52
+ print(result.returncode) # 0
53
+ print(result.error_code) # "none"
54
+ print(result.duration_seconds) # 14.2
55
+ print(result.stdout_text[:200])
56
+ ```
57
+
58
+ Swap `codex` for `claude`, `gemini`, or `copilot` — the request/result
59
+ shape stays the same. A synchronous variant `run_cli_command_sync` is also
60
+ available.
61
+
62
+ ### Pick a model from the provider catalog
63
+
64
+ ```python
65
+ from coding_cli_runtime import get_provider_spec
66
+
67
+ codex = get_provider_spec("codex")
68
+ print(codex.default_model) # "gpt-5.3-codex"
69
+ print(codex.model_source) # "codex_cli_cache", "override", or "code"
70
+
71
+ for model in codex.models:
72
+ print(f" {model.name}: {model.description}")
73
+ ```
74
+
75
+ The catalog covers all four providers — each with model names, reasoning
76
+ levels, default settings, and visibility flags.
77
+
78
+ Model lists are resolved with a three-tier fallback:
79
+
80
+ 1. **User override** — drop a JSON file at
81
+ `~/.config/coding-cli-runtime/providers/<provider>.json` to use your own
82
+ model list immediately, without waiting for a package update.
83
+ 2. **Live CLI cache** — for Codex, the library reads
84
+ `~/.codex/models_cache.json` (auto-refreshed by the Codex CLI) when
85
+ present. Other providers fall through because their CLIs don't expose a
86
+ machine-readable model list.
87
+ 3. **Hardcoded fallback** — the model list shipped with the package.
88
+
89
+ Override file format:
90
+
91
+ ```json
92
+ {
93
+ "default_model": "claude-sonnet-4-7",
94
+ "models": [
95
+ "claude-sonnet-4-7",
96
+ {
97
+ "name": "claude-opus-5",
98
+ "description": "Latest opus model",
99
+ "controls": [
100
+ { "name": "effort", "kind": "choice", "choices": ["low", "high"], "default": "low" }
101
+ ]
102
+ }
103
+ ]
104
+ }
105
+ ```
106
+
107
+ Set `CODING_CLI_RUNTIME_CONFIG_DIR` to change the config directory
108
+ (default: `~/.config/coding-cli-runtime`).
109
+
110
+ ### Decide whether to retry a failed run
111
+
112
+ ```python
113
+ from coding_cli_runtime import classify_provider_failure
114
+
115
+ classification = classify_provider_failure(
116
+ provider="gemini",
117
+ stderr_text="429 Resource exhausted: rate limit exceeded",
118
+ )
119
+
120
+ if classification.retryable:
121
+ print(f"Retryable ({classification.category}) — will retry")
122
+ else:
123
+ print(f"Fatal ({classification.category}) — giving up")
124
+ ```
125
+
126
+ Works for all four providers. Recognizes auth failures, rate limits,
127
+ network transients, and other provider-specific error patterns.
128
+
129
+ ### Look up provider contract metadata
130
+
131
+ ```python
132
+ from coding_cli_runtime import get_provider_contract, build_env_overlay, resolve_config_paths, render_prompt
133
+
134
+ # Get structured metadata for any supported provider
135
+ contract = get_provider_contract("claude")
136
+ print(contract.binary) # "claude"
137
+ print(contract.auth.api_key_env_var) # "CLAUDE_API_KEY"
138
+ print(contract.paths.config_dir) # "~/.claude"
139
+ print(contract.headless.approval.flag) # "--dangerously-skip-permissions"
140
+
141
+ # Build env var overlay for subprocess
142
+ env = build_env_overlay(contract, api_key="sk-...", base_url="https://custom.example.com")
143
+ # {"CLAUDE_API_KEY": "sk-...", "ANTHROPIC_BASE_URL": "https://custom.example.com"}
144
+
145
+ # Resolve config paths for container mounts
146
+ host_dir, container_dir = resolve_config_paths(contract, containerized=True)
147
+ # ("/home/user/.claude", "/root/.claude")
148
+
149
+ # Resolve prompt delivery (stdin vs flag vs activation)
150
+ payload = render_prompt(contract.headless.prompt, "Fix the bug")
151
+ # payload.args = () (stdin delivery for Claude)
152
+ # payload.stdin_text = "Fix the bug"
153
+ ```
154
+
155
+ `ProviderContract` is structured as nested sub-contracts
156
+ (`AuthContract`, `PathContract`, `HeadlessContract`) so consumers
157
+ can drill into whichever aspect they need. This is reference metadata,
158
+ not a command-construction control plane — consumers keep their own
159
+ command assembly and adopt contract fields selectively.
160
+
161
+ ### Build headless launch commands
162
+
163
+ ```python
164
+ from coding_cli_runtime import build_claude_headless_core, build_codex_headless_core
165
+
166
+ # Claude: binary + --print + --permission-mode + --dangerously-skip-permissions + --model
167
+ cmd = build_claude_headless_core("claude-sonnet-4-6")
168
+ cmd.extend(["--output-format", "text", "--disallowedTools", "Bash,Task"])
169
+
170
+ # Codex: binary + exec + --full-auto + --sandbox + --skip-git-repo-check + --model
171
+ cmd = build_codex_headless_core("gpt-5.4", sandbox_mode="read-only")
172
+ cmd.extend(["-C", str(workdir)])
173
+ ```
174
+
175
+ Headless core helpers emit the standard flags for non-interactive runs.
176
+ Consumers append app-specific tails (tool restrictions, output paths, etc.).
177
+
178
+ ### Find session logs after a run
179
+
180
+ ```python
181
+ import time
182
+ from coding_cli_runtime import find_codex_session, find_claude_session
183
+
184
+ # Find the most recent Codex session log for a given working directory
185
+ session = find_codex_session("/path/to/project", since_ts=time.time() - 300)
186
+ if session:
187
+ print(f"Session log: {session}") # ~/.codex/sessions/.../conversation.jsonl
188
+ ```
189
+
190
+ Works for Codex and Claude. Scans provider config directories for session
191
+ files matching the working directory and time window.
192
+
193
+ ## Key types
194
+
195
+ | Type | Purpose |
196
+ |------|---------|
197
+ | `CliRunRequest` | Command spec: cmd, cwd, env, timeout, stream paths |
198
+ | `CliRunResult` | Result: returncode, stdout/stderr, duration, error code |
199
+ | `ErrorCode` | `none` · `spawn_failed` · `timed_out` · `non_zero_exit` |
200
+ | `ProviderSpec` | Provider catalog entry with models, controls, defaults |
201
+ | `ProviderContract` | Structured provider CLI metadata (auth, paths, headless launch) |
202
+ | `FailureClassification` | Classified error with retryable flag and category |
203
+
204
+ ### Run long-lived CLI sessions
205
+
206
+ For CLI runs that take minutes (e.g., full app generation), use
207
+ `run_interactive_session()` instead of `run_cli_command()`. It adds:
208
+
209
+ - Process-group cleanup (kills orphaned child processes on timeout)
210
+ - Transcript mirroring (streams CLI output to a file while the process runs)
211
+ - Automatic retries on transient failures
212
+
213
+ ```python
214
+ from coding_cli_runtime import run_interactive_session
215
+
216
+ result = await run_interactive_session(
217
+ cmd_parts=("claude", "--print", "--model", "claude-sonnet-4-6"),
218
+ cwd=workdir,
219
+ stdin_text=prompt,
220
+ logger=logger,
221
+ timeout_seconds=600,
222
+ )
223
+ ```
224
+
225
+ Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are required.
226
+ Observability labels (`job_name`, `phase_tag`) default to sensible values.
227
+
228
+ ## API summary
229
+
230
+ The full public API is listed in [`__init__.py`](src/coding_cli_runtime/__init__.py).
231
+ Key function groups:
232
+
233
+ | Group | Functions |
234
+ |-------|-----------|
235
+ | Execution | `run_cli_command`, `run_cli_command_sync`, `run_interactive_session` |
236
+ | Provider metadata | `get_provider_contract`, `get_provider_spec`, `list_provider_specs` |
237
+ | Contract helpers | `build_env_overlay`, `resolve_config_paths`, `render_prompt`, `resolve_auth` |
238
+ | Headless launch | `build_claude_headless_core`, `build_codex_headless_core`, `build_copilot_headless_core`, `build_gemini_headless_core` |
239
+ | Codex batch | `build_codex_exec_spec` |
240
+ | Failure handling | `classify_provider_failure` |
241
+ | Session logs | `find_codex_session`, `find_claude_session` |
242
+ | Schema | `load_schema`, `validate_payload` |
243
+ | Utilities | `redact_text`, `build_model_id`, `normalize_path_str` |
244
+
245
+ ## Contributing
246
+
247
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and quality checks.
248
+
249
+ ## Prerequisites
250
+
251
+ This package does **not** bundle any CLI binaries or credentials. You must
252
+ install and authenticate the relevant provider CLI yourself before using the
253
+ execution helpers.
254
+
255
+ ## Status
256
+
257
+ Pre-1.0. API may change between minor versions.
258
+
259
+ ## License
260
+
261
+ MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "coding-cli-runtime"
7
- version = "0.1.0"
7
+ version = "0.3.0"
8
8
  description = "Reusable CLI runtime primitives for provider-backed automation workflows"
9
9
  readme = {file = "README.md", content-type = "text/markdown"}
10
10
  license = "MIT"
@@ -94,7 +94,7 @@ disallow_untyped_defs = false
94
94
  warn_return_any = false
95
95
 
96
96
  [tool.bumpversion]
97
- current_version = "0.1.0"
97
+ current_version = "0.3.0"
98
98
  parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
99
99
  serialize = ["{major}.{minor}.{patch}"]
100
100
  commit = true
@@ -2,7 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "0.1.0"
5
+ __version__ = "0.3.0"
6
6
 
7
7
  from .auth import AuthResolution, resolve_auth
8
8
  from .codex_cli import CodexExecSpec, build_codex_exec_spec
@@ -15,6 +15,26 @@ from .contracts import (
15
15
  ErrorCode,
16
16
  )
17
17
  from .failure_classification import FailureClassification, classify_provider_failure
18
+ from .headless import (
19
+ build_claude_headless_core,
20
+ build_codex_headless_core,
21
+ build_copilot_headless_core,
22
+ build_gemini_headless_core,
23
+ )
24
+ from .provider_contracts import (
25
+ ApprovalContract,
26
+ AuthContract,
27
+ HeadlessContract,
28
+ PathContract,
29
+ PromptPayload,
30
+ PromptTransport,
31
+ ProviderContract,
32
+ SandboxContract,
33
+ build_env_overlay,
34
+ get_provider_contract,
35
+ render_prompt,
36
+ resolve_config_paths,
37
+ )
18
38
  from .provider_controls import build_model_id, resolve_provider_model_controls
19
39
  from .provider_specs import (
20
40
  ChoiceSpec,
@@ -56,6 +76,8 @@ from .session_logs import (
56
76
  from .subprocess_runner import run_cli_command, run_cli_command_sync
57
77
 
58
78
  __all__ = [
79
+ "ApprovalContract",
80
+ "AuthContract",
59
81
  "AuthMode",
60
82
  "AuthResolution",
61
83
  "CliRunRequest",
@@ -67,14 +89,26 @@ __all__ = [
67
89
  "ControlSpec",
68
90
  "ErrorCode",
69
91
  "FailureClassification",
92
+ "HeadlessContract",
70
93
  "ModelSpec",
94
+ "PathContract",
95
+ "PromptPayload",
96
+ "PromptTransport",
97
+ "ProviderContract",
71
98
  "ProviderSpec",
99
+ "SandboxContract",
72
100
  "SchemaValidationError",
73
101
  "InteractiveCliRunResult",
74
102
  "SessionProgressEvent",
75
103
  "SessionRetryDecision",
76
104
  "SessionExecutionTimeoutError",
77
105
  "TranscriptMirrorStrategy",
106
+ "build_claude_headless_core",
107
+ "build_codex_exec_spec",
108
+ "build_codex_headless_core",
109
+ "build_copilot_headless_core",
110
+ "build_env_overlay",
111
+ "build_gemini_headless_core",
78
112
  "get_claude_default_model",
79
113
  "get_claude_effort_levels",
80
114
  "get_claude_model_candidates",
@@ -85,14 +119,16 @@ __all__ = [
85
119
  "get_copilot_model_catalog",
86
120
  "get_gemini_default_model",
87
121
  "get_gemini_model_options",
122
+ "get_provider_contract",
88
123
  "get_provider_spec",
89
124
  "list_provider_specs",
90
125
  "build_model_id",
91
- "build_codex_exec_spec",
92
126
  "classify_provider_failure",
93
127
  "load_schema",
128
+ "render_prompt",
94
129
  "resolve_auth",
95
130
  "resolve_claude_reasoning_policy",
131
+ "resolve_config_paths",
96
132
  "resolve_provider_model_controls",
97
133
  "redact_text",
98
134
  "claude_project_key",