coding-cli-runtime 0.2.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. coding_cli_runtime-0.4.0/CHANGELOG.md +96 -0
  2. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/PKG-INFO +163 -16
  3. coding_cli_runtime-0.4.0/README.md +333 -0
  4. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/pyproject.toml +2 -2
  5. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/__init__.py +31 -3
  6. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/codex_cli.py +10 -8
  7. coding_cli_runtime-0.4.0/src/coding_cli_runtime/headless.py +124 -0
  8. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/provider_contracts.py +220 -39
  9. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/session_logs.py +56 -0
  10. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime.egg-info/PKG-INFO +163 -16
  11. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime.egg-info/SOURCES.txt +5 -1
  12. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_coverage_gaps.py +15 -7
  13. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_packaging.py +4 -3
  14. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_provider_contracts.py +3 -3
  15. coding_cli_runtime-0.4.0/tests/test_stage2_tier1.py +241 -0
  16. coding_cli_runtime-0.4.0/tests/test_stage3_io_contracts.py +119 -0
  17. coding_cli_runtime-0.4.0/tests/test_stage4_helpers.py +105 -0
  18. coding_cli_runtime-0.2.0/CHANGELOG.md +0 -69
  19. coding_cli_runtime-0.2.0/README.md +0 -186
  20. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/CONTRIBUTING.md +0 -0
  21. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/LICENSE +0 -0
  22. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/MANIFEST.in +0 -0
  23. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/setup.cfg +0 -0
  24. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/auth.py +0 -0
  25. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/contracts.py +0 -0
  26. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/copilot_reasoning_baseline.json +0 -0
  27. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/copilot_reasoning_logs.py +0 -0
  28. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/failure_classification.py +0 -0
  29. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/json_io.py +0 -0
  30. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/provider_controls.py +0 -0
  31. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/provider_specs.py +0 -0
  32. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/py.typed +0 -0
  33. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/reasoning.py +0 -0
  34. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/redaction.py +0 -0
  35. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/schema_validation.py +0 -0
  36. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/schemas/normalized_run_result.v1.json +0 -0
  37. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/schemas/reasoning_metadata.v1.json +0 -0
  38. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/session_execution.py +0 -0
  39. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime/subprocess_runner.py +0 -0
  40. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime.egg-info/dependency_links.txt +0 -0
  41. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/src/coding_cli_runtime.egg-info/top_level.txt +0 -0
  42. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_copilot_reasoning_logs.py +0 -0
  43. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_package_resources.py +0 -0
  44. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_playground_probe_smoke.py +0 -0
  45. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_provider_catalog_resolution.py +0 -0
  46. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.4.0}/tests/test_runtime_parity.py +0 -0
@@ -0,0 +1,96 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/).
6
+
7
+ ## [Unreleased]
8
+
9
+ ## [0.4.0] - 2026-04-09
10
+
11
+ ### Added
12
+ - `OutputContract`, `IoContract`, `SessionDiscoveryContract`,
13
+ `DiagnosticsContract` sub-contracts on `ProviderContract`, with data
14
+ populated for all four providers.
15
+ - `WorkspaceEnvVar` structured type with `name` + `value_source` semantics
16
+ (replaces bare env-var name strings in `IoContract.workspace_env_vars`).
17
+ - `WorkspaceEnvValueSource` — closed vocabulary (`"execution_dir"` /
18
+ `"workspace_root"`) for `WorkspaceEnvVar.value_source`.
19
+ - `resolve_workspace_env()` — turns `IoContract.workspace_env_vars` into a
20
+ concrete env overlay from an execution directory.
21
+ - `resolve_session_search_paths()` — expands `SessionDiscoveryContract`
22
+ roots into concrete host paths.
23
+ - `is_provider_installed()` — checks whether a provider CLI binary is on
24
+ PATH.
25
+ - README sections: "Query provider I/O conventions" and "Common integration
26
+ tasks" with copy-pasteable examples.
27
+ - `WorkspaceEnvVar` added to key-types table in README.
28
+
29
+ ### Changed
30
+ - Gemini `session_glob` tightened from `"*.json"` to `"*/chats/session-*.json"`
31
+ to match the real `tmp/{hash}/chats/session-*.json` layout.
32
+ - Claude `session_glob` tightened from `"*.jsonl"` to
33
+ `"*/conversation.jsonl"` to match per-project subdirectory structure.
34
+
35
+ ## [0.3.0] - 2026-04-09
36
+
37
+ ### Added
38
+ - Per-provider headless launch helpers: `build_claude_headless_core()`,
39
+ `build_codex_headless_core()`, `build_copilot_headless_core()`,
40
+ `build_gemini_headless_core()`. These emit the standard non-interactive
41
+ flags for each provider; callers append app-specific tails.
42
+ - Session log discovery section in README.
43
+ - API summary table in README.
44
+
45
+ ### Changed
46
+ - `build_codex_exec_spec()` now delegates to `build_codex_headless_core()`.
47
+ `full_auto` and `skip_git_repo_check` params preserved.
48
+ - README rewritten with task-oriented examples, `run_interactive_session`
49
+ usage, `uv add` install, and API summary.
50
+
51
+ ## [0.2.0] - 2026-04-08
52
+
53
+ ### Added
54
+ - `ProviderContract` API — structured, nested metadata for all four provider
55
+ CLIs (Claude, Codex, Gemini, Copilot). Composed of `AuthContract`,
56
+ `PathContract`, `HeadlessContract`, `PromptTransport`, `ApprovalContract`,
57
+ `SandboxContract`.
58
+ - `get_provider_contract(provider_id)` — returns structured contract for a
59
+ provider.
60
+ - `build_env_overlay(contract, api_key, base_url)` — builds provider-specific
61
+ env var overlay from contract metadata.
62
+ - `resolve_config_paths(contract, containerized)` — resolves host and container
63
+ config directory paths.
64
+ - `render_prompt(transport, prompt)` — resolves prompt delivery into argv args +
65
+ stdin text based on provider transport mode.
66
+ - `PromptPayload` dataclass for resolved prompt delivery.
67
+ - `resolve_auth()` — resolves provider auth status from environment.
68
+ - `__version__` attribute.
69
+ - `CONTRIBUTING.md` with development setup and quality checks.
70
+
71
+ ### Changed
72
+ - `run_interactive_session()` observability kwargs (`job_name`, `phase_tag`)
73
+ now have sensible defaults so callers don't need to supply them.
74
+ - `CliRunResult.command` type widened from `tuple[str, ...]` to `Sequence[str]`.
75
+ - Provider model catalogs resolved with three-tier fallback: user override
76
+ file > live CLI discovery > hardcoded fallback.
77
+
78
+ ### Fixed
79
+ - Copilot BYOK (`COPILOT_PROVIDER_API_KEY`) now discoverable via contract
80
+ but not reported as "required" in `resolve_auth()` — BYOK is opt-in.
81
+
82
+ ## [0.1.0] - 2026-04-07
83
+
84
+ ### Added
85
+ - Provider metadata and controls for Claude, Codex, Copilot, and Gemini CLIs.
86
+ - Shared request/result contracts (`CliRunRequest`, `CliRunResult`, `CliLaunchSpec`).
87
+ - Schema loading and payload validation (`load_schema`, `validate_payload`).
88
+ - Synchronous and asynchronous subprocess execution helpers.
89
+ - Interactive session execution with transcript mirroring.
90
+ - Session log discovery and parsing utilities.
91
+ - Claude reasoning policy resolution.
92
+ - Log redaction helpers.
93
+ - Copilot reasoning log parsing and classification.
94
+ - PEP 561 `py.typed` markers for both `coding_cli_runtime` and `shared_cli_runtime`.
95
+ - Packaged JSON schemas and Copilot reasoning baseline data.
96
+ - Playground knowledge base with probing guides and experiment templates.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coding-cli-runtime
3
- Version: 0.2.0
3
+ Version: 0.4.0
4
4
  Summary: Reusable CLI runtime primitives for provider-backed automation workflows
5
5
  Author-email: LLM Eval maintainers <llm-eval-maintainers@users.noreply.github.com>
6
6
  License-Expression: MIT
@@ -40,17 +40,21 @@ code doesn't need provider-specific subprocess handling.
40
40
 
41
41
  **What it does (and why not just `subprocess.run`):**
42
42
 
43
- - Unified request/result types across all four CLIs
44
- - Timeout enforcement with graceful process termination
45
- - Provider-aware failure classification (retryable vs fatal)
46
- - Built-in model catalog with defaults, reasoning levels, and capabilities
47
- - Interactive session management for long-running generation tasks
48
- - Zero runtime dependencies
43
+ - Run any provider CLI with unified request/result types and timeout enforcement
44
+ - Query the model catalog (with user-override and live-cache fallback)
45
+ - Classify failures as retryable vs fatal per provider
46
+ - Look up provider auth, config dirs, and headless launch flags
47
+ - Build non-interactive launch commands without hardcoding provider flags
48
+ - Find session logs after a run (Codex, Claude)
49
+ - Run long-lived sessions with process-group cleanup and transcript mirroring
50
+ - No Python package dependencies — only requires the provider CLIs themselves
49
51
 
50
52
  ## Installation
51
53
 
52
54
  ```bash
53
55
  pip install coding-cli-runtime
56
+ # or
57
+ uv add coding-cli-runtime
54
58
  ```
55
59
 
56
60
  Requires Python 3.10+.
@@ -65,7 +69,7 @@ from pathlib import Path
65
69
  from coding_cli_runtime import CliRunRequest, run_cli_command
66
70
 
67
71
  request = CliRunRequest(
68
- cmd_parts=("codex", "--model", "o4-mini", "--quiet", "exec", "fix the tests"),
72
+ cmd_parts=("codex", "--model", "gpt-5.4", "--quiet", "exec", "fix the tests"),
69
73
  cwd=Path("/tmp/my-project"),
70
74
  timeout_seconds=120,
71
75
  )
@@ -148,6 +152,44 @@ else:
148
152
  Works for all four providers. Recognizes auth failures, rate limits,
149
153
  network transients, and other provider-specific error patterns.
150
154
 
155
+ ### Common integration tasks
156
+
157
+ #### Check whether a provider CLI is installed
158
+
159
+ ```python
160
+ from coding_cli_runtime import is_provider_installed
161
+
162
+ if not is_provider_installed("claude"):
163
+ raise RuntimeError("Claude Code is not available on PATH")
164
+ ```
165
+
166
+ This is intentionally minimal: it checks whether the provider binary exists on
167
+ PATH. Deeper CLI drift validation belongs in maintainer tooling, not the
168
+ runtime API.
169
+
170
+ #### Resolve workspace env vars and session search paths
171
+
172
+ ```python
173
+ from coding_cli_runtime import (
174
+ get_provider_contract,
175
+ resolve_session_search_paths,
176
+ resolve_workspace_env,
177
+ )
178
+
179
+ gemini = get_provider_contract("gemini")
180
+
181
+ # Derive provider-specific workspace env vars from contract metadata
182
+ env = resolve_workspace_env(gemini, "/tmp/run-dir")
183
+ # {"GEMINI_CLI_IDE_WORKSPACE_PATH": "/tmp/run-dir"}
184
+
185
+ # Expand concrete host paths for session log searches
186
+ paths = resolve_session_search_paths(gemini)
187
+ # (Path.home() / ".gemini" / "tmp",)
188
+ ```
189
+
190
+ Use these helpers when you want the contract facts turned into concrete
191
+ filesystem/env values without rebuilding the same glue logic in each consumer.
192
+
151
193
  ### Look up provider contract metadata
152
194
 
153
195
  ```python
@@ -175,11 +217,75 @@ payload = render_prompt(contract.headless.prompt, "Fix the bug")
175
217
  ```
176
218
 
177
219
  `ProviderContract` is structured as nested sub-contracts
178
- (`AuthContract`, `PathContract`, `HeadlessContract`) so consumers
220
+ (`AuthContract`, `PathContract`, `HeadlessContract`, `OutputContract`,
221
+ `IoContract`, `SessionDiscoveryContract`, `DiagnosticsContract`) so consumers
179
222
  can drill into whichever aspect they need. This is reference metadata,
180
- not a command-construction control plane — consumers keep their own
223
+ not a command-construction control plane — callers keep their own
181
224
  command assembly and adopt contract fields selectively.
182
225
 
226
+ ### Query provider I/O conventions
227
+
228
+ ```python
229
+ from coding_cli_runtime import get_provider_contract
230
+
231
+ gemini = get_provider_contract("gemini")
232
+
233
+ # Workspace env vars with value semantics
234
+ for wev in gemini.io.workspace_env_vars:
235
+ print(f"{wev.name} = {wev.value_source}")
236
+ # GEMINI_CLI_IDE_WORKSPACE_PATH = execution_dir
237
+
238
+ # Session discovery (where session logs live)
239
+ sd = gemini.session_discovery
240
+ print(sd.session_roots) # ("tmp",)
241
+ print(sd.session_glob) # "*/chats/session-*.json"
242
+
243
+ # Output format support
244
+ codex = get_provider_contract("codex")
245
+ print(codex.output.output_path_flag) # "-o"
246
+ print(codex.output.schema_path_flag) # "--output-schema"
247
+
248
+ # Diagnostics (Copilot only)
249
+ copilot = get_provider_contract("copilot")
250
+ if copilot.diagnostics:
251
+ print(copilot.diagnostics.log_glob) # "logs/process-*.log"
252
+ ```
253
+
254
+ `WorkspaceEnvVar.value_source` uses a closed vocabulary:
255
+ `"execution_dir"` or `"workspace_root"`.
256
+
257
+ ### Build headless launch commands
258
+
259
+ ```python
260
+ from coding_cli_runtime import build_claude_headless_core, build_codex_headless_core
261
+
262
+ # Claude: binary + --print + --permission-mode + --dangerously-skip-permissions + --model
263
+ cmd = build_claude_headless_core("claude-sonnet-4-6")
264
+ cmd.extend(["--output-format", "text", "--disallowedTools", "Bash,Task"])
265
+
266
+ # Codex: binary + exec + --full-auto + --sandbox + --skip-git-repo-check + --model
267
+ cmd = build_codex_headless_core("gpt-5.4", sandbox_mode="read-only")
268
+ cmd.extend(["-C", str(workdir)])
269
+ ```
270
+
271
+ Headless core helpers emit the standard flags for non-interactive runs.
272
+ Consumers append app-specific tails (tool restrictions, output paths, etc.).
273
+
274
+ ### Find session logs after a run
275
+
276
+ ```python
277
+ import time
278
+ from coding_cli_runtime import find_codex_session, find_claude_session
279
+
280
+ # Find the most recent Codex session log for a given working directory
281
+ session = find_codex_session("/path/to/project", since_ts=time.time() - 300)
282
+ if session:
283
+ print(f"Session log: {session}") # ~/.codex/sessions/.../conversation.jsonl
284
+ ```
285
+
286
+ Works for Codex and Claude. Scans provider config directories for session
287
+ files matching the working directory and time window.
288
+
183
289
  ## Key types
184
290
 
185
291
  | Type | Purpose |
@@ -188,14 +294,55 @@ command assembly and adopt contract fields selectively.
188
294
  | `CliRunResult` | Result: returncode, stdout/stderr, duration, error code |
189
295
  | `ErrorCode` | `none` · `spawn_failed` · `timed_out` · `non_zero_exit` |
190
296
  | `ProviderSpec` | Provider catalog entry with models, controls, defaults |
191
- | `ProviderContract` | Structured provider CLI metadata (auth, paths, headless launch) |
297
+ | `ProviderContract` | Structured provider CLI metadata (auth, paths, headless, I/O, sessions) |
298
+ | `WorkspaceEnvVar` | Env var with value-source semantics (`execution_dir`, `workspace_root`) |
192
299
  | `FailureClassification` | Classified error with retryable flag and category |
193
300
 
194
- `run_interactive_session()` manages long-running CLI processes with
195
- timeout enforcement, process-group cleanup, transcript mirroring, and
196
- automatic retries. Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are
197
- required — observability labels like `job_name` and `phase_tag` default to
198
- sensible values so external callers don't need to invent them.
301
+ ### Run long-lived CLI sessions
302
+
303
+ For CLI runs that take minutes (e.g., full app generation), use
304
+ `run_interactive_session()` instead of `run_cli_command()`. It adds:
305
+
306
+ - Process-group cleanup (kills orphaned child processes on timeout)
307
+ - Transcript mirroring (streams CLI output to a file while the process runs)
308
+ - Automatic retries on transient failures
309
+
310
+ ```python
311
+ from coding_cli_runtime import run_interactive_session
312
+
313
+ result = await run_interactive_session(
314
+ cmd_parts=("claude", "--print", "--model", "claude-sonnet-4-6"),
315
+ cwd=workdir,
316
+ stdin_text=prompt,
317
+ logger=logger,
318
+ timeout_seconds=600,
319
+ )
320
+ ```
321
+
322
+ Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are required.
323
+ Other parameters have sensible defaults.
324
+
325
+ ## API summary
326
+
327
+ The full public API is listed in [`__init__.py`](src/coding_cli_runtime/__init__.py).
328
+ Key function groups:
329
+
330
+ | Group | Functions |
331
+ |-------|-----------|
332
+ | Execution | `run_cli_command`, `run_cli_command_sync`, `run_interactive_session` |
333
+ | Provider metadata | `get_provider_contract`, `get_provider_spec`, `list_provider_specs` |
334
+ | Contract helpers | `build_env_overlay`, `resolve_config_paths`, `render_prompt`, `resolve_auth`, `resolve_workspace_env`, `resolve_session_search_paths` |
335
+ | Headless launch | `build_claude_headless_core`, `build_codex_headless_core`, `build_copilot_headless_core`, `build_gemini_headless_core` |
336
+ | Codex batch | `build_codex_exec_spec` |
337
+ | Failure handling | `classify_provider_failure` |
338
+ | Installation check | `is_provider_installed` |
339
+ | Session logs | `find_codex_session`, `find_claude_session` |
340
+ | Schema | `load_schema`, `validate_payload` |
341
+ | Utilities | `redact_text`, `build_model_id`, `normalize_path_str` |
342
+
343
+ ## Contributing
344
+
345
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and quality checks.
199
346
 
200
347
  ## Prerequisites
201
348
 
@@ -0,0 +1,333 @@
1
+ # coding-cli-runtime
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/coding-cli-runtime)](https://pypi.org/project/coding-cli-runtime/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/coding-cli-runtime)](https://pypi.org/project/coding-cli-runtime/)
5
+ [![Build](https://github.com/pj-ms/llm-eval/actions/workflows/ci.yml/badge.svg)](https://github.com/pj-ms/llm-eval/actions/workflows/ci.yml)
6
+ [![License](https://img.shields.io/pypi/l/coding-cli-runtime)](LICENSE)
7
+
8
+ A Python library for orchestrating LLM coding agent CLIs — [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Codex](https://github.com/openai/codex), [Gemini CLI](https://github.com/google-gemini/gemini-cli), and [GitHub Copilot](https://docs.github.com/en/copilot).
9
+
10
+ These CLIs each have different invocation patterns, output formats, error
11
+ shapes, and timeout behaviors. This library normalizes all of that behind
12
+ a common `CliRunRequest` → `CliRunResult` contract, so your automation
13
+ code doesn't need provider-specific subprocess handling.
14
+
15
+ **What it does (and why not just `subprocess.run`):**
16
+
17
+ - Run any provider CLI with unified request/result types and timeout enforcement
18
+ - Query the model catalog (with user-override and live-cache fallback)
19
+ - Classify failures as retryable vs fatal per provider
20
+ - Look up provider auth, config dirs, and headless launch flags
21
+ - Build non-interactive launch commands without hardcoding provider flags
22
+ - Find session logs after a run (Codex, Claude)
23
+ - Run long-lived sessions with process-group cleanup and transcript mirroring
24
+ - No Python package dependencies — only requires the provider CLIs themselves
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ pip install coding-cli-runtime
30
+ # or
31
+ uv add coding-cli-runtime
32
+ ```
33
+
34
+ Requires Python 3.10+.
35
+
36
+ ## Examples
37
+
38
+ ### Execute a provider CLI
39
+
40
+ ```python
41
+ import asyncio
42
+ from pathlib import Path
43
+ from coding_cli_runtime import CliRunRequest, run_cli_command
44
+
45
+ request = CliRunRequest(
46
+ cmd_parts=("codex", "--model", "gpt-5.4", "--quiet", "exec", "fix the tests"),
47
+ cwd=Path("/tmp/my-project"),
48
+ timeout_seconds=120,
49
+ )
50
+ result = asyncio.run(run_cli_command(request))
51
+
52
+ print(result.returncode) # 0
53
+ print(result.error_code) # "none"
54
+ print(result.duration_seconds) # 14.2
55
+ print(result.stdout_text[:200])
56
+ ```
57
+
58
+ Swap `codex` for `claude`, `gemini`, or `copilot` — the request/result
59
+ shape stays the same. A synchronous variant `run_cli_command_sync` is also
60
+ available.
61
+
62
+ ### Pick a model from the provider catalog
63
+
64
+ ```python
65
+ from coding_cli_runtime import get_provider_spec
66
+
67
+ codex = get_provider_spec("codex")
68
+ print(codex.default_model) # "gpt-5.3-codex"
69
+ print(codex.model_source) # "codex_cli_cache", "override", or "code"
70
+
71
+ for model in codex.models:
72
+ print(f" {model.name}: {model.description}")
73
+ ```
74
+
75
+ The catalog covers all four providers — each with model names, reasoning
76
+ levels, default settings, and visibility flags.
77
+
78
+ Model lists are resolved with a three-tier fallback:
79
+
80
+ 1. **User override** — drop a JSON file at
81
+ `~/.config/coding-cli-runtime/providers/<provider>.json` to use your own
82
+ model list immediately, without waiting for a package update.
83
+ 2. **Live CLI cache** — for Codex, the library reads
84
+ `~/.codex/models_cache.json` (auto-refreshed by the Codex CLI) when
85
+ present. Other providers fall through because their CLIs don't expose a
86
+ machine-readable model list.
87
+ 3. **Hardcoded fallback** — the model list shipped with the package.
88
+
89
+ Override file format:
90
+
91
+ ```json
92
+ {
93
+ "default_model": "claude-sonnet-4-7",
94
+ "models": [
95
+ "claude-sonnet-4-7",
96
+ {
97
+ "name": "claude-opus-5",
98
+ "description": "Latest opus model",
99
+ "controls": [
100
+ { "name": "effort", "kind": "choice", "choices": ["low", "high"], "default": "low" }
101
+ ]
102
+ }
103
+ ]
104
+ }
105
+ ```
106
+
107
+ Set `CODING_CLI_RUNTIME_CONFIG_DIR` to change the config directory
108
+ (default: `~/.config/coding-cli-runtime`).
109
+
110
+ ### Decide whether to retry a failed run
111
+
112
+ ```python
113
+ from coding_cli_runtime import classify_provider_failure
114
+
115
+ classification = classify_provider_failure(
116
+ provider="gemini",
117
+ stderr_text="429 Resource exhausted: rate limit exceeded",
118
+ )
119
+
120
+ if classification.retryable:
121
+ print(f"Retryable ({classification.category}) — will retry")
122
+ else:
123
+ print(f"Fatal ({classification.category}) — giving up")
124
+ ```
125
+
126
+ Works for all four providers. Recognizes auth failures, rate limits,
127
+ network transients, and other provider-specific error patterns.
128
+
129
+ ### Common integration tasks
130
+
131
+ #### Check whether a provider CLI is installed
132
+
133
+ ```python
134
+ from coding_cli_runtime import is_provider_installed
135
+
136
+ if not is_provider_installed("claude"):
137
+ raise RuntimeError("Claude Code is not available on PATH")
138
+ ```
139
+
140
+ This is intentionally minimal: it checks whether the provider binary exists on
141
+ PATH. Deeper CLI drift validation belongs in maintainer tooling, not the
142
+ runtime API.
143
+
144
+ #### Resolve workspace env vars and session search paths
145
+
146
+ ```python
147
+ from coding_cli_runtime import (
148
+ get_provider_contract,
149
+ resolve_session_search_paths,
150
+ resolve_workspace_env,
151
+ )
152
+
153
+ gemini = get_provider_contract("gemini")
154
+
155
+ # Derive provider-specific workspace env vars from contract metadata
156
+ env = resolve_workspace_env(gemini, "/tmp/run-dir")
157
+ # {"GEMINI_CLI_IDE_WORKSPACE_PATH": "/tmp/run-dir"}
158
+
159
+ # Expand concrete host paths for session log searches
160
+ paths = resolve_session_search_paths(gemini)
161
+ # (Path.home() / ".gemini" / "tmp",)
162
+ ```
163
+
164
+ Use these helpers when you want the contract facts turned into concrete
165
+ filesystem/env values without rebuilding the same glue logic in each consumer.
166
+
167
+ ### Look up provider contract metadata
168
+
169
+ ```python
170
+ from coding_cli_runtime import get_provider_contract, build_env_overlay, resolve_config_paths, render_prompt
171
+
172
+ # Get structured metadata for any supported provider
173
+ contract = get_provider_contract("claude")
174
+ print(contract.binary) # "claude"
175
+ print(contract.auth.api_key_env_var) # "CLAUDE_API_KEY"
176
+ print(contract.paths.config_dir) # "~/.claude"
177
+ print(contract.headless.approval.flag) # "--dangerously-skip-permissions"
178
+
179
+ # Build env var overlay for subprocess
180
+ env = build_env_overlay(contract, api_key="sk-...", base_url="https://custom.example.com")
181
+ # {"CLAUDE_API_KEY": "sk-...", "ANTHROPIC_BASE_URL": "https://custom.example.com"}
182
+
183
+ # Resolve config paths for container mounts
184
+ host_dir, container_dir = resolve_config_paths(contract, containerized=True)
185
+ # ("/home/user/.claude", "/root/.claude")
186
+
187
+ # Resolve prompt delivery (stdin vs flag vs activation)
188
+ payload = render_prompt(contract.headless.prompt, "Fix the bug")
189
+ # payload.args = () (stdin delivery for Claude)
190
+ # payload.stdin_text = "Fix the bug"
191
+ ```
192
+
193
+ `ProviderContract` is structured as nested sub-contracts
194
+ (`AuthContract`, `PathContract`, `HeadlessContract`, `OutputContract`,
195
+ `IoContract`, `SessionDiscoveryContract`, `DiagnosticsContract`) so consumers
196
+ can drill into whichever aspect they need. This is reference metadata,
197
+ not a command-construction control plane — callers keep their own
198
+ command assembly and adopt contract fields selectively.
199
+
200
+ ### Query provider I/O conventions
201
+
202
+ ```python
203
+ from coding_cli_runtime import get_provider_contract
204
+
205
+ gemini = get_provider_contract("gemini")
206
+
207
+ # Workspace env vars with value semantics
208
+ for wev in gemini.io.workspace_env_vars:
209
+ print(f"{wev.name} = {wev.value_source}")
210
+ # GEMINI_CLI_IDE_WORKSPACE_PATH = execution_dir
211
+
212
+ # Session discovery (where session logs live)
213
+ sd = gemini.session_discovery
214
+ print(sd.session_roots) # ("tmp",)
215
+ print(sd.session_glob) # "*/chats/session-*.json"
216
+
217
+ # Output format support
218
+ codex = get_provider_contract("codex")
219
+ print(codex.output.output_path_flag) # "-o"
220
+ print(codex.output.schema_path_flag) # "--output-schema"
221
+
222
+ # Diagnostics (Copilot only)
223
+ copilot = get_provider_contract("copilot")
224
+ if copilot.diagnostics:
225
+ print(copilot.diagnostics.log_glob) # "logs/process-*.log"
226
+ ```
227
+
228
+ `WorkspaceEnvVar.value_source` uses a closed vocabulary:
229
+ `"execution_dir"` or `"workspace_root"`.
230
+
231
+ ### Build headless launch commands
232
+
233
+ ```python
234
+ from coding_cli_runtime import build_claude_headless_core, build_codex_headless_core
235
+
236
+ # Claude: binary + --print + --permission-mode + --dangerously-skip-permissions + --model
237
+ cmd = build_claude_headless_core("claude-sonnet-4-6")
238
+ cmd.extend(["--output-format", "text", "--disallowedTools", "Bash,Task"])
239
+
240
+ # Codex: binary + exec + --full-auto + --sandbox + --skip-git-repo-check + --model
241
+ cmd = build_codex_headless_core("gpt-5.4", sandbox_mode="read-only")
242
+ cmd.extend(["-C", str(workdir)])
243
+ ```
244
+
245
+ Headless core helpers emit the standard flags for non-interactive runs.
246
+ Consumers append app-specific tails (tool restrictions, output paths, etc.).
247
+
248
+ ### Find session logs after a run
249
+
250
+ ```python
251
+ import time
252
+ from coding_cli_runtime import find_codex_session, find_claude_session
253
+
254
+ # Find the most recent Codex session log for a given working directory
255
+ session = find_codex_session("/path/to/project", since_ts=time.time() - 300)
256
+ if session:
257
+ print(f"Session log: {session}") # ~/.codex/sessions/.../conversation.jsonl
258
+ ```
259
+
260
+ Works for Codex and Claude. Scans provider config directories for session
261
+ files matching the working directory and time window.
262
+
263
+ ## Key types
264
+
265
+ | Type | Purpose |
266
+ |------|---------|
267
+ | `CliRunRequest` | Command spec: cmd, cwd, env, timeout, stream paths |
268
+ | `CliRunResult` | Result: returncode, stdout/stderr, duration, error code |
269
+ | `ErrorCode` | `none` · `spawn_failed` · `timed_out` · `non_zero_exit` |
270
+ | `ProviderSpec` | Provider catalog entry with models, controls, defaults |
271
+ | `ProviderContract` | Structured provider CLI metadata (auth, paths, headless, I/O, sessions) |
272
+ | `WorkspaceEnvVar` | Env var with value-source semantics (`execution_dir`, `workspace_root`) |
273
+ | `FailureClassification` | Classified error with retryable flag and category |
274
+
275
+ ### Run long-lived CLI sessions
276
+
277
+ For CLI runs that take minutes (e.g., full app generation), use
278
+ `run_interactive_session()` instead of `run_cli_command()`. It adds:
279
+
280
+ - Process-group cleanup (kills orphaned child processes on timeout)
281
+ - Transcript mirroring (streams CLI output to a file while the process runs)
282
+ - Automatic retries on transient failures
283
+
284
+ ```python
285
+ from coding_cli_runtime import run_interactive_session
286
+
287
+ result = await run_interactive_session(
288
+ cmd_parts=("claude", "--print", "--model", "claude-sonnet-4-6"),
289
+ cwd=workdir,
290
+ stdin_text=prompt,
291
+ logger=logger,
292
+ timeout_seconds=600,
293
+ )
294
+ ```
295
+
296
+ Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are required.
297
+ Other parameters have sensible defaults.
298
+
299
+ ## API summary
300
+
301
+ The full public API is listed in [`__init__.py`](src/coding_cli_runtime/__init__.py).
302
+ Key function groups:
303
+
304
+ | Group | Functions |
305
+ |-------|-----------|
306
+ | Execution | `run_cli_command`, `run_cli_command_sync`, `run_interactive_session` |
307
+ | Provider metadata | `get_provider_contract`, `get_provider_spec`, `list_provider_specs` |
308
+ | Contract helpers | `build_env_overlay`, `resolve_config_paths`, `render_prompt`, `resolve_auth`, `resolve_workspace_env`, `resolve_session_search_paths` |
309
+ | Headless launch | `build_claude_headless_core`, `build_codex_headless_core`, `build_copilot_headless_core`, `build_gemini_headless_core` |
310
+ | Codex batch | `build_codex_exec_spec` |
311
+ | Failure handling | `classify_provider_failure` |
312
+ | Installation check | `is_provider_installed` |
313
+ | Session logs | `find_codex_session`, `find_claude_session` |
314
+ | Schema | `load_schema`, `validate_payload` |
315
+ | Utilities | `redact_text`, `build_model_id`, `normalize_path_str` |
316
+
317
+ ## Contributing
318
+
319
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and quality checks.
320
+
321
+ ## Prerequisites
322
+
323
+ This package does **not** bundle any CLI binaries or credentials. You must
324
+ install and authenticate the relevant provider CLI yourself before using the
325
+ execution helpers.
326
+
327
+ ## Status
328
+
329
+ Pre-1.0. API may change between minor versions.
330
+
331
+ ## License
332
+
333
+ MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "coding-cli-runtime"
7
- version = "0.2.0"
7
+ version = "0.4.0"
8
8
  description = "Reusable CLI runtime primitives for provider-backed automation workflows"
9
9
  readme = {file = "README.md", content-type = "text/markdown"}
10
10
  license = "MIT"
@@ -94,7 +94,7 @@ disallow_untyped_defs = false
94
94
  warn_return_any = false
95
95
 
96
96
  [tool.bumpversion]
97
- current_version = "0.2.0"
97
+ current_version = "0.4.0"
98
98
  parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
99
99
  serialize = ["{major}.{minor}.{patch}"]
100
100
  commit = true