coding-cli-runtime 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/CHANGELOG.md +32 -0
  2. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/PKG-INFO +88 -13
  3. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/README.md +87 -12
  4. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/pyproject.toml +2 -2
  5. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/__init__.py +12 -2
  6. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/codex_cli.py +10 -8
  7. coding_cli_runtime-0.3.0/src/coding_cli_runtime/headless.py +124 -0
  8. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/provider_contracts.py +44 -38
  9. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/session_logs.py +56 -0
  10. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime.egg-info/PKG-INFO +88 -13
  11. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime.egg-info/SOURCES.txt +3 -1
  12. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/tests/test_coverage_gaps.py +15 -7
  13. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/tests/test_packaging.py +4 -3
  14. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/tests/test_provider_contracts.py +3 -3
  15. coding_cli_runtime-0.3.0/tests/test_stage2_tier1.py +241 -0
  16. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/CONTRIBUTING.md +0 -0
  17. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/LICENSE +0 -0
  18. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/MANIFEST.in +0 -0
  19. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/setup.cfg +0 -0
  20. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/auth.py +0 -0
  21. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/contracts.py +0 -0
  22. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/copilot_reasoning_baseline.json +0 -0
  23. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/copilot_reasoning_logs.py +0 -0
  24. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/failure_classification.py +0 -0
  25. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/json_io.py +0 -0
  26. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/provider_controls.py +0 -0
  27. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/provider_specs.py +0 -0
  28. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/py.typed +0 -0
  29. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/reasoning.py +0 -0
  30. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/redaction.py +0 -0
  31. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/schema_validation.py +0 -0
  32. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/schemas/normalized_run_result.v1.json +0 -0
  33. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/schemas/reasoning_metadata.v1.json +0 -0
  34. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/session_execution.py +0 -0
  35. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime/subprocess_runner.py +0 -0
  36. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime.egg-info/dependency_links.txt +0 -0
  37. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/src/coding_cli_runtime.egg-info/top_level.txt +0 -0
  38. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/tests/test_copilot_reasoning_logs.py +0 -0
  39. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/tests/test_package_resources.py +0 -0
  40. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/tests/test_playground_probe_smoke.py +0 -0
  41. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/tests/test_provider_catalog_resolution.py +0 -0
  42. {coding_cli_runtime-0.2.0 → coding_cli_runtime-0.3.0}/tests/test_runtime_parity.py +0 -0
@@ -6,6 +6,38 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/).
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.3.0] - 2026-04-09
10
+
11
+ ### Added
12
+ - **Headless launch core helpers** — per-provider arg renderers derived from
13
+ `ProviderContract.headless`: `build_claude_headless_core()`,
14
+ `build_codex_headless_core()`, `build_copilot_headless_core()`,
15
+ `build_gemini_headless_core()`. All consumers (app-generation, feather,
16
+ codex_cli, provider_contracts builder) now delegate to these.
17
+ - `scan_session_dir()` — generic directory-scanning primitive for session log
18
+ discovery with `extract_fn` callback (internal, not in public `__all__`).
19
+ - Session log discovery section in README.
20
+ - API summary table in README.
21
+ - 27 new Stage 2 tests for headless cores, builder delegation, and
22
+ `scan_session_dir`.
23
+
24
+ ### Changed
25
+ - `build_codex_exec_spec()` now delegates to `build_codex_headless_core()`.
26
+ `full_auto` and `skip_git_repo_check` params preserved and passed through.
27
+ - `_build_non_interactive_run()` now delegates to per-provider headless core
28
+ helpers instead of assembling flags inline.
29
+ - Feather `report_data.py` and `report_sections.py` use headless core helpers
30
+ with fallback for environments without `coding_cli_runtime`.
31
+ - Feather `generate_report.py` Codex session discovery replaced with
32
+ `find_codex_session()` from `coding_cli_runtime`.
33
+ - App-generation `claude_impl.py`, `copilot_impl.py`, `gemini_impl.py`
34
+ `build_command()` functions delegate to headless core helpers.
35
+ - Dead headless opt-out flags removed from Copilot (`--allow-all`, `--ask-user`,
36
+ `--use-custom-instructions`) and Gemini (`--auto-approve`) CLI specs —
37
+ these were never used in batch runs and are now handled by the headless core.
38
+ - README rewritten: user-action feature list, `run_interactive_session` example,
39
+ `uv add` install, API summary, Contributing link, session log discovery.
40
+
9
41
  ## [0.2.0] - 2026-04-08
10
42
 
11
43
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coding-cli-runtime
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Reusable CLI runtime primitives for provider-backed automation workflows
5
5
  Author-email: LLM Eval maintainers <llm-eval-maintainers@users.noreply.github.com>
6
6
  License-Expression: MIT
@@ -40,17 +40,21 @@ code doesn't need provider-specific subprocess handling.
40
40
 
41
41
  **What it does (and why not just `subprocess.run`):**
42
42
 
43
- - Unified request/result types across all four CLIs
44
- - Timeout enforcement with graceful process termination
45
- - Provider-aware failure classification (retryable vs fatal)
46
- - Built-in model catalog with defaults, reasoning levels, and capabilities
47
- - Interactive session management for long-running generation tasks
48
- - Zero runtime dependencies
43
+ - Run any provider CLI with unified request/result types and timeout enforcement
44
+ - Query the model catalog (with user-override and live-cache fallback)
45
+ - Classify failures as retryable vs fatal per provider
46
+ - Look up provider auth, config dirs, and headless launch flags
47
+ - Build non-interactive launch commands without hardcoding provider flags
48
+ - Find session logs after a run (Codex, Claude)
49
+ - Run long-lived sessions with process-group cleanup and transcript mirroring
50
+ - No Python package dependencies — only requires the provider CLIs themselves
49
51
 
50
52
  ## Installation
51
53
 
52
54
  ```bash
53
55
  pip install coding-cli-runtime
56
+ # or
57
+ uv add coding-cli-runtime
54
58
  ```
55
59
 
56
60
  Requires Python 3.10+.
@@ -65,7 +69,7 @@ from pathlib import Path
65
69
  from coding_cli_runtime import CliRunRequest, run_cli_command
66
70
 
67
71
  request = CliRunRequest(
68
- cmd_parts=("codex", "--model", "o4-mini", "--quiet", "exec", "fix the tests"),
72
+ cmd_parts=("codex", "--model", "gpt-5.4", "--quiet", "exec", "fix the tests"),
69
73
  cwd=Path("/tmp/my-project"),
70
74
  timeout_seconds=120,
71
75
  )
@@ -180,6 +184,38 @@ can drill into whichever aspect they need. This is reference metadata,
180
184
  not a command-construction control plane — consumers keep their own
181
185
  command assembly and adopt contract fields selectively.
182
186
 
187
+ ### Build headless launch commands
188
+
189
+ ```python
190
+ from coding_cli_runtime import build_claude_headless_core, build_codex_headless_core
191
+
192
+ # Claude: binary + --print + --permission-mode + --dangerously-skip-permissions + --model
193
+ cmd = build_claude_headless_core("claude-sonnet-4-6")
194
+ cmd.extend(["--output-format", "text", "--disallowedTools", "Bash,Task"])
195
+
196
+ # Codex: binary + exec + --full-auto + --sandbox + --skip-git-repo-check + --model
197
+ cmd = build_codex_headless_core("gpt-5.4", sandbox_mode="read-only")
198
+ cmd.extend(["-C", str(workdir)])
199
+ ```
200
+
201
+ Headless core helpers emit the standard flags for non-interactive runs.
202
+ Consumers append app-specific tails (tool restrictions, output paths, etc.).
203
+
204
+ ### Find session logs after a run
205
+
206
+ ```python
207
+ import time
208
+ from coding_cli_runtime import find_codex_session, find_claude_session
209
+
210
+ # Find the most recent Codex session log for a given working directory
211
+ session = find_codex_session("/path/to/project", since_ts=time.time() - 300)
212
+ if session:
213
+ print(f"Session log: {session}") # ~/.codex/sessions/.../conversation.jsonl
214
+ ```
215
+
216
+ Works for Codex and Claude. Scans provider config directories for session
217
+ files matching the working directory and time window.
218
+
183
219
  ## Key types
184
220
 
185
221
  | Type | Purpose |
@@ -191,11 +227,50 @@ command assembly and adopt contract fields selectively.
191
227
  | `ProviderContract` | Structured provider CLI metadata (auth, paths, headless launch) |
192
228
  | `FailureClassification` | Classified error with retryable flag and category |
193
229
 
194
- `run_interactive_session()` manages long-running CLI processes with
195
- timeout enforcement, process-group cleanup, transcript mirroring, and
196
- automatic retries. Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are
197
- required — observability labels like `job_name` and `phase_tag` default to
198
- sensible values so external callers don't need to invent them.
230
+ ### Run long-lived CLI sessions
231
+
232
+ For CLI runs that take minutes (e.g., full app generation), use
233
+ `run_interactive_session()` instead of `run_cli_command()`. It adds:
234
+
235
+ - Process-group cleanup (kills orphaned child processes on timeout)
236
+ - Transcript mirroring (streams CLI output to a file while the process runs)
237
+ - Automatic retries on transient failures
238
+
239
+ ```python
240
+ from coding_cli_runtime import run_interactive_session
241
+
242
+ result = await run_interactive_session(
243
+ cmd_parts=("claude", "--print", "--model", "claude-sonnet-4-6"),
244
+ cwd=workdir,
245
+ stdin_text=prompt,
246
+ logger=logger,
247
+ timeout_seconds=600,
248
+ )
249
+ ```
250
+
251
+ Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are required.
252
+ Observability labels (`job_name`, `phase_tag`) default to sensible values.
253
+
254
+ ## API summary
255
+
256
+ The full public API is listed in [`__init__.py`](src/coding_cli_runtime/__init__.py).
257
+ Key function groups:
258
+
259
+ | Group | Functions |
260
+ |-------|-----------|
261
+ | Execution | `run_cli_command`, `run_cli_command_sync`, `run_interactive_session` |
262
+ | Provider metadata | `get_provider_contract`, `get_provider_spec`, `list_provider_specs` |
263
+ | Contract helpers | `build_env_overlay`, `resolve_config_paths`, `render_prompt`, `resolve_auth` |
264
+ | Headless launch | `build_claude_headless_core`, `build_codex_headless_core`, `build_copilot_headless_core`, `build_gemini_headless_core` |
265
+ | Codex batch | `build_codex_exec_spec` |
266
+ | Failure handling | `classify_provider_failure` |
267
+ | Session logs | `find_codex_session`, `find_claude_session` |
268
+ | Schema | `load_schema`, `validate_payload` |
269
+ | Utilities | `redact_text`, `build_model_id`, `normalize_path_str` |
270
+
271
+ ## Contributing
272
+
273
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and quality checks.
199
274
 
200
275
  ## Prerequisites
201
276
 
@@ -14,17 +14,21 @@ code doesn't need provider-specific subprocess handling.
14
14
 
15
15
  **What it does (and why not just `subprocess.run`):**
16
16
 
17
- - Unified request/result types across all four CLIs
18
- - Timeout enforcement with graceful process termination
19
- - Provider-aware failure classification (retryable vs fatal)
20
- - Built-in model catalog with defaults, reasoning levels, and capabilities
21
- - Interactive session management for long-running generation tasks
22
- - Zero runtime dependencies
17
+ - Run any provider CLI with unified request/result types and timeout enforcement
18
+ - Query the model catalog (with user-override and live-cache fallback)
19
+ - Classify failures as retryable vs fatal per provider
20
+ - Look up provider auth, config dirs, and headless launch flags
21
+ - Build non-interactive launch commands without hardcoding provider flags
22
+ - Find session logs after a run (Codex, Claude)
23
+ - Run long-lived sessions with process-group cleanup and transcript mirroring
24
+ - No Python package dependencies — only requires the provider CLIs themselves
23
25
 
24
26
  ## Installation
25
27
 
26
28
  ```bash
27
29
  pip install coding-cli-runtime
30
+ # or
31
+ uv add coding-cli-runtime
28
32
  ```
29
33
 
30
34
  Requires Python 3.10+.
@@ -39,7 +43,7 @@ from pathlib import Path
39
43
  from coding_cli_runtime import CliRunRequest, run_cli_command
40
44
 
41
45
  request = CliRunRequest(
42
- cmd_parts=("codex", "--model", "o4-mini", "--quiet", "exec", "fix the tests"),
46
+ cmd_parts=("codex", "--model", "gpt-5.4", "--quiet", "exec", "fix the tests"),
43
47
  cwd=Path("/tmp/my-project"),
44
48
  timeout_seconds=120,
45
49
  )
@@ -154,6 +158,38 @@ can drill into whichever aspect they need. This is reference metadata,
154
158
  not a command-construction control plane — consumers keep their own
155
159
  command assembly and adopt contract fields selectively.
156
160
 
161
+ ### Build headless launch commands
162
+
163
+ ```python
164
+ from coding_cli_runtime import build_claude_headless_core, build_codex_headless_core
165
+
166
+ # Claude: binary + --print + --permission-mode + --dangerously-skip-permissions + --model
167
+ cmd = build_claude_headless_core("claude-sonnet-4-6")
168
+ cmd.extend(["--output-format", "text", "--disallowedTools", "Bash,Task"])
169
+
170
+ # Codex: binary + exec + --full-auto + --sandbox + --skip-git-repo-check + --model
171
+ cmd = build_codex_headless_core("gpt-5.4", sandbox_mode="read-only")
172
+ cmd.extend(["-C", str(workdir)])
173
+ ```
174
+
175
+ Headless core helpers emit the standard flags for non-interactive runs.
176
+ Consumers append app-specific tails (tool restrictions, output paths, etc.).
177
+
178
+ ### Find session logs after a run
179
+
180
+ ```python
181
+ import time
182
+ from coding_cli_runtime import find_codex_session, find_claude_session
183
+
184
+ # Find the most recent Codex session log for a given working directory
185
+ session = find_codex_session("/path/to/project", since_ts=time.time() - 300)
186
+ if session:
187
+ print(f"Session log: {session}") # ~/.codex/sessions/.../conversation.jsonl
188
+ ```
189
+
190
+ Works for Codex and Claude. Scans provider config directories for session
191
+ files matching the working directory and time window.
192
+
157
193
  ## Key types
158
194
 
159
195
  | Type | Purpose |
@@ -165,11 +201,50 @@ command assembly and adopt contract fields selectively.
165
201
  | `ProviderContract` | Structured provider CLI metadata (auth, paths, headless launch) |
166
202
  | `FailureClassification` | Classified error with retryable flag and category |
167
203
 
168
- `run_interactive_session()` manages long-running CLI processes with
169
- timeout enforcement, process-group cleanup, transcript mirroring, and
170
- automatic retries. Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are
171
- required — observability labels like `job_name` and `phase_tag` default to
172
- sensible values so external callers don't need to invent them.
204
+ ### Run long-lived CLI sessions
205
+
206
+ For CLI runs that take minutes (e.g., full app generation), use
207
+ `run_interactive_session()` instead of `run_cli_command()`. It adds:
208
+
209
+ - Process-group cleanup (kills orphaned child processes on timeout)
210
+ - Transcript mirroring (streams CLI output to a file while the process runs)
211
+ - Automatic retries on transient failures
212
+
213
+ ```python
214
+ from coding_cli_runtime import run_interactive_session
215
+
216
+ result = await run_interactive_session(
217
+ cmd_parts=("claude", "--print", "--model", "claude-sonnet-4-6"),
218
+ cwd=workdir,
219
+ stdin_text=prompt,
220
+ logger=logger,
221
+ timeout_seconds=600,
222
+ )
223
+ ```
224
+
225
+ Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are required.
226
+ Observability labels (`job_name`, `phase_tag`) default to sensible values.
227
+
228
+ ## API summary
229
+
230
+ The full public API is listed in [`__init__.py`](src/coding_cli_runtime/__init__.py).
231
+ Key function groups:
232
+
233
+ | Group | Functions |
234
+ |-------|-----------|
235
+ | Execution | `run_cli_command`, `run_cli_command_sync`, `run_interactive_session` |
236
+ | Provider metadata | `get_provider_contract`, `get_provider_spec`, `list_provider_specs` |
237
+ | Contract helpers | `build_env_overlay`, `resolve_config_paths`, `render_prompt`, `resolve_auth` |
238
+ | Headless launch | `build_claude_headless_core`, `build_codex_headless_core`, `build_copilot_headless_core`, `build_gemini_headless_core` |
239
+ | Codex batch | `build_codex_exec_spec` |
240
+ | Failure handling | `classify_provider_failure` |
241
+ | Session logs | `find_codex_session`, `find_claude_session` |
242
+ | Schema | `load_schema`, `validate_payload` |
243
+ | Utilities | `redact_text`, `build_model_id`, `normalize_path_str` |
244
+
245
+ ## Contributing
246
+
247
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and quality checks.
173
248
 
174
249
  ## Prerequisites
175
250
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "coding-cli-runtime"
7
- version = "0.2.0"
7
+ version = "0.3.0"
8
8
  description = "Reusable CLI runtime primitives for provider-backed automation workflows"
9
9
  readme = {file = "README.md", content-type = "text/markdown"}
10
10
  license = "MIT"
@@ -94,7 +94,7 @@ disallow_untyped_defs = false
94
94
  warn_return_any = false
95
95
 
96
96
  [tool.bumpversion]
97
- current_version = "0.2.0"
97
+ current_version = "0.3.0"
98
98
  parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
99
99
  serialize = ["{major}.{minor}.{patch}"]
100
100
  commit = true
@@ -2,7 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "0.2.0"
5
+ __version__ = "0.3.0"
6
6
 
7
7
  from .auth import AuthResolution, resolve_auth
8
8
  from .codex_cli import CodexExecSpec, build_codex_exec_spec
@@ -15,6 +15,12 @@ from .contracts import (
15
15
  ErrorCode,
16
16
  )
17
17
  from .failure_classification import FailureClassification, classify_provider_failure
18
+ from .headless import (
19
+ build_claude_headless_core,
20
+ build_codex_headless_core,
21
+ build_copilot_headless_core,
22
+ build_gemini_headless_core,
23
+ )
18
24
  from .provider_contracts import (
19
25
  ApprovalContract,
20
26
  AuthContract,
@@ -97,7 +103,12 @@ __all__ = [
97
103
  "SessionRetryDecision",
98
104
  "SessionExecutionTimeoutError",
99
105
  "TranscriptMirrorStrategy",
106
+ "build_claude_headless_core",
107
+ "build_codex_exec_spec",
108
+ "build_codex_headless_core",
109
+ "build_copilot_headless_core",
100
110
  "build_env_overlay",
111
+ "build_gemini_headless_core",
101
112
  "get_claude_default_model",
102
113
  "get_claude_effort_levels",
103
114
  "get_claude_model_candidates",
@@ -112,7 +123,6 @@ __all__ = [
112
123
  "get_provider_spec",
113
124
  "list_provider_specs",
114
125
  "build_model_id",
115
- "build_codex_exec_spec",
116
126
  "classify_provider_failure",
117
127
  "load_schema",
118
128
  "render_prompt",
@@ -60,18 +60,20 @@ def build_codex_exec_spec(
60
60
  model_controls=model_controls,
61
61
  )
62
62
  reasoning_config_value = json.dumps(effective_reasoning)
63
- cmd_parts: list[str] = [str(codex_bin), "exec"]
63
+
64
+ from .headless import build_codex_headless_core
65
+
66
+ cmd_parts: list[str] = build_codex_headless_core(
67
+ model,
68
+ binary=str(codex_bin),
69
+ sandbox_mode=sandbox if sandbox else None,
70
+ full_auto=full_auto,
71
+ skip_git_repo_check=skip_git_repo_check,
72
+ )
64
73
  if json_output:
65
74
  cmd_parts.append("--json")
66
- if full_auto:
67
- cmd_parts.append("--full-auto")
68
- cmd_parts.extend(["--sandbox", sandbox])
69
- if skip_git_repo_check:
70
- cmd_parts.append("--skip-git-repo-check")
71
75
  cmd_parts.extend(
72
76
  [
73
- "--model",
74
- model,
75
77
  "--config",
76
78
  f"model_reasoning_effort={reasoning_config_value}",
77
79
  "-C",
@@ -0,0 +1,124 @@
1
+ """Per-provider headless launch core helpers.
2
+
3
+ Each helper emits the standard headless launch args for its provider,
4
+ derived from ``ProviderContract.headless``. Consumers append their own
5
+ app-specific tails (tool restrictions, output paths, prompt, etc.).
6
+
7
+ These helpers are the canonical source for headless launch flag assembly
8
+ within ``coding_cli_runtime``. In-repo consumers (feather, codex_cli,
9
+ provider_contracts builder) delegate to them. App-generation provider
10
+ wrappers may still assemble flags directly when their command construction
11
+ is interleaved with consumer-specific logic (reasoning config, output
12
+ format, artifact paths).
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from .provider_contracts import get_provider_contract
18
+
19
+
20
+ def build_claude_headless_core(
21
+ model: str,
22
+ *,
23
+ binary: str | None = None,
24
+ permission_mode: str | None = None,
25
+ skip_permissions: bool = True,
26
+ ) -> list[str]:
27
+ """Build Claude headless launch core args.
28
+
29
+ Returns args up to and including ``--model``. Does NOT include prompt,
30
+ output format, tool restrictions, or other app-specific flags.
31
+ """
32
+ contract = get_provider_contract("claude")
33
+ h = contract.headless
34
+ cmd: list[str] = [binary or contract.binary]
35
+ cmd.extend(h.activation_args)
36
+ if h.approval.permission_mode_flag:
37
+ mode = permission_mode or h.approval.default_permission_mode
38
+ if mode:
39
+ cmd.extend([h.approval.permission_mode_flag, mode])
40
+ if skip_permissions and h.approval.flag:
41
+ cmd.append(h.approval.flag)
42
+ cmd.extend(["--model", model])
43
+ return cmd
44
+
45
+
46
+ def build_codex_headless_core(
47
+ model: str,
48
+ *,
49
+ binary: str | None = None,
50
+ sandbox_mode: str | None = None,
51
+ full_auto: bool = True,
52
+ skip_git_repo_check: bool = True,
53
+ ) -> list[str]:
54
+ """Build Codex headless launch core args.
55
+
56
+ Returns args including ``exec``, ``--full-auto``, ``--sandbox``,
57
+ ``--skip-git-repo-check``, and ``--model``. Does NOT include
58
+ ``-C``, ``-o``, ``--output-schema``, or reasoning config.
59
+
60
+ Args:
61
+ full_auto: Include ``--full-auto`` (default True).
62
+ skip_git_repo_check: Include ``--skip-git-repo-check`` (default True).
63
+ """
64
+ contract = get_provider_contract("codex")
65
+ h = contract.headless
66
+ cmd: list[str] = [binary or contract.binary]
67
+ cmd.extend(h.activation_args)
68
+ if full_auto and h.noninteractive_mode_flag:
69
+ cmd.append(h.noninteractive_mode_flag)
70
+ if h.sandbox is not None:
71
+ mode = sandbox_mode or h.sandbox.writable_mode
72
+ cmd.extend([h.sandbox.flag, mode])
73
+ if skip_git_repo_check and h.requires_git_repo and h.skip_git_repo_flag:
74
+ cmd.append(h.skip_git_repo_flag)
75
+ cmd.extend(["--model", model])
76
+ return cmd
77
+
78
+
79
+ def build_copilot_headless_core(
80
+ model: str,
81
+ *,
82
+ binary: str | None = None,
83
+ stream: str | None = None,
84
+ ) -> list[str]:
85
+ """Build Copilot headless launch core args.
86
+
87
+ Returns args including activation (``--no-ask-user``,
88
+ ``--no-custom-instructions``), ``--allow-all``, ``--stream``,
89
+ and ``--model``. Does NOT include ``-p``, ``--share``, or
90
+ force-implementation.
91
+ """
92
+ contract = get_provider_contract("copilot")
93
+ h = contract.headless
94
+ cmd: list[str] = [binary or contract.binary]
95
+ cmd.extend(h.activation_args)
96
+ if h.approval.flag:
97
+ cmd.append(h.approval.flag)
98
+ cmd.extend(["--model", model])
99
+ if h.stream_flag:
100
+ stream_value = stream or h.default_stream_mode
101
+ if stream_value:
102
+ cmd.extend([h.stream_flag, stream_value])
103
+ return cmd
104
+
105
+
106
+ def build_gemini_headless_core(
107
+ model: str,
108
+ *,
109
+ binary: str | None = None,
110
+ ) -> list[str]:
111
+ """Build Gemini headless launch core args.
112
+
113
+ Returns args including approval flag (``--yolo``) and ``--model``.
114
+ Does NOT include ``--prompt ""`` activation (that's part of prompt
115
+ transport, handled by ``render_prompt()``).
116
+ """
117
+ contract = get_provider_contract("gemini")
118
+ h = contract.headless
119
+ cmd: list[str] = [binary or contract.binary]
120
+ cmd.extend(h.activation_args)
121
+ if h.approval.flag:
122
+ cmd.append(h.approval.flag)
123
+ cmd.extend(["--model", model])
124
+ return cmd
@@ -406,47 +406,53 @@ def _build_non_interactive_run(
406
406
  stream: str | None = None,
407
407
  extra_flags: tuple[str, ...] = (),
408
408
  ) -> NonInteractiveRunSpec:
409
- """Build a non-interactive CLI run spec. Internal convenience."""
410
- contract = get_provider_contract(provider_id)
411
- h = contract.headless
412
-
413
- bin_name = binary or contract.binary
414
- cmd: list[str] = [bin_name]
415
-
416
- # Headless activation (e.g. "--print" for Claude, "exec" for Codex)
417
- cmd.extend(h.activation_args)
418
-
419
- # Non-interactive mode flag (e.g. "--full-auto" for Codex)
420
- if h.noninteractive_mode_flag:
421
- cmd.append(h.noninteractive_mode_flag)
422
-
423
- # Sandbox (Codex)
424
- if h.sandbox is not None:
425
- mode = codex_sandbox_mode or h.sandbox.writable_mode
426
- cmd.extend([h.sandbox.flag, mode])
427
-
428
- # Git repo bypass
429
- if h.requires_git_repo and h.skip_git_repo_flag:
430
- cmd.append(h.skip_git_repo_flag)
409
+ """Build a non-interactive CLI run spec. Internal convenience.
431
410
 
432
- # Approval
433
- if h.approval.flag:
434
- cmd.append(h.approval.flag)
435
-
436
- # Permission mode (Claude)
437
- if h.approval.permission_mode_flag:
438
- mode_value = permission_mode or h.approval.default_permission_mode
439
- if mode_value:
440
- cmd.extend([h.approval.permission_mode_flag, mode_value])
411
+ Delegates headless core arg assembly to ``headless.build_*_headless_core()``
412
+ helpers, which derive flags from ``ProviderContract.headless``.
413
+ """
414
+ from .headless import (
415
+ build_claude_headless_core,
416
+ build_codex_headless_core,
417
+ build_copilot_headless_core,
418
+ build_gemini_headless_core,
419
+ )
441
420
 
442
- # Model
443
- cmd.extend(["--model", model])
421
+ contract = get_provider_contract(provider_id)
422
+ h = contract.headless
423
+ key = provider_id.strip().lower()
444
424
 
445
- # Stream (Copilot)
446
- if h.stream_flag:
447
- stream_value = stream or h.default_stream_mode
448
- if stream_value:
449
- cmd.extend([h.stream_flag, stream_value])
425
+ # Headless core (binary + activation + approval + model + stream)
426
+ if key == "claude":
427
+ cmd = build_claude_headless_core(model, binary=binary, permission_mode=permission_mode)
428
+ elif key == "codex":
429
+ cmd = build_codex_headless_core(model, binary=binary, sandbox_mode=codex_sandbox_mode)
430
+ elif key == "copilot":
431
+ cmd = build_copilot_headless_core(model, binary=binary, stream=stream)
432
+ elif key == "gemini":
433
+ cmd = build_gemini_headless_core(model, binary=binary)
434
+ else:
435
+ # Fallback for unknown providers — generic assembly
436
+ bin_name = binary or contract.binary
437
+ cmd = [bin_name, *h.activation_args]
438
+ if h.noninteractive_mode_flag:
439
+ cmd.append(h.noninteractive_mode_flag)
440
+ if h.sandbox is not None:
441
+ mode = codex_sandbox_mode or h.sandbox.writable_mode
442
+ cmd.extend([h.sandbox.flag, mode])
443
+ if h.requires_git_repo and h.skip_git_repo_flag:
444
+ cmd.append(h.skip_git_repo_flag)
445
+ if h.approval.flag:
446
+ cmd.append(h.approval.flag)
447
+ if h.approval.permission_mode_flag:
448
+ mode_value = permission_mode or h.approval.default_permission_mode
449
+ if mode_value:
450
+ cmd.extend([h.approval.permission_mode_flag, mode_value])
451
+ cmd.extend(["--model", model])
452
+ if h.stream_flag:
453
+ stream_value = stream or h.default_stream_mode
454
+ if stream_value:
455
+ cmd.extend([h.stream_flag, stream_value])
450
456
 
451
457
  # Prompt
452
458
  payload = render_prompt(h.prompt, prompt)
@@ -5,7 +5,11 @@ from __future__ import annotations
5
5
  import json
6
6
  import os
7
7
  import re
8
+ from collections.abc import Callable
8
9
  from pathlib import Path
10
+ from typing import TypeVar
11
+
12
+ _T = TypeVar("_T")
9
13
 
10
14
 
11
15
  def normalize_path_str(path_str: str) -> str:
@@ -15,6 +19,58 @@ def normalize_path_str(path_str: str) -> str:
15
19
  return os.path.normpath(path_str)
16
20
 
17
21
 
22
+ # ---------------------------------------------------------------------------
23
+ # Generic session-directory scanning primitive
24
+ # ---------------------------------------------------------------------------
25
+
26
+
27
+ def scan_session_dir(
28
+ directory: Path,
29
+ *,
30
+ glob_pattern: str = "*.jsonl",
31
+ since_ts: float,
32
+ mtime_buffer: float = 15.0,
33
+ extract_fn: Callable[[Path], _T | None],
34
+ max_candidates: int = 200,
35
+ ) -> list[tuple[float, Path, _T]]:
36
+ """Scan a directory for session files, filter by mtime, extract metadata.
37
+
38
+ Returns a list of ``(mtime, path, extracted)`` tuples sorted by mtime
39
+ descending. Provider-specific ranking/selection stays with the caller.
40
+
41
+ Args:
42
+ directory: Directory to scan.
43
+ glob_pattern: Glob pattern for session files (default: ``*.jsonl``).
44
+ since_ts: Only include files with mtime >= ``since_ts - mtime_buffer``.
45
+ mtime_buffer: Seconds of slack before ``since_ts`` (default: 15).
46
+ extract_fn: Called on each candidate path. Return ``None`` to skip.
47
+ max_candidates: Max number of candidates to process after mtime filter.
48
+ """
49
+ if not directory.exists():
50
+ return []
51
+
52
+ candidates: list[tuple[float, Path]] = []
53
+ try:
54
+ for path in directory.rglob(glob_pattern):
55
+ try:
56
+ mtime = path.stat().st_mtime
57
+ except OSError:
58
+ continue
59
+ if mtime >= since_ts - mtime_buffer:
60
+ candidates.append((mtime, path))
61
+ except (OSError, RuntimeError):
62
+ return []
63
+
64
+ candidates.sort(key=lambda item: item[0], reverse=True)
65
+
66
+ results: list[tuple[float, Path, _T]] = []
67
+ for mtime, path in candidates[:max_candidates]:
68
+ extracted = extract_fn(path)
69
+ if extracted is not None:
70
+ results.append((mtime, path, extracted))
71
+ return results
72
+
73
+
18
74
  def codex_session_roots() -> list[Path]:
19
75
  base = Path.home() / ".codex"
20
76
  return [base / "sessions", base / "archived_sessions"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coding-cli-runtime
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Reusable CLI runtime primitives for provider-backed automation workflows
5
5
  Author-email: LLM Eval maintainers <llm-eval-maintainers@users.noreply.github.com>
6
6
  License-Expression: MIT
@@ -40,17 +40,21 @@ code doesn't need provider-specific subprocess handling.
40
40
 
41
41
  **What it does (and why not just `subprocess.run`):**
42
42
 
43
- - Unified request/result types across all four CLIs
44
- - Timeout enforcement with graceful process termination
45
- - Provider-aware failure classification (retryable vs fatal)
46
- - Built-in model catalog with defaults, reasoning levels, and capabilities
47
- - Interactive session management for long-running generation tasks
48
- - Zero runtime dependencies
43
+ - Run any provider CLI with unified request/result types and timeout enforcement
44
+ - Query the model catalog (with user-override and live-cache fallback)
45
+ - Classify failures as retryable vs fatal per provider
46
+ - Look up provider auth, config dirs, and headless launch flags
47
+ - Build non-interactive launch commands without hardcoding provider flags
48
+ - Find session logs after a run (Codex, Claude)
49
+ - Run long-lived sessions with process-group cleanup and transcript mirroring
50
+ - No Python package dependencies — only requires the provider CLIs themselves
49
51
 
50
52
  ## Installation
51
53
 
52
54
  ```bash
53
55
  pip install coding-cli-runtime
56
+ # or
57
+ uv add coding-cli-runtime
54
58
  ```
55
59
 
56
60
  Requires Python 3.10+.
@@ -65,7 +69,7 @@ from pathlib import Path
65
69
  from coding_cli_runtime import CliRunRequest, run_cli_command
66
70
 
67
71
  request = CliRunRequest(
68
- cmd_parts=("codex", "--model", "o4-mini", "--quiet", "exec", "fix the tests"),
72
+ cmd_parts=("codex", "--model", "gpt-5.4", "--quiet", "exec", "fix the tests"),
69
73
  cwd=Path("/tmp/my-project"),
70
74
  timeout_seconds=120,
71
75
  )
@@ -180,6 +184,38 @@ can drill into whichever aspect they need. This is reference metadata,
180
184
  not a command-construction control plane — consumers keep their own
181
185
  command assembly and adopt contract fields selectively.
182
186
 
187
+ ### Build headless launch commands
188
+
189
+ ```python
190
+ from coding_cli_runtime import build_claude_headless_core, build_codex_headless_core
191
+
192
+ # Claude: binary + --print + --permission-mode + --dangerously-skip-permissions + --model
193
+ cmd = build_claude_headless_core("claude-sonnet-4-6")
194
+ cmd.extend(["--output-format", "text", "--disallowedTools", "Bash,Task"])
195
+
196
+ # Codex: binary + exec + --full-auto + --sandbox + --skip-git-repo-check + --model
197
+ cmd = build_codex_headless_core("gpt-5.4", sandbox_mode="read-only")
198
+ cmd.extend(["-C", str(workdir)])
199
+ ```
200
+
201
+ Headless core helpers emit the standard flags for non-interactive runs.
202
+ Consumers append app-specific tails (tool restrictions, output paths, etc.).
203
+
204
+ ### Find session logs after a run
205
+
206
+ ```python
207
+ import time
208
+ from coding_cli_runtime import find_codex_session, find_claude_session
209
+
210
+ # Find the most recent Codex session log for a given working directory
211
+ session = find_codex_session("/path/to/project", since_ts=time.time() - 300)
212
+ if session:
213
+ print(f"Session log: {session}") # ~/.codex/sessions/.../conversation.jsonl
214
+ ```
215
+
216
+ Works for Codex and Claude. Scans provider config directories for session
217
+ files matching the working directory and time window.
218
+
183
219
  ## Key types
184
220
 
185
221
  | Type | Purpose |
@@ -191,11 +227,50 @@ command assembly and adopt contract fields selectively.
191
227
  | `ProviderContract` | Structured provider CLI metadata (auth, paths, headless launch) |
192
228
  | `FailureClassification` | Classified error with retryable flag and category |
193
229
 
194
- `run_interactive_session()` manages long-running CLI processes with
195
- timeout enforcement, process-group cleanup, transcript mirroring, and
196
- automatic retries. Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are
197
- required — observability labels like `job_name` and `phase_tag` default to
198
- sensible values so external callers don't need to invent them.
230
+ ### Run long-lived CLI sessions
231
+
232
+ For CLI runs that take minutes (e.g., full app generation), use
233
+ `run_interactive_session()` instead of `run_cli_command()`. It adds:
234
+
235
+ - Process-group cleanup (kills orphaned child processes on timeout)
236
+ - Transcript mirroring (streams CLI output to a file while the process runs)
237
+ - Automatic retries on transient failures
238
+
239
+ ```python
240
+ from coding_cli_runtime import run_interactive_session
241
+
242
+ result = await run_interactive_session(
243
+ cmd_parts=("claude", "--print", "--model", "claude-sonnet-4-6"),
244
+ cwd=workdir,
245
+ stdin_text=prompt,
246
+ logger=logger,
247
+ timeout_seconds=600,
248
+ )
249
+ ```
250
+
251
+ Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are required.
252
+ Observability labels (`job_name`, `phase_tag`) default to sensible values.
253
+
254
+ ## API summary
255
+
256
+ The full public API is listed in [`__init__.py`](src/coding_cli_runtime/__init__.py).
257
+ Key function groups:
258
+
259
+ | Group | Functions |
260
+ |-------|-----------|
261
+ | Execution | `run_cli_command`, `run_cli_command_sync`, `run_interactive_session` |
262
+ | Provider metadata | `get_provider_contract`, `get_provider_spec`, `list_provider_specs` |
263
+ | Contract helpers | `build_env_overlay`, `resolve_config_paths`, `render_prompt`, `resolve_auth` |
264
+ | Headless launch | `build_claude_headless_core`, `build_codex_headless_core`, `build_copilot_headless_core`, `build_gemini_headless_core` |
265
+ | Codex batch | `build_codex_exec_spec` |
266
+ | Failure handling | `classify_provider_failure` |
267
+ | Session logs | `find_codex_session`, `find_claude_session` |
268
+ | Schema | `load_schema`, `validate_payload` |
269
+ | Utilities | `redact_text`, `build_model_id`, `normalize_path_str` |
270
+
271
+ ## Contributing
272
+
273
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and quality checks.
199
274
 
200
275
  ## Prerequisites
201
276
 
@@ -11,6 +11,7 @@ src/coding_cli_runtime/contracts.py
11
11
  src/coding_cli_runtime/copilot_reasoning_baseline.json
12
12
  src/coding_cli_runtime/copilot_reasoning_logs.py
13
13
  src/coding_cli_runtime/failure_classification.py
14
+ src/coding_cli_runtime/headless.py
14
15
  src/coding_cli_runtime/json_io.py
15
16
  src/coding_cli_runtime/provider_contracts.py
16
17
  src/coding_cli_runtime/provider_controls.py
@@ -35,4 +36,5 @@ tests/test_packaging.py
35
36
  tests/test_playground_probe_smoke.py
36
37
  tests/test_provider_catalog_resolution.py
37
38
  tests/test_provider_contracts.py
38
- tests/test_runtime_parity.py
39
+ tests/test_runtime_parity.py
40
+ tests/test_stage2_tier1.py
@@ -163,11 +163,19 @@ class TestCodexCli:
163
163
  spec = self._build(json_output=False)
164
164
  assert "--json" not in spec.cmd_parts
165
165
 
166
- def test_no_full_auto(self) -> None:
166
+ def test_full_auto_always_included(self) -> None:
167
+ spec = self._build()
168
+ assert "--full-auto" in spec.cmd_parts
169
+
170
+ def test_skip_git_repo_check_always_included(self) -> None:
171
+ spec = self._build()
172
+ assert "--skip-git-repo-check" in spec.cmd_parts
173
+
174
+ def test_full_auto_false_omits_flag(self) -> None:
167
175
  spec = self._build(full_auto=False)
168
176
  assert "--full-auto" not in spec.cmd_parts
169
177
 
170
- def test_no_skip_git_repo_check(self) -> None:
178
+ def test_skip_git_repo_check_false_omits_flag(self) -> None:
171
179
  spec = self._build(skip_git_repo_check=False)
172
180
  assert "--skip-git-repo-check" not in spec.cmd_parts
173
181
 
@@ -475,7 +483,7 @@ class TestProviderControls:
475
483
  assert "gpt-5.4" in result
476
484
 
477
485
  def test_build_model_id_empty_controls(self) -> None:
478
- assert build_model_id("claude-4-6", applied_controls={}) == "claude-4-6"
486
+ assert build_model_id("claude-sonnet-4-6", applied_controls={}) == "claude-sonnet-4-6"
479
487
 
480
488
 
481
489
  # ── auth ──────────────────────────────────────────────────────────────
@@ -589,10 +597,10 @@ class TestProviderControlsDeep:
589
597
 
590
598
  def test_build_model_id_multiple_controls(self) -> None:
591
599
  result = build_model_id(
592
- "claude-4-6",
600
+ "claude-sonnet-4-6",
593
601
  applied_controls={"effort": "medium", "thinking_tokens": 8192},
594
602
  )
595
- assert "claude-4-6:" in result
603
+ assert "claude-sonnet-4-6:" in result
596
604
  assert "effort=medium" in result
597
605
  assert "thinking_tokens=8192" in result
598
606
 
@@ -630,7 +638,7 @@ class TestSchemaValidationDeep:
630
638
  def test_type_check_boolean_not_integer(self) -> None:
631
639
  payload = {
632
640
  "provider": "claude",
633
- "model": "claude-4-6",
641
+ "model": "claude-sonnet-4-6",
634
642
  "run_id": "test",
635
643
  "status": "completed",
636
644
  "error_code": "none",
@@ -642,7 +650,7 @@ class TestSchemaValidationDeep:
642
650
  def test_null_type_accepted(self) -> None:
643
651
  payload = {
644
652
  "provider": "claude",
645
- "model": "claude-4-6",
653
+ "model": "claude-sonnet-4-6",
646
654
  "run_id": "test",
647
655
  "status": "completed",
648
656
  "error_code": "none",
@@ -34,11 +34,12 @@ def test_builds_wheel_and_sdist(tmp_path) -> None:
34
34
  wheel_path = wheel_paths[0]
35
35
  sdist_path = sdist_paths[0]
36
36
  # Read version from pyproject.toml so this test doesn't break on bumps
37
- import tomllib
37
+ import re
38
38
 
39
39
  pyproject = package_root / "pyproject.toml"
40
- with open(pyproject, "rb") as f:
41
- version = tomllib.load(f)["project"]["version"]
40
+ match = re.search(r'^version\s*=\s*"([^"]+)"', pyproject.read_text(), re.MULTILINE)
41
+ assert match, "Could not find version in pyproject.toml"
42
+ version = match.group(1)
42
43
  assert wheel_path.name.startswith(f"coding_cli_runtime-{version}-")
43
44
  assert sdist_path.name == f"coding_cli_runtime-{version}.tar.gz"
44
45
 
@@ -229,11 +229,11 @@ def test_render_prompt_flag_delivery_missing_flag_raises() -> None:
229
229
 
230
230
 
231
231
  def test_builder_claude_command_shape() -> None:
232
- spec = _build_non_interactive_run("claude", model="claude-4-6", prompt="do stuff")
232
+ spec = _build_non_interactive_run("claude", model="claude-sonnet-4-6", prompt="do stuff")
233
233
  assert spec.cmd_parts[0] == "claude"
234
234
  assert "--print" in spec.cmd_parts
235
235
  assert "--model" in spec.cmd_parts
236
- assert "claude-4-6" in spec.cmd_parts
236
+ assert "claude-sonnet-4-6" in spec.cmd_parts
237
237
  assert "--dangerously-skip-permissions" in spec.cmd_parts
238
238
  assert "--permission-mode" in spec.cmd_parts
239
239
  assert "bypassPermissions" in spec.cmd_parts
@@ -293,7 +293,7 @@ def test_builder_copilot_stream_off() -> None:
293
293
 
294
294
  def test_builder_claude_permission_mode_override() -> None:
295
295
  spec = _build_non_interactive_run(
296
- "claude", model="claude-4-6", prompt="x", permission_mode="acceptEdits"
296
+ "claude", model="claude-sonnet-4-6", prompt="x", permission_mode="acceptEdits"
297
297
  )
298
298
  idx = list(spec.cmd_parts).index("--permission-mode")
299
299
  assert spec.cmd_parts[idx + 1] == "acceptEdits"
@@ -0,0 +1,241 @@
1
+ """Tests for Stage 2 Tier 1 extractions: headless cores, scan_session_dir."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+
8
+ from coding_cli_runtime.headless import (
9
+ build_claude_headless_core,
10
+ build_codex_headless_core,
11
+ build_copilot_headless_core,
12
+ build_gemini_headless_core,
13
+ )
14
+ from coding_cli_runtime.session_logs import scan_session_dir
15
+
16
+ # ── headless launch cores ─────────────────────────────────────────────
17
+
18
+
19
+ class TestClaudeHeadlessCore:
20
+ def test_default(self) -> None:
21
+ cmd = build_claude_headless_core("claude-sonnet-4-6")
22
+ assert cmd[0] == "claude"
23
+ assert "--print" in cmd
24
+ assert "--permission-mode" in cmd
25
+ assert "bypassPermissions" in cmd
26
+ assert "--dangerously-skip-permissions" in cmd
27
+ assert "--model" in cmd
28
+ assert "claude-sonnet-4-6" in cmd
29
+
30
+ def test_custom_binary(self) -> None:
31
+ cmd = build_claude_headless_core("m", binary="/custom/claude")
32
+ assert cmd[0] == "/custom/claude"
33
+
34
+ def test_permission_mode_override(self) -> None:
35
+ cmd = build_claude_headless_core("m", permission_mode="acceptEdits")
36
+ idx = cmd.index("--permission-mode")
37
+ assert cmd[idx + 1] == "acceptEdits"
38
+
39
+ def test_skip_permissions_false(self) -> None:
40
+ cmd = build_claude_headless_core("m", skip_permissions=False)
41
+ assert "--dangerously-skip-permissions" not in cmd
42
+ assert "--permission-mode" in cmd
43
+
44
+ def test_no_prompt_in_output(self) -> None:
45
+ cmd = build_claude_headless_core("m")
46
+ assert "-p" not in cmd
47
+
48
+
49
+ class TestCodexHeadlessCore:
50
+ def test_default(self) -> None:
51
+ cmd = build_codex_headless_core("gpt-5.4")
52
+ assert cmd[0] == "codex"
53
+ assert "exec" in cmd
54
+ assert "--full-auto" in cmd
55
+ assert "--sandbox" in cmd
56
+ assert "danger-full-access" in cmd
57
+ assert "--skip-git-repo-check" in cmd
58
+ assert "--model" in cmd
59
+ assert "gpt-5.4" in cmd
60
+
61
+ def test_read_only_sandbox(self) -> None:
62
+ cmd = build_codex_headless_core("m", sandbox_mode="read-only")
63
+ idx = cmd.index("--sandbox")
64
+ assert cmd[idx + 1] == "read-only"
65
+ assert "--full-auto" in cmd
66
+
67
+ def test_custom_binary(self) -> None:
68
+ cmd = build_codex_headless_core("m", binary="/custom/codex")
69
+ assert cmd[0] == "/custom/codex"
70
+
71
+ def test_no_output_path_flags(self) -> None:
72
+ cmd = build_codex_headless_core("m")
73
+ assert "-C" not in cmd
74
+ assert "-o" not in cmd
75
+ assert "--output-schema" not in cmd
76
+
77
+
78
+ class TestCopilotHeadlessCore:
79
+ def test_default(self) -> None:
80
+ cmd = build_copilot_headless_core("gpt-5.4")
81
+ assert cmd[0] == "copilot"
82
+ assert "--no-ask-user" in cmd
83
+ assert "--no-custom-instructions" in cmd
84
+ assert "--allow-all" in cmd
85
+ assert "--stream" in cmd
86
+ assert "on" in cmd
87
+ assert "--model" in cmd
88
+
89
+ def test_stream_off(self) -> None:
90
+ cmd = build_copilot_headless_core("m", stream="off")
91
+ idx = cmd.index("--stream")
92
+ assert cmd[idx + 1] == "off"
93
+
94
+ def test_no_prompt_flag(self) -> None:
95
+ cmd = build_copilot_headless_core("m")
96
+ assert "-p" not in cmd
97
+
98
+ def test_custom_binary(self) -> None:
99
+ cmd = build_copilot_headless_core("m", binary="/custom/copilot")
100
+ assert cmd[0] == "/custom/copilot"
101
+
102
+
103
+ class TestGeminiHeadlessCore:
104
+ def test_default(self) -> None:
105
+ cmd = build_gemini_headless_core("gemini-3-pro-preview")
106
+ assert cmd[0] == "gemini"
107
+ assert "--yolo" in cmd
108
+ assert "--model" in cmd
109
+ assert "gemini-3-pro-preview" in cmd
110
+
111
+ def test_no_prompt_activation(self) -> None:
112
+ # --prompt "" is prompt transport, not headless core
113
+ cmd = build_gemini_headless_core("m")
114
+ assert "--prompt" not in cmd
115
+
116
+ def test_custom_binary(self) -> None:
117
+ cmd = build_gemini_headless_core("m", binary="/custom/gemini")
118
+ assert cmd[0] == "/custom/gemini"
119
+
120
+
121
+ class TestBuilderDelegation:
122
+ """Verify _build_non_interactive_run still produces correct output after delegation."""
123
+
124
+ def test_claude_via_builder(self) -> None:
125
+ from coding_cli_runtime.provider_contracts import _build_non_interactive_run
126
+
127
+ spec = _build_non_interactive_run("claude", model="claude-sonnet-4-6", prompt="test")
128
+ assert "--print" in spec.cmd_parts
129
+ assert "--dangerously-skip-permissions" in spec.cmd_parts
130
+ assert spec.stdin_text == "test"
131
+
132
+ def test_codex_via_builder(self) -> None:
133
+ from coding_cli_runtime.provider_contracts import _build_non_interactive_run
134
+
135
+ spec = _build_non_interactive_run("codex", model="gpt-5.4", prompt="fix")
136
+ assert "exec" in spec.cmd_parts
137
+ assert "--full-auto" in spec.cmd_parts
138
+
139
+ def test_copilot_via_builder(self) -> None:
140
+ from coding_cli_runtime.provider_contracts import _build_non_interactive_run
141
+
142
+ spec = _build_non_interactive_run("copilot", model="m", prompt="task")
143
+ assert "--no-ask-user" in spec.cmd_parts
144
+ assert "--allow-all" in spec.cmd_parts
145
+
146
+ def test_gemini_via_builder(self) -> None:
147
+ from coding_cli_runtime.provider_contracts import _build_non_interactive_run
148
+
149
+ spec = _build_non_interactive_run("gemini", model="m", prompt="build")
150
+ assert "--yolo" in spec.cmd_parts
151
+
152
+
153
+ # ── scan_session_dir ──────────────────────────────────────────────────
154
+
155
+
156
+ class TestScanSessionDir:
157
+ def _write_jsonl(self, path: Path, records: list[dict]) -> None:
158
+ path.parent.mkdir(parents=True, exist_ok=True)
159
+ path.write_text("\n".join(json.dumps(r) for r in records) + "\n", encoding="utf-8")
160
+
161
+ def test_empty_dir(self, tmp_path: Path) -> None:
162
+ results = scan_session_dir(
163
+ tmp_path,
164
+ since_ts=0.0,
165
+ extract_fn=lambda p: p.name,
166
+ )
167
+ assert results == []
168
+
169
+ def test_nonexistent_dir(self, tmp_path: Path) -> None:
170
+ results = scan_session_dir(
171
+ tmp_path / "nonexistent",
172
+ since_ts=0.0,
173
+ extract_fn=lambda p: p.name,
174
+ )
175
+ assert results == []
176
+
177
+ def test_finds_matching_files(self, tmp_path: Path) -> None:
178
+ self._write_jsonl(tmp_path / "session1.jsonl", [{"type": "start"}])
179
+ self._write_jsonl(tmp_path / "session2.jsonl", [{"type": "start"}])
180
+
181
+ results = scan_session_dir(
182
+ tmp_path,
183
+ since_ts=0.0,
184
+ extract_fn=lambda p: p.name,
185
+ )
186
+ assert len(results) == 2
187
+ names = {r[2] for r in results}
188
+ assert names == {"session1.jsonl", "session2.jsonl"}
189
+
190
+ def test_extract_fn_filters(self, tmp_path: Path) -> None:
191
+ self._write_jsonl(tmp_path / "good.jsonl", [{"type": "start"}])
192
+ self._write_jsonl(tmp_path / "bad.jsonl", [{"type": "start"}])
193
+
194
+ results = scan_session_dir(
195
+ tmp_path,
196
+ since_ts=0.0,
197
+ extract_fn=lambda p: p.name if "good" in p.name else None,
198
+ )
199
+ assert len(results) == 1
200
+ assert results[0][2] == "good.jsonl"
201
+
202
+ def test_sorted_by_mtime_descending(self, tmp_path: Path) -> None:
203
+ import time
204
+
205
+ p1 = tmp_path / "old.jsonl"
206
+ self._write_jsonl(p1, [{"x": 1}])
207
+ time.sleep(0.05)
208
+ p2 = tmp_path / "new.jsonl"
209
+ self._write_jsonl(p2, [{"x": 2}])
210
+
211
+ results = scan_session_dir(
212
+ tmp_path,
213
+ since_ts=0.0,
214
+ extract_fn=lambda p: p.name,
215
+ )
216
+ assert results[0][2] == "new.jsonl"
217
+
218
+ def test_custom_glob_pattern(self, tmp_path: Path) -> None:
219
+ (tmp_path / "session.jsonl").write_text("{}\n")
220
+ (tmp_path / "session.json").write_text("{}\n")
221
+
222
+ results = scan_session_dir(
223
+ tmp_path,
224
+ glob_pattern="*.json",
225
+ since_ts=0.0,
226
+ extract_fn=lambda p: p.name,
227
+ )
228
+ assert len(results) == 1
229
+ assert results[0][2] == "session.json"
230
+
231
+ def test_max_candidates_limits(self, tmp_path: Path) -> None:
232
+ for i in range(5):
233
+ self._write_jsonl(tmp_path / f"s{i}.jsonl", [{"i": i}])
234
+
235
+ results = scan_session_dir(
236
+ tmp_path,
237
+ since_ts=0.0,
238
+ extract_fn=lambda p: p.name,
239
+ max_candidates=2,
240
+ )
241
+ assert len(results) == 2