coding-cli-runtime 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. coding_cli_runtime-0.1.0/CHANGELOG.md +57 -0
  2. coding_cli_runtime-0.1.0/CONTRIBUTING.md +38 -0
  3. coding_cli_runtime-0.1.0/LICENSE +21 -0
  4. coding_cli_runtime-0.1.0/MANIFEST.in +9 -0
  5. coding_cli_runtime-0.1.0/PKG-INFO +179 -0
  6. coding_cli_runtime-0.1.0/README.md +153 -0
  7. coding_cli_runtime-0.1.0/pyproject.toml +112 -0
  8. coding_cli_runtime-0.1.0/setup.cfg +4 -0
  9. coding_cli_runtime-0.1.0/src/coding_cli_runtime/__init__.py +108 -0
  10. coding_cli_runtime-0.1.0/src/coding_cli_runtime/auth.py +55 -0
  11. coding_cli_runtime-0.1.0/src/coding_cli_runtime/codex_cli.py +95 -0
  12. coding_cli_runtime-0.1.0/src/coding_cli_runtime/contracts.py +72 -0
  13. coding_cli_runtime-0.1.0/src/coding_cli_runtime/copilot_reasoning_baseline.json +66 -0
  14. coding_cli_runtime-0.1.0/src/coding_cli_runtime/copilot_reasoning_logs.py +81 -0
  15. coding_cli_runtime-0.1.0/src/coding_cli_runtime/failure_classification.py +183 -0
  16. coding_cli_runtime-0.1.0/src/coding_cli_runtime/json_io.py +81 -0
  17. coding_cli_runtime-0.1.0/src/coding_cli_runtime/provider_controls.py +101 -0
  18. coding_cli_runtime-0.1.0/src/coding_cli_runtime/provider_specs.py +749 -0
  19. coding_cli_runtime-0.1.0/src/coding_cli_runtime/py.typed +1 -0
  20. coding_cli_runtime-0.1.0/src/coding_cli_runtime/reasoning.py +95 -0
  21. coding_cli_runtime-0.1.0/src/coding_cli_runtime/redaction.py +20 -0
  22. coding_cli_runtime-0.1.0/src/coding_cli_runtime/schema_validation.py +101 -0
  23. coding_cli_runtime-0.1.0/src/coding_cli_runtime/schemas/normalized_run_result.v1.json +37 -0
  24. coding_cli_runtime-0.1.0/src/coding_cli_runtime/schemas/reasoning_metadata.v1.json +14 -0
  25. coding_cli_runtime-0.1.0/src/coding_cli_runtime/session_execution.py +604 -0
  26. coding_cli_runtime-0.1.0/src/coding_cli_runtime/session_logs.py +129 -0
  27. coding_cli_runtime-0.1.0/src/coding_cli_runtime/subprocess_runner.py +346 -0
  28. coding_cli_runtime-0.1.0/src/coding_cli_runtime.egg-info/PKG-INFO +179 -0
  29. coding_cli_runtime-0.1.0/src/coding_cli_runtime.egg-info/SOURCES.txt +35 -0
  30. coding_cli_runtime-0.1.0/src/coding_cli_runtime.egg-info/dependency_links.txt +1 -0
  31. coding_cli_runtime-0.1.0/src/coding_cli_runtime.egg-info/top_level.txt +1 -0
  32. coding_cli_runtime-0.1.0/tests/test_copilot_reasoning_logs.py +87 -0
  33. coding_cli_runtime-0.1.0/tests/test_package_resources.py +60 -0
  34. coding_cli_runtime-0.1.0/tests/test_packaging.py +89 -0
  35. coding_cli_runtime-0.1.0/tests/test_playground_probe_smoke.py +244 -0
  36. coding_cli_runtime-0.1.0/tests/test_provider_catalog_resolution.py +304 -0
  37. coding_cli_runtime-0.1.0/tests/test_runtime_parity.py +22 -0
@@ -0,0 +1,57 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/).
6
+
7
+ ## [Unreleased]
8
+
9
+ ### Changed
10
+ - Consolidated `shared_cli_runtime` into `coding_cli_runtime`. The package now
11
+ ships a single top-level package; the `shared_cli_runtime` directory is removed.
12
+ - `MANIFEST.in` and docs updated to reference `coding_cli_runtime` paths.
13
+ - Minimum Python version remains `>=3.10`.
14
+ - `run_interactive_session()` observability kwargs (`provider_label`, `job_name`,
15
+ `phase_tag`, `process_label`, `timeout_seconds`) now have sensible defaults so
16
+ external callers don't need to supply internal batch-system labels.
17
+ - Provider model catalogs are now resolved with a three-tier fallback:
18
+ user override file > live CLI discovery > hardcoded fallback.
19
+ - **Codex**: reads `~/.codex/models_cache.json` (auto-refreshed by the CLI)
20
+ when present; falls back to the hardcoded catalog on any parse failure.
21
+ - **All providers**: place a JSON file at
22
+ `~/.config/coding-cli-runtime/providers/<provider>.json` (or set
23
+ `CODING_CLI_RUNTIME_CONFIG_DIR`) to override the model list and default
24
+ model without waiting for a package update.
25
+
26
+ ### Added
27
+ - `__version__` attribute in both `coding_cli_runtime` and `shared_cli_runtime`.
28
+ - `CONTRIBUTING.md`, `MANIFEST.in`, `.pre-commit-config.yaml`.
29
+ - PyPI / Python / Build / License badges in `README.md`.
30
+ - `bump-my-version` configuration syncing `pyproject.toml` and both `__init__.py` files.
31
+ - `ruff`, `mypy` (strict), and `pytest-cov` added to dev dependencies.
32
+ - CI quality gates: ruff check, ruff format, mypy, pytest-cov.
33
+
34
+ ### Changed
35
+ - `CliRunResult.command` type widened from `tuple[str, ...]` to `Sequence[str]`.
36
+ - Publish workflow path corrected (`shared-cli-runtime` → `coding-cli-runtime`).
37
+
38
+ ### Fixed
39
+ - mypy strict compliance: return-type annotations, per-module overrides, targeted type-ignore comments.
40
+ - ruff lint and format compliance across all source and test files.
41
+
42
+ ## [0.1.0] - 2026-04-07
43
+
44
+ ### Added
45
+ - Initial extraction from `llm-eval` monorepo.
46
+ - Provider metadata and controls for Claude, Codex, Copilot, and Gemini CLIs.
47
+ - Shared request/result contracts (`CliRunRequest`, `CliRunResult`, `CliLaunchSpec`).
48
+ - Schema loading and payload validation (`load_schema`, `validate_payload`).
49
+ - Synchronous and asynchronous subprocess execution helpers.
50
+ - Interactive session execution with transcript mirroring.
51
+ - Session log discovery and parsing utilities.
52
+ - Claude reasoning policy resolution.
53
+ - Log redaction helpers.
54
+ - Copilot reasoning log parsing and classification.
55
+ - PEP 561 `py.typed` markers for both `coding_cli_runtime` and `shared_cli_runtime`.
56
+ - Packaged JSON schemas and Copilot reasoning baseline data.
57
+ - Playground knowledge base with probing guides and experiment templates.
@@ -0,0 +1,38 @@
1
+ # Contributing to coding-cli-runtime
2
+
3
+ ## Development setup
4
+
5
+ ```bash
6
+ cd packages/coding-cli-runtime
7
+
8
+ # Install dependencies with uv (recommended)
9
+ uv sync --group dev
10
+
11
+ # Or use pip
12
+ python -m venv .venv
13
+ source .venv/bin/activate
14
+ pip install -e ".[dev]"
15
+ ```
16
+
17
+ ## Quality checks
18
+
19
+ ```bash
20
+ # Run all checks (same as CI)
21
+ uv run --project . --group dev python -m ruff check src tests
22
+ uv run --project . --group dev python -m ruff format --check src tests
23
+ uv run --project . --group dev python -m mypy src
24
+ uv run --project . --group dev python -m pytest -v
25
+
26
+ # Pre-commit (if installed)
27
+ uv run --project . --group dev pre-commit run --all-files
28
+
29
+ # Build and validate
30
+ uv run --project . --group dev python -m build --no-isolation --sdist --wheel
31
+ uv run --project . --group dev python -m twine check dist/*
32
+ ```
33
+
34
+ ## Pull request process
35
+
36
+ 1. Run all quality checks locally before pushing.
37
+ 2. Update `CHANGELOG.md` under `[Unreleased]` for any user-visible changes.
38
+ 3. CI enforces the same checks — PRs must pass before merge.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 LLM Eval contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,9 @@
1
+ include CHANGELOG.md
2
+ include LICENSE
3
+ include README.md
4
+ include CONTRIBUTING.md
5
+
6
+ recursive-include src/coding_cli_runtime/schemas *.json
7
+ include src/coding_cli_runtime/copilot_reasoning_baseline.json
8
+
9
+ global-exclude __pycache__ *.py[cod]
@@ -0,0 +1,179 @@
1
+ Metadata-Version: 2.4
2
+ Name: coding-cli-runtime
3
+ Version: 0.1.0
4
+ Summary: Reusable CLI runtime primitives for provider-backed automation workflows
5
+ Author-email: LLM Eval maintainers <llm-eval-maintainers@users.noreply.github.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/pj-ms/llm-eval/tree/main/packages/coding-cli-runtime
8
+ Project-URL: Repository, https://github.com/pj-ms/llm-eval
9
+ Project-URL: Issues, https://github.com/pj-ms/llm-eval/issues
10
+ Project-URL: Changelog, https://github.com/pj-ms/llm-eval/blob/main/packages/coding-cli-runtime/CHANGELOG.md
11
+ Keywords: cli,runtime,llm,automation,schema-validation
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Dynamic: license-file
26
+
27
+ # coding-cli-runtime
28
+
29
+ [![PyPI](https://img.shields.io/pypi/v/coding-cli-runtime)](https://pypi.org/project/coding-cli-runtime/)
30
+ [![Python](https://img.shields.io/pypi/pyversions/coding-cli-runtime)](https://pypi.org/project/coding-cli-runtime/)
31
+ [![Build](https://github.com/pj-ms/llm-eval/actions/workflows/ci.yml/badge.svg)](https://github.com/pj-ms/llm-eval/actions/workflows/ci.yml)
32
+ [![License](https://img.shields.io/pypi/l/coding-cli-runtime)](LICENSE)
33
+
34
+ A Python library for orchestrating LLM coding agent CLIs — [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Codex](https://github.com/openai/codex), [Gemini CLI](https://github.com/google-gemini/gemini-cli), and [GitHub Copilot](https://docs.github.com/en/copilot).
35
+
36
+ These CLIs each have different invocation patterns, output formats, error
37
+ shapes, and timeout behaviors. This library normalizes all of that behind
38
+ a common `CliRunRequest` → `CliRunResult` contract, so your automation
39
+ code doesn't need provider-specific subprocess handling.
40
+
41
+ **What it does (and why not just `subprocess.run`):**
42
+
43
+ - Unified request/result types across all four CLIs
44
+ - Timeout enforcement with graceful process termination
45
+ - Provider-aware failure classification (retryable vs fatal)
46
+ - Built-in model catalog with defaults, reasoning levels, and capabilities
47
+ - Interactive session management for long-running generation tasks
48
+ - Zero runtime dependencies
49
+
50
+ ## Installation
51
+
52
+ ```bash
53
+ pip install coding-cli-runtime
54
+ ```
55
+
56
+ Requires Python 3.10+.
57
+
58
+ ## Examples
59
+
60
+ ### Execute a provider CLI
61
+
62
+ ```python
63
+ import asyncio
64
+ from pathlib import Path
65
+ from coding_cli_runtime import CliRunRequest, run_cli_command
66
+
67
+ request = CliRunRequest(
68
+ cmd_parts=("codex", "--model", "o4-mini", "--quiet", "exec", "fix the tests"),
69
+ cwd=Path("/tmp/my-project"),
70
+ timeout_seconds=120,
71
+ )
72
+ result = asyncio.run(run_cli_command(request))
73
+
74
+ print(result.returncode) # 0
75
+ print(result.error_code) # "none"
76
+ print(result.duration_seconds) # 14.2
77
+ print(result.stdout_text[:200])
78
+ ```
79
+
80
+ Swap `codex` for `claude`, `gemini`, or `copilot` — the request/result
81
+ shape stays the same. A synchronous variant `run_cli_command_sync` is also
82
+ available.
83
+
84
+ ### Pick a model from the provider catalog
85
+
86
+ ```python
87
+ from coding_cli_runtime import get_provider_spec
88
+
89
+ codex = get_provider_spec("codex")
90
+ print(codex.default_model) # "gpt-5.3-codex"
91
+ print(codex.model_source) # "codex_cli_cache", "override", or "code"
92
+
93
+ for model in codex.models:
94
+ print(f" {model.name}: {model.description}")
95
+ ```
96
+
97
+ The catalog covers all four providers — each with model names, reasoning
98
+ levels, default settings, and visibility flags.
99
+
100
+ Model lists are resolved with a three-tier fallback:
101
+
102
+ 1. **User override** — drop a JSON file at
103
+ `~/.config/coding-cli-runtime/providers/<provider>.json` to use your own
104
+ model list immediately, without waiting for a package update.
105
+ 2. **Live CLI cache** — for Codex, the library reads
106
+ `~/.codex/models_cache.json` (auto-refreshed by the Codex CLI) when
107
+ present. Other providers fall through because their CLIs don't expose a
108
+ machine-readable model list.
109
+ 3. **Hardcoded fallback** — the model list shipped with the package.
110
+
111
+ Override file format:
112
+
113
+ ```json
114
+ {
115
+ "default_model": "claude-sonnet-4-7",
116
+ "models": [
117
+ "claude-sonnet-4-7",
118
+ {
119
+ "name": "claude-opus-5",
120
+ "description": "Latest opus model",
121
+ "controls": [
122
+ { "name": "effort", "kind": "choice", "choices": ["low", "high"], "default": "low" }
123
+ ]
124
+ }
125
+ ]
126
+ }
127
+ ```
128
+
129
+ Set `CODING_CLI_RUNTIME_CONFIG_DIR` to change the config directory
130
+ (default: `~/.config/coding-cli-runtime`).
131
+
132
+ ### Decide whether to retry a failed run
133
+
134
+ ```python
135
+ from coding_cli_runtime import classify_provider_failure
136
+
137
+ classification = classify_provider_failure(
138
+ provider="gemini",
139
+ stderr_text="429 Resource exhausted: rate limit exceeded",
140
+ )
141
+
142
+ if classification.retryable:
143
+ print(f"Retryable ({classification.category}) — will retry")
144
+ else:
145
+ print(f"Fatal ({classification.category}) — giving up")
146
+ ```
147
+
148
+ Works for all four providers. Recognizes auth failures, rate limits,
149
+ network transients, and other provider-specific error patterns.
150
+
151
+ ## Key types
152
+
153
+ | Type | Purpose |
154
+ |------|---------|
155
+ | `CliRunRequest` | Command spec: cmd, cwd, env, timeout, stream paths |
156
+ | `CliRunResult` | Result: returncode, stdout/stderr, duration, error code |
157
+ | `ErrorCode` | `none` · `spawn_failed` · `timed_out` · `non_zero_exit` |
158
+ | `ProviderSpec` | Provider catalog entry with models, controls, defaults |
159
+ | `FailureClassification` | Classified error with retryable flag and category |
160
+
161
+ `run_interactive_session()` manages long-running CLI processes with
162
+ timeout enforcement, process-group cleanup, transcript mirroring, and
163
+ automatic retries. Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are
164
+ required — observability labels like `job_name` and `phase_tag` default to
165
+ sensible values so external callers don't need to invent them.
166
+
167
+ ## Prerequisites
168
+
169
+ This package does **not** bundle any CLI binaries or credentials. You must
170
+ install and authenticate the relevant provider CLI yourself before using the
171
+ execution helpers.
172
+
173
+ ## Status
174
+
175
+ Pre-1.0. API may change between minor versions.
176
+
177
+ ## License
178
+
179
+ MIT
@@ -0,0 +1,153 @@
1
+ # coding-cli-runtime
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/coding-cli-runtime)](https://pypi.org/project/coding-cli-runtime/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/coding-cli-runtime)](https://pypi.org/project/coding-cli-runtime/)
5
+ [![Build](https://github.com/pj-ms/llm-eval/actions/workflows/ci.yml/badge.svg)](https://github.com/pj-ms/llm-eval/actions/workflows/ci.yml)
6
+ [![License](https://img.shields.io/pypi/l/coding-cli-runtime)](LICENSE)
7
+
8
+ A Python library for orchestrating LLM coding agent CLIs — [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Codex](https://github.com/openai/codex), [Gemini CLI](https://github.com/google-gemini/gemini-cli), and [GitHub Copilot](https://docs.github.com/en/copilot).
9
+
10
+ These CLIs each have different invocation patterns, output formats, error
11
+ shapes, and timeout behaviors. This library normalizes all of that behind
12
+ a common `CliRunRequest` → `CliRunResult` contract, so your automation
13
+ code doesn't need provider-specific subprocess handling.
14
+
15
+ **What it does (and why not just `subprocess.run`):**
16
+
17
+ - Unified request/result types across all four CLIs
18
+ - Timeout enforcement with graceful process termination
19
+ - Provider-aware failure classification (retryable vs fatal)
20
+ - Built-in model catalog with defaults, reasoning levels, and capabilities
21
+ - Interactive session management for long-running generation tasks
22
+ - Zero runtime dependencies
23
+
24
+ ## Installation
25
+
26
+ ```bash
27
+ pip install coding-cli-runtime
28
+ ```
29
+
30
+ Requires Python 3.10+.
31
+
32
+ ## Examples
33
+
34
+ ### Execute a provider CLI
35
+
36
+ ```python
37
+ import asyncio
38
+ from pathlib import Path
39
+ from coding_cli_runtime import CliRunRequest, run_cli_command
40
+
41
+ request = CliRunRequest(
42
+ cmd_parts=("codex", "--model", "o4-mini", "--quiet", "exec", "fix the tests"),
43
+ cwd=Path("/tmp/my-project"),
44
+ timeout_seconds=120,
45
+ )
46
+ result = asyncio.run(run_cli_command(request))
47
+
48
+ print(result.returncode) # 0
49
+ print(result.error_code) # "none"
50
+ print(result.duration_seconds) # 14.2
51
+ print(result.stdout_text[:200])
52
+ ```
53
+
54
+ Swap `codex` for `claude`, `gemini`, or `copilot` — the request/result
55
+ shape stays the same. A synchronous variant `run_cli_command_sync` is also
56
+ available.
57
+
58
+ ### Pick a model from the provider catalog
59
+
60
+ ```python
61
+ from coding_cli_runtime import get_provider_spec
62
+
63
+ codex = get_provider_spec("codex")
64
+ print(codex.default_model) # "gpt-5.3-codex"
65
+ print(codex.model_source) # "codex_cli_cache", "override", or "code"
66
+
67
+ for model in codex.models:
68
+ print(f" {model.name}: {model.description}")
69
+ ```
70
+
71
+ The catalog covers all four providers — each with model names, reasoning
72
+ levels, default settings, and visibility flags.
73
+
74
+ Model lists are resolved with a three-tier fallback:
75
+
76
+ 1. **User override** — drop a JSON file at
77
+ `~/.config/coding-cli-runtime/providers/<provider>.json` to use your own
78
+ model list immediately, without waiting for a package update.
79
+ 2. **Live CLI cache** — for Codex, the library reads
80
+ `~/.codex/models_cache.json` (auto-refreshed by the Codex CLI) when
81
+ present. Other providers fall through because their CLIs don't expose a
82
+ machine-readable model list.
83
+ 3. **Hardcoded fallback** — the model list shipped with the package.
84
+
85
+ Override file format:
86
+
87
+ ```json
88
+ {
89
+ "default_model": "claude-sonnet-4-7",
90
+ "models": [
91
+ "claude-sonnet-4-7",
92
+ {
93
+ "name": "claude-opus-5",
94
+ "description": "Latest opus model",
95
+ "controls": [
96
+ { "name": "effort", "kind": "choice", "choices": ["low", "high"], "default": "low" }
97
+ ]
98
+ }
99
+ ]
100
+ }
101
+ ```
102
+
103
+ Set `CODING_CLI_RUNTIME_CONFIG_DIR` to change the config directory
104
+ (default: `~/.config/coding-cli-runtime`).
105
+
106
+ ### Decide whether to retry a failed run
107
+
108
+ ```python
109
+ from coding_cli_runtime import classify_provider_failure
110
+
111
+ classification = classify_provider_failure(
112
+ provider="gemini",
113
+ stderr_text="429 Resource exhausted: rate limit exceeded",
114
+ )
115
+
116
+ if classification.retryable:
117
+ print(f"Retryable ({classification.category}) — will retry")
118
+ else:
119
+ print(f"Fatal ({classification.category}) — giving up")
120
+ ```
121
+
122
+ Works for all four providers. Recognizes auth failures, rate limits,
123
+ network transients, and other provider-specific error patterns.
124
+
125
+ ## Key types
126
+
127
+ | Type | Purpose |
128
+ |------|---------|
129
+ | `CliRunRequest` | Command spec: cmd, cwd, env, timeout, stream paths |
130
+ | `CliRunResult` | Result: returncode, stdout/stderr, duration, error code |
131
+ | `ErrorCode` | `none` · `spawn_failed` · `timed_out` · `non_zero_exit` |
132
+ | `ProviderSpec` | Provider catalog entry with models, controls, defaults |
133
+ | `FailureClassification` | Classified error with retryable flag and category |
134
+
135
+ `run_interactive_session()` manages long-running CLI processes with
136
+ timeout enforcement, process-group cleanup, transcript mirroring, and
137
+ automatic retries. Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are
138
+ required — observability labels like `job_name` and `phase_tag` default to
139
+ sensible values so external callers don't need to invent them.
140
+
141
+ ## Prerequisites
142
+
143
+ This package does **not** bundle any CLI binaries or credentials. You must
144
+ install and authenticate the relevant provider CLI yourself before using the
145
+ execution helpers.
146
+
147
+ ## Status
148
+
149
+ Pre-1.0. API may change between minor versions.
150
+
151
+ ## License
152
+
153
+ MIT
@@ -0,0 +1,112 @@
1
+ [build-system]
2
+ requires = ["setuptools>=77.0.3", "wheel"] # >=77.0.3 required for SPDX license + license-files
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "coding-cli-runtime"
7
+ version = "0.1.0"
8
+ description = "Reusable CLI runtime primitives for provider-backed automation workflows"
9
+ readme = {file = "README.md", content-type = "text/markdown"}
10
+ license = "MIT"
11
+ license-files = ["LICENSE"]
12
+ requires-python = ">=3.10"
13
+ authors = [
14
+ {name = "LLM Eval maintainers", email = "llm-eval-maintainers@users.noreply.github.com"},
15
+ ]
16
+ dependencies = []
17
+ keywords = [
18
+ "cli",
19
+ "runtime",
20
+ "llm",
21
+ "automation",
22
+ "schema-validation",
23
+ ]
24
+ classifiers = [
25
+ "Development Status :: 3 - Alpha",
26
+ "Intended Audience :: Developers",
27
+ "Operating System :: OS Independent",
28
+ "Programming Language :: Python :: 3",
29
+ "Programming Language :: Python :: 3.10",
30
+ "Programming Language :: Python :: 3.11",
31
+ "Programming Language :: Python :: 3.12",
32
+ "Programming Language :: Python :: 3.13",
33
+ "Topic :: Software Development :: Libraries :: Python Modules",
34
+ "Typing :: Typed",
35
+ ]
36
+
37
+ [project.urls]
38
+ Homepage = "https://github.com/pj-ms/llm-eval/tree/main/packages/coding-cli-runtime"
39
+ Repository = "https://github.com/pj-ms/llm-eval"
40
+ Issues = "https://github.com/pj-ms/llm-eval/issues"
41
+ Changelog = "https://github.com/pj-ms/llm-eval/blob/main/packages/coding-cli-runtime/CHANGELOG.md"
42
+
43
+ [dependency-groups]
44
+ dev = [
45
+ "build>=1.2.2",
46
+ "bump-my-version>=0.26.0",
47
+ "mypy>=1.13.0",
48
+ "pre-commit>=4.0.0",
49
+ "pytest==8.3.3",
50
+ "pytest-cov>=4.0",
51
+ "ruff>=0.8.0",
52
+ "setuptools>=77.0.3",
53
+ "twine>=5.1.1",
54
+ "wheel",
55
+ ]
56
+
57
+ [tool.setuptools]
58
+ package-dir = {"" = "src"}
59
+
60
+ [tool.setuptools.packages.find]
61
+ where = ["src"]
62
+
63
+ [tool.setuptools.package-data]
64
+ coding_cli_runtime = [
65
+ "py.typed",
66
+ "copilot_reasoning_baseline.json",
67
+ "schemas/*.json",
68
+ ]
69
+
70
+ [tool.pytest.ini_options]
71
+ testpaths = ["tests"]
72
+
73
+ [tool.ruff]
74
+ target-version = "py310"
75
+ line-length = 100
76
+
77
+ [tool.ruff.lint]
78
+ select = ["E", "F", "I", "UP", "B"]
79
+
80
+ [tool.ruff.lint.isort]
81
+ known-first-party = ["coding_cli_runtime"]
82
+
83
+ [tool.mypy]
84
+ python_version = "3.10"
85
+ strict = true
86
+
87
+ [[tool.mypy.overrides]]
88
+ module = [
89
+ "coding_cli_runtime.subprocess_runner",
90
+ "coding_cli_runtime.session_execution",
91
+ ]
92
+ disallow_any_generics = false
93
+ disallow_untyped_defs = false
94
+ warn_return_any = false
95
+
96
+ [tool.bumpversion]
97
+ current_version = "0.1.0"
98
+ parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
99
+ serialize = ["{major}.{minor}.{patch}"]
100
+ commit = true
101
+ tag = true
102
+ tag_name = "coding-cli-runtime-v{new_version}"
103
+
104
+ [[tool.bumpversion.files]]
105
+ filename = "pyproject.toml"
106
+ search = 'version = "{current_version}"'
107
+ replace = 'version = "{new_version}"'
108
+
109
+ [[tool.bumpversion.files]]
110
+ filename = "src/coding_cli_runtime/__init__.py"
111
+ search = '__version__ = "{current_version}"'
112
+ replace = '__version__ = "{new_version}"'
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,108 @@
1
+ """Runtime primitives for orchestrating LLM provider CLIs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __version__ = "0.1.0"
6
+
7
+ from .auth import AuthResolution, resolve_auth
8
+ from .codex_cli import CodexExecSpec, build_codex_exec_spec
9
+ from .contracts import (
10
+ AuthMode,
11
+ ClaudeReasoningPolicy,
12
+ CliLaunchSpec,
13
+ CliRunRequest,
14
+ CliRunResult,
15
+ ErrorCode,
16
+ )
17
+ from .failure_classification import FailureClassification, classify_provider_failure
18
+ from .provider_controls import build_model_id, resolve_provider_model_controls
19
+ from .provider_specs import (
20
+ ChoiceSpec,
21
+ ControlSpec,
22
+ ModelSpec,
23
+ ProviderSpec,
24
+ get_claude_default_model,
25
+ get_claude_effort_levels,
26
+ get_claude_model_candidates,
27
+ get_claude_output_suffixes,
28
+ get_claude_permission_modes,
29
+ get_codex_supported_models,
30
+ get_copilot_default_model,
31
+ get_copilot_model_catalog,
32
+ get_gemini_default_model,
33
+ get_gemini_model_options,
34
+ get_provider_spec,
35
+ list_provider_specs,
36
+ serialize_provider_specs,
37
+ )
38
+ from .reasoning import resolve_claude_reasoning_policy
39
+ from .redaction import redact_text
40
+ from .schema_validation import SchemaValidationError, load_schema, validate_payload
41
+ from .session_execution import (
42
+ InteractiveCliRunResult,
43
+ SessionExecutionTimeoutError,
44
+ SessionProgressEvent,
45
+ SessionRetryDecision,
46
+ TranscriptMirrorStrategy,
47
+ mirror_session_transcript,
48
+ run_interactive_session,
49
+ )
50
+ from .session_logs import (
51
+ claude_project_key,
52
+ find_claude_session,
53
+ find_codex_session,
54
+ normalize_path_str,
55
+ )
56
+ from .subprocess_runner import run_cli_command, run_cli_command_sync
57
+
58
+ __all__ = [
59
+ "AuthMode",
60
+ "AuthResolution",
61
+ "CliRunRequest",
62
+ "CliRunResult",
63
+ "CodexExecSpec",
64
+ "ChoiceSpec",
65
+ "ClaudeReasoningPolicy",
66
+ "CliLaunchSpec",
67
+ "ControlSpec",
68
+ "ErrorCode",
69
+ "FailureClassification",
70
+ "ModelSpec",
71
+ "ProviderSpec",
72
+ "SchemaValidationError",
73
+ "InteractiveCliRunResult",
74
+ "SessionProgressEvent",
75
+ "SessionRetryDecision",
76
+ "SessionExecutionTimeoutError",
77
+ "TranscriptMirrorStrategy",
78
+ "get_claude_default_model",
79
+ "get_claude_effort_levels",
80
+ "get_claude_model_candidates",
81
+ "get_claude_output_suffixes",
82
+ "get_claude_permission_modes",
83
+ "get_codex_supported_models",
84
+ "get_copilot_default_model",
85
+ "get_copilot_model_catalog",
86
+ "get_gemini_default_model",
87
+ "get_gemini_model_options",
88
+ "get_provider_spec",
89
+ "list_provider_specs",
90
+ "build_model_id",
91
+ "build_codex_exec_spec",
92
+ "classify_provider_failure",
93
+ "load_schema",
94
+ "resolve_auth",
95
+ "resolve_claude_reasoning_policy",
96
+ "resolve_provider_model_controls",
97
+ "redact_text",
98
+ "claude_project_key",
99
+ "find_claude_session",
100
+ "find_codex_session",
101
+ "normalize_path_str",
102
+ "mirror_session_transcript",
103
+ "run_cli_command",
104
+ "run_cli_command_sync",
105
+ "run_interactive_session",
106
+ "serialize_provider_specs",
107
+ "validate_payload",
108
+ ]