coding-cli-runtime 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coding_cli_runtime-0.1.0/CHANGELOG.md +57 -0
- coding_cli_runtime-0.1.0/CONTRIBUTING.md +38 -0
- coding_cli_runtime-0.1.0/LICENSE +21 -0
- coding_cli_runtime-0.1.0/MANIFEST.in +9 -0
- coding_cli_runtime-0.1.0/PKG-INFO +179 -0
- coding_cli_runtime-0.1.0/README.md +153 -0
- coding_cli_runtime-0.1.0/pyproject.toml +112 -0
- coding_cli_runtime-0.1.0/setup.cfg +4 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/__init__.py +108 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/auth.py +55 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/codex_cli.py +95 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/contracts.py +72 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/copilot_reasoning_baseline.json +66 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/copilot_reasoning_logs.py +81 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/failure_classification.py +183 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/json_io.py +81 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/provider_controls.py +101 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/provider_specs.py +749 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/py.typed +1 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/reasoning.py +95 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/redaction.py +20 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/schema_validation.py +101 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/schemas/normalized_run_result.v1.json +37 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/schemas/reasoning_metadata.v1.json +14 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/session_execution.py +604 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/session_logs.py +129 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime/subprocess_runner.py +346 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime.egg-info/PKG-INFO +179 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime.egg-info/SOURCES.txt +35 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime.egg-info/dependency_links.txt +1 -0
- coding_cli_runtime-0.1.0/src/coding_cli_runtime.egg-info/top_level.txt +1 -0
- coding_cli_runtime-0.1.0/tests/test_copilot_reasoning_logs.py +87 -0
- coding_cli_runtime-0.1.0/tests/test_package_resources.py +60 -0
- coding_cli_runtime-0.1.0/tests/test_packaging.py +89 -0
- coding_cli_runtime-0.1.0/tests/test_playground_probe_smoke.py +244 -0
- coding_cli_runtime-0.1.0/tests/test_provider_catalog_resolution.py +304 -0
- coding_cli_runtime-0.1.0/tests/test_runtime_parity.py +22 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/).
|
|
6
|
+
|
|
7
|
+
## [Unreleased]
|
|
8
|
+
|
|
9
|
+
### Changed
|
|
10
|
+
- Consolidated `shared_cli_runtime` into `coding_cli_runtime`. The package now
|
|
11
|
+
ships a single top-level package; the `shared_cli_runtime` directory is removed.
|
|
12
|
+
- `MANIFEST.in` and docs updated to reference `coding_cli_runtime` paths.
|
|
13
|
+
- Minimum Python version remains `>=3.10`.
|
|
14
|
+
- `run_interactive_session()` observability kwargs (`provider_label`, `job_name`,
|
|
15
|
+
`phase_tag`, `process_label`, `timeout_seconds`) now have sensible defaults so
|
|
16
|
+
external callers don't need to supply internal batch-system labels.
|
|
17
|
+
- Provider model catalogs are now resolved with a three-tier fallback:
|
|
18
|
+
user override file > live CLI discovery > hardcoded fallback.
|
|
19
|
+
- **Codex**: reads `~/.codex/models_cache.json` (auto-refreshed by the CLI)
|
|
20
|
+
when present; falls back to the hardcoded catalog on any parse failure.
|
|
21
|
+
- **All providers**: place a JSON file at
|
|
22
|
+
`~/.config/coding-cli-runtime/providers/<provider>.json` (or set
|
|
23
|
+
`CODING_CLI_RUNTIME_CONFIG_DIR`) to override the model list and default
|
|
24
|
+
model without waiting for a package update.
|
|
25
|
+
|
|
26
|
+
### Added
|
|
27
|
+
- `__version__` attribute in both `coding_cli_runtime` and `shared_cli_runtime`.
|
|
28
|
+
- `CONTRIBUTING.md`, `MANIFEST.in`, `.pre-commit-config.yaml`.
|
|
29
|
+
- PyPI / Python / Build / License badges in `README.md`.
|
|
30
|
+
- `bump-my-version` configuration syncing `pyproject.toml` and both `__init__.py` files.
|
|
31
|
+
- `ruff`, `mypy` (strict), and `pytest-cov` added to dev dependencies.
|
|
32
|
+
- CI quality gates: ruff check, ruff format, mypy, pytest-cov.
|
|
33
|
+
|
|
34
|
+
### Changed
|
|
35
|
+
- `CliRunResult.command` type widened from `tuple[str, ...]` to `Sequence[str]`.
|
|
36
|
+
- Publish workflow path corrected (`shared-cli-runtime` → `coding-cli-runtime`).
|
|
37
|
+
|
|
38
|
+
### Fixed
|
|
39
|
+
- mypy strict compliance: return-type annotations, per-module overrides, targeted type-ignore comments.
|
|
40
|
+
- ruff lint and format compliance across all source and test files.
|
|
41
|
+
|
|
42
|
+
## [0.1.0] - 2026-04-07
|
|
43
|
+
|
|
44
|
+
### Added
|
|
45
|
+
- Initial extraction from `llm-eval` monorepo.
|
|
46
|
+
- Provider metadata and controls for Claude, Codex, Copilot, and Gemini CLIs.
|
|
47
|
+
- Shared request/result contracts (`CliRunRequest`, `CliRunResult`, `CliLaunchSpec`).
|
|
48
|
+
- Schema loading and payload validation (`load_schema`, `validate_payload`).
|
|
49
|
+
- Synchronous and asynchronous subprocess execution helpers.
|
|
50
|
+
- Interactive session execution with transcript mirroring.
|
|
51
|
+
- Session log discovery and parsing utilities.
|
|
52
|
+
- Claude reasoning policy resolution.
|
|
53
|
+
- Log redaction helpers.
|
|
54
|
+
- Copilot reasoning log parsing and classification.
|
|
55
|
+
- PEP 561 `py.typed` markers for both `coding_cli_runtime` and `shared_cli_runtime`.
|
|
56
|
+
- Packaged JSON schemas and Copilot reasoning baseline data.
|
|
57
|
+
- Playground knowledge base with probing guides and experiment templates.
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Contributing to coding-cli-runtime
|
|
2
|
+
|
|
3
|
+
## Development setup
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
cd packages/coding-cli-runtime
|
|
7
|
+
|
|
8
|
+
# Install dependencies with uv (recommended)
|
|
9
|
+
uv sync --group dev
|
|
10
|
+
|
|
11
|
+
# Or use pip
|
|
12
|
+
python -m venv .venv
|
|
13
|
+
source .venv/bin/activate
|
|
14
|
+
pip install -e ".[dev]"
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Quality checks
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Run all checks (same as CI)
|
|
21
|
+
uv run --project . --group dev python -m ruff check src tests
|
|
22
|
+
uv run --project . --group dev python -m ruff format --check src tests
|
|
23
|
+
uv run --project . --group dev python -m mypy src
|
|
24
|
+
uv run --project . --group dev python -m pytest -v
|
|
25
|
+
|
|
26
|
+
# Pre-commit (if installed)
|
|
27
|
+
uv run --project . --group dev pre-commit run --all-files
|
|
28
|
+
|
|
29
|
+
# Build and validate
|
|
30
|
+
uv run --project . --group dev python -m build --no-isolation --sdist --wheel
|
|
31
|
+
uv run --project . --group dev python -m twine check dist/*
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Pull request process
|
|
35
|
+
|
|
36
|
+
1. Run all quality checks locally before pushing.
|
|
37
|
+
2. Update `CHANGELOG.md` under `[Unreleased]` for any user-visible changes.
|
|
38
|
+
3. CI enforces the same checks — PRs must pass before merge.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 LLM Eval contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: coding-cli-runtime
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Reusable CLI runtime primitives for provider-backed automation workflows
|
|
5
|
+
Author-email: LLM Eval maintainers <llm-eval-maintainers@users.noreply.github.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/pj-ms/llm-eval/tree/main/packages/coding-cli-runtime
|
|
8
|
+
Project-URL: Repository, https://github.com/pj-ms/llm-eval
|
|
9
|
+
Project-URL: Issues, https://github.com/pj-ms/llm-eval/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/pj-ms/llm-eval/blob/main/packages/coding-cli-runtime/CHANGELOG.md
|
|
11
|
+
Keywords: cli,runtime,llm,automation,schema-validation
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
|
|
27
|
+
# coding-cli-runtime
|
|
28
|
+
|
|
29
|
+
[](https://pypi.org/project/coding-cli-runtime/)
|
|
30
|
+
[](https://pypi.org/project/coding-cli-runtime/)
|
|
31
|
+
[](https://github.com/pj-ms/llm-eval/actions/workflows/ci.yml)
|
|
32
|
+
[](LICENSE)
|
|
33
|
+
|
|
34
|
+
A Python library for orchestrating LLM coding agent CLIs — [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Codex](https://github.com/openai/codex), [Gemini CLI](https://github.com/google-gemini/gemini-cli), and [GitHub Copilot](https://docs.github.com/en/copilot).
|
|
35
|
+
|
|
36
|
+
These CLIs each have different invocation patterns, output formats, error
|
|
37
|
+
shapes, and timeout behaviors. This library normalizes all of that behind
|
|
38
|
+
a common `CliRunRequest` → `CliRunResult` contract, so your automation
|
|
39
|
+
code doesn't need provider-specific subprocess handling.
|
|
40
|
+
|
|
41
|
+
**What it does (and why not just `subprocess.run`):**
|
|
42
|
+
|
|
43
|
+
- Unified request/result types across all four CLIs
|
|
44
|
+
- Timeout enforcement with graceful process termination
|
|
45
|
+
- Provider-aware failure classification (retryable vs fatal)
|
|
46
|
+
- Built-in model catalog with defaults, reasoning levels, and capabilities
|
|
47
|
+
- Interactive session management for long-running generation tasks
|
|
48
|
+
- Zero runtime dependencies
|
|
49
|
+
|
|
50
|
+
## Installation
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install coding-cli-runtime
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Requires Python 3.10+.
|
|
57
|
+
|
|
58
|
+
## Examples
|
|
59
|
+
|
|
60
|
+
### Execute a provider CLI
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
import asyncio
|
|
64
|
+
from pathlib import Path
|
|
65
|
+
from coding_cli_runtime import CliRunRequest, run_cli_command
|
|
66
|
+
|
|
67
|
+
request = CliRunRequest(
|
|
68
|
+
cmd_parts=("codex", "--model", "o4-mini", "--quiet", "exec", "fix the tests"),
|
|
69
|
+
cwd=Path("/tmp/my-project"),
|
|
70
|
+
timeout_seconds=120,
|
|
71
|
+
)
|
|
72
|
+
result = asyncio.run(run_cli_command(request))
|
|
73
|
+
|
|
74
|
+
print(result.returncode) # 0
|
|
75
|
+
print(result.error_code) # "none"
|
|
76
|
+
print(result.duration_seconds) # 14.2
|
|
77
|
+
print(result.stdout_text[:200])
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Swap `codex` for `claude`, `gemini`, or `copilot` — the request/result
|
|
81
|
+
shape stays the same. A synchronous variant `run_cli_command_sync` is also
|
|
82
|
+
available.
|
|
83
|
+
|
|
84
|
+
### Pick a model from the provider catalog
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from coding_cli_runtime import get_provider_spec
|
|
88
|
+
|
|
89
|
+
codex = get_provider_spec("codex")
|
|
90
|
+
print(codex.default_model) # "gpt-5.3-codex"
|
|
91
|
+
print(codex.model_source) # "codex_cli_cache", "override", or "code"
|
|
92
|
+
|
|
93
|
+
for model in codex.models:
|
|
94
|
+
print(f" {model.name}: {model.description}")
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
The catalog covers all four providers — each with model names, reasoning
|
|
98
|
+
levels, default settings, and visibility flags.
|
|
99
|
+
|
|
100
|
+
Model lists are resolved with a three-tier fallback:
|
|
101
|
+
|
|
102
|
+
1. **User override** — drop a JSON file at
|
|
103
|
+
`~/.config/coding-cli-runtime/providers/<provider>.json` to use your own
|
|
104
|
+
model list immediately, without waiting for a package update.
|
|
105
|
+
2. **Live CLI cache** — for Codex, the library reads
|
|
106
|
+
`~/.codex/models_cache.json` (auto-refreshed by the Codex CLI) when
|
|
107
|
+
present. Other providers fall through because their CLIs don't expose a
|
|
108
|
+
machine-readable model list.
|
|
109
|
+
3. **Hardcoded fallback** — the model list shipped with the package.
|
|
110
|
+
|
|
111
|
+
Override file format:
|
|
112
|
+
|
|
113
|
+
```json
|
|
114
|
+
{
|
|
115
|
+
"default_model": "claude-sonnet-4-7",
|
|
116
|
+
"models": [
|
|
117
|
+
"claude-sonnet-4-7",
|
|
118
|
+
{
|
|
119
|
+
"name": "claude-opus-5",
|
|
120
|
+
"description": "Latest opus model",
|
|
121
|
+
"controls": [
|
|
122
|
+
{ "name": "effort", "kind": "choice", "choices": ["low", "high"], "default": "low" }
|
|
123
|
+
]
|
|
124
|
+
}
|
|
125
|
+
]
|
|
126
|
+
}
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Set `CODING_CLI_RUNTIME_CONFIG_DIR` to change the config directory
|
|
130
|
+
(default: `~/.config/coding-cli-runtime`).
|
|
131
|
+
|
|
132
|
+
### Decide whether to retry a failed run
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from coding_cli_runtime import classify_provider_failure
|
|
136
|
+
|
|
137
|
+
classification = classify_provider_failure(
|
|
138
|
+
provider="gemini",
|
|
139
|
+
stderr_text="429 Resource exhausted: rate limit exceeded",
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
if classification.retryable:
|
|
143
|
+
print(f"Retryable ({classification.category}) — will retry")
|
|
144
|
+
else:
|
|
145
|
+
print(f"Fatal ({classification.category}) — giving up")
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Works for all four providers. Recognizes auth failures, rate limits,
|
|
149
|
+
network transients, and other provider-specific error patterns.
|
|
150
|
+
|
|
151
|
+
## Key types
|
|
152
|
+
|
|
153
|
+
| Type | Purpose |
|
|
154
|
+
|------|---------|
|
|
155
|
+
| `CliRunRequest` | Command spec: cmd, cwd, env, timeout, stream paths |
|
|
156
|
+
| `CliRunResult` | Result: returncode, stdout/stderr, duration, error code |
|
|
157
|
+
| `ErrorCode` | `none` · `spawn_failed` · `timed_out` · `non_zero_exit` |
|
|
158
|
+
| `ProviderSpec` | Provider catalog entry with models, controls, defaults |
|
|
159
|
+
| `FailureClassification` | Classified error with retryable flag and category |
|
|
160
|
+
|
|
161
|
+
`run_interactive_session()` manages long-running CLI processes with
|
|
162
|
+
timeout enforcement, process-group cleanup, transcript mirroring, and
|
|
163
|
+
automatic retries. Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are
|
|
164
|
+
required — observability labels like `job_name` and `phase_tag` default to
|
|
165
|
+
sensible values so external callers don't need to invent them.
|
|
166
|
+
|
|
167
|
+
## Prerequisites
|
|
168
|
+
|
|
169
|
+
This package does **not** bundle any CLI binaries or credentials. You must
|
|
170
|
+
install and authenticate the relevant provider CLI yourself before using the
|
|
171
|
+
execution helpers.
|
|
172
|
+
|
|
173
|
+
## Status
|
|
174
|
+
|
|
175
|
+
Pre-1.0. API may change between minor versions.
|
|
176
|
+
|
|
177
|
+
## License
|
|
178
|
+
|
|
179
|
+
MIT
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# coding-cli-runtime
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/coding-cli-runtime/)
|
|
4
|
+
[](https://pypi.org/project/coding-cli-runtime/)
|
|
5
|
+
[](https://github.com/pj-ms/llm-eval/actions/workflows/ci.yml)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
A Python library for orchestrating LLM coding agent CLIs — [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Codex](https://github.com/openai/codex), [Gemini CLI](https://github.com/google-gemini/gemini-cli), and [GitHub Copilot](https://docs.github.com/en/copilot).
|
|
9
|
+
|
|
10
|
+
These CLIs each have different invocation patterns, output formats, error
|
|
11
|
+
shapes, and timeout behaviors. This library normalizes all of that behind
|
|
12
|
+
a common `CliRunRequest` → `CliRunResult` contract, so your automation
|
|
13
|
+
code doesn't need provider-specific subprocess handling.
|
|
14
|
+
|
|
15
|
+
**What it does (and why not just `subprocess.run`):**
|
|
16
|
+
|
|
17
|
+
- Unified request/result types across all four CLIs
|
|
18
|
+
- Timeout enforcement with graceful process termination
|
|
19
|
+
- Provider-aware failure classification (retryable vs fatal)
|
|
20
|
+
- Built-in model catalog with defaults, reasoning levels, and capabilities
|
|
21
|
+
- Interactive session management for long-running generation tasks
|
|
22
|
+
- Zero runtime dependencies
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install coding-cli-runtime
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Requires Python 3.10+.
|
|
31
|
+
|
|
32
|
+
## Examples
|
|
33
|
+
|
|
34
|
+
### Execute a provider CLI
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
import asyncio
|
|
38
|
+
from pathlib import Path
|
|
39
|
+
from coding_cli_runtime import CliRunRequest, run_cli_command
|
|
40
|
+
|
|
41
|
+
request = CliRunRequest(
|
|
42
|
+
cmd_parts=("codex", "--model", "o4-mini", "--quiet", "exec", "fix the tests"),
|
|
43
|
+
cwd=Path("/tmp/my-project"),
|
|
44
|
+
timeout_seconds=120,
|
|
45
|
+
)
|
|
46
|
+
result = asyncio.run(run_cli_command(request))
|
|
47
|
+
|
|
48
|
+
print(result.returncode) # 0
|
|
49
|
+
print(result.error_code) # "none"
|
|
50
|
+
print(result.duration_seconds) # 14.2
|
|
51
|
+
print(result.stdout_text[:200])
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Swap `codex` for `claude`, `gemini`, or `copilot` — the request/result
|
|
55
|
+
shape stays the same. A synchronous variant `run_cli_command_sync` is also
|
|
56
|
+
available.
|
|
57
|
+
|
|
58
|
+
### Pick a model from the provider catalog
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from coding_cli_runtime import get_provider_spec
|
|
62
|
+
|
|
63
|
+
codex = get_provider_spec("codex")
|
|
64
|
+
print(codex.default_model) # "gpt-5.3-codex"
|
|
65
|
+
print(codex.model_source) # "codex_cli_cache", "override", or "code"
|
|
66
|
+
|
|
67
|
+
for model in codex.models:
|
|
68
|
+
print(f" {model.name}: {model.description}")
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
The catalog covers all four providers — each with model names, reasoning
|
|
72
|
+
levels, default settings, and visibility flags.
|
|
73
|
+
|
|
74
|
+
Model lists are resolved with a three-tier fallback:
|
|
75
|
+
|
|
76
|
+
1. **User override** — drop a JSON file at
|
|
77
|
+
`~/.config/coding-cli-runtime/providers/<provider>.json` to use your own
|
|
78
|
+
model list immediately, without waiting for a package update.
|
|
79
|
+
2. **Live CLI cache** — for Codex, the library reads
|
|
80
|
+
`~/.codex/models_cache.json` (auto-refreshed by the Codex CLI) when
|
|
81
|
+
present. Other providers fall through because their CLIs don't expose a
|
|
82
|
+
machine-readable model list.
|
|
83
|
+
3. **Hardcoded fallback** — the model list shipped with the package.
|
|
84
|
+
|
|
85
|
+
Override file format:
|
|
86
|
+
|
|
87
|
+
```json
|
|
88
|
+
{
|
|
89
|
+
"default_model": "claude-sonnet-4-7",
|
|
90
|
+
"models": [
|
|
91
|
+
"claude-sonnet-4-7",
|
|
92
|
+
{
|
|
93
|
+
"name": "claude-opus-5",
|
|
94
|
+
"description": "Latest opus model",
|
|
95
|
+
"controls": [
|
|
96
|
+
{ "name": "effort", "kind": "choice", "choices": ["low", "high"], "default": "low" }
|
|
97
|
+
]
|
|
98
|
+
}
|
|
99
|
+
]
|
|
100
|
+
}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Set `CODING_CLI_RUNTIME_CONFIG_DIR` to change the config directory
|
|
104
|
+
(default: `~/.config/coding-cli-runtime`).
|
|
105
|
+
|
|
106
|
+
### Decide whether to retry a failed run
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from coding_cli_runtime import classify_provider_failure
|
|
110
|
+
|
|
111
|
+
classification = classify_provider_failure(
|
|
112
|
+
provider="gemini",
|
|
113
|
+
stderr_text="429 Resource exhausted: rate limit exceeded",
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
if classification.retryable:
|
|
117
|
+
print(f"Retryable ({classification.category}) — will retry")
|
|
118
|
+
else:
|
|
119
|
+
print(f"Fatal ({classification.category}) — giving up")
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Works for all four providers. Recognizes auth failures, rate limits,
|
|
123
|
+
network transients, and other provider-specific error patterns.
|
|
124
|
+
|
|
125
|
+
## Key types
|
|
126
|
+
|
|
127
|
+
| Type | Purpose |
|
|
128
|
+
|------|---------|
|
|
129
|
+
| `CliRunRequest` | Command spec: cmd, cwd, env, timeout, stream paths |
|
|
130
|
+
| `CliRunResult` | Result: returncode, stdout/stderr, duration, error code |
|
|
131
|
+
| `ErrorCode` | `none` · `spawn_failed` · `timed_out` · `non_zero_exit` |
|
|
132
|
+
| `ProviderSpec` | Provider catalog entry with models, controls, defaults |
|
|
133
|
+
| `FailureClassification` | Classified error with retryable flag and category |
|
|
134
|
+
|
|
135
|
+
`run_interactive_session()` manages long-running CLI processes with
|
|
136
|
+
timeout enforcement, process-group cleanup, transcript mirroring, and
|
|
137
|
+
automatic retries. Only `cmd_parts`, `cwd`, `stdin_text`, and `logger` are
|
|
138
|
+
required — observability labels like `job_name` and `phase_tag` default to
|
|
139
|
+
sensible values so external callers don't need to invent them.
|
|
140
|
+
|
|
141
|
+
## Prerequisites
|
|
142
|
+
|
|
143
|
+
This package does **not** bundle any CLI binaries or credentials. You must
|
|
144
|
+
install and authenticate the relevant provider CLI yourself before using the
|
|
145
|
+
execution helpers.
|
|
146
|
+
|
|
147
|
+
## Status
|
|
148
|
+
|
|
149
|
+
Pre-1.0. API may change between minor versions.
|
|
150
|
+
|
|
151
|
+
## License
|
|
152
|
+
|
|
153
|
+
MIT
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77.0.3", "wheel"] # >=77.0.3 required for SPDX license + license-files
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "coding-cli-runtime"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Reusable CLI runtime primitives for provider-backed automation workflows"
|
|
9
|
+
readme = {file = "README.md", content-type = "text/markdown"}
|
|
10
|
+
license = "MIT"
|
|
11
|
+
license-files = ["LICENSE"]
|
|
12
|
+
requires-python = ">=3.10"
|
|
13
|
+
authors = [
|
|
14
|
+
{name = "LLM Eval maintainers", email = "llm-eval-maintainers@users.noreply.github.com"},
|
|
15
|
+
]
|
|
16
|
+
dependencies = []
|
|
17
|
+
keywords = [
|
|
18
|
+
"cli",
|
|
19
|
+
"runtime",
|
|
20
|
+
"llm",
|
|
21
|
+
"automation",
|
|
22
|
+
"schema-validation",
|
|
23
|
+
]
|
|
24
|
+
classifiers = [
|
|
25
|
+
"Development Status :: 3 - Alpha",
|
|
26
|
+
"Intended Audience :: Developers",
|
|
27
|
+
"Operating System :: OS Independent",
|
|
28
|
+
"Programming Language :: Python :: 3",
|
|
29
|
+
"Programming Language :: Python :: 3.10",
|
|
30
|
+
"Programming Language :: Python :: 3.11",
|
|
31
|
+
"Programming Language :: Python :: 3.12",
|
|
32
|
+
"Programming Language :: Python :: 3.13",
|
|
33
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
34
|
+
"Typing :: Typed",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Homepage = "https://github.com/pj-ms/llm-eval/tree/main/packages/coding-cli-runtime"
|
|
39
|
+
Repository = "https://github.com/pj-ms/llm-eval"
|
|
40
|
+
Issues = "https://github.com/pj-ms/llm-eval/issues"
|
|
41
|
+
Changelog = "https://github.com/pj-ms/llm-eval/blob/main/packages/coding-cli-runtime/CHANGELOG.md"
|
|
42
|
+
|
|
43
|
+
[dependency-groups]
|
|
44
|
+
dev = [
|
|
45
|
+
"build>=1.2.2",
|
|
46
|
+
"bump-my-version>=0.26.0",
|
|
47
|
+
"mypy>=1.13.0",
|
|
48
|
+
"pre-commit>=4.0.0",
|
|
49
|
+
"pytest==8.3.3",
|
|
50
|
+
"pytest-cov>=4.0",
|
|
51
|
+
"ruff>=0.8.0",
|
|
52
|
+
"setuptools>=77.0.3",
|
|
53
|
+
"twine>=5.1.1",
|
|
54
|
+
"wheel",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
[tool.setuptools]
|
|
58
|
+
package-dir = {"" = "src"}
|
|
59
|
+
|
|
60
|
+
[tool.setuptools.packages.find]
|
|
61
|
+
where = ["src"]
|
|
62
|
+
|
|
63
|
+
[tool.setuptools.package-data]
|
|
64
|
+
coding_cli_runtime = [
|
|
65
|
+
"py.typed",
|
|
66
|
+
"copilot_reasoning_baseline.json",
|
|
67
|
+
"schemas/*.json",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
[tool.pytest.ini_options]
|
|
71
|
+
testpaths = ["tests"]
|
|
72
|
+
|
|
73
|
+
[tool.ruff]
|
|
74
|
+
target-version = "py310"
|
|
75
|
+
line-length = 100
|
|
76
|
+
|
|
77
|
+
[tool.ruff.lint]
|
|
78
|
+
select = ["E", "F", "I", "UP", "B"]
|
|
79
|
+
|
|
80
|
+
[tool.ruff.lint.isort]
|
|
81
|
+
known-first-party = ["coding_cli_runtime"]
|
|
82
|
+
|
|
83
|
+
[tool.mypy]
|
|
84
|
+
python_version = "3.10"
|
|
85
|
+
strict = true
|
|
86
|
+
|
|
87
|
+
[[tool.mypy.overrides]]
|
|
88
|
+
module = [
|
|
89
|
+
"coding_cli_runtime.subprocess_runner",
|
|
90
|
+
"coding_cli_runtime.session_execution",
|
|
91
|
+
]
|
|
92
|
+
disallow_any_generics = false
|
|
93
|
+
disallow_untyped_defs = false
|
|
94
|
+
warn_return_any = false
|
|
95
|
+
|
|
96
|
+
[tool.bumpversion]
|
|
97
|
+
current_version = "0.1.0"
|
|
98
|
+
parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
|
|
99
|
+
serialize = ["{major}.{minor}.{patch}"]
|
|
100
|
+
commit = true
|
|
101
|
+
tag = true
|
|
102
|
+
tag_name = "coding-cli-runtime-v{new_version}"
|
|
103
|
+
|
|
104
|
+
[[tool.bumpversion.files]]
|
|
105
|
+
filename = "pyproject.toml"
|
|
106
|
+
search = 'version = "{current_version}"'
|
|
107
|
+
replace = 'version = "{new_version}"'
|
|
108
|
+
|
|
109
|
+
[[tool.bumpversion.files]]
|
|
110
|
+
filename = "src/coding_cli_runtime/__init__.py"
|
|
111
|
+
search = '__version__ = "{current_version}"'
|
|
112
|
+
replace = '__version__ = "{new_version}"'
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Runtime primitives for orchestrating LLM provider CLIs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__version__ = "0.1.0"
|
|
6
|
+
|
|
7
|
+
from .auth import AuthResolution, resolve_auth
|
|
8
|
+
from .codex_cli import CodexExecSpec, build_codex_exec_spec
|
|
9
|
+
from .contracts import (
|
|
10
|
+
AuthMode,
|
|
11
|
+
ClaudeReasoningPolicy,
|
|
12
|
+
CliLaunchSpec,
|
|
13
|
+
CliRunRequest,
|
|
14
|
+
CliRunResult,
|
|
15
|
+
ErrorCode,
|
|
16
|
+
)
|
|
17
|
+
from .failure_classification import FailureClassification, classify_provider_failure
|
|
18
|
+
from .provider_controls import build_model_id, resolve_provider_model_controls
|
|
19
|
+
from .provider_specs import (
|
|
20
|
+
ChoiceSpec,
|
|
21
|
+
ControlSpec,
|
|
22
|
+
ModelSpec,
|
|
23
|
+
ProviderSpec,
|
|
24
|
+
get_claude_default_model,
|
|
25
|
+
get_claude_effort_levels,
|
|
26
|
+
get_claude_model_candidates,
|
|
27
|
+
get_claude_output_suffixes,
|
|
28
|
+
get_claude_permission_modes,
|
|
29
|
+
get_codex_supported_models,
|
|
30
|
+
get_copilot_default_model,
|
|
31
|
+
get_copilot_model_catalog,
|
|
32
|
+
get_gemini_default_model,
|
|
33
|
+
get_gemini_model_options,
|
|
34
|
+
get_provider_spec,
|
|
35
|
+
list_provider_specs,
|
|
36
|
+
serialize_provider_specs,
|
|
37
|
+
)
|
|
38
|
+
from .reasoning import resolve_claude_reasoning_policy
|
|
39
|
+
from .redaction import redact_text
|
|
40
|
+
from .schema_validation import SchemaValidationError, load_schema, validate_payload
|
|
41
|
+
from .session_execution import (
|
|
42
|
+
InteractiveCliRunResult,
|
|
43
|
+
SessionExecutionTimeoutError,
|
|
44
|
+
SessionProgressEvent,
|
|
45
|
+
SessionRetryDecision,
|
|
46
|
+
TranscriptMirrorStrategy,
|
|
47
|
+
mirror_session_transcript,
|
|
48
|
+
run_interactive_session,
|
|
49
|
+
)
|
|
50
|
+
from .session_logs import (
|
|
51
|
+
claude_project_key,
|
|
52
|
+
find_claude_session,
|
|
53
|
+
find_codex_session,
|
|
54
|
+
normalize_path_str,
|
|
55
|
+
)
|
|
56
|
+
from .subprocess_runner import run_cli_command, run_cli_command_sync
|
|
57
|
+
|
|
58
|
+
__all__ = [
|
|
59
|
+
"AuthMode",
|
|
60
|
+
"AuthResolution",
|
|
61
|
+
"CliRunRequest",
|
|
62
|
+
"CliRunResult",
|
|
63
|
+
"CodexExecSpec",
|
|
64
|
+
"ChoiceSpec",
|
|
65
|
+
"ClaudeReasoningPolicy",
|
|
66
|
+
"CliLaunchSpec",
|
|
67
|
+
"ControlSpec",
|
|
68
|
+
"ErrorCode",
|
|
69
|
+
"FailureClassification",
|
|
70
|
+
"ModelSpec",
|
|
71
|
+
"ProviderSpec",
|
|
72
|
+
"SchemaValidationError",
|
|
73
|
+
"InteractiveCliRunResult",
|
|
74
|
+
"SessionProgressEvent",
|
|
75
|
+
"SessionRetryDecision",
|
|
76
|
+
"SessionExecutionTimeoutError",
|
|
77
|
+
"TranscriptMirrorStrategy",
|
|
78
|
+
"get_claude_default_model",
|
|
79
|
+
"get_claude_effort_levels",
|
|
80
|
+
"get_claude_model_candidates",
|
|
81
|
+
"get_claude_output_suffixes",
|
|
82
|
+
"get_claude_permission_modes",
|
|
83
|
+
"get_codex_supported_models",
|
|
84
|
+
"get_copilot_default_model",
|
|
85
|
+
"get_copilot_model_catalog",
|
|
86
|
+
"get_gemini_default_model",
|
|
87
|
+
"get_gemini_model_options",
|
|
88
|
+
"get_provider_spec",
|
|
89
|
+
"list_provider_specs",
|
|
90
|
+
"build_model_id",
|
|
91
|
+
"build_codex_exec_spec",
|
|
92
|
+
"classify_provider_failure",
|
|
93
|
+
"load_schema",
|
|
94
|
+
"resolve_auth",
|
|
95
|
+
"resolve_claude_reasoning_policy",
|
|
96
|
+
"resolve_provider_model_controls",
|
|
97
|
+
"redact_text",
|
|
98
|
+
"claude_project_key",
|
|
99
|
+
"find_claude_session",
|
|
100
|
+
"find_codex_session",
|
|
101
|
+
"normalize_path_str",
|
|
102
|
+
"mirror_session_transcript",
|
|
103
|
+
"run_cli_command",
|
|
104
|
+
"run_cli_command_sync",
|
|
105
|
+
"run_interactive_session",
|
|
106
|
+
"serialize_provider_specs",
|
|
107
|
+
"validate_payload",
|
|
108
|
+
]
|