coding-cli-runtime 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coding_cli_runtime/__init__.py +108 -0
- coding_cli_runtime/auth.py +55 -0
- coding_cli_runtime/codex_cli.py +95 -0
- coding_cli_runtime/contracts.py +72 -0
- coding_cli_runtime/copilot_reasoning_baseline.json +66 -0
- coding_cli_runtime/copilot_reasoning_logs.py +81 -0
- coding_cli_runtime/failure_classification.py +183 -0
- coding_cli_runtime/json_io.py +81 -0
- coding_cli_runtime/provider_controls.py +101 -0
- coding_cli_runtime/provider_specs.py +749 -0
- coding_cli_runtime/py.typed +1 -0
- coding_cli_runtime/reasoning.py +95 -0
- coding_cli_runtime/redaction.py +20 -0
- coding_cli_runtime/schema_validation.py +101 -0
- coding_cli_runtime/schemas/normalized_run_result.v1.json +37 -0
- coding_cli_runtime/schemas/reasoning_metadata.v1.json +14 -0
- coding_cli_runtime/session_execution.py +604 -0
- coding_cli_runtime/session_logs.py +129 -0
- coding_cli_runtime/subprocess_runner.py +346 -0
- coding_cli_runtime-0.1.0.dist-info/METADATA +179 -0
- coding_cli_runtime-0.1.0.dist-info/RECORD +24 -0
- coding_cli_runtime-0.1.0.dist-info/WHEEL +5 -0
- coding_cli_runtime-0.1.0.dist-info/licenses/LICENSE +21 -0
- coding_cli_runtime-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Runtime primitives for orchestrating LLM provider CLIs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__version__ = "0.1.0"
|
|
6
|
+
|
|
7
|
+
from .auth import AuthResolution, resolve_auth
|
|
8
|
+
from .codex_cli import CodexExecSpec, build_codex_exec_spec
|
|
9
|
+
from .contracts import (
|
|
10
|
+
AuthMode,
|
|
11
|
+
ClaudeReasoningPolicy,
|
|
12
|
+
CliLaunchSpec,
|
|
13
|
+
CliRunRequest,
|
|
14
|
+
CliRunResult,
|
|
15
|
+
ErrorCode,
|
|
16
|
+
)
|
|
17
|
+
from .failure_classification import FailureClassification, classify_provider_failure
|
|
18
|
+
from .provider_controls import build_model_id, resolve_provider_model_controls
|
|
19
|
+
from .provider_specs import (
|
|
20
|
+
ChoiceSpec,
|
|
21
|
+
ControlSpec,
|
|
22
|
+
ModelSpec,
|
|
23
|
+
ProviderSpec,
|
|
24
|
+
get_claude_default_model,
|
|
25
|
+
get_claude_effort_levels,
|
|
26
|
+
get_claude_model_candidates,
|
|
27
|
+
get_claude_output_suffixes,
|
|
28
|
+
get_claude_permission_modes,
|
|
29
|
+
get_codex_supported_models,
|
|
30
|
+
get_copilot_default_model,
|
|
31
|
+
get_copilot_model_catalog,
|
|
32
|
+
get_gemini_default_model,
|
|
33
|
+
get_gemini_model_options,
|
|
34
|
+
get_provider_spec,
|
|
35
|
+
list_provider_specs,
|
|
36
|
+
serialize_provider_specs,
|
|
37
|
+
)
|
|
38
|
+
from .reasoning import resolve_claude_reasoning_policy
|
|
39
|
+
from .redaction import redact_text
|
|
40
|
+
from .schema_validation import SchemaValidationError, load_schema, validate_payload
|
|
41
|
+
from .session_execution import (
|
|
42
|
+
InteractiveCliRunResult,
|
|
43
|
+
SessionExecutionTimeoutError,
|
|
44
|
+
SessionProgressEvent,
|
|
45
|
+
SessionRetryDecision,
|
|
46
|
+
TranscriptMirrorStrategy,
|
|
47
|
+
mirror_session_transcript,
|
|
48
|
+
run_interactive_session,
|
|
49
|
+
)
|
|
50
|
+
from .session_logs import (
|
|
51
|
+
claude_project_key,
|
|
52
|
+
find_claude_session,
|
|
53
|
+
find_codex_session,
|
|
54
|
+
normalize_path_str,
|
|
55
|
+
)
|
|
56
|
+
from .subprocess_runner import run_cli_command, run_cli_command_sync
|
|
57
|
+
|
|
58
|
+
__all__ = [
|
|
59
|
+
"AuthMode",
|
|
60
|
+
"AuthResolution",
|
|
61
|
+
"CliRunRequest",
|
|
62
|
+
"CliRunResult",
|
|
63
|
+
"CodexExecSpec",
|
|
64
|
+
"ChoiceSpec",
|
|
65
|
+
"ClaudeReasoningPolicy",
|
|
66
|
+
"CliLaunchSpec",
|
|
67
|
+
"ControlSpec",
|
|
68
|
+
"ErrorCode",
|
|
69
|
+
"FailureClassification",
|
|
70
|
+
"ModelSpec",
|
|
71
|
+
"ProviderSpec",
|
|
72
|
+
"SchemaValidationError",
|
|
73
|
+
"InteractiveCliRunResult",
|
|
74
|
+
"SessionProgressEvent",
|
|
75
|
+
"SessionRetryDecision",
|
|
76
|
+
"SessionExecutionTimeoutError",
|
|
77
|
+
"TranscriptMirrorStrategy",
|
|
78
|
+
"get_claude_default_model",
|
|
79
|
+
"get_claude_effort_levels",
|
|
80
|
+
"get_claude_model_candidates",
|
|
81
|
+
"get_claude_output_suffixes",
|
|
82
|
+
"get_claude_permission_modes",
|
|
83
|
+
"get_codex_supported_models",
|
|
84
|
+
"get_copilot_default_model",
|
|
85
|
+
"get_copilot_model_catalog",
|
|
86
|
+
"get_gemini_default_model",
|
|
87
|
+
"get_gemini_model_options",
|
|
88
|
+
"get_provider_spec",
|
|
89
|
+
"list_provider_specs",
|
|
90
|
+
"build_model_id",
|
|
91
|
+
"build_codex_exec_spec",
|
|
92
|
+
"classify_provider_failure",
|
|
93
|
+
"load_schema",
|
|
94
|
+
"resolve_auth",
|
|
95
|
+
"resolve_claude_reasoning_policy",
|
|
96
|
+
"resolve_provider_model_controls",
|
|
97
|
+
"redact_text",
|
|
98
|
+
"claude_project_key",
|
|
99
|
+
"find_claude_session",
|
|
100
|
+
"find_codex_session",
|
|
101
|
+
"normalize_path_str",
|
|
102
|
+
"mirror_session_transcript",
|
|
103
|
+
"run_cli_command",
|
|
104
|
+
"run_cli_command_sync",
|
|
105
|
+
"run_interactive_session",
|
|
106
|
+
"serialize_provider_specs",
|
|
107
|
+
"validate_payload",
|
|
108
|
+
]
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Provider auth resolution contract for shared runtime."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
from .contracts import AuthMode
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class AuthResolution:
|
|
13
|
+
provider: str
|
|
14
|
+
mode: AuthMode
|
|
15
|
+
required_env: tuple[str, ...]
|
|
16
|
+
present_env: tuple[str, ...]
|
|
17
|
+
missing_env: tuple[str, ...]
|
|
18
|
+
hint: str
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
_PROVIDER_ENV_HINTS: dict[str, tuple[tuple[str, ...], str]] = {
|
|
22
|
+
"claude": (("CLAUDE_API_KEY",), "Use `claude login` or set CLAUDE_API_KEY."),
|
|
23
|
+
"copilot": (
|
|
24
|
+
tuple(),
|
|
25
|
+
"Use `copilot auth login` (CLI login is the default authentication mode).",
|
|
26
|
+
),
|
|
27
|
+
"codex": (("OPENAI_API_KEY",), "Use Codex login flow or set OPENAI_API_KEY."),
|
|
28
|
+
"gemini": (("GEMINI_API_KEY",), "Use `gemini auth login` or set GEMINI_API_KEY."),
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def resolve_auth(provider: str, env: dict[str, str] | None = None) -> AuthResolution:
|
|
33
|
+
provider_key = provider.strip().lower()
|
|
34
|
+
required_env, hint = _PROVIDER_ENV_HINTS.get(
|
|
35
|
+
provider_key,
|
|
36
|
+
(tuple(), "Unknown provider; configure CLI login or provider token env vars."),
|
|
37
|
+
)
|
|
38
|
+
current_env = env if env is not None else os.environ
|
|
39
|
+
|
|
40
|
+
present_env = tuple(key for key in required_env if current_env.get(key))
|
|
41
|
+
missing_env = tuple(key for key in required_env if key not in present_env)
|
|
42
|
+
|
|
43
|
+
if required_env and present_env:
|
|
44
|
+
mode = AuthMode.ENV
|
|
45
|
+
else:
|
|
46
|
+
mode = AuthMode.CLI_LOGIN
|
|
47
|
+
|
|
48
|
+
return AuthResolution(
|
|
49
|
+
provider=provider_key,
|
|
50
|
+
mode=mode,
|
|
51
|
+
required_env=required_env,
|
|
52
|
+
present_env=present_env,
|
|
53
|
+
missing_env=missing_env,
|
|
54
|
+
hint=hint,
|
|
55
|
+
)
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Reusable Codex CLI launch helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import shlex
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from .provider_controls import resolve_provider_model_controls
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class CodexExecSpec:
|
|
16
|
+
cmd_parts: tuple[str, ...]
|
|
17
|
+
display_text: str
|
|
18
|
+
applied_controls: dict[str, Any]
|
|
19
|
+
effective_reasoning: str
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _resolve_codex_reasoning(
|
|
23
|
+
*,
|
|
24
|
+
model_reasoning: str | None = None,
|
|
25
|
+
model_controls: dict[str, Any] | None = None,
|
|
26
|
+
) -> tuple[dict[str, Any], str]:
|
|
27
|
+
controls = resolve_provider_model_controls(
|
|
28
|
+
provider="codex",
|
|
29
|
+
model_reasoning=model_reasoning,
|
|
30
|
+
model_controls=model_controls,
|
|
31
|
+
)
|
|
32
|
+
applied_raw = controls.get("applied")
|
|
33
|
+
applied_controls = applied_raw if isinstance(applied_raw, dict) else {}
|
|
34
|
+
effective_reasoning_raw = applied_controls.get("reasoning_effort")
|
|
35
|
+
if isinstance(effective_reasoning_raw, str) and effective_reasoning_raw.strip():
|
|
36
|
+
return applied_controls, effective_reasoning_raw
|
|
37
|
+
if isinstance(model_reasoning, str) and model_reasoning.strip():
|
|
38
|
+
return applied_controls, model_reasoning
|
|
39
|
+
raise ValueError("Codex launch requires a reasoning effort")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def build_codex_exec_spec(
|
|
43
|
+
*,
|
|
44
|
+
codex_bin: str,
|
|
45
|
+
model: str,
|
|
46
|
+
cwd: Path,
|
|
47
|
+
output_schema_path: Path,
|
|
48
|
+
output_path: Path,
|
|
49
|
+
model_reasoning: str | None = None,
|
|
50
|
+
model_controls: dict[str, Any] | None = None,
|
|
51
|
+
json_output: bool,
|
|
52
|
+
sandbox: str,
|
|
53
|
+
full_auto: bool = True,
|
|
54
|
+
skip_git_repo_check: bool = True,
|
|
55
|
+
extra_args: tuple[str, ...] | list[str] | None = None,
|
|
56
|
+
stdin_placeholder: str | None = "<prompt>",
|
|
57
|
+
) -> CodexExecSpec:
|
|
58
|
+
applied_controls, effective_reasoning = _resolve_codex_reasoning(
|
|
59
|
+
model_reasoning=model_reasoning,
|
|
60
|
+
model_controls=model_controls,
|
|
61
|
+
)
|
|
62
|
+
reasoning_config_value = json.dumps(effective_reasoning)
|
|
63
|
+
cmd_parts: list[str] = [str(codex_bin), "exec"]
|
|
64
|
+
if json_output:
|
|
65
|
+
cmd_parts.append("--json")
|
|
66
|
+
if full_auto:
|
|
67
|
+
cmd_parts.append("--full-auto")
|
|
68
|
+
cmd_parts.extend(["--sandbox", sandbox])
|
|
69
|
+
if skip_git_repo_check:
|
|
70
|
+
cmd_parts.append("--skip-git-repo-check")
|
|
71
|
+
cmd_parts.extend(
|
|
72
|
+
[
|
|
73
|
+
"--model",
|
|
74
|
+
model,
|
|
75
|
+
"--config",
|
|
76
|
+
f"model_reasoning_effort={reasoning_config_value}",
|
|
77
|
+
"-C",
|
|
78
|
+
str(cwd),
|
|
79
|
+
"--output-schema",
|
|
80
|
+
str(output_schema_path),
|
|
81
|
+
"-o",
|
|
82
|
+
str(output_path),
|
|
83
|
+
]
|
|
84
|
+
)
|
|
85
|
+
if extra_args:
|
|
86
|
+
cmd_parts.extend([str(part) for part in extra_args])
|
|
87
|
+
display_parts = [shlex.quote(part) for part in cmd_parts]
|
|
88
|
+
if stdin_placeholder is not None:
|
|
89
|
+
display_parts.append(stdin_placeholder)
|
|
90
|
+
return CodexExecSpec(
|
|
91
|
+
cmd_parts=tuple(cmd_parts),
|
|
92
|
+
display_text=" ".join(display_parts),
|
|
93
|
+
applied_controls=applied_controls,
|
|
94
|
+
effective_reasoning=effective_reasoning,
|
|
95
|
+
)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Core contracts for shared CLI runtime execution."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ErrorCode(str, Enum):
|
|
12
|
+
NONE = "none"
|
|
13
|
+
SPAWN_FAILED = "spawn_failed"
|
|
14
|
+
TIMED_OUT = "timed_out"
|
|
15
|
+
NON_ZERO_EXIT = "non_zero_exit"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AuthMode(str, Enum):
|
|
19
|
+
NONE = "none"
|
|
20
|
+
ENV = "env"
|
|
21
|
+
CLI_LOGIN = "cli_login"
|
|
22
|
+
TOKEN_FILE = "token_file"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class CliRunRequest:
|
|
27
|
+
cmd_parts: tuple[str, ...]
|
|
28
|
+
cwd: Path
|
|
29
|
+
stdin_text: str | None = None
|
|
30
|
+
env: dict[str, str] | None = None
|
|
31
|
+
timeout_seconds: float | None = None
|
|
32
|
+
stdout_stream_path: Path | None = None
|
|
33
|
+
stderr_stream_path: Path | None = None
|
|
34
|
+
|
|
35
|
+
def __post_init__(self) -> None:
|
|
36
|
+
object.__setattr__(self, "cmd_parts", tuple(self.cmd_parts))
|
|
37
|
+
if self.env is not None:
|
|
38
|
+
object.__setattr__(self, "env", dict(self.env))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(frozen=True)
|
|
42
|
+
class CliLaunchSpec:
|
|
43
|
+
cmd_parts: tuple[str, ...]
|
|
44
|
+
display_text: str
|
|
45
|
+
stdin_text: str | None = None
|
|
46
|
+
|
|
47
|
+
def __post_init__(self) -> None:
|
|
48
|
+
object.__setattr__(self, "cmd_parts", tuple(self.cmd_parts))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True)
|
|
52
|
+
class CliRunResult:
|
|
53
|
+
command: Sequence[str]
|
|
54
|
+
returncode: int | None
|
|
55
|
+
stdout_text: str
|
|
56
|
+
stderr_text: str
|
|
57
|
+
duration_seconds: float
|
|
58
|
+
timed_out: bool
|
|
59
|
+
error_code: ErrorCode
|
|
60
|
+
error_message: str | None
|
|
61
|
+
|
|
62
|
+
def __post_init__(self) -> None:
|
|
63
|
+
object.__setattr__(self, "command", tuple(self.command))
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass(frozen=True)
|
|
67
|
+
class ClaudeReasoningPolicy:
|
|
68
|
+
mode: str
|
|
69
|
+
effective_effort: str | None
|
|
70
|
+
effort_source: str | None
|
|
71
|
+
thinking_tokens: int | None
|
|
72
|
+
disable_via_env_setting: bool = False
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
{
|
|
2
|
+
"copilot_cli_version": "1.0.15-0",
|
|
3
|
+
"source_probe_run": "packages/coding-cli-runtime/playground/copilot-cli/experiments/results/reasoning_probe_20260331_165652",
|
|
4
|
+
"models": {
|
|
5
|
+
"claude-sonnet-4.6": {
|
|
6
|
+
"schema": "reasoning_effort",
|
|
7
|
+
"value": "medium",
|
|
8
|
+
"default_reasoning": "medium"
|
|
9
|
+
},
|
|
10
|
+
"claude-sonnet-4.5": {
|
|
11
|
+
"schema": "thinking_budget",
|
|
12
|
+
"value": "1024"
|
|
13
|
+
},
|
|
14
|
+
"claude-haiku-4.5": {
|
|
15
|
+
"schema": "thinking_budget",
|
|
16
|
+
"value": "1024"
|
|
17
|
+
},
|
|
18
|
+
"claude-opus-4.6": {
|
|
19
|
+
"schema": "reasoning_effort",
|
|
20
|
+
"value": "high",
|
|
21
|
+
"default_reasoning": "high"
|
|
22
|
+
},
|
|
23
|
+
"claude-opus-4.5": {
|
|
24
|
+
"schema": "thinking_budget",
|
|
25
|
+
"value": "1024"
|
|
26
|
+
},
|
|
27
|
+
"claude-sonnet-4": {
|
|
28
|
+
"schema": "thinking_budget",
|
|
29
|
+
"value": "1024"
|
|
30
|
+
},
|
|
31
|
+
"goldeneye": {
|
|
32
|
+
"schema": "reasoning_effort",
|
|
33
|
+
"value": "medium",
|
|
34
|
+
"default_reasoning": "medium",
|
|
35
|
+
"supported_reasoning_efforts": ["low", "medium", "high"]
|
|
36
|
+
},
|
|
37
|
+
"gpt-5.4": {
|
|
38
|
+
"schema": "reasoning_effort",
|
|
39
|
+
"value": "medium",
|
|
40
|
+
"default_reasoning": "medium"
|
|
41
|
+
},
|
|
42
|
+
"gpt-5.3-codex": {
|
|
43
|
+
"schema": "reasoning_effort",
|
|
44
|
+
"value": "medium",
|
|
45
|
+
"default_reasoning": "medium"
|
|
46
|
+
},
|
|
47
|
+
"gpt-5.2": {
|
|
48
|
+
"schema": "reasoning_effort",
|
|
49
|
+
"value": "medium",
|
|
50
|
+
"default_reasoning": "medium"
|
|
51
|
+
},
|
|
52
|
+
"gpt-5.1": {
|
|
53
|
+
"schema": "reasoning_effort",
|
|
54
|
+
"value": "medium",
|
|
55
|
+
"default_reasoning": "medium"
|
|
56
|
+
},
|
|
57
|
+
"gpt-5-mini": {
|
|
58
|
+
"schema": "reasoning_effort",
|
|
59
|
+
"value": "medium",
|
|
60
|
+
"default_reasoning": "medium"
|
|
61
|
+
},
|
|
62
|
+
"gpt-4.1": {
|
|
63
|
+
"schema": "none"
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Copilot share-export and process-log parsing helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from collections.abc import Mapping
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
SESSION_ID_RE = re.compile(r"\*\*Session ID:\*\*\s*`([^`]+)`")
|
|
12
|
+
DEFAULT_REASONING_RE = re.compile(r"defaultReasoningEffort=([A-Za-z0-9_-]+)")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def extract_session_id(markdown_text: str) -> str | None:
|
|
16
|
+
match = SESSION_ID_RE.search(markdown_text)
|
|
17
|
+
return match.group(1) if match else None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def extract_final_request_options(log_text: str) -> dict[str, Any]:
|
|
21
|
+
marker = "Final request options:"
|
|
22
|
+
idx = log_text.find(marker)
|
|
23
|
+
if idx < 0:
|
|
24
|
+
return {}
|
|
25
|
+
|
|
26
|
+
start = log_text.find("{", idx)
|
|
27
|
+
if start < 0:
|
|
28
|
+
return {}
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
parsed, _ = json.JSONDecoder().raw_decode(log_text[start:])
|
|
32
|
+
except json.JSONDecodeError:
|
|
33
|
+
return {}
|
|
34
|
+
if not isinstance(parsed, Mapping):
|
|
35
|
+
return {}
|
|
36
|
+
return dict(parsed)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def classify_reasoning_fields(options: Mapping[str, Any]) -> tuple[str, str, str, str]:
|
|
40
|
+
reasoning_effort = options.get("reasoning_effort")
|
|
41
|
+
thinking_budget = options.get("thinking_budget")
|
|
42
|
+
if reasoning_effort is not None:
|
|
43
|
+
value = str(reasoning_effort)
|
|
44
|
+
return "reasoning_effort", value, value, ""
|
|
45
|
+
if thinking_budget is not None:
|
|
46
|
+
value = str(thinking_budget)
|
|
47
|
+
return "thinking_budget", value, "", value
|
|
48
|
+
return "none", "", "", ""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def extract_default_reasoning(log_text: str) -> str:
|
|
52
|
+
match = DEFAULT_REASONING_RE.search(log_text)
|
|
53
|
+
return match.group(1) if match else ""
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def find_process_log_for_session(
|
|
57
|
+
session_id: str,
|
|
58
|
+
logs_dir: Path,
|
|
59
|
+
*,
|
|
60
|
+
max_candidates: int = 400,
|
|
61
|
+
) -> Path | None:
|
|
62
|
+
if not session_id or not logs_dir.exists():
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
candidates: list[tuple[float, Path]] = []
|
|
66
|
+
for path in logs_dir.glob("process-*.log"):
|
|
67
|
+
try:
|
|
68
|
+
mtime = path.stat().st_mtime
|
|
69
|
+
except OSError:
|
|
70
|
+
continue
|
|
71
|
+
candidates.append((mtime, path))
|
|
72
|
+
|
|
73
|
+
candidates.sort(key=lambda item: item[0], reverse=True)
|
|
74
|
+
for _, path in candidates[:max_candidates]:
|
|
75
|
+
try:
|
|
76
|
+
text = path.read_text(encoding="utf-8", errors="replace")
|
|
77
|
+
except OSError:
|
|
78
|
+
continue
|
|
79
|
+
if session_id in text:
|
|
80
|
+
return path
|
|
81
|
+
return None
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""Provider stderr classification for retry and failure-injection handling."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True)
|
|
9
|
+
class FailureClassification:
|
|
10
|
+
provider: str
|
|
11
|
+
retryable: bool
|
|
12
|
+
category: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _classify_gemini(stderr_text: str) -> FailureClassification:
|
|
16
|
+
lowered = stderr_text.lower()
|
|
17
|
+
|
|
18
|
+
auth_expiry_markers = (
|
|
19
|
+
"authentication timed out",
|
|
20
|
+
"token expired",
|
|
21
|
+
"refresh token",
|
|
22
|
+
"please run gemini auth login",
|
|
23
|
+
"reauthenticate",
|
|
24
|
+
)
|
|
25
|
+
if any(marker in lowered for marker in auth_expiry_markers):
|
|
26
|
+
return FailureClassification(provider="gemini", retryable=False, category="auth_expired")
|
|
27
|
+
|
|
28
|
+
auth_or_permission_markers = (
|
|
29
|
+
"invalid api key",
|
|
30
|
+
"permission denied",
|
|
31
|
+
"unauthorized",
|
|
32
|
+
"forbidden",
|
|
33
|
+
)
|
|
34
|
+
if any(marker in lowered for marker in auth_or_permission_markers):
|
|
35
|
+
return FailureClassification(
|
|
36
|
+
provider="gemini", retryable=False, category="auth_or_permission"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
rate_limit_markers = (
|
|
40
|
+
"quota",
|
|
41
|
+
"rate limit",
|
|
42
|
+
"resource exhausted",
|
|
43
|
+
"insufficient_quota",
|
|
44
|
+
"exhausted your capacity",
|
|
45
|
+
"billing",
|
|
46
|
+
)
|
|
47
|
+
if any(marker in lowered for marker in rate_limit_markers):
|
|
48
|
+
return FailureClassification(provider="gemini", retryable=False, category="rate_limited")
|
|
49
|
+
|
|
50
|
+
always_transient_markers = (
|
|
51
|
+
"eai_again",
|
|
52
|
+
"getaddrinfo",
|
|
53
|
+
"temporary failure in name resolution",
|
|
54
|
+
"enotfound",
|
|
55
|
+
"etimedout",
|
|
56
|
+
"econnreset",
|
|
57
|
+
"econnrefused",
|
|
58
|
+
"socket hang up",
|
|
59
|
+
)
|
|
60
|
+
if any(marker in lowered for marker in always_transient_markers):
|
|
61
|
+
return FailureClassification(
|
|
62
|
+
provider="gemini", retryable=True, category="network_transient"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
stdin_transport_markers = ("no input provided via stdin",)
|
|
66
|
+
if any(marker in lowered for marker in stdin_transport_markers):
|
|
67
|
+
return FailureClassification(
|
|
68
|
+
provider="gemini", retryable=True, category="cli_stdin_transient"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
weak_transient_markers = (
|
|
72
|
+
"network error",
|
|
73
|
+
"failed, reason:",
|
|
74
|
+
)
|
|
75
|
+
if not any(marker in lowered for marker in weak_transient_markers):
|
|
76
|
+
return FailureClassification(provider="gemini", retryable=False, category="unknown")
|
|
77
|
+
|
|
78
|
+
signal_markers = (
|
|
79
|
+
"cloudcode-pa.googleapis.com",
|
|
80
|
+
"streamgeneratecontent",
|
|
81
|
+
"loadcodeassist",
|
|
82
|
+
"gaxioserror",
|
|
83
|
+
"fetcherror",
|
|
84
|
+
)
|
|
85
|
+
if any(marker in lowered for marker in signal_markers):
|
|
86
|
+
return FailureClassification(
|
|
87
|
+
provider="gemini", retryable=True, category="network_transient"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
return FailureClassification(provider="gemini", retryable=False, category="unknown")
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _classify_copilot(stderr_text: str) -> FailureClassification:
|
|
94
|
+
lowered = stderr_text.lower()
|
|
95
|
+
|
|
96
|
+
auth_markers = (
|
|
97
|
+
"no authentication information found",
|
|
98
|
+
"copilot can be authenticated",
|
|
99
|
+
"gh auth login",
|
|
100
|
+
)
|
|
101
|
+
if any(marker in lowered for marker in auth_markers):
|
|
102
|
+
return FailureClassification(provider="copilot", retryable=False, category="auth_missing")
|
|
103
|
+
|
|
104
|
+
# Copilot CLI sometimes exits before producing stderr and only logs this in
|
|
105
|
+
# ~/.copilot/logs/process-*.log. Treat this startup path as transient.
|
|
106
|
+
startup_rate_limit_markers = (
|
|
107
|
+
"failed to list models: 429",
|
|
108
|
+
"error posting to endpoint: too many requests",
|
|
109
|
+
"failed to open sse stream",
|
|
110
|
+
"error loading models",
|
|
111
|
+
)
|
|
112
|
+
if any(marker in lowered for marker in startup_rate_limit_markers):
|
|
113
|
+
return FailureClassification(
|
|
114
|
+
provider="copilot", retryable=True, category="startup_rate_limited"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
session_store_lock_markers = (
|
|
118
|
+
"database is locked",
|
|
119
|
+
"session store",
|
|
120
|
+
)
|
|
121
|
+
if all(marker in lowered for marker in session_store_lock_markers):
|
|
122
|
+
return FailureClassification(
|
|
123
|
+
provider="copilot", retryable=True, category="session_store_locked"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
return FailureClassification(provider="copilot", retryable=False, category="unknown")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _classify_claude(stderr_text: str) -> FailureClassification:
|
|
130
|
+
lowered = stderr_text.lower()
|
|
131
|
+
|
|
132
|
+
auth_markers = (
|
|
133
|
+
"not authenticated",
|
|
134
|
+
"authentication required",
|
|
135
|
+
"please run claude auth login",
|
|
136
|
+
"please run /login",
|
|
137
|
+
"invalid api key",
|
|
138
|
+
"unauthorized",
|
|
139
|
+
"forbidden",
|
|
140
|
+
)
|
|
141
|
+
if any(marker in lowered for marker in auth_markers):
|
|
142
|
+
return FailureClassification(provider="claude", retryable=False, category="auth_missing")
|
|
143
|
+
|
|
144
|
+
rate_limit_markers = (
|
|
145
|
+
"rate limit",
|
|
146
|
+
"too many requests",
|
|
147
|
+
"quota",
|
|
148
|
+
"insufficient_quota",
|
|
149
|
+
)
|
|
150
|
+
if any(marker in lowered for marker in rate_limit_markers):
|
|
151
|
+
return FailureClassification(provider="claude", retryable=False, category="rate_limited")
|
|
152
|
+
|
|
153
|
+
transient_markers = (
|
|
154
|
+
"overloaded",
|
|
155
|
+
"temporarily unavailable",
|
|
156
|
+
"please try again",
|
|
157
|
+
"eai_again",
|
|
158
|
+
"getaddrinfo",
|
|
159
|
+
"temporary failure in name resolution",
|
|
160
|
+
"enotfound",
|
|
161
|
+
"etimedout",
|
|
162
|
+
"econnreset",
|
|
163
|
+
"econnrefused",
|
|
164
|
+
"socket hang up",
|
|
165
|
+
"network error",
|
|
166
|
+
)
|
|
167
|
+
if any(marker in lowered for marker in transient_markers):
|
|
168
|
+
return FailureClassification(
|
|
169
|
+
provider="claude", retryable=True, category="network_transient"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
return FailureClassification(provider="claude", retryable=False, category="unknown")
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def classify_provider_failure(*, provider: str, stderr_text: str) -> FailureClassification:
|
|
176
|
+
provider_key = provider.strip().lower()
|
|
177
|
+
if provider_key == "gemini":
|
|
178
|
+
return _classify_gemini(stderr_text)
|
|
179
|
+
if provider_key == "copilot":
|
|
180
|
+
return _classify_copilot(stderr_text)
|
|
181
|
+
if provider_key == "claude":
|
|
182
|
+
return _classify_claude(stderr_text)
|
|
183
|
+
return FailureClassification(provider=provider_key, retryable=False, category="unknown")
|