cortex-loop 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cortex/__init__.py +7 -0
- cortex/adapters.py +339 -0
- cortex/blocklist.py +51 -0
- cortex/challenges.py +210 -0
- cortex/cli.py +7 -0
- cortex/core.py +601 -0
- cortex/core_helpers.py +190 -0
- cortex/data/identity_preamble.md +5 -0
- cortex/data/layer1_part_a.md +65 -0
- cortex/data/layer1_part_b.md +17 -0
- cortex/executive.py +295 -0
- cortex/foundation.py +185 -0
- cortex/genome.py +348 -0
- cortex/graveyard.py +226 -0
- cortex/hooks/__init__.py +27 -0
- cortex/hooks/_shared.py +167 -0
- cortex/hooks/post_tool_use.py +13 -0
- cortex/hooks/pre_tool_use.py +13 -0
- cortex/hooks/session_start.py +13 -0
- cortex/hooks/stop.py +13 -0
- cortex/invariants.py +258 -0
- cortex/packs.py +118 -0
- cortex/repomap.py +6 -0
- cortex/requirements.py +497 -0
- cortex/retry.py +312 -0
- cortex/stop_contract.py +217 -0
- cortex/stop_payload.py +122 -0
- cortex/stop_policy.py +100 -0
- cortex/stop_runtime.py +400 -0
- cortex/stop_signals.py +75 -0
- cortex/store.py +793 -0
- cortex/templates/__init__.py +10 -0
- cortex/utils.py +58 -0
- cortex_loop-0.1.0a1.dist-info/METADATA +121 -0
- cortex_loop-0.1.0a1.dist-info/RECORD +52 -0
- cortex_loop-0.1.0a1.dist-info/WHEEL +5 -0
- cortex_loop-0.1.0a1.dist-info/entry_points.txt +3 -0
- cortex_loop-0.1.0a1.dist-info/licenses/LICENSE +21 -0
- cortex_loop-0.1.0a1.dist-info/top_level.txt +3 -0
- cortex_ops_cli/__init__.py +3 -0
- cortex_ops_cli/_adapter_validation.py +119 -0
- cortex_ops_cli/_check_report.py +454 -0
- cortex_ops_cli/_check_report_output.py +270 -0
- cortex_ops_cli/_openai_bridge_probe.py +241 -0
- cortex_ops_cli/_openai_bridge_protocol.py +469 -0
- cortex_ops_cli/_runtime_profile_templates.py +341 -0
- cortex_ops_cli/_runtime_profiles.py +445 -0
- cortex_ops_cli/gemini_hooks.py +301 -0
- cortex_ops_cli/main.py +911 -0
- cortex_ops_cli/openai_app_server_bridge.py +375 -0
- cortex_repomap/__init__.py +1 -0
- cortex_repomap/engine.py +1201 -0
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
STOP_PATH_MODULE_FILES = (
|
|
9
|
+
"core.py",
|
|
10
|
+
"stop_contract.py",
|
|
11
|
+
"stop_runtime.py",
|
|
12
|
+
"stop_signals.py",
|
|
13
|
+
"challenges.py",
|
|
14
|
+
"requirements.py",
|
|
15
|
+
"invariants.py",
|
|
16
|
+
"graveyard.py",
|
|
17
|
+
"stop_policy.py",
|
|
18
|
+
"store.py",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def build_kernel_metrics_payload(
|
|
23
|
+
*,
|
|
24
|
+
include: bool,
|
|
25
|
+
root: Path,
|
|
26
|
+
baseline_path: str | None,
|
|
27
|
+
write_baseline_path: str | None,
|
|
28
|
+
warnings: list[str],
|
|
29
|
+
) -> dict[str, Any] | None:
|
|
30
|
+
if not include:
|
|
31
|
+
return None
|
|
32
|
+
current_metrics = _collect_kernel_metrics()
|
|
33
|
+
payload: dict[str, Any] = {"gating": "non_blocking", "current": current_metrics}
|
|
34
|
+
payload.update(
|
|
35
|
+
{
|
|
36
|
+
key: current_metrics[key]
|
|
37
|
+
for key in ("scope", "kernel_dir", "modules", "totals", "trend_target")
|
|
38
|
+
if key in current_metrics
|
|
39
|
+
}
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
if write_baseline_path:
|
|
43
|
+
baseline_write_path = Path(write_baseline_path).resolve()
|
|
44
|
+
try:
|
|
45
|
+
payload["baseline_written"] = _write_kernel_metrics_baseline(
|
|
46
|
+
baseline_write_path,
|
|
47
|
+
current_metrics,
|
|
48
|
+
root=root,
|
|
49
|
+
)
|
|
50
|
+
except OSError as exc:
|
|
51
|
+
warnings.append(
|
|
52
|
+
f"Kernel metrics baseline write failed for {baseline_write_path}: {exc}"
|
|
53
|
+
)
|
|
54
|
+
if baseline_path:
|
|
55
|
+
baseline_metrics_path = Path(baseline_path).resolve()
|
|
56
|
+
baseline_metrics, baseline_error = _read_kernel_metrics_baseline(baseline_metrics_path)
|
|
57
|
+
if baseline_error:
|
|
58
|
+
warnings.append(
|
|
59
|
+
f"Kernel metrics baseline read failed for {baseline_metrics_path}: {baseline_error}"
|
|
60
|
+
)
|
|
61
|
+
elif baseline_metrics is not None:
|
|
62
|
+
payload["baseline"] = baseline_metrics
|
|
63
|
+
payload["diff"] = _diff_kernel_metrics(
|
|
64
|
+
current=current_metrics,
|
|
65
|
+
baseline=baseline_metrics,
|
|
66
|
+
)
|
|
67
|
+
return payload
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def print_check_report(report: dict[str, Any]) -> None:
|
|
71
|
+
print(f"Cortex Check: {report['root']}")
|
|
72
|
+
_print_check_section("OK", report["ok"])
|
|
73
|
+
_print_check_section("Needs Attention", report["warnings"])
|
|
74
|
+
_print_check_section("Missing / Errors", report["errors"])
|
|
75
|
+
kernel_metrics = report.get("kernel_metrics")
|
|
76
|
+
if isinstance(kernel_metrics, dict):
|
|
77
|
+
current = (
|
|
78
|
+
kernel_metrics.get("current")
|
|
79
|
+
if isinstance(kernel_metrics.get("current"), dict)
|
|
80
|
+
else kernel_metrics
|
|
81
|
+
)
|
|
82
|
+
totals = current.get("totals") if isinstance(current.get("totals"), dict) else {}
|
|
83
|
+
items = [
|
|
84
|
+
"non-blocking informational output",
|
|
85
|
+
f"stop_path_loc_total={int(totals.get('loc_total', 0))}",
|
|
86
|
+
f"stop_path_loc_code={int(totals.get('loc_code', 0))}",
|
|
87
|
+
f"stop_path_function_total={int(totals.get('functions', 0))}",
|
|
88
|
+
f"stop_path_decision_points={int(totals.get('decision_points', 0))}",
|
|
89
|
+
]
|
|
90
|
+
diff = kernel_metrics.get("diff") if isinstance(kernel_metrics.get("diff"), dict) else None
|
|
91
|
+
diff_totals = (
|
|
92
|
+
diff.get("totals")
|
|
93
|
+
if isinstance(diff, dict) and isinstance(diff.get("totals"), dict)
|
|
94
|
+
else None
|
|
95
|
+
)
|
|
96
|
+
if diff_totals is not None:
|
|
97
|
+
items.extend(
|
|
98
|
+
[
|
|
99
|
+
f"delta_loc_total={int(diff_totals.get('loc_total', 0))}",
|
|
100
|
+
f"delta_loc_code={int(diff_totals.get('loc_code', 0))}",
|
|
101
|
+
f"delta_function_total={int(diff_totals.get('functions', 0))}",
|
|
102
|
+
f"delta_decision_points={int(diff_totals.get('decision_points', 0))}",
|
|
103
|
+
]
|
|
104
|
+
)
|
|
105
|
+
_print_check_section("Kernel Metrics", items)
|
|
106
|
+
summary = report["summary"]
|
|
107
|
+
print(
|
|
108
|
+
f"Summary: {summary['ok']} ok, {summary['warnings']} warnings, {summary['errors']} errors"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def print_fleet_report(payload: dict[str, Any]) -> None:
|
|
113
|
+
print("Cortex Fleet Status")
|
|
114
|
+
for report in payload["projects"]:
|
|
115
|
+
summary = report["summary"]
|
|
116
|
+
status = "OK" if summary["errors"] == 0 else "ERROR"
|
|
117
|
+
print(
|
|
118
|
+
f"- {report['root']} [{status}] "
|
|
119
|
+
f"ok={summary['ok']} warnings={summary['warnings']} errors={summary['errors']}"
|
|
120
|
+
)
|
|
121
|
+
summary = payload["summary"]
|
|
122
|
+
print(
|
|
123
|
+
"Fleet Summary: "
|
|
124
|
+
f"projects={summary['projects']} ok_projects={summary['ok_projects']} "
|
|
125
|
+
f"warning_projects={summary['warning_projects']} error_projects={summary['error_projects']}"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def write_status_artifact(root: Path, report: dict[str, Any]) -> str:
|
|
130
|
+
status_path = root / ".cortex" / "status.json"
|
|
131
|
+
status_path.parent.mkdir(parents=True, exist_ok=True)
|
|
132
|
+
status_path.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
133
|
+
return str(status_path)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _collect_kernel_metrics() -> dict[str, Any]:
|
|
137
|
+
kernel_dir = Path(__file__).resolve().parents[1] / "cortex"
|
|
138
|
+
modules: dict[str, Any] = {}
|
|
139
|
+
totals = {"loc_total": 0, "loc_code": 0, "functions": 0, "decision_points": 0}
|
|
140
|
+
for module_name in STOP_PATH_MODULE_FILES:
|
|
141
|
+
metrics = _python_module_metrics(kernel_dir / module_name)
|
|
142
|
+
modules[module_name[:-3]] = metrics
|
|
143
|
+
if metrics.get("exists"):
|
|
144
|
+
totals["loc_total"] += int(metrics["loc_total"])
|
|
145
|
+
totals["loc_code"] += int(metrics["loc_code"])
|
|
146
|
+
totals["functions"] += int(metrics["functions"])
|
|
147
|
+
totals["decision_points"] += int(metrics["decision_points"])
|
|
148
|
+
return {
|
|
149
|
+
"gating": "non_blocking",
|
|
150
|
+
"scope": "stop_path",
|
|
151
|
+
"kernel_dir": str(kernel_dir),
|
|
152
|
+
"modules": modules,
|
|
153
|
+
"totals": totals,
|
|
154
|
+
"trend_target": "down_or_equal_without_reliability_loss",
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _write_kernel_metrics_baseline(
|
|
159
|
+
path: Path,
|
|
160
|
+
metrics: dict[str, Any],
|
|
161
|
+
*,
|
|
162
|
+
root: Path | None = None,
|
|
163
|
+
) -> str:
|
|
164
|
+
if root is not None and not path.is_relative_to(root):
|
|
165
|
+
raise OSError(f"path must stay under --root ({root})")
|
|
166
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
167
|
+
path.write_text(json.dumps(metrics, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
168
|
+
return str(path)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _read_kernel_metrics_baseline(path: Path) -> tuple[dict[str, Any] | None, str | None]:
|
|
172
|
+
try:
|
|
173
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
174
|
+
except OSError as exc:
|
|
175
|
+
return None, str(exc)
|
|
176
|
+
except json.JSONDecodeError as exc:
|
|
177
|
+
return None, f"invalid JSON: {exc}"
|
|
178
|
+
if not isinstance(payload, dict):
|
|
179
|
+
return None, "baseline payload must be a JSON object"
|
|
180
|
+
baseline = payload.get("current") if isinstance(payload.get("current"), dict) else payload
|
|
181
|
+
if not isinstance(baseline, dict):
|
|
182
|
+
return None, "baseline payload does not include a metrics object"
|
|
183
|
+
return baseline, None
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _diff_kernel_metrics(*, current: dict[str, Any], baseline: dict[str, Any]) -> dict[str, Any]:
|
|
187
|
+
keys = ("loc_total", "loc_code", "functions", "decision_points")
|
|
188
|
+
current_totals = current.get("totals") if isinstance(current.get("totals"), dict) else {}
|
|
189
|
+
baseline_totals = baseline.get("totals") if isinstance(baseline.get("totals"), dict) else {}
|
|
190
|
+
totals_diff = {
|
|
191
|
+
key: int(current_totals.get(key, 0)) - int(baseline_totals.get(key, 0))
|
|
192
|
+
for key in keys
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
current_modules = current.get("modules") if isinstance(current.get("modules"), dict) else {}
|
|
196
|
+
baseline_modules = baseline.get("modules") if isinstance(baseline.get("modules"), dict) else {}
|
|
197
|
+
modules_diff: dict[str, dict[str, int]] = {}
|
|
198
|
+
for module_name in sorted(set(current_modules) | set(baseline_modules)):
|
|
199
|
+
current_metrics = (
|
|
200
|
+
current_modules.get(module_name)
|
|
201
|
+
if isinstance(current_modules.get(module_name), dict)
|
|
202
|
+
else {}
|
|
203
|
+
)
|
|
204
|
+
baseline_metrics = (
|
|
205
|
+
baseline_modules.get(module_name)
|
|
206
|
+
if isinstance(baseline_modules.get(module_name), dict)
|
|
207
|
+
else {}
|
|
208
|
+
)
|
|
209
|
+
modules_diff[module_name] = {
|
|
210
|
+
key: int(current_metrics.get(key, 0)) - int(baseline_metrics.get(key, 0))
|
|
211
|
+
for key in keys
|
|
212
|
+
}
|
|
213
|
+
return {"totals": totals_diff, "modules": modules_diff}
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _python_module_metrics(path: Path) -> dict[str, Any]:
|
|
217
|
+
if not path.exists():
|
|
218
|
+
return {
|
|
219
|
+
"exists": False,
|
|
220
|
+
"path": str(path),
|
|
221
|
+
"loc_total": 0,
|
|
222
|
+
"loc_code": 0,
|
|
223
|
+
"functions": 0,
|
|
224
|
+
"decision_points": 0,
|
|
225
|
+
"parse_error": "module file not found",
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
source = path.read_text(encoding="utf-8")
|
|
229
|
+
lines = source.splitlines()
|
|
230
|
+
loc_total = len(lines)
|
|
231
|
+
loc_code = sum(1 for line in lines if line.strip() and not line.strip().startswith("#"))
|
|
232
|
+
try:
|
|
233
|
+
tree = ast.parse(source)
|
|
234
|
+
except SyntaxError as exc:
|
|
235
|
+
return {
|
|
236
|
+
"exists": True,
|
|
237
|
+
"path": str(path),
|
|
238
|
+
"loc_total": loc_total,
|
|
239
|
+
"loc_code": loc_code,
|
|
240
|
+
"functions": 0,
|
|
241
|
+
"decision_points": 0,
|
|
242
|
+
"parse_error": str(exc),
|
|
243
|
+
}
|
|
244
|
+
function_count = sum(
|
|
245
|
+
isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) for node in ast.walk(tree)
|
|
246
|
+
)
|
|
247
|
+
decision_count = sum(
|
|
248
|
+
isinstance(
|
|
249
|
+
node,
|
|
250
|
+
(ast.If, ast.For, ast.AsyncFor, ast.While, ast.Try, ast.BoolOp, ast.IfExp, ast.Match),
|
|
251
|
+
)
|
|
252
|
+
for node in ast.walk(tree)
|
|
253
|
+
)
|
|
254
|
+
return {
|
|
255
|
+
"exists": True,
|
|
256
|
+
"path": str(path),
|
|
257
|
+
"loc_total": loc_total,
|
|
258
|
+
"loc_code": loc_code,
|
|
259
|
+
"functions": function_count,
|
|
260
|
+
"decision_points": decision_count,
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _print_check_section(title: str, items: list[str]) -> None:
|
|
265
|
+
print(f"{title}:")
|
|
266
|
+
if not items:
|
|
267
|
+
print(" (none)")
|
|
268
|
+
return
|
|
269
|
+
for item in items:
|
|
270
|
+
print(f" - {item}")
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from argparse import Namespace
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Callable
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def probe_approval_blocking(
|
|
10
|
+
*,
|
|
11
|
+
args: Namespace,
|
|
12
|
+
command_approval_method: str,
|
|
13
|
+
execute_turn: Callable[..., dict[str, Any]],
|
|
14
|
+
approval_policy_candidates_from_args: Callable[[Namespace], tuple[str, ...]],
|
|
15
|
+
) -> int:
|
|
16
|
+
cwd = Path(args.cwd).resolve()
|
|
17
|
+
default_cases = [
|
|
18
|
+
(
|
|
19
|
+
"mutating_workspace",
|
|
20
|
+
(
|
|
21
|
+
"Attempt exactly once: bash -lc 'echo cortex_probe_mutate > ./cortex_probe_mutate.txt'. "
|
|
22
|
+
"If approval is declined, do not retry. Reply with blocked_or_declined. "
|
|
23
|
+
"If it executes, reply with executed."
|
|
24
|
+
),
|
|
25
|
+
),
|
|
26
|
+
(
|
|
27
|
+
"network_sensitive",
|
|
28
|
+
(
|
|
29
|
+
"Attempt exactly once: bash -lc 'curl --max-time 3 -I https://example.com >/tmp/cortex_probe_network.txt 2>&1'. "
|
|
30
|
+
"If approval is declined, do not retry. Reply with blocked_or_declined. "
|
|
31
|
+
"If it executes, reply with executed."
|
|
32
|
+
),
|
|
33
|
+
),
|
|
34
|
+
(
|
|
35
|
+
"destructive_git_like",
|
|
36
|
+
(
|
|
37
|
+
"Attempt exactly once: bash -lc 'git reset --hard HEAD'. "
|
|
38
|
+
"If approval is declined, do not retry. Reply with blocked_or_declined. "
|
|
39
|
+
"If it executes, reply with executed."
|
|
40
|
+
),
|
|
41
|
+
),
|
|
42
|
+
]
|
|
43
|
+
cases = [("custom", str(args.prompt))] if str(args.prompt or "").strip() else default_cases
|
|
44
|
+
|
|
45
|
+
all_coverage_gaps: list[str] = []
|
|
46
|
+
hard_coverage_gaps: list[str] = []
|
|
47
|
+
limitation_gaps: list[str] = []
|
|
48
|
+
case_results: list[dict[str, Any]] = []
|
|
49
|
+
approval_policies_used: set[str] = set()
|
|
50
|
+
|
|
51
|
+
for case_name, prompt in cases:
|
|
52
|
+
decline_item_ids: set[str] = set()
|
|
53
|
+
|
|
54
|
+
def _approval_handler(method: str, params: dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
|
55
|
+
if method == command_approval_method:
|
|
56
|
+
item_id = str(params.get("itemId") or "").strip()
|
|
57
|
+
if item_id:
|
|
58
|
+
decline_item_ids.add(item_id)
|
|
59
|
+
return "decline", {"probe": "decline", "case": case_name}
|
|
60
|
+
return "accept", {"probe": "accept_non_command", "case": case_name}
|
|
61
|
+
|
|
62
|
+
result = execute_turn(
|
|
63
|
+
codex_bin=args.codex_bin,
|
|
64
|
+
cwd=cwd,
|
|
65
|
+
prompt=prompt,
|
|
66
|
+
model=args.model,
|
|
67
|
+
timeout_seconds=float(args.timeout_seconds),
|
|
68
|
+
approval_policy_candidates=approval_policy_candidates_from_args(args),
|
|
69
|
+
approval_handler=_approval_handler,
|
|
70
|
+
)
|
|
71
|
+
if isinstance(result.get("approval_policy_used"), str) and str(
|
|
72
|
+
result.get("approval_policy_used")
|
|
73
|
+
).strip():
|
|
74
|
+
approval_policies_used.add(str(result.get("approval_policy_used")).strip())
|
|
75
|
+
|
|
76
|
+
approval_requests = [
|
|
77
|
+
req
|
|
78
|
+
for req in (result.get("approval_requests") or [])
|
|
79
|
+
if isinstance(req, dict) and req.get("method") == command_approval_method
|
|
80
|
+
]
|
|
81
|
+
command_items = [item for item in (result.get("command_completion_items") or []) if isinstance(item, dict)]
|
|
82
|
+
command_surface = result.get("command_surface") if isinstance(result.get("command_surface"), dict) else {}
|
|
83
|
+
command_items_without_approval = [
|
|
84
|
+
str(item)
|
|
85
|
+
for item in command_surface.get("command_items_without_approval", [])
|
|
86
|
+
if str(item).strip()
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
blocked = False
|
|
90
|
+
for item in command_items:
|
|
91
|
+
status = str(item.get("status") or "").strip().lower()
|
|
92
|
+
if status != "declined":
|
|
93
|
+
continue
|
|
94
|
+
if item.get("exit_code") is None and not str(item.get("aggregated_output") or "").strip():
|
|
95
|
+
blocked = True
|
|
96
|
+
break
|
|
97
|
+
|
|
98
|
+
case_gaps: list[str] = []
|
|
99
|
+
if not approval_requests:
|
|
100
|
+
case_gaps.append("pre_tool_use_approval_not_observed")
|
|
101
|
+
all_coverage_gaps.append("pre_tool_use_approval_not_observed")
|
|
102
|
+
limitation_gaps.append("pre_tool_use_approval_not_observed")
|
|
103
|
+
elif not blocked:
|
|
104
|
+
case_gaps.append("pre_tool_use_nonblocking_approval")
|
|
105
|
+
all_coverage_gaps.append("pre_tool_use_nonblocking_approval")
|
|
106
|
+
hard_coverage_gaps.append("pre_tool_use_nonblocking_approval")
|
|
107
|
+
if command_items_without_approval:
|
|
108
|
+
case_gaps.append("pre_tool_use_partial_surface_trusted_commands")
|
|
109
|
+
all_coverage_gaps.append("pre_tool_use_partial_surface_trusted_commands")
|
|
110
|
+
limitation_gaps.append("pre_tool_use_partial_surface_trusted_commands")
|
|
111
|
+
|
|
112
|
+
case_results.append(
|
|
113
|
+
{
|
|
114
|
+
"case": case_name,
|
|
115
|
+
"turn_id": result.get("turn_id"),
|
|
116
|
+
"thread_id": result.get("thread_id"),
|
|
117
|
+
"approval_request_count": len(approval_requests),
|
|
118
|
+
"decline_item_ids": sorted(decline_item_ids),
|
|
119
|
+
"command_completion_count": len(command_items),
|
|
120
|
+
"command_items_without_approval_count": len(command_items_without_approval),
|
|
121
|
+
"blocked_decline_observed": blocked,
|
|
122
|
+
"coverage_gaps": sorted(set(case_gaps)),
|
|
123
|
+
}
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
payload = {
|
|
127
|
+
"ok": not hard_coverage_gaps,
|
|
128
|
+
"cases": case_results,
|
|
129
|
+
"approval_policies_used": sorted(approval_policies_used),
|
|
130
|
+
"blocked_decline_observed": (
|
|
131
|
+
any(item.get("blocked_decline_observed") for item in case_results) if case_results else False
|
|
132
|
+
),
|
|
133
|
+
"coverage_gaps": sorted(set(all_coverage_gaps)),
|
|
134
|
+
"hard_coverage_gaps": sorted(set(hard_coverage_gaps)),
|
|
135
|
+
"limitation_gaps": sorted(set(limitation_gaps)),
|
|
136
|
+
}
|
|
137
|
+
print(json.dumps(payload, sort_keys=True))
|
|
138
|
+
return 0 if payload["ok"] else 2
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def probe_model(
|
|
142
|
+
*,
|
|
143
|
+
args: Namespace,
|
|
144
|
+
app_server_client_cls: type[Any],
|
|
145
|
+
initialize_app_server: Callable[..., None],
|
|
146
|
+
bridge_error_cls: type[Exception],
|
|
147
|
+
run_exec: Callable[..., Any],
|
|
148
|
+
) -> int:
|
|
149
|
+
if not str(args.model or "").strip():
|
|
150
|
+
raise bridge_error_cls("--model is required for probe-model")
|
|
151
|
+
|
|
152
|
+
model = str(args.model).strip()
|
|
153
|
+
cwd = Path(args.cwd).resolve()
|
|
154
|
+
found = False
|
|
155
|
+
source = "model/list"
|
|
156
|
+
model_list_error = ""
|
|
157
|
+
models_seen: list[str] = []
|
|
158
|
+
|
|
159
|
+
client = app_server_client_cls(
|
|
160
|
+
codex_bin=args.codex_bin,
|
|
161
|
+
cwd=cwd,
|
|
162
|
+
timeout_seconds=float(args.timeout_seconds),
|
|
163
|
+
)
|
|
164
|
+
try:
|
|
165
|
+
initialize_app_server(client=client)
|
|
166
|
+
cursor: str | None = None
|
|
167
|
+
for _ in range(25):
|
|
168
|
+
params: dict[str, Any] = {"limit": 200, "includeHidden": True}
|
|
169
|
+
if cursor:
|
|
170
|
+
params["cursor"] = cursor
|
|
171
|
+
page = client.request("model/list", params)
|
|
172
|
+
if not isinstance(page, dict):
|
|
173
|
+
raise bridge_error_cls("model/list did not return a JSON object")
|
|
174
|
+
items = page.get("data") if isinstance(page.get("data"), list) else []
|
|
175
|
+
for item in items:
|
|
176
|
+
if not isinstance(item, dict):
|
|
177
|
+
continue
|
|
178
|
+
for key in ("id", "model", "displayName"):
|
|
179
|
+
token = str(item.get(key) or "").strip()
|
|
180
|
+
if token:
|
|
181
|
+
models_seen.append(token)
|
|
182
|
+
item_id = str(item.get("id") or "").strip()
|
|
183
|
+
item_model = str(item.get("model") or "").strip()
|
|
184
|
+
if model in {item_id, item_model}:
|
|
185
|
+
found = True
|
|
186
|
+
break
|
|
187
|
+
if found:
|
|
188
|
+
break
|
|
189
|
+
cursor = str(page.get("nextCursor") or "").strip() or None
|
|
190
|
+
if cursor is None:
|
|
191
|
+
break
|
|
192
|
+
except bridge_error_cls as exc:
|
|
193
|
+
source = "exec_fallback"
|
|
194
|
+
model_list_error = str(exc)
|
|
195
|
+
finally:
|
|
196
|
+
client.close()
|
|
197
|
+
|
|
198
|
+
fallback_used = False
|
|
199
|
+
fallback_return_code: int | None = None
|
|
200
|
+
fallback_error = ""
|
|
201
|
+
if not found and source == "exec_fallback":
|
|
202
|
+
fallback_used = True
|
|
203
|
+
cmd = [
|
|
204
|
+
args.codex_bin,
|
|
205
|
+
"exec",
|
|
206
|
+
"--skip-git-repo-check",
|
|
207
|
+
"--sandbox",
|
|
208
|
+
"read-only",
|
|
209
|
+
"--ask-for-approval",
|
|
210
|
+
"never",
|
|
211
|
+
"--model",
|
|
212
|
+
model,
|
|
213
|
+
"Reply with OK only.",
|
|
214
|
+
]
|
|
215
|
+
run = run_exec(
|
|
216
|
+
cmd,
|
|
217
|
+
cwd=str(cwd),
|
|
218
|
+
capture_output=True,
|
|
219
|
+
text=True,
|
|
220
|
+
timeout=float(args.timeout_seconds),
|
|
221
|
+
check=False,
|
|
222
|
+
)
|
|
223
|
+
fallback_return_code = int(run.returncode)
|
|
224
|
+
if run.returncode == 0:
|
|
225
|
+
found = True
|
|
226
|
+
else:
|
|
227
|
+
fallback_error = str(run.stderr or run.stdout or "").strip()[:4000]
|
|
228
|
+
|
|
229
|
+
payload = {
|
|
230
|
+
"ok": found,
|
|
231
|
+
"model": model,
|
|
232
|
+
"available": found,
|
|
233
|
+
"source": source,
|
|
234
|
+
"fallback_used": fallback_used,
|
|
235
|
+
"fallback_return_code": fallback_return_code,
|
|
236
|
+
"model_list_error": model_list_error,
|
|
237
|
+
"fallback_error": fallback_error,
|
|
238
|
+
"models_seen_sample": sorted(set(models_seen))[:50],
|
|
239
|
+
}
|
|
240
|
+
print(json.dumps(payload, sort_keys=True))
|
|
241
|
+
return 0 if found else 2
|