@tt-a1i/mco 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +190 -0
- package/bin/mco.js +19 -0
- package/mco +16 -0
- package/package.json +21 -0
- package/runtime/__init__.py +8 -0
- package/runtime/adapters/__init__.py +7 -0
- package/runtime/adapters/claude.py +60 -0
- package/runtime/adapters/codex.py +84 -0
- package/runtime/adapters/gemini.py +48 -0
- package/runtime/adapters/opencode.py +38 -0
- package/runtime/adapters/parsing.py +305 -0
- package/runtime/adapters/qwen.py +38 -0
- package/runtime/adapters/shim.py +251 -0
- package/runtime/artifacts.py +40 -0
- package/runtime/cli.py +341 -0
- package/runtime/config.py +189 -0
- package/runtime/contracts.py +127 -0
- package/runtime/errors.py +43 -0
- package/runtime/orchestrator.py +241 -0
- package/runtime/retry.py +15 -0
- package/runtime/review_engine.py +806 -0
- package/runtime/schemas/review_findings.schema.json +94 -0
- package/runtime/types.py +71 -0
|
@@ -0,0 +1,806 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import asdict, dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Dict, List, Mapping, Optional, Set, Tuple
|
|
10
|
+
|
|
11
|
+
from .adapters import ClaudeAdapter, CodexAdapter, GeminiAdapter, OpenCodeAdapter, QwenAdapter
|
|
12
|
+
from .adapters.parsing import inspect_contract_output
|
|
13
|
+
from .artifacts import expected_paths, task_artifact_root
|
|
14
|
+
from .config import ReviewPolicy
|
|
15
|
+
from .contracts import Evidence, NormalizeContext, NormalizedFinding, ProviderAdapter, ProviderId, TaskInput
|
|
16
|
+
from .orchestrator import OrchestratorRuntime
|
|
17
|
+
from .retry import RetryPolicy
|
|
18
|
+
from .types import AttemptResult, ErrorKind, TaskState
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
STRICT_JSON_CONTRACT = (
|
|
22
|
+
"Return JSON only. Use this exact shape: "
|
|
23
|
+
'{"findings":[{"finding_id":"<id>","severity":"critical|high|medium|low","category":"bug|security|performance|maintainability|test-gap","title":"<title>",'
|
|
24
|
+
'"evidence":{"file":"<path>","line":null,"symbol":null,"snippet":"<snippet>"},'
|
|
25
|
+
'"recommendation":"<fix>","confidence":0.0,"fingerprint":"<stable-hash>"}]}. '
|
|
26
|
+
"If no findings, return {\"findings\":[]}."
|
|
27
|
+
)
|
|
28
|
+
REVIEW_FINDINGS_SCHEMA_PATH = Path(__file__).resolve().parent / "schemas" / "review_findings.schema.json"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(frozen=True)
|
|
32
|
+
class ReviewRequest:
|
|
33
|
+
repo_root: str
|
|
34
|
+
prompt: str
|
|
35
|
+
providers: List[ProviderId]
|
|
36
|
+
artifact_base: str
|
|
37
|
+
state_file: str
|
|
38
|
+
policy: ReviewPolicy
|
|
39
|
+
task_id: Optional[str] = None
|
|
40
|
+
idempotency_key: Optional[str] = None
|
|
41
|
+
target_paths: Optional[List[str]] = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class ReviewResult:
|
|
46
|
+
task_id: str
|
|
47
|
+
artifact_root: str
|
|
48
|
+
decision: str
|
|
49
|
+
terminal_state: str
|
|
50
|
+
provider_results: Dict[str, Dict[str, object]]
|
|
51
|
+
findings_count: int
|
|
52
|
+
parse_success_count: int
|
|
53
|
+
parse_failure_count: int
|
|
54
|
+
schema_valid_count: int
|
|
55
|
+
dropped_findings_count: int
|
|
56
|
+
created_new_task: bool
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _sha(value: str) -> str:
|
|
60
|
+
return hashlib.sha256(value.encode("utf-8")).hexdigest()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _stable_payload_hash(payload: object) -> str:
|
|
64
|
+
serialized = json.dumps(payload, ensure_ascii=True, sort_keys=True, separators=(",", ":"))
|
|
65
|
+
return _sha(serialized)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _default_task_id(repo_root: str, prompt: str) -> str:
|
|
69
|
+
return f"task-{_sha(f'{repo_root}:{prompt}')[:16]}"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _default_idempotency_key(
|
|
73
|
+
repo_root: str,
|
|
74
|
+
prompt: str,
|
|
75
|
+
providers: List[ProviderId],
|
|
76
|
+
review_mode: bool,
|
|
77
|
+
policy: ReviewPolicy,
|
|
78
|
+
) -> str:
|
|
79
|
+
mode = "review" if review_mode else "run"
|
|
80
|
+
policy_fingerprint = json.dumps(
|
|
81
|
+
{
|
|
82
|
+
"mode": mode,
|
|
83
|
+
"allow_paths": policy.allow_paths,
|
|
84
|
+
"enforcement_mode": policy.enforcement_mode,
|
|
85
|
+
"provider_permissions": policy.provider_permissions,
|
|
86
|
+
"provider_timeouts": policy.provider_timeouts,
|
|
87
|
+
},
|
|
88
|
+
sort_keys=True,
|
|
89
|
+
ensure_ascii=True,
|
|
90
|
+
)
|
|
91
|
+
return _sha(f"{repo_root}|{prompt}|{','.join(providers)}|{policy_fingerprint}|stage-b-v1")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _build_prompt(user_prompt: str, target_paths: List[str]) -> str:
|
|
95
|
+
scope = ", ".join(target_paths) if target_paths else "."
|
|
96
|
+
return f"{user_prompt}\n\nScope: {scope}\n\n{STRICT_JSON_CONTRACT}"
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _build_run_prompt(user_prompt: str, target_paths: List[str], allow_paths: List[str]) -> str:
|
|
100
|
+
scope = ", ".join(target_paths) if target_paths else "."
|
|
101
|
+
allowed = ", ".join(allow_paths) if allow_paths else "."
|
|
102
|
+
return f"{user_prompt}\n\nScope: {scope}\nAllowed Paths: {allowed}"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _adapter_registry() -> Mapping[str, ProviderAdapter]:
|
|
106
|
+
return {
|
|
107
|
+
"claude": ClaudeAdapter(),
|
|
108
|
+
"codex": CodexAdapter(),
|
|
109
|
+
"gemini": GeminiAdapter(),
|
|
110
|
+
"opencode": OpenCodeAdapter(),
|
|
111
|
+
"qwen": QwenAdapter(),
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _read_text(path: Path) -> str:
|
|
116
|
+
return path.read_text(encoding="utf-8") if path.exists() else ""
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _write_json(path: Path, payload: object) -> None:
|
|
120
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
121
|
+
path.write_text(json.dumps(payload, ensure_ascii=True, indent=2), encoding="utf-8")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _write_text(path: Path, content: str) -> None:
|
|
125
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
126
|
+
path.write_text(content, encoding="utf-8")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _output_excerpt(stdout_text: str, stderr_text: str, limit: int = 240) -> str:
|
|
130
|
+
source = stdout_text.strip() or stderr_text.strip()
|
|
131
|
+
if not source:
|
|
132
|
+
return ""
|
|
133
|
+
compact = " ".join(source.split())
|
|
134
|
+
return compact[:limit]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@dataclass(frozen=True)
|
|
138
|
+
class _ProviderExecutionOutcome:
|
|
139
|
+
provider: str
|
|
140
|
+
success: bool
|
|
141
|
+
parse_ok: bool
|
|
142
|
+
schema_valid_count: int
|
|
143
|
+
dropped_count: int
|
|
144
|
+
findings: List[NormalizedFinding]
|
|
145
|
+
provider_result: Dict[str, object]
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _safe_resolve(repo_root: Path, raw_path: str) -> Path:
|
|
149
|
+
candidate_raw = Path(raw_path)
|
|
150
|
+
base = candidate_raw if candidate_raw.is_absolute() else (repo_root / candidate_raw)
|
|
151
|
+
resolved = base.resolve(strict=False)
|
|
152
|
+
repo_resolved = repo_root.resolve(strict=False)
|
|
153
|
+
try:
|
|
154
|
+
resolved.relative_to(repo_resolved)
|
|
155
|
+
except Exception as exc:
|
|
156
|
+
raise ValueError(f"path_outside_repo: {raw_path}") from exc
|
|
157
|
+
return resolved
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _normalize_scopes(repo_root: str, target_paths: List[str], allow_paths: List[str]) -> Tuple[List[str], List[str]]:
|
|
161
|
+
root = Path(repo_root).resolve(strict=False)
|
|
162
|
+
raw_allow = allow_paths if allow_paths else ["."]
|
|
163
|
+
raw_target = target_paths if target_paths else ["."]
|
|
164
|
+
|
|
165
|
+
normalized_allow: List[str] = []
|
|
166
|
+
allow_resolved: List[Path] = []
|
|
167
|
+
for raw_path in raw_allow:
|
|
168
|
+
resolved = _safe_resolve(root, raw_path)
|
|
169
|
+
rel = resolved.relative_to(root).as_posix()
|
|
170
|
+
rel_value = rel if rel else "."
|
|
171
|
+
normalized_allow.append(rel_value)
|
|
172
|
+
allow_resolved.append(resolved)
|
|
173
|
+
|
|
174
|
+
normalized_target: List[str] = []
|
|
175
|
+
for raw_path in raw_target:
|
|
176
|
+
resolved = _safe_resolve(root, raw_path)
|
|
177
|
+
in_allow = False
|
|
178
|
+
for allow_root in allow_resolved:
|
|
179
|
+
if resolved == allow_root or allow_root in resolved.parents:
|
|
180
|
+
in_allow = True
|
|
181
|
+
break
|
|
182
|
+
if not in_allow:
|
|
183
|
+
raise ValueError(f"target_path_outside_allow_paths: {raw_path}")
|
|
184
|
+
rel = resolved.relative_to(root).as_posix()
|
|
185
|
+
normalized_target.append(rel if rel else ".")
|
|
186
|
+
|
|
187
|
+
return normalized_target, normalized_allow
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _supported_permission_keys(adapter: ProviderAdapter) -> Set[str]:
|
|
191
|
+
fn = getattr(adapter, "supported_permission_keys", None)
|
|
192
|
+
if not callable(fn):
|
|
193
|
+
return set()
|
|
194
|
+
try:
|
|
195
|
+
keys = fn()
|
|
196
|
+
except Exception:
|
|
197
|
+
return set()
|
|
198
|
+
if not isinstance(keys, list):
|
|
199
|
+
return set()
|
|
200
|
+
return {str(item).strip() for item in keys if str(item).strip()}
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _provider_stall_timeout_seconds(policy: ReviewPolicy, provider: str) -> int:
|
|
204
|
+
timeout = policy.provider_timeouts.get(provider, policy.stall_timeout_seconds)
|
|
205
|
+
try:
|
|
206
|
+
value = int(timeout)
|
|
207
|
+
except Exception:
|
|
208
|
+
value = policy.stall_timeout_seconds
|
|
209
|
+
return value if value > 0 else policy.stall_timeout_seconds
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _poll_interval_seconds(policy: ReviewPolicy) -> float:
|
|
213
|
+
try:
|
|
214
|
+
value = float(policy.poll_interval_seconds)
|
|
215
|
+
except Exception:
|
|
216
|
+
value = 1.0
|
|
217
|
+
return value if value > 0 else 1.0
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _timestamp_to_iso(timestamp: float) -> str:
|
|
221
|
+
return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(timestamp))
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _raw_output_size_snapshot(artifact_path: str, provider: str) -> Tuple[int, int]:
|
|
225
|
+
root = Path(artifact_path) / "raw"
|
|
226
|
+
stdout_path = root / f"{provider}.stdout.log"
|
|
227
|
+
stderr_path = root / f"{provider}.stderr.log"
|
|
228
|
+
stdout_size = stdout_path.stat().st_size if stdout_path.exists() else 0
|
|
229
|
+
stderr_size = stderr_path.stat().st_size if stderr_path.exists() else 0
|
|
230
|
+
return (stdout_size, stderr_size)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _ensure_provider_artifacts(artifact_base: str, task_id: str, provider: str) -> None:
|
|
234
|
+
paths = expected_paths(artifact_base, task_id, (provider,))
|
|
235
|
+
provider_json = paths[f"providers/{provider}.json"]
|
|
236
|
+
if not provider_json.exists():
|
|
237
|
+
_write_json(provider_json, {"provider": provider, "note": "provider result fallback"})
|
|
238
|
+
for key in (f"raw/{provider}.stdout.log", f"raw/{provider}.stderr.log"):
|
|
239
|
+
p = paths[key]
|
|
240
|
+
if not p.exists():
|
|
241
|
+
_write_text(p, "")
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _deserialize_findings(payload: object) -> List[NormalizedFinding]:
|
|
245
|
+
findings: List[NormalizedFinding] = []
|
|
246
|
+
findings_payload = payload if isinstance(payload, list) else []
|
|
247
|
+
serialized_findings = [item for item in findings_payload if isinstance(item, dict)]
|
|
248
|
+
for item in serialized_findings:
|
|
249
|
+
try:
|
|
250
|
+
evidence_raw = item.get("evidence", {})
|
|
251
|
+
if not isinstance(evidence_raw, dict):
|
|
252
|
+
continue
|
|
253
|
+
evidence = Evidence(
|
|
254
|
+
file=str(evidence_raw.get("file", "")),
|
|
255
|
+
line=evidence_raw.get("line") if isinstance(evidence_raw.get("line"), int) else None,
|
|
256
|
+
snippet=str(evidence_raw.get("snippet", "")),
|
|
257
|
+
symbol=evidence_raw.get("symbol") if isinstance(evidence_raw.get("symbol"), str) else None,
|
|
258
|
+
)
|
|
259
|
+
finding = NormalizedFinding(
|
|
260
|
+
task_id=str(item["task_id"]),
|
|
261
|
+
provider=item["provider"],
|
|
262
|
+
finding_id=str(item["finding_id"]),
|
|
263
|
+
severity=item["severity"],
|
|
264
|
+
category=item["category"],
|
|
265
|
+
title=str(item["title"]),
|
|
266
|
+
evidence=evidence,
|
|
267
|
+
recommendation=str(item.get("recommendation", "")),
|
|
268
|
+
confidence=float(item.get("confidence", 0.0)),
|
|
269
|
+
fingerprint=str(item.get("fingerprint", "")),
|
|
270
|
+
raw_ref=str(item.get("raw_ref", "")),
|
|
271
|
+
)
|
|
272
|
+
except Exception:
|
|
273
|
+
continue
|
|
274
|
+
findings.append(finding)
|
|
275
|
+
return findings
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _run_provider(
|
|
279
|
+
request: ReviewRequest,
|
|
280
|
+
runtime: OrchestratorRuntime,
|
|
281
|
+
adapter_map: Mapping[str, ProviderAdapter],
|
|
282
|
+
resolved_task_id: str,
|
|
283
|
+
idempotency_key: str,
|
|
284
|
+
full_prompt: str,
|
|
285
|
+
target_paths: List[str],
|
|
286
|
+
allow_paths: List[str],
|
|
287
|
+
review_mode: bool,
|
|
288
|
+
provider: str,
|
|
289
|
+
) -> _ProviderExecutionOutcome:
|
|
290
|
+
adapter = adapter_map.get(provider)
|
|
291
|
+
if adapter is None:
|
|
292
|
+
_ensure_provider_artifacts(request.artifact_base, resolved_task_id, provider)
|
|
293
|
+
return _ProviderExecutionOutcome(
|
|
294
|
+
provider=provider,
|
|
295
|
+
success=False,
|
|
296
|
+
parse_ok=False,
|
|
297
|
+
schema_valid_count=0,
|
|
298
|
+
dropped_count=0,
|
|
299
|
+
findings=[],
|
|
300
|
+
provider_result={"success": False, "reason": "adapter_not_implemented"},
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
presence = adapter.detect()
|
|
304
|
+
if not presence.detected or not presence.auth_ok:
|
|
305
|
+
_ensure_provider_artifacts(request.artifact_base, resolved_task_id, provider)
|
|
306
|
+
return _ProviderExecutionOutcome(
|
|
307
|
+
provider=provider,
|
|
308
|
+
success=False,
|
|
309
|
+
parse_ok=False,
|
|
310
|
+
schema_valid_count=0,
|
|
311
|
+
dropped_count=0,
|
|
312
|
+
findings=[],
|
|
313
|
+
provider_result={
|
|
314
|
+
"success": False,
|
|
315
|
+
"reason": "provider_unavailable",
|
|
316
|
+
"detected": presence.detected,
|
|
317
|
+
"auth_ok": presence.auth_ok,
|
|
318
|
+
},
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
requested_permissions = request.policy.provider_permissions.get(provider, {})
|
|
322
|
+
requested_permissions = requested_permissions if isinstance(requested_permissions, dict) else {}
|
|
323
|
+
supported_keys = _supported_permission_keys(adapter)
|
|
324
|
+
unknown_permission_keys = sorted(
|
|
325
|
+
key for key in requested_permissions.keys() if str(key).strip() and key not in supported_keys
|
|
326
|
+
)
|
|
327
|
+
effective_permissions = {
|
|
328
|
+
str(key): str(value)
|
|
329
|
+
for key, value in requested_permissions.items()
|
|
330
|
+
if str(key).strip() in supported_keys
|
|
331
|
+
}
|
|
332
|
+
if unknown_permission_keys and request.policy.enforcement_mode == "strict":
|
|
333
|
+
_ensure_provider_artifacts(request.artifact_base, resolved_task_id, provider)
|
|
334
|
+
return _ProviderExecutionOutcome(
|
|
335
|
+
provider=provider,
|
|
336
|
+
success=False,
|
|
337
|
+
parse_ok=False,
|
|
338
|
+
schema_valid_count=0,
|
|
339
|
+
dropped_count=0,
|
|
340
|
+
findings=[],
|
|
341
|
+
provider_result={
|
|
342
|
+
"success": False,
|
|
343
|
+
"reason": "permission_enforcement_failed",
|
|
344
|
+
"enforcement_mode": request.policy.enforcement_mode,
|
|
345
|
+
"requested_permissions": requested_permissions,
|
|
346
|
+
"supported_permission_keys": sorted(supported_keys),
|
|
347
|
+
"unknown_permission_keys": unknown_permission_keys,
|
|
348
|
+
},
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
dispatch_key = _sha(f"{idempotency_key}:{provider}:dispatch-v2")
|
|
352
|
+
provider_stall_timeout = _provider_stall_timeout_seconds(request.policy, provider)
|
|
353
|
+
poll_interval_seconds = _poll_interval_seconds(request.policy)
|
|
354
|
+
review_hard_timeout_seconds = request.policy.review_hard_timeout_seconds if review_mode else 0
|
|
355
|
+
|
|
356
|
+
def runner(_attempt: int) -> AttemptResult:
|
|
357
|
+
run_ref = None
|
|
358
|
+
try:
|
|
359
|
+
metadata = {
|
|
360
|
+
"artifact_root": request.artifact_base,
|
|
361
|
+
"allow_paths": allow_paths,
|
|
362
|
+
"provider_permissions": effective_permissions,
|
|
363
|
+
"enforcement_mode": request.policy.enforcement_mode,
|
|
364
|
+
}
|
|
365
|
+
if review_mode and provider == "codex" and REVIEW_FINDINGS_SCHEMA_PATH.exists():
|
|
366
|
+
metadata["output_schema_path"] = str(REVIEW_FINDINGS_SCHEMA_PATH)
|
|
367
|
+
input_task = TaskInput(
|
|
368
|
+
task_id=resolved_task_id,
|
|
369
|
+
prompt=full_prompt,
|
|
370
|
+
repo_root=request.repo_root,
|
|
371
|
+
target_paths=target_paths,
|
|
372
|
+
timeout_seconds=provider_stall_timeout,
|
|
373
|
+
metadata=metadata,
|
|
374
|
+
)
|
|
375
|
+
run_ref = adapter.run(input_task)
|
|
376
|
+
started = time.time()
|
|
377
|
+
last_progress_at = started
|
|
378
|
+
last_snapshot = _raw_output_size_snapshot(run_ref.artifact_path, provider)
|
|
379
|
+
status = None
|
|
380
|
+
while True:
|
|
381
|
+
status = adapter.poll(run_ref)
|
|
382
|
+
now = time.time()
|
|
383
|
+
if status.completed:
|
|
384
|
+
break
|
|
385
|
+
|
|
386
|
+
current_snapshot = _raw_output_size_snapshot(run_ref.artifact_path, provider)
|
|
387
|
+
if current_snapshot != last_snapshot:
|
|
388
|
+
last_snapshot = current_snapshot
|
|
389
|
+
last_progress_at = now
|
|
390
|
+
|
|
391
|
+
cancel_reason = ""
|
|
392
|
+
if review_hard_timeout_seconds > 0 and (now - started) > review_hard_timeout_seconds:
|
|
393
|
+
cancel_reason = "hard_deadline_exceeded"
|
|
394
|
+
elif (now - last_progress_at) > provider_stall_timeout:
|
|
395
|
+
cancel_reason = "stall_timeout"
|
|
396
|
+
|
|
397
|
+
if cancel_reason:
|
|
398
|
+
if run_ref is not None:
|
|
399
|
+
try:
|
|
400
|
+
adapter.cancel(run_ref)
|
|
401
|
+
except Exception:
|
|
402
|
+
pass
|
|
403
|
+
raw_dir = Path(run_ref.artifact_path) / "raw"
|
|
404
|
+
timeout_stdout = _read_text(raw_dir / f"{provider}.stdout.log")
|
|
405
|
+
timeout_stderr = _read_text(raw_dir / f"{provider}.stderr.log")
|
|
406
|
+
timeout_payload = {
|
|
407
|
+
"cancel_reason": cancel_reason,
|
|
408
|
+
"wall_clock_seconds": round(now - started, 3),
|
|
409
|
+
"last_progress_at": _timestamp_to_iso(last_progress_at),
|
|
410
|
+
"output_excerpt": _output_excerpt(timeout_stdout, timeout_stderr),
|
|
411
|
+
"parse_ok": False,
|
|
412
|
+
"parse_reason": "",
|
|
413
|
+
"schema_valid_count": 0,
|
|
414
|
+
"dropped_count": 0,
|
|
415
|
+
"findings": [],
|
|
416
|
+
"run_ref": asdict(run_ref),
|
|
417
|
+
"status": asdict(status),
|
|
418
|
+
}
|
|
419
|
+
return AttemptResult(
|
|
420
|
+
success=False,
|
|
421
|
+
output=timeout_payload,
|
|
422
|
+
error_kind=ErrorKind.RETRYABLE_TIMEOUT,
|
|
423
|
+
stderr=cancel_reason,
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
time.sleep(poll_interval_seconds)
|
|
427
|
+
|
|
428
|
+
if status is None or not status.completed:
|
|
429
|
+
if run_ref is not None:
|
|
430
|
+
try:
|
|
431
|
+
adapter.cancel(run_ref)
|
|
432
|
+
except Exception:
|
|
433
|
+
pass
|
|
434
|
+
fallback_payload = {
|
|
435
|
+
"cancel_reason": "provider_poll_timeout",
|
|
436
|
+
"wall_clock_seconds": round(time.time() - started, 3),
|
|
437
|
+
"last_progress_at": _timestamp_to_iso(last_progress_at),
|
|
438
|
+
"output_excerpt": "",
|
|
439
|
+
"parse_ok": False,
|
|
440
|
+
"parse_reason": "",
|
|
441
|
+
"schema_valid_count": 0,
|
|
442
|
+
"dropped_count": 0,
|
|
443
|
+
"findings": [],
|
|
444
|
+
"run_ref": asdict(run_ref) if run_ref is not None else None,
|
|
445
|
+
"status": asdict(status) if status is not None else None,
|
|
446
|
+
}
|
|
447
|
+
return AttemptResult(
|
|
448
|
+
success=False,
|
|
449
|
+
output=fallback_payload,
|
|
450
|
+
error_kind=ErrorKind.RETRYABLE_TIMEOUT,
|
|
451
|
+
stderr="provider_poll_timeout",
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
raw_dir = Path(run_ref.artifact_path) / "raw"
|
|
455
|
+
raw_stdout = _read_text(raw_dir / f"{provider}.stdout.log")
|
|
456
|
+
raw_stderr = _read_text(raw_dir / f"{provider}.stderr.log")
|
|
457
|
+
findings: List[NormalizedFinding] = []
|
|
458
|
+
parse_ok = False
|
|
459
|
+
parse_reason = "not_applicable"
|
|
460
|
+
schema_valid_count = 0
|
|
461
|
+
dropped_count = 0
|
|
462
|
+
success = status.attempt_state == "SUCCEEDED"
|
|
463
|
+
if review_mode:
|
|
464
|
+
findings = adapter.normalize(
|
|
465
|
+
raw_stdout,
|
|
466
|
+
NormalizeContext(
|
|
467
|
+
task_id=resolved_task_id,
|
|
468
|
+
provider=provider,
|
|
469
|
+
repo_root=request.repo_root,
|
|
470
|
+
raw_ref=f"raw/{provider}.stdout.log",
|
|
471
|
+
),
|
|
472
|
+
)
|
|
473
|
+
contract_info = inspect_contract_output(raw_stdout)
|
|
474
|
+
parse_ok = bool(contract_info["parse_ok"])
|
|
475
|
+
parse_reason = str(contract_info.get("parse_reason", ""))
|
|
476
|
+
schema_valid_count = int(contract_info["schema_valid_count"])
|
|
477
|
+
dropped_count = int(contract_info["dropped_count"])
|
|
478
|
+
if request.policy.enforce_findings_contract:
|
|
479
|
+
success = status.attempt_state == "SUCCEEDED" and parse_ok
|
|
480
|
+
if request.policy.require_non_empty_findings and success and len(findings) == 0:
|
|
481
|
+
success = False
|
|
482
|
+
|
|
483
|
+
payload = {
|
|
484
|
+
"provider": provider,
|
|
485
|
+
"status": asdict(status),
|
|
486
|
+
"run_ref": asdict(run_ref),
|
|
487
|
+
"cancel_reason": "",
|
|
488
|
+
"wall_clock_seconds": round(time.time() - started, 3),
|
|
489
|
+
"last_progress_at": _timestamp_to_iso(last_progress_at),
|
|
490
|
+
"output_excerpt": _output_excerpt(raw_stdout, raw_stderr),
|
|
491
|
+
"parse_ok": parse_ok,
|
|
492
|
+
"parse_reason": parse_reason,
|
|
493
|
+
"schema_valid_count": schema_valid_count,
|
|
494
|
+
"dropped_count": dropped_count,
|
|
495
|
+
"findings": [asdict(item) for item in findings],
|
|
496
|
+
}
|
|
497
|
+
if success:
|
|
498
|
+
return AttemptResult(success=True, output=payload)
|
|
499
|
+
if status.error_kind:
|
|
500
|
+
return AttemptResult(success=False, output=payload, error_kind=status.error_kind)
|
|
501
|
+
return AttemptResult(success=False, output=payload, error_kind=ErrorKind.NORMALIZATION_ERROR)
|
|
502
|
+
except Exception as exc: # pragma: no cover - guarded by contract tests
|
|
503
|
+
return AttemptResult(success=False, error_kind=ErrorKind.NORMALIZATION_ERROR, stderr=str(exc))
|
|
504
|
+
|
|
505
|
+
run_result = runtime.run_with_retry(resolved_task_id, provider, dispatch_key, runner)
|
|
506
|
+
output = run_result.output if isinstance(run_result.output, dict) else {}
|
|
507
|
+
parse_ok = bool(output.get("parse_ok", False))
|
|
508
|
+
provider_schema_valid = int(output.get("schema_valid_count", 0))
|
|
509
|
+
provider_dropped = int(output.get("dropped_count", 0))
|
|
510
|
+
findings = _deserialize_findings(output.get("findings"))
|
|
511
|
+
|
|
512
|
+
wall_clock_value = output.get("wall_clock_seconds")
|
|
513
|
+
try:
|
|
514
|
+
wall_clock_seconds = float(wall_clock_value) if wall_clock_value is not None else 0.0
|
|
515
|
+
except Exception:
|
|
516
|
+
wall_clock_seconds = 0.0
|
|
517
|
+
|
|
518
|
+
provider_result = {
|
|
519
|
+
"success": run_result.success,
|
|
520
|
+
"attempts": run_result.attempts,
|
|
521
|
+
"final_error": run_result.final_error.value if run_result.final_error else None,
|
|
522
|
+
"deduped_dispatch": run_result.deduped_dispatch,
|
|
523
|
+
"cancel_reason": str(output.get("cancel_reason", "")),
|
|
524
|
+
"wall_clock_seconds": wall_clock_seconds,
|
|
525
|
+
"last_progress_at": str(output.get("last_progress_at", "")),
|
|
526
|
+
"output_excerpt": str(output.get("output_excerpt", "")),
|
|
527
|
+
"parse_ok": parse_ok,
|
|
528
|
+
"parse_reason": str(output.get("parse_reason", "")),
|
|
529
|
+
"schema_valid_count": provider_schema_valid,
|
|
530
|
+
"dropped_count": provider_dropped,
|
|
531
|
+
"findings_count": len(findings),
|
|
532
|
+
"output_path": output.get("status", {}).get("output_path") if isinstance(output.get("status"), dict) else None,
|
|
533
|
+
"requested_permissions": requested_permissions,
|
|
534
|
+
"applied_permissions": effective_permissions,
|
|
535
|
+
"unknown_permission_keys": unknown_permission_keys,
|
|
536
|
+
"enforcement_mode": request.policy.enforcement_mode,
|
|
537
|
+
}
|
|
538
|
+
_ensure_provider_artifacts(request.artifact_base, resolved_task_id, provider)
|
|
539
|
+
return _ProviderExecutionOutcome(
|
|
540
|
+
provider=provider,
|
|
541
|
+
success=run_result.success,
|
|
542
|
+
parse_ok=parse_ok,
|
|
543
|
+
schema_valid_count=provider_schema_valid,
|
|
544
|
+
dropped_count=provider_dropped,
|
|
545
|
+
findings=findings,
|
|
546
|
+
provider_result=provider_result,
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
def run_review(
|
|
551
|
+
request: ReviewRequest,
|
|
552
|
+
adapters: Optional[Mapping[str, ProviderAdapter]] = None,
|
|
553
|
+
review_mode: bool = True,
|
|
554
|
+
write_artifacts: bool = True,
|
|
555
|
+
) -> ReviewResult:
|
|
556
|
+
adapter_map = dict(adapters or _adapter_registry())
|
|
557
|
+
task_id = request.task_id or _default_task_id(request.repo_root, request.prompt)
|
|
558
|
+
idempotency_key = request.idempotency_key or _default_idempotency_key(
|
|
559
|
+
request.repo_root,
|
|
560
|
+
request.prompt,
|
|
561
|
+
request.providers,
|
|
562
|
+
review_mode,
|
|
563
|
+
request.policy,
|
|
564
|
+
)
|
|
565
|
+
|
|
566
|
+
runtime = OrchestratorRuntime(
|
|
567
|
+
retry_policy=RetryPolicy(max_retries=request.policy.max_retries, base_delay_seconds=1.0, backoff_multiplier=2.0),
|
|
568
|
+
state_file=request.state_file,
|
|
569
|
+
)
|
|
570
|
+
created_new_task, resolved_task_id = runtime.submit(task_id, idempotency_key)
|
|
571
|
+
artifact_root = str(task_artifact_root(request.artifact_base, resolved_task_id))
|
|
572
|
+
root_path = Path(artifact_root)
|
|
573
|
+
root_path.mkdir(parents=True, exist_ok=True)
|
|
574
|
+
|
|
575
|
+
if not created_new_task:
|
|
576
|
+
run_file = root_path / "run.json"
|
|
577
|
+
if run_file.exists():
|
|
578
|
+
existing = json.loads(run_file.read_text(encoding="utf-8"))
|
|
579
|
+
return ReviewResult(
|
|
580
|
+
task_id=resolved_task_id,
|
|
581
|
+
artifact_root=artifact_root,
|
|
582
|
+
decision=str(existing.get("decision", "INCONCLUSIVE")),
|
|
583
|
+
terminal_state=str(existing.get("terminal_state", TaskState.FAILED.value)),
|
|
584
|
+
provider_results=dict(existing.get("provider_results", {})),
|
|
585
|
+
findings_count=int(existing.get("findings_count", 0)),
|
|
586
|
+
parse_success_count=int(existing.get("parse_success_count", 0)),
|
|
587
|
+
parse_failure_count=int(existing.get("parse_failure_count", 0)),
|
|
588
|
+
schema_valid_count=int(existing.get("schema_valid_count", 0)),
|
|
589
|
+
dropped_findings_count=int(existing.get("dropped_findings_count", 0)),
|
|
590
|
+
created_new_task=False,
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
normalized_targets, normalized_allow_paths = _normalize_scopes(
|
|
594
|
+
request.repo_root,
|
|
595
|
+
request.target_paths or ["."],
|
|
596
|
+
request.policy.allow_paths or ["."],
|
|
597
|
+
)
|
|
598
|
+
full_prompt = (
|
|
599
|
+
_build_prompt(request.prompt, normalized_targets)
|
|
600
|
+
if review_mode
|
|
601
|
+
else _build_run_prompt(request.prompt, normalized_targets, normalized_allow_paths)
|
|
602
|
+
)
|
|
603
|
+
provider_order: List[str] = []
|
|
604
|
+
provider_seen = set()
|
|
605
|
+
for provider in request.providers:
|
|
606
|
+
if provider in provider_seen:
|
|
607
|
+
continue
|
|
608
|
+
provider_seen.add(provider)
|
|
609
|
+
provider_order.append(provider)
|
|
610
|
+
provider_order = sorted(provider_order)
|
|
611
|
+
|
|
612
|
+
if request.policy.max_provider_parallelism <= 0:
|
|
613
|
+
max_workers = max(1, len(provider_order))
|
|
614
|
+
else:
|
|
615
|
+
max_workers = max(1, min(len(provider_order), request.policy.max_provider_parallelism))
|
|
616
|
+
outcomes: Dict[str, _ProviderExecutionOutcome] = {}
|
|
617
|
+
if max_workers <= 1:
|
|
618
|
+
for provider in provider_order:
|
|
619
|
+
outcomes[provider] = _run_provider(
|
|
620
|
+
request,
|
|
621
|
+
runtime,
|
|
622
|
+
adapter_map,
|
|
623
|
+
resolved_task_id,
|
|
624
|
+
idempotency_key,
|
|
625
|
+
full_prompt,
|
|
626
|
+
normalized_targets,
|
|
627
|
+
normalized_allow_paths,
|
|
628
|
+
review_mode,
|
|
629
|
+
provider,
|
|
630
|
+
)
|
|
631
|
+
else:
|
|
632
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
633
|
+
futures = {
|
|
634
|
+
executor.submit(
|
|
635
|
+
_run_provider,
|
|
636
|
+
request,
|
|
637
|
+
runtime,
|
|
638
|
+
adapter_map,
|
|
639
|
+
resolved_task_id,
|
|
640
|
+
idempotency_key,
|
|
641
|
+
full_prompt,
|
|
642
|
+
normalized_targets,
|
|
643
|
+
normalized_allow_paths,
|
|
644
|
+
review_mode,
|
|
645
|
+
provider,
|
|
646
|
+
): provider
|
|
647
|
+
for provider in provider_order
|
|
648
|
+
}
|
|
649
|
+
for future in as_completed(futures):
|
|
650
|
+
provider = futures[future]
|
|
651
|
+
try:
|
|
652
|
+
outcomes[provider] = future.result()
|
|
653
|
+
except Exception as exc: # pragma: no cover - protective guard
|
|
654
|
+
_ensure_provider_artifacts(request.artifact_base, resolved_task_id, provider)
|
|
655
|
+
outcomes[provider] = _ProviderExecutionOutcome(
|
|
656
|
+
provider=provider,
|
|
657
|
+
success=False,
|
|
658
|
+
parse_ok=False,
|
|
659
|
+
schema_valid_count=0,
|
|
660
|
+
dropped_count=0,
|
|
661
|
+
findings=[],
|
|
662
|
+
provider_result={"success": False, "reason": "internal_error", "error": str(exc)},
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
provider_results: Dict[str, Dict[str, object]] = {}
|
|
666
|
+
required_provider_success: Dict[str, bool] = {}
|
|
667
|
+
aggregated_findings: List[NormalizedFinding] = []
|
|
668
|
+
parse_success_count = 0
|
|
669
|
+
parse_failure_count = 0
|
|
670
|
+
schema_valid_count = 0
|
|
671
|
+
dropped_findings_count = 0
|
|
672
|
+
|
|
673
|
+
for provider in provider_order:
|
|
674
|
+
outcome = outcomes[provider]
|
|
675
|
+
provider_results[provider] = outcome.provider_result
|
|
676
|
+
required_provider_success[provider] = outcome.success
|
|
677
|
+
aggregated_findings.extend(outcome.findings)
|
|
678
|
+
if review_mode:
|
|
679
|
+
if outcome.parse_ok:
|
|
680
|
+
parse_success_count += 1
|
|
681
|
+
else:
|
|
682
|
+
parse_failure_count += 1
|
|
683
|
+
schema_valid_count += outcome.schema_valid_count
|
|
684
|
+
dropped_findings_count += outcome.dropped_count
|
|
685
|
+
|
|
686
|
+
terminal_state = runtime.evaluate_terminal_state(required_provider_success)
|
|
687
|
+
aggregated_findings.sort(key=lambda item: (item.provider, item.finding_id, item.fingerprint))
|
|
688
|
+
|
|
689
|
+
counts = {"critical": 0, "high": 0, "medium": 0, "low": 0}
|
|
690
|
+
for finding in aggregated_findings:
|
|
691
|
+
counts[finding.severity] = counts.get(finding.severity, 0) + 1
|
|
692
|
+
|
|
693
|
+
if review_mode and counts.get("critical", 0) > 0:
|
|
694
|
+
decision = "FAIL"
|
|
695
|
+
elif review_mode and counts.get("high", 0) >= request.policy.high_escalation_threshold:
|
|
696
|
+
decision = "ESCALATE"
|
|
697
|
+
elif review_mode and request.policy.enforce_findings_contract and len(aggregated_findings) == 0:
|
|
698
|
+
decision = "INCONCLUSIVE"
|
|
699
|
+
elif review_mode and terminal_state == TaskState.FAILED:
|
|
700
|
+
decision = "FAIL"
|
|
701
|
+
elif review_mode and terminal_state == TaskState.PARTIAL_SUCCESS:
|
|
702
|
+
decision = "PARTIAL"
|
|
703
|
+
elif not review_mode and terminal_state == TaskState.FAILED:
|
|
704
|
+
decision = "FAIL"
|
|
705
|
+
elif not review_mode and terminal_state == TaskState.PARTIAL_SUCCESS:
|
|
706
|
+
decision = "PARTIAL"
|
|
707
|
+
else:
|
|
708
|
+
decision = "PASS"
|
|
709
|
+
|
|
710
|
+
findings_json = [
|
|
711
|
+
asdict(item)
|
|
712
|
+
for item in aggregated_findings
|
|
713
|
+
]
|
|
714
|
+
|
|
715
|
+
if review_mode and write_artifacts:
|
|
716
|
+
_write_json(root_path / "findings.json", findings_json)
|
|
717
|
+
|
|
718
|
+
summary = [
|
|
719
|
+
f"# {'Review' if review_mode else 'Run'} Summary ({resolved_task_id})",
|
|
720
|
+
"",
|
|
721
|
+
f"- Decision: {decision}",
|
|
722
|
+
f"- Terminal state: {terminal_state.value}",
|
|
723
|
+
f"- Providers: {', '.join(provider_order)}",
|
|
724
|
+
f"- Findings total: {len(aggregated_findings)}",
|
|
725
|
+
f"- Parse success count: {parse_success_count}",
|
|
726
|
+
f"- Parse failure count: {parse_failure_count}",
|
|
727
|
+
f"- Schema valid finding count: {schema_valid_count}",
|
|
728
|
+
f"- Dropped finding count: {dropped_findings_count}",
|
|
729
|
+
f"- Allow paths: {', '.join(normalized_allow_paths)}",
|
|
730
|
+
f"- Enforcement mode: {request.policy.enforcement_mode}",
|
|
731
|
+
f"- Strict contract: {request.policy.enforce_findings_contract}",
|
|
732
|
+
"",
|
|
733
|
+
"## Severity Counts",
|
|
734
|
+
f"- critical: {counts['critical']}",
|
|
735
|
+
f"- high: {counts['high']}",
|
|
736
|
+
f"- medium: {counts['medium']}",
|
|
737
|
+
f"- low: {counts['low']}",
|
|
738
|
+
"",
|
|
739
|
+
"## Provider Results",
|
|
740
|
+
]
|
|
741
|
+
for provider in provider_order:
|
|
742
|
+
details = provider_results.get(provider, {})
|
|
743
|
+
success = bool(details.get("success"))
|
|
744
|
+
parse_reason = str(details.get("parse_reason", ""))
|
|
745
|
+
cancel_reason = str(details.get("cancel_reason", ""))
|
|
746
|
+
excerpt = str(details.get("output_excerpt", ""))
|
|
747
|
+
summary.append(
|
|
748
|
+
f"- {provider}: success={success}, final_error={details.get('final_error')}, parse_reason={parse_reason or '-'}, cancel_reason={cancel_reason or '-'}"
|
|
749
|
+
)
|
|
750
|
+
if excerpt:
|
|
751
|
+
summary.append(f" excerpt: {excerpt}")
|
|
752
|
+
if write_artifacts:
|
|
753
|
+
_write_text(root_path / "summary.md", "\n".join(summary))
|
|
754
|
+
|
|
755
|
+
decision_lines = [f"# {'Review' if review_mode else 'Run'} Decision ({resolved_task_id})", ""]
|
|
756
|
+
decision_lines.append(f"- decision: {decision}")
|
|
757
|
+
decision_lines.append(f"- terminal_state: {terminal_state.value}")
|
|
758
|
+
if review_mode:
|
|
759
|
+
decision_lines.append(
|
|
760
|
+
f"- rule_trace: critical={counts['critical']}, high={counts['high']}, findings={len(aggregated_findings)}"
|
|
761
|
+
)
|
|
762
|
+
else:
|
|
763
|
+
success_count = sum(1 for value in required_provider_success.values() if value)
|
|
764
|
+
decision_lines.append(
|
|
765
|
+
f"- run_trace: providers={len(required_provider_success)}, success={success_count}, failed={len(required_provider_success) - success_count}"
|
|
766
|
+
)
|
|
767
|
+
if write_artifacts:
|
|
768
|
+
_write_text(root_path / "decision.md", "\n".join(decision_lines))
|
|
769
|
+
|
|
770
|
+
run_payload = {
|
|
771
|
+
"task_id": resolved_task_id,
|
|
772
|
+
"mode": "review" if review_mode else "run",
|
|
773
|
+
"created_new_task": created_new_task,
|
|
774
|
+
"terminal_state": terminal_state.value,
|
|
775
|
+
"decision": decision,
|
|
776
|
+
"effective_cwd": str(Path(request.repo_root).resolve(strict=False)),
|
|
777
|
+
"allow_paths": normalized_allow_paths,
|
|
778
|
+
"allow_paths_hash": _stable_payload_hash(normalized_allow_paths),
|
|
779
|
+
"target_paths": normalized_targets,
|
|
780
|
+
"enforcement_mode": request.policy.enforcement_mode,
|
|
781
|
+
"enforce_findings_contract": request.policy.enforce_findings_contract,
|
|
782
|
+
"provider_permissions": request.policy.provider_permissions,
|
|
783
|
+
"permissions_hash": _stable_payload_hash(request.policy.provider_permissions),
|
|
784
|
+
"provider_results": provider_results,
|
|
785
|
+
"findings_count": len(aggregated_findings),
|
|
786
|
+
"parse_success_count": parse_success_count,
|
|
787
|
+
"parse_failure_count": parse_failure_count,
|
|
788
|
+
"schema_valid_count": schema_valid_count,
|
|
789
|
+
"dropped_findings_count": dropped_findings_count,
|
|
790
|
+
}
|
|
791
|
+
if write_artifacts:
|
|
792
|
+
_write_json(root_path / "run.json", run_payload)
|
|
793
|
+
|
|
794
|
+
return ReviewResult(
|
|
795
|
+
task_id=resolved_task_id,
|
|
796
|
+
artifact_root=artifact_root,
|
|
797
|
+
decision=decision,
|
|
798
|
+
terminal_state=terminal_state.value,
|
|
799
|
+
provider_results=provider_results,
|
|
800
|
+
findings_count=len(aggregated_findings),
|
|
801
|
+
parse_success_count=parse_success_count,
|
|
802
|
+
parse_failure_count=parse_failure_count,
|
|
803
|
+
schema_valid_count=schema_valid_count,
|
|
804
|
+
dropped_findings_count=dropped_findings_count,
|
|
805
|
+
created_new_task=created_new_task,
|
|
806
|
+
)
|