@tt-a1i/mco 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +190 -0
- package/bin/mco.js +19 -0
- package/mco +16 -0
- package/package.json +21 -0
- package/runtime/__init__.py +8 -0
- package/runtime/adapters/__init__.py +7 -0
- package/runtime/adapters/claude.py +60 -0
- package/runtime/adapters/codex.py +84 -0
- package/runtime/adapters/gemini.py +48 -0
- package/runtime/adapters/opencode.py +38 -0
- package/runtime/adapters/parsing.py +305 -0
- package/runtime/adapters/qwen.py +38 -0
- package/runtime/adapters/shim.py +251 -0
- package/runtime/artifacts.py +40 -0
- package/runtime/cli.py +341 -0
- package/runtime/config.py +189 -0
- package/runtime/contracts.py +127 -0
- package/runtime/errors.py +43 -0
- package/runtime/orchestrator.py +241 -0
- package/runtime/retry.py +15 -0
- package/runtime/review_engine.py +806 -0
- package/runtime/schemas/review_findings.schema.json +94 -0
- package/runtime/types.py +71 -0
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
from ..contracts import Evidence, NormalizedFinding, NormalizeContext, ProviderId
|
|
8
|
+
|
|
9
|
+
ALLOWED_SEVERITY = {"critical", "high", "medium", "low"}
|
|
10
|
+
ALLOWED_CATEGORY = {"bug", "security", "performance", "maintainability", "test-gap"}
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _decode_json_fragments(text: str) -> List[Any]:
|
|
14
|
+
decoder = json.JSONDecoder()
|
|
15
|
+
payloads: List[Any] = []
|
|
16
|
+
index = 0
|
|
17
|
+
while index < len(text):
|
|
18
|
+
match = re.search(r"[\{\[]", text[index:])
|
|
19
|
+
if not match:
|
|
20
|
+
break
|
|
21
|
+
start = index + match.start()
|
|
22
|
+
try:
|
|
23
|
+
payload, end = decoder.raw_decode(text, start)
|
|
24
|
+
except json.JSONDecodeError:
|
|
25
|
+
index = start + 1
|
|
26
|
+
continue
|
|
27
|
+
payloads.append(payload)
|
|
28
|
+
index = end
|
|
29
|
+
return payloads
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _iter_nested_strings(payload: Any) -> List[str]:
|
|
33
|
+
nested_strings: List[str] = []
|
|
34
|
+
stack = [payload]
|
|
35
|
+
while stack:
|
|
36
|
+
node = stack.pop()
|
|
37
|
+
if isinstance(node, dict):
|
|
38
|
+
for value in node.values():
|
|
39
|
+
if isinstance(value, str):
|
|
40
|
+
nested_strings.append(value)
|
|
41
|
+
elif isinstance(value, (dict, list)):
|
|
42
|
+
stack.append(value)
|
|
43
|
+
elif isinstance(node, list):
|
|
44
|
+
for value in node:
|
|
45
|
+
if isinstance(value, str):
|
|
46
|
+
nested_strings.append(value)
|
|
47
|
+
elif isinstance(value, (dict, list)):
|
|
48
|
+
stack.append(value)
|
|
49
|
+
return nested_strings
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _looks_like_nested_json_blob(value: str) -> bool:
|
|
53
|
+
stripped = value.strip()
|
|
54
|
+
if not stripped:
|
|
55
|
+
return False
|
|
56
|
+
lowered = stripped.lower()
|
|
57
|
+
if stripped.startswith("{") or stripped.startswith("["):
|
|
58
|
+
return True
|
|
59
|
+
if "```json" in lowered:
|
|
60
|
+
return True
|
|
61
|
+
if "findings" in lowered and ("{" in stripped or "}" in stripped):
|
|
62
|
+
return True
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def extract_json_payloads(text: str) -> List[Any]:
|
|
67
|
+
payloads: List[Any] = []
|
|
68
|
+
seen_signatures = set()
|
|
69
|
+
|
|
70
|
+
def add_payload(payload: Any) -> bool:
|
|
71
|
+
try:
|
|
72
|
+
signature = json.dumps(payload, sort_keys=True, ensure_ascii=True)
|
|
73
|
+
except Exception:
|
|
74
|
+
signature = repr(payload)
|
|
75
|
+
if signature in seen_signatures:
|
|
76
|
+
return False
|
|
77
|
+
seen_signatures.add(signature)
|
|
78
|
+
payloads.append(payload)
|
|
79
|
+
return True
|
|
80
|
+
|
|
81
|
+
stripped = text.strip()
|
|
82
|
+
if not stripped:
|
|
83
|
+
return payloads
|
|
84
|
+
|
|
85
|
+
for payload in _decode_json_fragments(stripped):
|
|
86
|
+
add_payload(payload)
|
|
87
|
+
|
|
88
|
+
for match in re.findall(r"```json\s*(.*?)\s*```", text, flags=re.DOTALL | re.IGNORECASE):
|
|
89
|
+
for payload in _decode_json_fragments(match):
|
|
90
|
+
add_payload(payload)
|
|
91
|
+
|
|
92
|
+
for line in text.splitlines():
|
|
93
|
+
candidate = line.strip()
|
|
94
|
+
if not candidate:
|
|
95
|
+
continue
|
|
96
|
+
for payload in _decode_json_fragments(candidate):
|
|
97
|
+
add_payload(payload)
|
|
98
|
+
|
|
99
|
+
# Recursively extract contract payloads from event-stream text fields.
|
|
100
|
+
index = 0
|
|
101
|
+
while index < len(payloads):
|
|
102
|
+
payload = payloads[index]
|
|
103
|
+
index += 1
|
|
104
|
+
for nested_text in _iter_nested_strings(payload):
|
|
105
|
+
if not _looks_like_nested_json_blob(nested_text):
|
|
106
|
+
continue
|
|
107
|
+
for nested_payload in _decode_json_fragments(nested_text):
|
|
108
|
+
add_payload(nested_payload)
|
|
109
|
+
|
|
110
|
+
return payloads
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _validate_finding_item(item: Any) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
|
114
|
+
if not isinstance(item, dict):
|
|
115
|
+
return (False, None)
|
|
116
|
+
required = {"finding_id", "severity", "category", "title", "evidence", "recommendation", "confidence", "fingerprint"}
|
|
117
|
+
if not required.issubset(item.keys()):
|
|
118
|
+
return (False, None)
|
|
119
|
+
if item.get("severity") not in ALLOWED_SEVERITY:
|
|
120
|
+
return (False, None)
|
|
121
|
+
if item.get("category") not in ALLOWED_CATEGORY:
|
|
122
|
+
return (False, None)
|
|
123
|
+
if not isinstance(item.get("title"), str):
|
|
124
|
+
return (False, None)
|
|
125
|
+
if not isinstance(item.get("recommendation"), str):
|
|
126
|
+
return (False, None)
|
|
127
|
+
if not isinstance(item.get("confidence"), (int, float)):
|
|
128
|
+
return (False, None)
|
|
129
|
+
evidence = item.get("evidence")
|
|
130
|
+
if not isinstance(evidence, dict):
|
|
131
|
+
return (False, None)
|
|
132
|
+
if not isinstance(evidence.get("file"), str):
|
|
133
|
+
return (False, None)
|
|
134
|
+
if not isinstance(evidence.get("snippet"), str):
|
|
135
|
+
return (False, None)
|
|
136
|
+
line = evidence.get("line")
|
|
137
|
+
if line is not None and not isinstance(line, int):
|
|
138
|
+
return (False, None)
|
|
139
|
+
symbol = evidence.get("symbol")
|
|
140
|
+
if symbol is not None and not isinstance(symbol, str):
|
|
141
|
+
return (False, None)
|
|
142
|
+
return (True, item)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def inspect_contract_output(text: str) -> Dict[str, Any]:
|
|
146
|
+
"""
|
|
147
|
+
Strict contract validation for output shaped as:
|
|
148
|
+
{"findings": [ ... ]}.
|
|
149
|
+
"""
|
|
150
|
+
candidates: List[Dict[str, Any]] = []
|
|
151
|
+
|
|
152
|
+
for index, payload in enumerate(extract_json_payloads(text)):
|
|
153
|
+
if not isinstance(payload, dict):
|
|
154
|
+
continue
|
|
155
|
+
if "findings" not in payload:
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
valid_findings: List[Dict[str, Any]] = []
|
|
159
|
+
dropped_count = 0
|
|
160
|
+
findings = payload.get("findings")
|
|
161
|
+
if not isinstance(findings, list):
|
|
162
|
+
dropped_count += 1
|
|
163
|
+
else:
|
|
164
|
+
for item in findings:
|
|
165
|
+
ok, normalized = _validate_finding_item(item)
|
|
166
|
+
if ok and normalized is not None:
|
|
167
|
+
valid_findings.append(normalized)
|
|
168
|
+
else:
|
|
169
|
+
dropped_count += 1
|
|
170
|
+
|
|
171
|
+
candidates.append(
|
|
172
|
+
{
|
|
173
|
+
"index": index,
|
|
174
|
+
"valid_findings": valid_findings,
|
|
175
|
+
"valid_count": len(valid_findings),
|
|
176
|
+
"dropped_count": dropped_count,
|
|
177
|
+
"parse_ok": dropped_count == 0,
|
|
178
|
+
}
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
has_contract_envelope = len(candidates) > 0
|
|
182
|
+
if not has_contract_envelope:
|
|
183
|
+
return {
|
|
184
|
+
"parse_ok": False,
|
|
185
|
+
"has_contract_envelope": False,
|
|
186
|
+
"schema_valid_count": 0,
|
|
187
|
+
"dropped_count": 0,
|
|
188
|
+
"findings": [],
|
|
189
|
+
"parse_reason": "no_contract_envelope",
|
|
190
|
+
"candidate_count": 0,
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
best = max(
|
|
194
|
+
candidates,
|
|
195
|
+
key=lambda item: (
|
|
196
|
+
1 if item["parse_ok"] else 0,
|
|
197
|
+
int(item["valid_count"]),
|
|
198
|
+
-int(item["dropped_count"]),
|
|
199
|
+
int(item["index"]),
|
|
200
|
+
),
|
|
201
|
+
)
|
|
202
|
+
parse_reason = "ok" if best["parse_ok"] else ("schema_invalid" if best["dropped_count"] > 0 else "no_valid_findings")
|
|
203
|
+
|
|
204
|
+
return {
|
|
205
|
+
"parse_ok": bool(best["parse_ok"]),
|
|
206
|
+
"has_contract_envelope": True,
|
|
207
|
+
"schema_valid_count": int(best["valid_count"]),
|
|
208
|
+
"dropped_count": int(best["dropped_count"]),
|
|
209
|
+
"findings": list(best["valid_findings"]),
|
|
210
|
+
"parse_reason": parse_reason,
|
|
211
|
+
"candidate_count": len(candidates),
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _extract_findings(payload: Any) -> List[Dict[str, Any]]:
|
|
216
|
+
if isinstance(payload, list):
|
|
217
|
+
return [item for item in payload if isinstance(item, dict)]
|
|
218
|
+
if isinstance(payload, dict):
|
|
219
|
+
findings = payload.get("findings")
|
|
220
|
+
if isinstance(findings, list):
|
|
221
|
+
return [item for item in findings if isinstance(item, dict)]
|
|
222
|
+
if all(k in payload for k in ("severity", "category", "title")):
|
|
223
|
+
return [payload]
|
|
224
|
+
return []
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _as_optional_int(value: Any) -> Optional[int]:
|
|
228
|
+
if value is None:
|
|
229
|
+
return None
|
|
230
|
+
if isinstance(value, int):
|
|
231
|
+
return value
|
|
232
|
+
if isinstance(value, str) and value.isdigit():
|
|
233
|
+
return int(value)
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def normalize_findings_from_text(text: str, ctx: NormalizeContext, provider: ProviderId) -> List[NormalizedFinding]:
|
|
238
|
+
normalized: List[NormalizedFinding] = []
|
|
239
|
+
seen_ids = set()
|
|
240
|
+
|
|
241
|
+
contract_info = inspect_contract_output(text)
|
|
242
|
+
findings_source = contract_info["findings"] if contract_info["has_contract_envelope"] else []
|
|
243
|
+
|
|
244
|
+
if findings_source:
|
|
245
|
+
source_items = findings_source
|
|
246
|
+
else:
|
|
247
|
+
source_items = []
|
|
248
|
+
for payload in extract_json_payloads(text):
|
|
249
|
+
source_items.extend(_extract_findings(payload))
|
|
250
|
+
|
|
251
|
+
for item in source_items:
|
|
252
|
+
severity = item.get("severity")
|
|
253
|
+
category = item.get("category")
|
|
254
|
+
title = item.get("title")
|
|
255
|
+
evidence = item.get("evidence")
|
|
256
|
+
recommendation = item.get("recommendation")
|
|
257
|
+
confidence = item.get("confidence")
|
|
258
|
+
|
|
259
|
+
if not isinstance(severity, str) or not isinstance(category, str) or not isinstance(title, str):
|
|
260
|
+
continue
|
|
261
|
+
if severity not in ALLOWED_SEVERITY or category not in ALLOWED_CATEGORY:
|
|
262
|
+
continue
|
|
263
|
+
if not isinstance(evidence, dict):
|
|
264
|
+
continue
|
|
265
|
+
if not isinstance(recommendation, str):
|
|
266
|
+
recommendation = ""
|
|
267
|
+
if not isinstance(confidence, (int, float)):
|
|
268
|
+
confidence = 0.0
|
|
269
|
+
|
|
270
|
+
file_path = evidence.get("file")
|
|
271
|
+
snippet = evidence.get("snippet")
|
|
272
|
+
if not isinstance(file_path, str) or not isinstance(snippet, str):
|
|
273
|
+
continue
|
|
274
|
+
|
|
275
|
+
finding_id = str(item.get("finding_id") or item.get("id") or "")
|
|
276
|
+
if not finding_id:
|
|
277
|
+
finding_id = f"{provider}:{len(normalized) + 1}"
|
|
278
|
+
if finding_id in seen_ids:
|
|
279
|
+
continue
|
|
280
|
+
seen_ids.add(finding_id)
|
|
281
|
+
|
|
282
|
+
fingerprint = str(item.get("fingerprint") or f"{provider}:{title}:{file_path}:{evidence.get('line')}")
|
|
283
|
+
|
|
284
|
+
normalized.append(
|
|
285
|
+
NormalizedFinding(
|
|
286
|
+
task_id=ctx.task_id,
|
|
287
|
+
provider=provider,
|
|
288
|
+
finding_id=finding_id,
|
|
289
|
+
severity=severity, # type: ignore[arg-type]
|
|
290
|
+
category=category, # type: ignore[arg-type]
|
|
291
|
+
title=title,
|
|
292
|
+
evidence=Evidence(
|
|
293
|
+
file=file_path,
|
|
294
|
+
line=_as_optional_int(evidence.get("line")),
|
|
295
|
+
snippet=snippet,
|
|
296
|
+
symbol=evidence.get("symbol") if isinstance(evidence.get("symbol"), str) else None,
|
|
297
|
+
),
|
|
298
|
+
recommendation=recommendation,
|
|
299
|
+
confidence=max(0.0, min(1.0, float(confidence))),
|
|
300
|
+
fingerprint=fingerprint,
|
|
301
|
+
raw_ref=ctx.raw_ref,
|
|
302
|
+
)
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
return normalized
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, List
|
|
4
|
+
|
|
5
|
+
from ..contracts import CapabilitySet, NormalizeContext, NormalizedFinding, TaskInput
|
|
6
|
+
from .parsing import normalize_findings_from_text
|
|
7
|
+
from .shim import ShimAdapterBase
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class QwenAdapter(ShimAdapterBase):
|
|
11
|
+
def __init__(self) -> None:
|
|
12
|
+
super().__init__(
|
|
13
|
+
provider_id="qwen",
|
|
14
|
+
binary_name="qwen",
|
|
15
|
+
capability_set=CapabilitySet(
|
|
16
|
+
tiers=["C0", "C1", "C2", "C3"],
|
|
17
|
+
supports_native_async=False,
|
|
18
|
+
supports_poll_endpoint=False,
|
|
19
|
+
supports_resume_after_restart=True,
|
|
20
|
+
supports_schema_enforcement=False,
|
|
21
|
+
min_supported_version="0.10.6",
|
|
22
|
+
tested_os=["macos"],
|
|
23
|
+
),
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
def _auth_check_command(self, binary: str) -> List[str]:
|
|
27
|
+
return [binary, "Reply with exactly OK", "--output-format", "text", "--auth-type", "qwen-oauth"]
|
|
28
|
+
|
|
29
|
+
def _build_command(self, input_task: TaskInput) -> List[str]:
|
|
30
|
+
return ["qwen", input_task.prompt, "--output-format", "json", "--auth-type", "qwen-oauth"]
|
|
31
|
+
|
|
32
|
+
def _build_command_for_record(self) -> List[str]:
|
|
33
|
+
return ["qwen", "<prompt>", "--output-format", "json", "--auth-type", "qwen-oauth"]
|
|
34
|
+
|
|
35
|
+
def normalize(self, raw: Any, ctx: NormalizeContext) -> List[NormalizedFinding]:
|
|
36
|
+
text = raw if isinstance(raw, str) else ""
|
|
37
|
+
return normalize_findings_from_text(text, ctx, "qwen")
|
|
38
|
+
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import signal
|
|
6
|
+
import subprocess
|
|
7
|
+
import time
|
|
8
|
+
import uuid
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Dict, List, Optional, TextIO
|
|
13
|
+
|
|
14
|
+
from ..artifacts import expected_paths
|
|
15
|
+
from ..contracts import (
|
|
16
|
+
CapabilitySet,
|
|
17
|
+
NormalizeContext,
|
|
18
|
+
NormalizedFinding,
|
|
19
|
+
ProviderId,
|
|
20
|
+
ProviderPresence,
|
|
21
|
+
TaskInput,
|
|
22
|
+
TaskRunRef,
|
|
23
|
+
TaskStatus,
|
|
24
|
+
)
|
|
25
|
+
from ..errors import classify_error, detect_warnings
|
|
26
|
+
from ..types import ErrorKind
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def now_iso() -> str:
|
|
30
|
+
return datetime.now(timezone.utc).isoformat()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class ShimRunHandle:
|
|
35
|
+
process: subprocess.Popen[str]
|
|
36
|
+
stdout_path: Path
|
|
37
|
+
stderr_path: Path
|
|
38
|
+
provider_result_path: Path
|
|
39
|
+
stdout_file: TextIO
|
|
40
|
+
stderr_file: TextIO
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ShimAdapterBase:
|
|
44
|
+
id: ProviderId
|
|
45
|
+
|
|
46
|
+
def __init__(self, provider_id: ProviderId, binary_name: str, capability_set: CapabilitySet) -> None:
|
|
47
|
+
self.id = provider_id
|
|
48
|
+
self.binary_name = binary_name
|
|
49
|
+
self._capability_set = capability_set
|
|
50
|
+
self._runs: Dict[str, ShimRunHandle] = {}
|
|
51
|
+
|
|
52
|
+
def detect(self) -> ProviderPresence:
|
|
53
|
+
binary = self._resolve_binary()
|
|
54
|
+
if not binary:
|
|
55
|
+
return ProviderPresence(
|
|
56
|
+
provider=self.id,
|
|
57
|
+
detected=False,
|
|
58
|
+
binary_path=None,
|
|
59
|
+
version=None,
|
|
60
|
+
auth_ok=False,
|
|
61
|
+
reason="binary_not_found",
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
version = self._probe_version(binary)
|
|
65
|
+
auth_ok = self._probe_auth(binary)
|
|
66
|
+
return ProviderPresence(
|
|
67
|
+
provider=self.id,
|
|
68
|
+
detected=True,
|
|
69
|
+
binary_path=binary,
|
|
70
|
+
version=version,
|
|
71
|
+
auth_ok=auth_ok,
|
|
72
|
+
reason="ok" if auth_ok else "auth_check_failed",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def capabilities(self) -> CapabilitySet:
|
|
76
|
+
return self._capability_set
|
|
77
|
+
|
|
78
|
+
def supported_permission_keys(self) -> List[str]:
|
|
79
|
+
return []
|
|
80
|
+
|
|
81
|
+
def run(self, input_task: TaskInput) -> TaskRunRef:
|
|
82
|
+
command_override = input_task.metadata.get("command_override")
|
|
83
|
+
cmd = command_override if isinstance(command_override, list) else self._build_command(input_task)
|
|
84
|
+
if not isinstance(cmd, list) or not cmd:
|
|
85
|
+
raise ValueError("adapter run command is empty")
|
|
86
|
+
|
|
87
|
+
artifact_root = str(input_task.metadata.get("artifact_root", "/tmp/mco"))
|
|
88
|
+
paths = expected_paths(artifact_root, input_task.task_id, (self.id,))
|
|
89
|
+
root = paths["root"]
|
|
90
|
+
paths["providers_dir"].mkdir(parents=True, exist_ok=True)
|
|
91
|
+
paths["raw_dir"].mkdir(parents=True, exist_ok=True)
|
|
92
|
+
|
|
93
|
+
stdout_path = paths[f"raw/{self.id}.stdout.log"]
|
|
94
|
+
stderr_path = paths[f"raw/{self.id}.stderr.log"]
|
|
95
|
+
provider_result_path = paths[f"providers/{self.id}.json"]
|
|
96
|
+
run_id = f"{self.id}-{uuid.uuid4().hex[:12]}"
|
|
97
|
+
|
|
98
|
+
stdout_file = stdout_path.open("w", encoding="utf-8")
|
|
99
|
+
stderr_file = stderr_path.open("w", encoding="utf-8")
|
|
100
|
+
process = subprocess.Popen(
|
|
101
|
+
cmd,
|
|
102
|
+
cwd=input_task.repo_root,
|
|
103
|
+
stdout=stdout_file,
|
|
104
|
+
stderr=stderr_file,
|
|
105
|
+
text=True,
|
|
106
|
+
start_new_session=True,
|
|
107
|
+
)
|
|
108
|
+
self._runs[run_id] = ShimRunHandle(
|
|
109
|
+
process=process,
|
|
110
|
+
stdout_path=stdout_path,
|
|
111
|
+
stderr_path=stderr_path,
|
|
112
|
+
provider_result_path=provider_result_path,
|
|
113
|
+
stdout_file=stdout_file,
|
|
114
|
+
stderr_file=stderr_file,
|
|
115
|
+
)
|
|
116
|
+
return TaskRunRef(
|
|
117
|
+
task_id=input_task.task_id,
|
|
118
|
+
provider=self.id,
|
|
119
|
+
run_id=run_id,
|
|
120
|
+
artifact_path=str(root),
|
|
121
|
+
started_at=now_iso(),
|
|
122
|
+
pid=process.pid,
|
|
123
|
+
session_id=None,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def poll(self, ref: TaskRunRef) -> TaskStatus:
|
|
127
|
+
handle = self._runs.get(ref.run_id)
|
|
128
|
+
if handle is None:
|
|
129
|
+
return TaskStatus(
|
|
130
|
+
task_id=ref.task_id,
|
|
131
|
+
provider=self.id,
|
|
132
|
+
run_id=ref.run_id,
|
|
133
|
+
attempt_state="EXPIRED",
|
|
134
|
+
completed=True,
|
|
135
|
+
heartbeat_at=None,
|
|
136
|
+
output_path=None,
|
|
137
|
+
error_kind=ErrorKind.NON_RETRYABLE_INVALID_INPUT,
|
|
138
|
+
exit_code=None,
|
|
139
|
+
message="run_handle_not_found",
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
return_code = handle.process.poll()
|
|
143
|
+
if return_code is None:
|
|
144
|
+
return TaskStatus(
|
|
145
|
+
task_id=ref.task_id,
|
|
146
|
+
provider=self.id,
|
|
147
|
+
run_id=ref.run_id,
|
|
148
|
+
attempt_state="STARTED",
|
|
149
|
+
completed=False,
|
|
150
|
+
heartbeat_at=now_iso(),
|
|
151
|
+
output_path=str(handle.provider_result_path),
|
|
152
|
+
error_kind=None,
|
|
153
|
+
exit_code=None,
|
|
154
|
+
message="running",
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
handle.stdout_file.close()
|
|
159
|
+
handle.stderr_file.close()
|
|
160
|
+
except Exception:
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
stdout_text = handle.stdout_path.read_text(encoding="utf-8") if handle.stdout_path.exists() else ""
|
|
164
|
+
stderr_text = handle.stderr_path.read_text(encoding="utf-8") if handle.stderr_path.exists() else ""
|
|
165
|
+
success = self._is_success(return_code, stdout_text, stderr_text)
|
|
166
|
+
error_kind = None if success else classify_error(return_code, stderr_text)
|
|
167
|
+
warnings = [warning.value for warning in detect_warnings(stderr_text)]
|
|
168
|
+
|
|
169
|
+
payload = {
|
|
170
|
+
"provider": self.id,
|
|
171
|
+
"task_id": ref.task_id,
|
|
172
|
+
"run_id": ref.run_id,
|
|
173
|
+
"pid": ref.pid,
|
|
174
|
+
"command": self._build_command_for_record(),
|
|
175
|
+
"started_at": ref.started_at,
|
|
176
|
+
"completed_at": now_iso(),
|
|
177
|
+
"exit_code": return_code,
|
|
178
|
+
"success": success,
|
|
179
|
+
"error_kind": error_kind.value if error_kind else None,
|
|
180
|
+
"warnings": warnings,
|
|
181
|
+
"stdout_path": str(handle.stdout_path),
|
|
182
|
+
"stderr_path": str(handle.stderr_path),
|
|
183
|
+
}
|
|
184
|
+
handle.provider_result_path.write_text(json.dumps(payload, ensure_ascii=True, indent=2), encoding="utf-8")
|
|
185
|
+
|
|
186
|
+
return TaskStatus(
|
|
187
|
+
task_id=ref.task_id,
|
|
188
|
+
provider=self.id,
|
|
189
|
+
run_id=ref.run_id,
|
|
190
|
+
attempt_state="SUCCEEDED" if success else "FAILED",
|
|
191
|
+
completed=True,
|
|
192
|
+
heartbeat_at=now_iso(),
|
|
193
|
+
output_path=str(handle.provider_result_path),
|
|
194
|
+
error_kind=error_kind,
|
|
195
|
+
exit_code=return_code,
|
|
196
|
+
message="completed",
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
def cancel(self, ref: TaskRunRef) -> None:
|
|
200
|
+
handle = self._runs.get(ref.run_id)
|
|
201
|
+
if handle is None:
|
|
202
|
+
return
|
|
203
|
+
if handle.process.poll() is not None:
|
|
204
|
+
return
|
|
205
|
+
try:
|
|
206
|
+
os.killpg(os.getpgid(handle.process.pid), signal.SIGTERM)
|
|
207
|
+
except ProcessLookupError:
|
|
208
|
+
return
|
|
209
|
+
time.sleep(0.2)
|
|
210
|
+
if handle.process.poll() is None:
|
|
211
|
+
try:
|
|
212
|
+
os.killpg(os.getpgid(handle.process.pid), signal.SIGKILL)
|
|
213
|
+
except ProcessLookupError:
|
|
214
|
+
return
|
|
215
|
+
|
|
216
|
+
def normalize(self, raw: object, ctx: NormalizeContext) -> List[NormalizedFinding]:
|
|
217
|
+
raise NotImplementedError
|
|
218
|
+
|
|
219
|
+
def _resolve_binary(self) -> Optional[str]:
|
|
220
|
+
result = subprocess.run(
|
|
221
|
+
["bash", "-lc", f"command -v {self.binary_name}"],
|
|
222
|
+
capture_output=True,
|
|
223
|
+
text=True,
|
|
224
|
+
check=False,
|
|
225
|
+
)
|
|
226
|
+
value = result.stdout.strip()
|
|
227
|
+
return value if value else None
|
|
228
|
+
|
|
229
|
+
def _probe_version(self, binary: str) -> Optional[str]:
|
|
230
|
+
result = subprocess.run([binary, "--version"], capture_output=True, text=True, check=False)
|
|
231
|
+
lines = (result.stdout or result.stderr).splitlines()
|
|
232
|
+
return lines[-1].strip() if lines else None
|
|
233
|
+
|
|
234
|
+
def _probe_auth(self, binary: str) -> bool:
|
|
235
|
+
cmd = self._auth_check_command(binary)
|
|
236
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=False)
|
|
237
|
+
return result.returncode == 0
|
|
238
|
+
|
|
239
|
+
def _auth_check_command(self, binary: str) -> List[str]:
|
|
240
|
+
raise NotImplementedError
|
|
241
|
+
|
|
242
|
+
def _build_command(self, input_task: TaskInput) -> List[str]:
|
|
243
|
+
raise NotImplementedError
|
|
244
|
+
|
|
245
|
+
def _build_command_for_record(self) -> List[str]:
|
|
246
|
+
return []
|
|
247
|
+
|
|
248
|
+
def _is_success(self, return_code: int, stdout_text: str, stderr_text: str) -> bool:
|
|
249
|
+
_ = stdout_text
|
|
250
|
+
_ = stderr_text
|
|
251
|
+
return return_code == 0
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Dict, Iterable
|
|
5
|
+
|
|
6
|
+
from .contracts import ProviderId
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
ARTIFACT_LAYOUT_VERSION = "stage-a-v1"
|
|
10
|
+
ROOT_FILES = ("summary.md", "decision.md", "findings.json", "run.json")
|
|
11
|
+
ROOT_DIRS = ("providers", "raw")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def task_artifact_root(base_dir: str, task_id: str) -> Path:
|
|
15
|
+
return Path(base_dir) / task_id
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def provider_artifact_name(provider: ProviderId) -> str:
|
|
19
|
+
return f"{provider}.json"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def expected_paths(base_dir: str, task_id: str, providers: Iterable[ProviderId]) -> Dict[str, Path]:
|
|
23
|
+
root = task_artifact_root(base_dir, task_id)
|
|
24
|
+
paths: Dict[str, Path] = {"root": root}
|
|
25
|
+
|
|
26
|
+
for filename in ROOT_FILES:
|
|
27
|
+
paths[filename] = root / filename
|
|
28
|
+
|
|
29
|
+
providers_dir = root / "providers"
|
|
30
|
+
raw_dir = root / "raw"
|
|
31
|
+
paths["providers_dir"] = providers_dir
|
|
32
|
+
paths["raw_dir"] = raw_dir
|
|
33
|
+
|
|
34
|
+
for provider in providers:
|
|
35
|
+
paths[f"providers/{provider}.json"] = providers_dir / provider_artifact_name(provider)
|
|
36
|
+
paths[f"raw/{provider}.stdout.log"] = raw_dir / f"{provider}.stdout.log"
|
|
37
|
+
paths[f"raw/{provider}.stderr.log"] = raw_dir / f"{provider}.stderr.log"
|
|
38
|
+
|
|
39
|
+
return paths
|
|
40
|
+
|