@tt-a1i/mco 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,305 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ from ..contracts import Evidence, NormalizedFinding, NormalizeContext, ProviderId
8
+
9
+ ALLOWED_SEVERITY = {"critical", "high", "medium", "low"}
10
+ ALLOWED_CATEGORY = {"bug", "security", "performance", "maintainability", "test-gap"}
11
+
12
+
13
+ def _decode_json_fragments(text: str) -> List[Any]:
14
+ decoder = json.JSONDecoder()
15
+ payloads: List[Any] = []
16
+ index = 0
17
+ while index < len(text):
18
+ match = re.search(r"[\{\[]", text[index:])
19
+ if not match:
20
+ break
21
+ start = index + match.start()
22
+ try:
23
+ payload, end = decoder.raw_decode(text, start)
24
+ except json.JSONDecodeError:
25
+ index = start + 1
26
+ continue
27
+ payloads.append(payload)
28
+ index = end
29
+ return payloads
30
+
31
+
32
+ def _iter_nested_strings(payload: Any) -> List[str]:
33
+ nested_strings: List[str] = []
34
+ stack = [payload]
35
+ while stack:
36
+ node = stack.pop()
37
+ if isinstance(node, dict):
38
+ for value in node.values():
39
+ if isinstance(value, str):
40
+ nested_strings.append(value)
41
+ elif isinstance(value, (dict, list)):
42
+ stack.append(value)
43
+ elif isinstance(node, list):
44
+ for value in node:
45
+ if isinstance(value, str):
46
+ nested_strings.append(value)
47
+ elif isinstance(value, (dict, list)):
48
+ stack.append(value)
49
+ return nested_strings
50
+
51
+
52
+ def _looks_like_nested_json_blob(value: str) -> bool:
53
+ stripped = value.strip()
54
+ if not stripped:
55
+ return False
56
+ lowered = stripped.lower()
57
+ if stripped.startswith("{") or stripped.startswith("["):
58
+ return True
59
+ if "```json" in lowered:
60
+ return True
61
+ if "findings" in lowered and ("{" in stripped or "}" in stripped):
62
+ return True
63
+ return False
64
+
65
+
66
+ def extract_json_payloads(text: str) -> List[Any]:
67
+ payloads: List[Any] = []
68
+ seen_signatures = set()
69
+
70
+ def add_payload(payload: Any) -> bool:
71
+ try:
72
+ signature = json.dumps(payload, sort_keys=True, ensure_ascii=True)
73
+ except Exception:
74
+ signature = repr(payload)
75
+ if signature in seen_signatures:
76
+ return False
77
+ seen_signatures.add(signature)
78
+ payloads.append(payload)
79
+ return True
80
+
81
+ stripped = text.strip()
82
+ if not stripped:
83
+ return payloads
84
+
85
+ for payload in _decode_json_fragments(stripped):
86
+ add_payload(payload)
87
+
88
+ for match in re.findall(r"```json\s*(.*?)\s*```", text, flags=re.DOTALL | re.IGNORECASE):
89
+ for payload in _decode_json_fragments(match):
90
+ add_payload(payload)
91
+
92
+ for line in text.splitlines():
93
+ candidate = line.strip()
94
+ if not candidate:
95
+ continue
96
+ for payload in _decode_json_fragments(candidate):
97
+ add_payload(payload)
98
+
99
+ # Recursively extract contract payloads from event-stream text fields.
100
+ index = 0
101
+ while index < len(payloads):
102
+ payload = payloads[index]
103
+ index += 1
104
+ for nested_text in _iter_nested_strings(payload):
105
+ if not _looks_like_nested_json_blob(nested_text):
106
+ continue
107
+ for nested_payload in _decode_json_fragments(nested_text):
108
+ add_payload(nested_payload)
109
+
110
+ return payloads
111
+
112
+
113
+ def _validate_finding_item(item: Any) -> Tuple[bool, Optional[Dict[str, Any]]]:
114
+ if not isinstance(item, dict):
115
+ return (False, None)
116
+ required = {"finding_id", "severity", "category", "title", "evidence", "recommendation", "confidence", "fingerprint"}
117
+ if not required.issubset(item.keys()):
118
+ return (False, None)
119
+ if item.get("severity") not in ALLOWED_SEVERITY:
120
+ return (False, None)
121
+ if item.get("category") not in ALLOWED_CATEGORY:
122
+ return (False, None)
123
+ if not isinstance(item.get("title"), str):
124
+ return (False, None)
125
+ if not isinstance(item.get("recommendation"), str):
126
+ return (False, None)
127
+ if not isinstance(item.get("confidence"), (int, float)):
128
+ return (False, None)
129
+ evidence = item.get("evidence")
130
+ if not isinstance(evidence, dict):
131
+ return (False, None)
132
+ if not isinstance(evidence.get("file"), str):
133
+ return (False, None)
134
+ if not isinstance(evidence.get("snippet"), str):
135
+ return (False, None)
136
+ line = evidence.get("line")
137
+ if line is not None and not isinstance(line, int):
138
+ return (False, None)
139
+ symbol = evidence.get("symbol")
140
+ if symbol is not None and not isinstance(symbol, str):
141
+ return (False, None)
142
+ return (True, item)
143
+
144
+
145
+ def inspect_contract_output(text: str) -> Dict[str, Any]:
146
+ """
147
+ Strict contract validation for output shaped as:
148
+ {"findings": [ ... ]}.
149
+ """
150
+ candidates: List[Dict[str, Any]] = []
151
+
152
+ for index, payload in enumerate(extract_json_payloads(text)):
153
+ if not isinstance(payload, dict):
154
+ continue
155
+ if "findings" not in payload:
156
+ continue
157
+
158
+ valid_findings: List[Dict[str, Any]] = []
159
+ dropped_count = 0
160
+ findings = payload.get("findings")
161
+ if not isinstance(findings, list):
162
+ dropped_count += 1
163
+ else:
164
+ for item in findings:
165
+ ok, normalized = _validate_finding_item(item)
166
+ if ok and normalized is not None:
167
+ valid_findings.append(normalized)
168
+ else:
169
+ dropped_count += 1
170
+
171
+ candidates.append(
172
+ {
173
+ "index": index,
174
+ "valid_findings": valid_findings,
175
+ "valid_count": len(valid_findings),
176
+ "dropped_count": dropped_count,
177
+ "parse_ok": dropped_count == 0,
178
+ }
179
+ )
180
+
181
+ has_contract_envelope = len(candidates) > 0
182
+ if not has_contract_envelope:
183
+ return {
184
+ "parse_ok": False,
185
+ "has_contract_envelope": False,
186
+ "schema_valid_count": 0,
187
+ "dropped_count": 0,
188
+ "findings": [],
189
+ "parse_reason": "no_contract_envelope",
190
+ "candidate_count": 0,
191
+ }
192
+
193
+ best = max(
194
+ candidates,
195
+ key=lambda item: (
196
+ 1 if item["parse_ok"] else 0,
197
+ int(item["valid_count"]),
198
+ -int(item["dropped_count"]),
199
+ int(item["index"]),
200
+ ),
201
+ )
202
+ parse_reason = "ok" if best["parse_ok"] else ("schema_invalid" if best["dropped_count"] > 0 else "no_valid_findings")
203
+
204
+ return {
205
+ "parse_ok": bool(best["parse_ok"]),
206
+ "has_contract_envelope": True,
207
+ "schema_valid_count": int(best["valid_count"]),
208
+ "dropped_count": int(best["dropped_count"]),
209
+ "findings": list(best["valid_findings"]),
210
+ "parse_reason": parse_reason,
211
+ "candidate_count": len(candidates),
212
+ }
213
+
214
+
215
+ def _extract_findings(payload: Any) -> List[Dict[str, Any]]:
216
+ if isinstance(payload, list):
217
+ return [item for item in payload if isinstance(item, dict)]
218
+ if isinstance(payload, dict):
219
+ findings = payload.get("findings")
220
+ if isinstance(findings, list):
221
+ return [item for item in findings if isinstance(item, dict)]
222
+ if all(k in payload for k in ("severity", "category", "title")):
223
+ return [payload]
224
+ return []
225
+
226
+
227
+ def _as_optional_int(value: Any) -> Optional[int]:
228
+ if value is None:
229
+ return None
230
+ if isinstance(value, int):
231
+ return value
232
+ if isinstance(value, str) and value.isdigit():
233
+ return int(value)
234
+ return None
235
+
236
+
237
+ def normalize_findings_from_text(text: str, ctx: NormalizeContext, provider: ProviderId) -> List[NormalizedFinding]:
238
+ normalized: List[NormalizedFinding] = []
239
+ seen_ids = set()
240
+
241
+ contract_info = inspect_contract_output(text)
242
+ findings_source = contract_info["findings"] if contract_info["has_contract_envelope"] else []
243
+
244
+ if findings_source:
245
+ source_items = findings_source
246
+ else:
247
+ source_items = []
248
+ for payload in extract_json_payloads(text):
249
+ source_items.extend(_extract_findings(payload))
250
+
251
+ for item in source_items:
252
+ severity = item.get("severity")
253
+ category = item.get("category")
254
+ title = item.get("title")
255
+ evidence = item.get("evidence")
256
+ recommendation = item.get("recommendation")
257
+ confidence = item.get("confidence")
258
+
259
+ if not isinstance(severity, str) or not isinstance(category, str) or not isinstance(title, str):
260
+ continue
261
+ if severity not in ALLOWED_SEVERITY or category not in ALLOWED_CATEGORY:
262
+ continue
263
+ if not isinstance(evidence, dict):
264
+ continue
265
+ if not isinstance(recommendation, str):
266
+ recommendation = ""
267
+ if not isinstance(confidence, (int, float)):
268
+ confidence = 0.0
269
+
270
+ file_path = evidence.get("file")
271
+ snippet = evidence.get("snippet")
272
+ if not isinstance(file_path, str) or not isinstance(snippet, str):
273
+ continue
274
+
275
+ finding_id = str(item.get("finding_id") or item.get("id") or "")
276
+ if not finding_id:
277
+ finding_id = f"{provider}:{len(normalized) + 1}"
278
+ if finding_id in seen_ids:
279
+ continue
280
+ seen_ids.add(finding_id)
281
+
282
+ fingerprint = str(item.get("fingerprint") or f"{provider}:{title}:{file_path}:{evidence.get('line')}")
283
+
284
+ normalized.append(
285
+ NormalizedFinding(
286
+ task_id=ctx.task_id,
287
+ provider=provider,
288
+ finding_id=finding_id,
289
+ severity=severity, # type: ignore[arg-type]
290
+ category=category, # type: ignore[arg-type]
291
+ title=title,
292
+ evidence=Evidence(
293
+ file=file_path,
294
+ line=_as_optional_int(evidence.get("line")),
295
+ snippet=snippet,
296
+ symbol=evidence.get("symbol") if isinstance(evidence.get("symbol"), str) else None,
297
+ ),
298
+ recommendation=recommendation,
299
+ confidence=max(0.0, min(1.0, float(confidence))),
300
+ fingerprint=fingerprint,
301
+ raw_ref=ctx.raw_ref,
302
+ )
303
+ )
304
+
305
+ return normalized
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, List
4
+
5
+ from ..contracts import CapabilitySet, NormalizeContext, NormalizedFinding, TaskInput
6
+ from .parsing import normalize_findings_from_text
7
+ from .shim import ShimAdapterBase
8
+
9
+
10
+ class QwenAdapter(ShimAdapterBase):
11
+ def __init__(self) -> None:
12
+ super().__init__(
13
+ provider_id="qwen",
14
+ binary_name="qwen",
15
+ capability_set=CapabilitySet(
16
+ tiers=["C0", "C1", "C2", "C3"],
17
+ supports_native_async=False,
18
+ supports_poll_endpoint=False,
19
+ supports_resume_after_restart=True,
20
+ supports_schema_enforcement=False,
21
+ min_supported_version="0.10.6",
22
+ tested_os=["macos"],
23
+ ),
24
+ )
25
+
26
+ def _auth_check_command(self, binary: str) -> List[str]:
27
+ return [binary, "Reply with exactly OK", "--output-format", "text", "--auth-type", "qwen-oauth"]
28
+
29
+ def _build_command(self, input_task: TaskInput) -> List[str]:
30
+ return ["qwen", input_task.prompt, "--output-format", "json", "--auth-type", "qwen-oauth"]
31
+
32
+ def _build_command_for_record(self) -> List[str]:
33
+ return ["qwen", "<prompt>", "--output-format", "json", "--auth-type", "qwen-oauth"]
34
+
35
+ def normalize(self, raw: Any, ctx: NormalizeContext) -> List[NormalizedFinding]:
36
+ text = raw if isinstance(raw, str) else ""
37
+ return normalize_findings_from_text(text, ctx, "qwen")
38
+
@@ -0,0 +1,251 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import signal
6
+ import subprocess
7
+ import time
8
+ import uuid
9
+ from dataclasses import dataclass
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Dict, List, Optional, TextIO
13
+
14
+ from ..artifacts import expected_paths
15
+ from ..contracts import (
16
+ CapabilitySet,
17
+ NormalizeContext,
18
+ NormalizedFinding,
19
+ ProviderId,
20
+ ProviderPresence,
21
+ TaskInput,
22
+ TaskRunRef,
23
+ TaskStatus,
24
+ )
25
+ from ..errors import classify_error, detect_warnings
26
+ from ..types import ErrorKind
27
+
28
+
29
+ def now_iso() -> str:
30
+ return datetime.now(timezone.utc).isoformat()
31
+
32
+
33
+ @dataclass
34
+ class ShimRunHandle:
35
+ process: subprocess.Popen[str]
36
+ stdout_path: Path
37
+ stderr_path: Path
38
+ provider_result_path: Path
39
+ stdout_file: TextIO
40
+ stderr_file: TextIO
41
+
42
+
43
+ class ShimAdapterBase:
44
+ id: ProviderId
45
+
46
+ def __init__(self, provider_id: ProviderId, binary_name: str, capability_set: CapabilitySet) -> None:
47
+ self.id = provider_id
48
+ self.binary_name = binary_name
49
+ self._capability_set = capability_set
50
+ self._runs: Dict[str, ShimRunHandle] = {}
51
+
52
+ def detect(self) -> ProviderPresence:
53
+ binary = self._resolve_binary()
54
+ if not binary:
55
+ return ProviderPresence(
56
+ provider=self.id,
57
+ detected=False,
58
+ binary_path=None,
59
+ version=None,
60
+ auth_ok=False,
61
+ reason="binary_not_found",
62
+ )
63
+
64
+ version = self._probe_version(binary)
65
+ auth_ok = self._probe_auth(binary)
66
+ return ProviderPresence(
67
+ provider=self.id,
68
+ detected=True,
69
+ binary_path=binary,
70
+ version=version,
71
+ auth_ok=auth_ok,
72
+ reason="ok" if auth_ok else "auth_check_failed",
73
+ )
74
+
75
+ def capabilities(self) -> CapabilitySet:
76
+ return self._capability_set
77
+
78
+ def supported_permission_keys(self) -> List[str]:
79
+ return []
80
+
81
+ def run(self, input_task: TaskInput) -> TaskRunRef:
82
+ command_override = input_task.metadata.get("command_override")
83
+ cmd = command_override if isinstance(command_override, list) else self._build_command(input_task)
84
+ if not isinstance(cmd, list) or not cmd:
85
+ raise ValueError("adapter run command is empty")
86
+
87
+ artifact_root = str(input_task.metadata.get("artifact_root", "/tmp/mco"))
88
+ paths = expected_paths(artifact_root, input_task.task_id, (self.id,))
89
+ root = paths["root"]
90
+ paths["providers_dir"].mkdir(parents=True, exist_ok=True)
91
+ paths["raw_dir"].mkdir(parents=True, exist_ok=True)
92
+
93
+ stdout_path = paths[f"raw/{self.id}.stdout.log"]
94
+ stderr_path = paths[f"raw/{self.id}.stderr.log"]
95
+ provider_result_path = paths[f"providers/{self.id}.json"]
96
+ run_id = f"{self.id}-{uuid.uuid4().hex[:12]}"
97
+
98
+ stdout_file = stdout_path.open("w", encoding="utf-8")
99
+ stderr_file = stderr_path.open("w", encoding="utf-8")
100
+ process = subprocess.Popen(
101
+ cmd,
102
+ cwd=input_task.repo_root,
103
+ stdout=stdout_file,
104
+ stderr=stderr_file,
105
+ text=True,
106
+ start_new_session=True,
107
+ )
108
+ self._runs[run_id] = ShimRunHandle(
109
+ process=process,
110
+ stdout_path=stdout_path,
111
+ stderr_path=stderr_path,
112
+ provider_result_path=provider_result_path,
113
+ stdout_file=stdout_file,
114
+ stderr_file=stderr_file,
115
+ )
116
+ return TaskRunRef(
117
+ task_id=input_task.task_id,
118
+ provider=self.id,
119
+ run_id=run_id,
120
+ artifact_path=str(root),
121
+ started_at=now_iso(),
122
+ pid=process.pid,
123
+ session_id=None,
124
+ )
125
+
126
+ def poll(self, ref: TaskRunRef) -> TaskStatus:
127
+ handle = self._runs.get(ref.run_id)
128
+ if handle is None:
129
+ return TaskStatus(
130
+ task_id=ref.task_id,
131
+ provider=self.id,
132
+ run_id=ref.run_id,
133
+ attempt_state="EXPIRED",
134
+ completed=True,
135
+ heartbeat_at=None,
136
+ output_path=None,
137
+ error_kind=ErrorKind.NON_RETRYABLE_INVALID_INPUT,
138
+ exit_code=None,
139
+ message="run_handle_not_found",
140
+ )
141
+
142
+ return_code = handle.process.poll()
143
+ if return_code is None:
144
+ return TaskStatus(
145
+ task_id=ref.task_id,
146
+ provider=self.id,
147
+ run_id=ref.run_id,
148
+ attempt_state="STARTED",
149
+ completed=False,
150
+ heartbeat_at=now_iso(),
151
+ output_path=str(handle.provider_result_path),
152
+ error_kind=None,
153
+ exit_code=None,
154
+ message="running",
155
+ )
156
+
157
+ try:
158
+ handle.stdout_file.close()
159
+ handle.stderr_file.close()
160
+ except Exception:
161
+ pass
162
+
163
+ stdout_text = handle.stdout_path.read_text(encoding="utf-8") if handle.stdout_path.exists() else ""
164
+ stderr_text = handle.stderr_path.read_text(encoding="utf-8") if handle.stderr_path.exists() else ""
165
+ success = self._is_success(return_code, stdout_text, stderr_text)
166
+ error_kind = None if success else classify_error(return_code, stderr_text)
167
+ warnings = [warning.value for warning in detect_warnings(stderr_text)]
168
+
169
+ payload = {
170
+ "provider": self.id,
171
+ "task_id": ref.task_id,
172
+ "run_id": ref.run_id,
173
+ "pid": ref.pid,
174
+ "command": self._build_command_for_record(),
175
+ "started_at": ref.started_at,
176
+ "completed_at": now_iso(),
177
+ "exit_code": return_code,
178
+ "success": success,
179
+ "error_kind": error_kind.value if error_kind else None,
180
+ "warnings": warnings,
181
+ "stdout_path": str(handle.stdout_path),
182
+ "stderr_path": str(handle.stderr_path),
183
+ }
184
+ handle.provider_result_path.write_text(json.dumps(payload, ensure_ascii=True, indent=2), encoding="utf-8")
185
+
186
+ return TaskStatus(
187
+ task_id=ref.task_id,
188
+ provider=self.id,
189
+ run_id=ref.run_id,
190
+ attempt_state="SUCCEEDED" if success else "FAILED",
191
+ completed=True,
192
+ heartbeat_at=now_iso(),
193
+ output_path=str(handle.provider_result_path),
194
+ error_kind=error_kind,
195
+ exit_code=return_code,
196
+ message="completed",
197
+ )
198
+
199
+ def cancel(self, ref: TaskRunRef) -> None:
200
+ handle = self._runs.get(ref.run_id)
201
+ if handle is None:
202
+ return
203
+ if handle.process.poll() is not None:
204
+ return
205
+ try:
206
+ os.killpg(os.getpgid(handle.process.pid), signal.SIGTERM)
207
+ except ProcessLookupError:
208
+ return
209
+ time.sleep(0.2)
210
+ if handle.process.poll() is None:
211
+ try:
212
+ os.killpg(os.getpgid(handle.process.pid), signal.SIGKILL)
213
+ except ProcessLookupError:
214
+ return
215
+
216
+ def normalize(self, raw: object, ctx: NormalizeContext) -> List[NormalizedFinding]:
217
+ raise NotImplementedError
218
+
219
+ def _resolve_binary(self) -> Optional[str]:
220
+ result = subprocess.run(
221
+ ["bash", "-lc", f"command -v {self.binary_name}"],
222
+ capture_output=True,
223
+ text=True,
224
+ check=False,
225
+ )
226
+ value = result.stdout.strip()
227
+ return value if value else None
228
+
229
+ def _probe_version(self, binary: str) -> Optional[str]:
230
+ result = subprocess.run([binary, "--version"], capture_output=True, text=True, check=False)
231
+ lines = (result.stdout or result.stderr).splitlines()
232
+ return lines[-1].strip() if lines else None
233
+
234
+ def _probe_auth(self, binary: str) -> bool:
235
+ cmd = self._auth_check_command(binary)
236
+ result = subprocess.run(cmd, capture_output=True, text=True, check=False)
237
+ return result.returncode == 0
238
+
239
+ def _auth_check_command(self, binary: str) -> List[str]:
240
+ raise NotImplementedError
241
+
242
+ def _build_command(self, input_task: TaskInput) -> List[str]:
243
+ raise NotImplementedError
244
+
245
+ def _build_command_for_record(self) -> List[str]:
246
+ return []
247
+
248
+ def _is_success(self, return_code: int, stdout_text: str, stderr_text: str) -> bool:
249
+ _ = stdout_text
250
+ _ = stderr_text
251
+ return return_code == 0
@@ -0,0 +1,40 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Dict, Iterable
5
+
6
+ from .contracts import ProviderId
7
+
8
+
9
+ ARTIFACT_LAYOUT_VERSION = "stage-a-v1"
10
+ ROOT_FILES = ("summary.md", "decision.md", "findings.json", "run.json")
11
+ ROOT_DIRS = ("providers", "raw")
12
+
13
+
14
+ def task_artifact_root(base_dir: str, task_id: str) -> Path:
15
+ return Path(base_dir) / task_id
16
+
17
+
18
+ def provider_artifact_name(provider: ProviderId) -> str:
19
+ return f"{provider}.json"
20
+
21
+
22
+ def expected_paths(base_dir: str, task_id: str, providers: Iterable[ProviderId]) -> Dict[str, Path]:
23
+ root = task_artifact_root(base_dir, task_id)
24
+ paths: Dict[str, Path] = {"root": root}
25
+
26
+ for filename in ROOT_FILES:
27
+ paths[filename] = root / filename
28
+
29
+ providers_dir = root / "providers"
30
+ raw_dir = root / "raw"
31
+ paths["providers_dir"] = providers_dir
32
+ paths["raw_dir"] = raw_dir
33
+
34
+ for provider in providers:
35
+ paths[f"providers/{provider}.json"] = providers_dir / provider_artifact_name(provider)
36
+ paths[f"raw/{provider}.stdout.log"] = raw_dir / f"{provider}.stdout.log"
37
+ paths[f"raw/{provider}.stderr.log"] = raw_dir / f"{provider}.stderr.log"
38
+
39
+ return paths
40
+