@oswaldzsh/devhive 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -0
- package/__init__.py +0 -0
- package/agents/__init__.py +0 -0
- package/agents/base.py +118 -0
- package/agents/execute.py +150 -0
- package/agents/verifier_dynamic.py +164 -0
- package/agents/verifier_semantic.py +84 -0
- package/agents/verifier_static.py +153 -0
- package/bin/dh +77 -0
- package/config.yaml +71 -0
- package/control_plane/__init__.py +0 -0
- package/control_plane/cli.py +596 -0
- package/control_plane/dashboard.py +57 -0
- package/control_plane/notifications.py +54 -0
- package/control_plane/tui.py +352 -0
- package/install.sh +67 -0
- package/orchestrator/__init__.py +0 -0
- package/orchestrator/agent_pool.py +107 -0
- package/orchestrator/convergence_gate.py +133 -0
- package/orchestrator/engine.py +353 -0
- package/orchestrator/event_bus.py +58 -0
- package/orchestrator/task_queue.py +59 -0
- package/package.json +50 -0
- package/protocol/__init__.py +0 -0
- package/protocol/schemas.py +222 -0
- package/setup.py +44 -0
- package/signature/__init__.py +0 -0
- package/signature/engine.py +211 -0
- package/signature/extractor.py +156 -0
- package/signature/learner.py +75 -0
- package/signature/src/matcher.c +263 -0
- package/signature/src/matcher.h +135 -0
- package/signatures/seed_signatures.json +174 -0
- package/storage/__init__.py +0 -0
- package/storage/checkpoint.py +153 -0
- package/storage/signature_db.py +62 -0
- package/tools/__init__.py +0 -0
- package/tools/api_client.py +101 -0
- package/tools/git.py +75 -0
- package/tools/sandbox.py +79 -0
- package/verification/__init__.py +0 -0
- package/verification/diagnostic.py +124 -0
- package/verification/patterns/api_breaking.yaml +25 -0
- package/verification/patterns/code_quality.yaml +41 -0
- package/verification/patterns/security.yaml +41 -0
- package/verification/pipeline.py +61 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Storage layer — SQLite-based checkpoint and signature database."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
SCHEMA_SQL = """
|
|
11
|
+
CREATE TABLE IF NOT EXISTS checkpoints (
|
|
12
|
+
id TEXT PRIMARY KEY,
|
|
13
|
+
task_id TEXT NOT NULL,
|
|
14
|
+
stage TEXT NOT NULL,
|
|
15
|
+
agent_id TEXT,
|
|
16
|
+
handoff_json TEXT,
|
|
17
|
+
verdict_json TEXT,
|
|
18
|
+
state_before TEXT,
|
|
19
|
+
state_after TEXT,
|
|
20
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
21
|
+
duration_ms INTEGER,
|
|
22
|
+
outcome TEXT,
|
|
23
|
+
escalation_id TEXT
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
CREATE INDEX IF NOT EXISTS idx_checkpoint_task ON checkpoints(task_id, created_at);
|
|
27
|
+
|
|
28
|
+
CREATE TABLE IF NOT EXISTS tasks (
|
|
29
|
+
id TEXT PRIMARY KEY,
|
|
30
|
+
spec_json TEXT NOT NULL,
|
|
31
|
+
branch TEXT NOT NULL,
|
|
32
|
+
base_commit TEXT NOT NULL,
|
|
33
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
34
|
+
current_stage TEXT DEFAULT 'SPECIFY',
|
|
35
|
+
status TEXT DEFAULT 'pending'
|
|
36
|
+
);
|
|
37
|
+
|
|
38
|
+
CREATE INDEX IF NOT EXISTS idx_tasks_status ON tasks(status, created_at);
|
|
39
|
+
|
|
40
|
+
CREATE TABLE IF NOT EXISTS escalation_log (
|
|
41
|
+
id TEXT PRIMARY KEY,
|
|
42
|
+
task_id TEXT NOT NULL,
|
|
43
|
+
report_json TEXT NOT NULL,
|
|
44
|
+
resolved_by TEXT,
|
|
45
|
+
resolved_at TIMESTAMP,
|
|
46
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
47
|
+
);
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class CheckpointStore:
|
|
52
|
+
"""Persistent task-state snapshots."""
|
|
53
|
+
|
|
54
|
+
def __init__(self, db_path: str = "storage/devhive.db"):
|
|
55
|
+
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
|
56
|
+
self.db_path = db_path
|
|
57
|
+
self._init_db()
|
|
58
|
+
|
|
59
|
+
def _init_db(self):
|
|
60
|
+
with sqlite3.connect(self.db_path) as conn:
|
|
61
|
+
conn.executescript(SCHEMA_SQL)
|
|
62
|
+
|
|
63
|
+
def _get_conn(self) -> sqlite3.Connection:
|
|
64
|
+
conn = sqlite3.connect(self.db_path)
|
|
65
|
+
conn.row_factory = sqlite3.Row
|
|
66
|
+
return conn
|
|
67
|
+
|
|
68
|
+
def save_checkpoint(self, checkpoint_id: str, task_id: str, stage: str,
|
|
69
|
+
agent_id: Optional[str] = None,
|
|
70
|
+
handoff_json: Optional[str] = None,
|
|
71
|
+
verdict_json: Optional[str] = None,
|
|
72
|
+
state_before: Optional[str] = None,
|
|
73
|
+
state_after: Optional[str] = None,
|
|
74
|
+
duration_ms: Optional[int] = None,
|
|
75
|
+
outcome: Optional[str] = None,
|
|
76
|
+
escalation_id: Optional[str] = None):
|
|
77
|
+
with self._get_conn() as conn:
|
|
78
|
+
conn.execute(
|
|
79
|
+
"""INSERT OR REPLACE INTO checkpoints
|
|
80
|
+
(id, task_id, stage, agent_id, handoff_json, verdict_json,
|
|
81
|
+
state_before, state_after, duration_ms, outcome, escalation_id)
|
|
82
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
83
|
+
(checkpoint_id, task_id, stage, agent_id, handoff_json,
|
|
84
|
+
verdict_json, state_before, state_after, duration_ms,
|
|
85
|
+
outcome, escalation_id))
|
|
86
|
+
conn.commit()
|
|
87
|
+
|
|
88
|
+
def get_checkpoint(self, checkpoint_id: str) -> Optional[dict]:
|
|
89
|
+
with self._get_conn() as conn:
|
|
90
|
+
row = conn.execute(
|
|
91
|
+
"SELECT * FROM checkpoints WHERE id = ?", (checkpoint_id,)
|
|
92
|
+
).fetchone()
|
|
93
|
+
return dict(row) if row else None
|
|
94
|
+
|
|
95
|
+
def get_task_history(self, task_id: str) -> list[dict]:
|
|
96
|
+
with self._get_conn() as conn:
|
|
97
|
+
rows = conn.execute(
|
|
98
|
+
"SELECT * FROM checkpoints WHERE task_id = ? ORDER BY created_at",
|
|
99
|
+
(task_id,)
|
|
100
|
+
).fetchall()
|
|
101
|
+
return [dict(r) for r in rows]
|
|
102
|
+
|
|
103
|
+
def save_task(self, task_id: str, spec_json: str, branch: str, base_commit: str):
|
|
104
|
+
with self._get_conn() as conn:
|
|
105
|
+
conn.execute(
|
|
106
|
+
"""INSERT OR REPLACE INTO tasks (id, spec_json, branch, base_commit)
|
|
107
|
+
VALUES (?, ?, ?, ?)""",
|
|
108
|
+
(task_id, spec_json, branch, base_commit))
|
|
109
|
+
conn.commit()
|
|
110
|
+
|
|
111
|
+
def update_task_stage(self, task_id: str, stage: str):
|
|
112
|
+
with self._get_conn() as conn:
|
|
113
|
+
conn.execute(
|
|
114
|
+
"UPDATE tasks SET current_stage = ? WHERE id = ?",
|
|
115
|
+
(stage, task_id))
|
|
116
|
+
conn.commit()
|
|
117
|
+
|
|
118
|
+
def get_task(self, task_id: str) -> Optional[dict]:
|
|
119
|
+
with self._get_conn() as conn:
|
|
120
|
+
row = conn.execute(
|
|
121
|
+
"SELECT * FROM tasks WHERE id = ?", (task_id,)
|
|
122
|
+
).fetchone()
|
|
123
|
+
return dict(row) if row else None
|
|
124
|
+
|
|
125
|
+
def get_pending_tasks(self) -> list[dict]:
|
|
126
|
+
with self._get_conn() as conn:
|
|
127
|
+
rows = conn.execute(
|
|
128
|
+
"SELECT * FROM tasks WHERE status = 'pending' ORDER BY created_at"
|
|
129
|
+
).fetchall()
|
|
130
|
+
return [dict(r) for r in rows]
|
|
131
|
+
|
|
132
|
+
def save_escalation(self, escalation_id: str, task_id: str, report_json: str):
|
|
133
|
+
with self._get_conn() as conn:
|
|
134
|
+
conn.execute(
|
|
135
|
+
"""INSERT OR REPLACE INTO escalation_log (id, task_id, report_json)
|
|
136
|
+
VALUES (?, ?, ?)""",
|
|
137
|
+
(escalation_id, task_id, report_json))
|
|
138
|
+
conn.commit()
|
|
139
|
+
|
|
140
|
+
def resolve_escalation(self, escalation_id: str, resolved_by: str):
|
|
141
|
+
with self._get_conn() as conn:
|
|
142
|
+
conn.execute(
|
|
143
|
+
"""UPDATE escalation_log SET resolved_by = ?, resolved_at = ?
|
|
144
|
+
WHERE id = ?""",
|
|
145
|
+
(resolved_by, datetime.utcnow().isoformat(), escalation_id))
|
|
146
|
+
conn.commit()
|
|
147
|
+
|
|
148
|
+
def get_open_escalations(self) -> list[dict]:
|
|
149
|
+
with self._get_conn() as conn:
|
|
150
|
+
rows = conn.execute(
|
|
151
|
+
"SELECT * FROM escalation_log WHERE resolved_by IS NULL ORDER BY created_at"
|
|
152
|
+
).fetchall()
|
|
153
|
+
return [dict(r) for r in rows]
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Signature Database — stores and queries failure signatures."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SignatureDB:
|
|
9
|
+
"""JSON-file backed signature store (Python wrapper).
|
|
10
|
+
|
|
11
|
+
The heavy matching is done by the C extension (signature/src/matcher.c).
|
|
12
|
+
This module handles CRUD and learning.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, db_path: str = "signatures/signatures.db"):
|
|
16
|
+
self.db_path = db_path
|
|
17
|
+
self._signatures: dict[str, dict] = {}
|
|
18
|
+
self._load()
|
|
19
|
+
|
|
20
|
+
def _load(self):
|
|
21
|
+
if os.path.exists(self.db_path):
|
|
22
|
+
with open(self.db_path) as f:
|
|
23
|
+
data = json.load(f)
|
|
24
|
+
self._signatures = {s["signature_id"]: s for s in data.get("signatures", [])}
|
|
25
|
+
|
|
26
|
+
def _save(self):
|
|
27
|
+
os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
|
|
28
|
+
with open(self.db_path, "w") as f:
|
|
29
|
+
json.dump({"signatures": list(self._signatures.values())}, f, indent=2)
|
|
30
|
+
|
|
31
|
+
def add(self, signature: dict):
|
|
32
|
+
sig_id = signature["signature_id"]
|
|
33
|
+
self._signatures[sig_id] = signature
|
|
34
|
+
self._save()
|
|
35
|
+
|
|
36
|
+
def get(self, sig_id: str) -> Optional[dict]:
|
|
37
|
+
return self._signatures.get(sig_id)
|
|
38
|
+
|
|
39
|
+
def all(self) -> list[dict]:
|
|
40
|
+
return list(self._signatures.values())
|
|
41
|
+
|
|
42
|
+
def update_match(self, sig_id: str):
|
|
43
|
+
"""Increment match count and update last_matched timestamp."""
|
|
44
|
+
if sig_id in self._signatures:
|
|
45
|
+
from datetime import datetime
|
|
46
|
+
self._signatures[sig_id]["last_matched"] = datetime.utcnow().isoformat()
|
|
47
|
+
self._signatures[sig_id]["match_count"] = \
|
|
48
|
+
self._signatures[sig_id].get("match_count", 0) + 1
|
|
49
|
+
self._save()
|
|
50
|
+
|
|
51
|
+
def seed_from_file(self, seed_path: str):
|
|
52
|
+
"""Load seed signatures from a JSON file."""
|
|
53
|
+
if os.path.exists(seed_path):
|
|
54
|
+
with open(seed_path) as f:
|
|
55
|
+
data = json.load(f)
|
|
56
|
+
for sig in data.get("signatures", []):
|
|
57
|
+
if sig["signature_id"] not in self._signatures:
|
|
58
|
+
self._signatures[sig["signature_id"]] = sig
|
|
59
|
+
self._save()
|
|
60
|
+
|
|
61
|
+
def count(self) -> int:
|
|
62
|
+
return len(self._signatures)
|
|
File without changes
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Anthropic-compatible API client for lejuapi."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import json
|
|
5
|
+
import httpx
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class APIClient:
|
|
10
|
+
"""Async client for Anthropic-compatible Messages API via lejuapi."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, base_url: str = None, auth_token: str = None,
|
|
13
|
+
default_model: str = None):
|
|
14
|
+
self.base_url = base_url or os.getenv("ANTHROPIC_BASE_URL",
|
|
15
|
+
"https://aiapi.lejurobot.com")
|
|
16
|
+
self.auth_token = auth_token or os.getenv("ANTHROPIC_AUTH_TOKEN",
|
|
17
|
+
os.getenv("LEJU_TOKEN", ""))
|
|
18
|
+
self.default_model = default_model or os.getenv("DEVHIVE_MODEL",
|
|
19
|
+
"deepseek/deepseek-v4-pro")
|
|
20
|
+
|
|
21
|
+
def _headers(self) -> dict:
|
|
22
|
+
return {
|
|
23
|
+
"x-api-key": self.auth_token,
|
|
24
|
+
"anthropic-version": "2023-06-01",
|
|
25
|
+
"content-type": "application/json",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async def create_message(
|
|
29
|
+
self,
|
|
30
|
+
system: str,
|
|
31
|
+
messages: list[dict],
|
|
32
|
+
tools: list[dict] = None,
|
|
33
|
+
model: str = None,
|
|
34
|
+
max_tokens: int = 4096,
|
|
35
|
+
temperature: float = 0.3,
|
|
36
|
+
) -> dict:
|
|
37
|
+
"""Send a message to the model and return the API response."""
|
|
38
|
+
body = {
|
|
39
|
+
"model": model or self.default_model,
|
|
40
|
+
"max_tokens": max_tokens,
|
|
41
|
+
"temperature": temperature,
|
|
42
|
+
"system": system,
|
|
43
|
+
"messages": messages,
|
|
44
|
+
}
|
|
45
|
+
if tools:
|
|
46
|
+
body["tools"] = tools
|
|
47
|
+
|
|
48
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(600.0)) as client:
|
|
49
|
+
resp = await client.post(
|
|
50
|
+
f"{self.base_url}/v1/messages",
|
|
51
|
+
headers=self._headers(),
|
|
52
|
+
json=body,
|
|
53
|
+
)
|
|
54
|
+
resp.raise_for_status()
|
|
55
|
+
return resp.json()
|
|
56
|
+
|
|
57
|
+
async def create_message_stream(
|
|
58
|
+
self,
|
|
59
|
+
system: str,
|
|
60
|
+
messages: list[dict],
|
|
61
|
+
tools: list[dict] = None,
|
|
62
|
+
model: str = None,
|
|
63
|
+
max_tokens: int = 4096,
|
|
64
|
+
):
|
|
65
|
+
"""Stream a message response token-by-token."""
|
|
66
|
+
body = {
|
|
67
|
+
"model": model or self.default_model,
|
|
68
|
+
"max_tokens": max_tokens,
|
|
69
|
+
"system": system,
|
|
70
|
+
"messages": messages,
|
|
71
|
+
"stream": True,
|
|
72
|
+
}
|
|
73
|
+
if tools:
|
|
74
|
+
body["tools"] = tools
|
|
75
|
+
|
|
76
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(600.0)) as client:
|
|
77
|
+
async with client.stream(
|
|
78
|
+
"POST",
|
|
79
|
+
f"{self.base_url}/v1/messages",
|
|
80
|
+
headers=self._headers(),
|
|
81
|
+
json=body,
|
|
82
|
+
) as resp:
|
|
83
|
+
resp.raise_for_status()
|
|
84
|
+
async for line in resp.aiter_lines():
|
|
85
|
+
if line.startswith("data: "):
|
|
86
|
+
data = line[6:]
|
|
87
|
+
if data.strip():
|
|
88
|
+
yield json.loads(data)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def extract_text_from_response(response: dict) -> str:
|
|
92
|
+
"""Extract the text content from an Anthropic API response."""
|
|
93
|
+
for block in response.get("content", []):
|
|
94
|
+
if block.get("type") == "text":
|
|
95
|
+
return block["text"]
|
|
96
|
+
return ""
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def extract_tool_use(response: dict) -> list[dict]:
|
|
100
|
+
"""Extract tool_use blocks from an Anthropic API response."""
|
|
101
|
+
return [b for b in response.get("content", []) if b.get("type") == "tool_use"]
|
package/tools/git.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Git operations wrapper — safe, auditable, with trace."""
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
import os
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GitError(Exception):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GitOps:
|
|
14
|
+
"""Thin wrapper around git CLI. Every mutation logs the command for audit."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, repo_path: str = "."):
|
|
17
|
+
self.repo_path = repo_path
|
|
18
|
+
self.command_log: list[dict] = []
|
|
19
|
+
|
|
20
|
+
def _run(self, args: list[str], capture: bool = True) -> Optional[str]:
|
|
21
|
+
cmd = ["git", "-C", self.repo_path] + args
|
|
22
|
+
self.command_log.append({
|
|
23
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
24
|
+
"command": " ".join(cmd),
|
|
25
|
+
})
|
|
26
|
+
try:
|
|
27
|
+
result = subprocess.run(cmd, capture_output=capture, text=True, timeout=120)
|
|
28
|
+
if result.returncode != 0 and capture:
|
|
29
|
+
raise GitError(result.stderr.strip())
|
|
30
|
+
return result.stdout.strip() if capture else None
|
|
31
|
+
except subprocess.TimeoutExpired:
|
|
32
|
+
raise GitError(f"Git command timed out: {' '.join(cmd)}")
|
|
33
|
+
|
|
34
|
+
def current_branch(self) -> str:
|
|
35
|
+
return self._run(["branch", "--show-current"]) or ""
|
|
36
|
+
|
|
37
|
+
def current_commit(self) -> str:
|
|
38
|
+
return self._run(["rev-parse", "HEAD"]) or ""
|
|
39
|
+
|
|
40
|
+
def create_branch(self, name: str, base: str = "HEAD"):
|
|
41
|
+
self._run(["checkout", "-b", name, base])
|
|
42
|
+
|
|
43
|
+
def checkout(self, branch: str):
|
|
44
|
+
self._run(["checkout", branch])
|
|
45
|
+
|
|
46
|
+
def status(self) -> str:
|
|
47
|
+
return self._run(["status", "--short"]) or ""
|
|
48
|
+
|
|
49
|
+
def diff(self, target: str = "HEAD") -> str:
|
|
50
|
+
return self._run(["diff", target]) or ""
|
|
51
|
+
|
|
52
|
+
def diff_stat(self, target: str = "HEAD") -> str:
|
|
53
|
+
return self._run(["diff", "--stat", target]) or ""
|
|
54
|
+
|
|
55
|
+
def changed_files(self, target: str = "HEAD") -> list[str]:
|
|
56
|
+
output = self._run(["diff", "--name-only", target]) or ""
|
|
57
|
+
return [f for f in output.split("\n") if f]
|
|
58
|
+
|
|
59
|
+
def stage(self, *files: str):
|
|
60
|
+
self._run(["add"] + list(files))
|
|
61
|
+
|
|
62
|
+
def commit(self, message: str):
|
|
63
|
+
self._run(["commit", "-m", message])
|
|
64
|
+
|
|
65
|
+
def get_file_at_commit(self, file_path: str, commit: str = "HEAD") -> str:
|
|
66
|
+
return self._run(["show", f"{commit}:{file_path}"]) or ""
|
|
67
|
+
|
|
68
|
+
def log(self, n: int = 10, oneline: bool = True) -> str:
|
|
69
|
+
args = ["log", f"-{n}"]
|
|
70
|
+
if oneline:
|
|
71
|
+
args.append("--oneline")
|
|
72
|
+
return self._run(args) or ""
|
|
73
|
+
|
|
74
|
+
def merge_base(self, branch: str) -> str:
|
|
75
|
+
return self._run(["merge-base", "HEAD", branch]) or ""
|
package/tools/sandbox.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Sandbox management — Docker-based isolation for agent execution."""
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
import os
|
|
5
|
+
import tempfile
|
|
6
|
+
import shutil
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SandboxError(Exception):
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Sandbox:
|
|
15
|
+
"""Docker-based sandbox for safe agent code execution."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, image: str = "devhive-sandbox:latest",
|
|
18
|
+
memory_limit: str = "2g", cpu_limit: int = 2,
|
|
19
|
+
timeout: int = 600):
|
|
20
|
+
self.image = image
|
|
21
|
+
self.memory_limit = memory_limit
|
|
22
|
+
self.cpu_limit = cpu_limit
|
|
23
|
+
self.timeout = timeout
|
|
24
|
+
self._container_id: Optional[str] = None
|
|
25
|
+
|
|
26
|
+
def _run_docker(self, args: list[str], timeout: int = None) -> subprocess.CompletedProcess:
|
|
27
|
+
cmd = ["docker"] + args
|
|
28
|
+
t = timeout or self.timeout
|
|
29
|
+
return subprocess.run(cmd, capture_output=True, text=True, timeout=t)
|
|
30
|
+
|
|
31
|
+
def ensure_image(self):
|
|
32
|
+
"""Pull the sandbox image if not present."""
|
|
33
|
+
result = self._run_docker(["images", "-q", self.image])
|
|
34
|
+
if not result.stdout.strip():
|
|
35
|
+
r = self._run_docker(["pull", self.image], timeout=300)
|
|
36
|
+
if r.returncode != 0:
|
|
37
|
+
raise SandboxError(f"Failed to pull image: {r.stderr}")
|
|
38
|
+
|
|
39
|
+
def create(self, repo_path: str, env_vars: dict[str, str] = None) -> str:
|
|
40
|
+
"""Create a sandbox container from the repo and return its ID."""
|
|
41
|
+
self.ensure_image()
|
|
42
|
+
env_args = []
|
|
43
|
+
if env_vars:
|
|
44
|
+
for k, v in env_vars.items():
|
|
45
|
+
env_args.extend(["-e", f"{k}={v}"])
|
|
46
|
+
|
|
47
|
+
result = self._run_docker([
|
|
48
|
+
"run", "-d", "--rm",
|
|
49
|
+
"--memory", self.memory_limit,
|
|
50
|
+
"--cpus", str(self.cpu_limit),
|
|
51
|
+
"-v", f"{os.path.abspath(repo_path)}:/workspace",
|
|
52
|
+
"-w", "/workspace",
|
|
53
|
+
] + env_args + [self.image, "sleep", "infinity"])
|
|
54
|
+
|
|
55
|
+
if result.returncode != 0:
|
|
56
|
+
raise SandboxError(f"Failed to create sandbox: {result.stderr}")
|
|
57
|
+
self._container_id = result.stdout.strip()
|
|
58
|
+
return self._container_id
|
|
59
|
+
|
|
60
|
+
def exec(self, command: str, timeout: int = None) -> tuple[int, str, str]:
|
|
61
|
+
"""Execute a command inside the sandbox. Returns (returncode, stdout, stderr)."""
|
|
62
|
+
if not self._container_id:
|
|
63
|
+
raise SandboxError("No sandbox container. Call create() first.")
|
|
64
|
+
result = self._run_docker([
|
|
65
|
+
"exec", self._container_id, "bash", "-c", command
|
|
66
|
+
], timeout=timeout)
|
|
67
|
+
return result.returncode, result.stdout, result.stderr
|
|
68
|
+
|
|
69
|
+
def destroy(self):
|
|
70
|
+
"""Stop and remove the sandbox container."""
|
|
71
|
+
if self._container_id:
|
|
72
|
+
self._run_docker(["stop", self._container_id])
|
|
73
|
+
self._container_id = None
|
|
74
|
+
|
|
75
|
+
def __enter__(self):
|
|
76
|
+
return self
|
|
77
|
+
|
|
78
|
+
def __exit__(self, *args):
|
|
79
|
+
self.destroy()
|
|
File without changes
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Diagnostic Aggregator — deterministic rule engine for verdict aggregation."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from protocol.schemas import (
|
|
7
|
+
Verdict, SemanticVerdict, ConvergenceDecision, ConcurrencyAction,
|
|
8
|
+
EscalationReport, VerdictOverall, ConflictType, Alignment,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class AggregatorResult:
|
|
14
|
+
action: ConcurrencyAction
|
|
15
|
+
reason: str
|
|
16
|
+
fix_strategy: Optional[str] = None
|
|
17
|
+
escalation: Optional[EscalationReport] = None
|
|
18
|
+
conflict_type: Optional[ConflictType] = None
|
|
19
|
+
needs_human: bool = False
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DiagnosticAggregator:
|
|
23
|
+
"""Merges multiple Verifier outputs using deterministic rules.
|
|
24
|
+
|
|
25
|
+
This is NOT an LLM-based agent. It uses a rule engine because:
|
|
26
|
+
1. Aggregation must be predictable and auditable
|
|
27
|
+
2. No black-box decisions in the critical path
|
|
28
|
+
3. Fast — runs in microseconds, not seconds
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, config: dict = None):
|
|
32
|
+
self.config = config or {}
|
|
33
|
+
|
|
34
|
+
def aggregate_l1(self, static: Verdict, dynamic: Verdict,
|
|
35
|
+
task_id: str) -> AggregatorResult:
|
|
36
|
+
"""Aggregate Static + Dynamic verdicts (L1 convergence check)."""
|
|
37
|
+
|
|
38
|
+
# Both PASS → advance
|
|
39
|
+
if (static.overall == VerdictOverall.PASS and
|
|
40
|
+
dynamic.overall == VerdictOverall.PASS):
|
|
41
|
+
return AggregatorResult(
|
|
42
|
+
action=ConcurrencyAction.PASS,
|
|
43
|
+
reason="L1: Static and Dynamic verification both passed",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Either FAIL → determine if fixable or needs escalation
|
|
47
|
+
all_findings = static.findings + dynamic.findings
|
|
48
|
+
critical_findings = [f for f in all_findings
|
|
49
|
+
if f.severity in ("CRITICAL", "HIGH")]
|
|
50
|
+
matched_findings = [f for f in critical_findings
|
|
51
|
+
if f.matched_signature]
|
|
52
|
+
|
|
53
|
+
if matched_findings:
|
|
54
|
+
# We have known fixes for some failures
|
|
55
|
+
return AggregatorResult(
|
|
56
|
+
action=ConcurrencyAction.FIX,
|
|
57
|
+
reason=f"L1: {len(matched_findings)} findings matched known signatures",
|
|
58
|
+
fix_strategy=matched_findings[0].matched_signature,
|
|
59
|
+
)
|
|
60
|
+
elif critical_findings:
|
|
61
|
+
# Unknown critical issues → escalate
|
|
62
|
+
return AggregatorResult(
|
|
63
|
+
action=ConcurrencyAction.ESCALATE,
|
|
64
|
+
reason=f"L1: {len(critical_findings)} critical findings with no known fix",
|
|
65
|
+
needs_human=True,
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
# WARN/LOW only → pass with caution
|
|
69
|
+
return AggregatorResult(
|
|
70
|
+
action=ConcurrencyAction.PASS,
|
|
71
|
+
reason="L1: No critical findings, passing with warnings",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def aggregate_l2(self, static: Verdict, dynamic: Verdict,
|
|
75
|
+
semantic: SemanticVerdict,
|
|
76
|
+
mutation: Optional[Verdict],
|
|
77
|
+
task_id: str) -> AggregatorResult:
|
|
78
|
+
"""Aggregate L1 + Semantic + Mutation (L2 convergence check)."""
|
|
79
|
+
|
|
80
|
+
# Check L1 first
|
|
81
|
+
l1 = self.aggregate_l1(static, dynamic, task_id)
|
|
82
|
+
if l1.action != ConcurrencyAction.PASS:
|
|
83
|
+
return l1
|
|
84
|
+
|
|
85
|
+
# Check semantic alignment
|
|
86
|
+
if semantic.alignment == Alignment.DEVIATED:
|
|
87
|
+
return AggregatorResult(
|
|
88
|
+
action=ConcurrencyAction.ESCALATE,
|
|
89
|
+
reason=f"L2: Semantic alignment DEVIATED — {semantic.reasoning}",
|
|
90
|
+
conflict_type=ConflictType.INTERPRETATION,
|
|
91
|
+
needs_human=True,
|
|
92
|
+
)
|
|
93
|
+
elif semantic.alignment == Alignment.CONFLICT:
|
|
94
|
+
return AggregatorResult(
|
|
95
|
+
action=ConcurrencyAction.CONFLICT,
|
|
96
|
+
reason=f"L2: Semantic CONFLICT detected — {semantic.reasoning}",
|
|
97
|
+
conflict_type=ConflictType.INTERPRETATION,
|
|
98
|
+
needs_human=True,
|
|
99
|
+
)
|
|
100
|
+
elif semantic.alignment == Alignment.ENHANCED:
|
|
101
|
+
# Flag for human review but don't block
|
|
102
|
+
return AggregatorResult(
|
|
103
|
+
action=ConcurrencyAction.PASS,
|
|
104
|
+
reason="L2: Semantically ENHANCED — changes exceed Spec but are reasonable. Human review recommended.",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Check mutation
|
|
108
|
+
if mutation and mutation.overall == VerdictOverall.FAIL:
|
|
109
|
+
return AggregatorResult(
|
|
110
|
+
action=ConcurrencyAction.FIX,
|
|
111
|
+
reason="L2: Mutation testing found coverage gaps",
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
return AggregatorResult(
|
|
115
|
+
action=ConcurrencyAction.PASS,
|
|
116
|
+
reason="L2: All verification layers passed",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
def detect_conflict(self, verdicts: list[Verdict]) -> Optional[ConflictType]:
|
|
120
|
+
"""Detect if multiple verdicts contradict each other."""
|
|
121
|
+
# FACT conflict: Static says PASS, Dynamic says FAIL for same assertion
|
|
122
|
+
# INTERPRETATION: Semantic says DEVIATED but Static/Dynamic say PASS
|
|
123
|
+
# SPEC_AMBIGUITY: Multiple Semantic verdicts with different interpretations
|
|
124
|
+
return None # MVP: basic conflict detection
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# API breaking change detection rules
|
|
2
|
+
patterns:
|
|
3
|
+
- id: P001
|
|
4
|
+
name: public_api_break
|
|
5
|
+
desc: "Public function signature changed but callers not updated in this diff"
|
|
6
|
+
severity: HIGH
|
|
7
|
+
detector:
|
|
8
|
+
type: ast_diff
|
|
9
|
+
rule: "function signature changed AND call sites unchanged"
|
|
10
|
+
|
|
11
|
+
- id: P002
|
|
12
|
+
name: removed_public_symbol
|
|
13
|
+
desc: "Public class/function/module removed without deprecation period"
|
|
14
|
+
severity: HIGH
|
|
15
|
+
detector:
|
|
16
|
+
type: ast_diff
|
|
17
|
+
rule: "public symbol removed AND no deprecation warning added"
|
|
18
|
+
|
|
19
|
+
- id: P003
|
|
20
|
+
name: return_type_changed
|
|
21
|
+
desc: "Function return type changed — may break downstream consumers"
|
|
22
|
+
severity: HIGH
|
|
23
|
+
detector:
|
|
24
|
+
type: ast_diff
|
|
25
|
+
rule: "return type annotation changed AND callers not updated"
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Code quality regression detection rules
|
|
2
|
+
patterns:
|
|
3
|
+
- id: Q001
|
|
4
|
+
name: error_swallowed
|
|
5
|
+
desc: "Exception caught but not handled or logged"
|
|
6
|
+
severity: MEDIUM
|
|
7
|
+
detector:
|
|
8
|
+
type: ast_pattern
|
|
9
|
+
rule: "except block with empty body or bare pass"
|
|
10
|
+
|
|
11
|
+
- id: Q002
|
|
12
|
+
name: overly_broad_except
|
|
13
|
+
desc: "Bare 'except:' or 'except Exception:' without specific types"
|
|
14
|
+
severity: LOW
|
|
15
|
+
detector:
|
|
16
|
+
type: ast_pattern
|
|
17
|
+
rule: "bare except or except Exception without specific exception types"
|
|
18
|
+
|
|
19
|
+
- id: Q003
|
|
20
|
+
name: dependency_drift
|
|
21
|
+
desc: "New external dependency introduced without explicit version pinning"
|
|
22
|
+
severity: MEDIUM
|
|
23
|
+
detector:
|
|
24
|
+
type: file_diff
|
|
25
|
+
rule: "new entry in requirements.txt/Cargo.toml/package.json without version"
|
|
26
|
+
|
|
27
|
+
- id: Q004
|
|
28
|
+
name: commented_out_code
|
|
29
|
+
desc: "Blocks of commented-out code added — should be removed"
|
|
30
|
+
severity: LOW
|
|
31
|
+
detector:
|
|
32
|
+
type: pattern_match
|
|
33
|
+
rule: "5+ consecutive lines of commented-out code in diff"
|
|
34
|
+
|
|
35
|
+
- id: Q005
|
|
36
|
+
name: todo_without_ticket
|
|
37
|
+
desc: "TODO/FIXME comment without issue tracker reference"
|
|
38
|
+
severity: LOW
|
|
39
|
+
detector:
|
|
40
|
+
type: pattern_match
|
|
41
|
+
rule: "TODO|FIXME|HACK without issue/PR reference"
|