@oswaldzsh/devhive 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -0
- package/__init__.py +0 -0
- package/agents/__init__.py +0 -0
- package/agents/base.py +118 -0
- package/agents/execute.py +150 -0
- package/agents/verifier_dynamic.py +164 -0
- package/agents/verifier_semantic.py +84 -0
- package/agents/verifier_static.py +153 -0
- package/bin/dh +77 -0
- package/config.yaml +71 -0
- package/control_plane/__init__.py +0 -0
- package/control_plane/cli.py +596 -0
- package/control_plane/dashboard.py +57 -0
- package/control_plane/notifications.py +54 -0
- package/control_plane/tui.py +352 -0
- package/install.sh +67 -0
- package/orchestrator/__init__.py +0 -0
- package/orchestrator/agent_pool.py +107 -0
- package/orchestrator/convergence_gate.py +133 -0
- package/orchestrator/engine.py +353 -0
- package/orchestrator/event_bus.py +58 -0
- package/orchestrator/task_queue.py +59 -0
- package/package.json +50 -0
- package/protocol/__init__.py +0 -0
- package/protocol/schemas.py +222 -0
- package/setup.py +44 -0
- package/signature/__init__.py +0 -0
- package/signature/engine.py +211 -0
- package/signature/extractor.py +156 -0
- package/signature/learner.py +75 -0
- package/signature/src/matcher.c +263 -0
- package/signature/src/matcher.h +135 -0
- package/signatures/seed_signatures.json +174 -0
- package/storage/__init__.py +0 -0
- package/storage/checkpoint.py +153 -0
- package/storage/signature_db.py +62 -0
- package/tools/__init__.py +0 -0
- package/tools/api_client.py +101 -0
- package/tools/git.py +75 -0
- package/tools/sandbox.py +79 -0
- package/verification/__init__.py +0 -0
- package/verification/diagnostic.py +124 -0
- package/verification/patterns/api_breaking.yaml +25 -0
- package/verification/patterns/code_quality.yaml +41 -0
- package/verification/patterns/security.yaml +41 -0
- package/verification/pipeline.py +61 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""DevHive Protocol Schemas
|
|
2
|
+
|
|
3
|
+
All inter-agent communication is fully typed via Pydantic models.
|
|
4
|
+
No natural-language handoffs — every message is machine-verifiable.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _utcnow() -> datetime:
|
|
14
|
+
return datetime.now(timezone.utc).replace(tzinfo=None)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# ── Enums ────────────────────────────────────────────────────
|
|
18
|
+
|
|
19
|
+
class ChangeType(str, Enum):
|
|
20
|
+
LOGIC_FIX = "logic_fix"
|
|
21
|
+
NEW_FEATURE = "new_feature"
|
|
22
|
+
REFACTOR = "refactor"
|
|
23
|
+
CONFIG = "config"
|
|
24
|
+
DEPENDENCY = "dependency"
|
|
25
|
+
DOCS = "docs"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Severity(str, Enum):
|
|
29
|
+
CRITICAL = "CRITICAL"
|
|
30
|
+
HIGH = "HIGH"
|
|
31
|
+
MEDIUM = "MEDIUM"
|
|
32
|
+
LOW = "LOW"
|
|
33
|
+
INFO = "INFO"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Priority(str, Enum):
|
|
37
|
+
CRITICAL = "CRITICAL"
|
|
38
|
+
HIGH = "HIGH"
|
|
39
|
+
MEDIUM = "MEDIUM"
|
|
40
|
+
LOW = "LOW"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class VerdictOverall(str, Enum):
|
|
44
|
+
PASS = "PASS"
|
|
45
|
+
WARN = "WARN"
|
|
46
|
+
FAIL = "FAIL"
|
|
47
|
+
CONFLICT = "CONFLICT"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class VerifierType(str, Enum):
|
|
51
|
+
STATIC = "static"
|
|
52
|
+
DYNAMIC = "dynamic"
|
|
53
|
+
SEMANTIC = "semantic"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ConflictType(str, Enum):
|
|
57
|
+
FACT = "fact"
|
|
58
|
+
INTERPRETATION = "interpretation"
|
|
59
|
+
SPEC_AMBIGUITY = "spec_ambiguity"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class ConcurrencyAction(str, Enum):
|
|
63
|
+
PASS = "PASS"
|
|
64
|
+
FIX = "FIX"
|
|
65
|
+
ESCALATE = "ESCALATE"
|
|
66
|
+
CONFLICT = "CONFLICT"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class Alignment(str, Enum):
|
|
70
|
+
ALIGNED = "ALIGNED"
|
|
71
|
+
ENHANCED = "ENHANCED"
|
|
72
|
+
DEVIATED = "DEVIATED"
|
|
73
|
+
CONFLICT = "CONFLICT"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class SuggestedAction(str, Enum):
|
|
77
|
+
ROLLBACK = "ROLLBACK"
|
|
78
|
+
FIX = "FIX"
|
|
79
|
+
RETEST = "RETEST"
|
|
80
|
+
ESCALATE = "ESCALATE"
|
|
81
|
+
IGNORE = "IGNORE"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# ── Task / Spec ──────────────────────────────────────────────
|
|
85
|
+
|
|
86
|
+
class TaskSpec(BaseModel):
|
|
87
|
+
"""Human-provided target-state description."""
|
|
88
|
+
title: str
|
|
89
|
+
description: str
|
|
90
|
+
acceptance_criteria: list[str] = Field(default_factory=list)
|
|
91
|
+
scope_constraints: list[str] = Field(default_factory=list)
|
|
92
|
+
sensitive_modules: list[str] = Field(default_factory=list)
|
|
93
|
+
priority: Priority = Priority.MEDIUM
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class Task(BaseModel):
|
|
97
|
+
id: str
|
|
98
|
+
spec: TaskSpec
|
|
99
|
+
branch: str
|
|
100
|
+
base_commit: str
|
|
101
|
+
created_at: datetime = Field(default_factory=_utcnow)
|
|
102
|
+
current_stage: str = "SPECIFY"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# ── Handoff ──────────────────────────────────────────────────
|
|
106
|
+
|
|
107
|
+
class RiskAssessment(BaseModel):
|
|
108
|
+
risk: str
|
|
109
|
+
severity: Severity
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class FileChange(BaseModel):
|
|
113
|
+
file: str
|
|
114
|
+
diff_range: dict # {"start_line": int, "end_line": int}
|
|
115
|
+
change_type: ChangeType
|
|
116
|
+
summary: str
|
|
117
|
+
risk_self_assessment: list[RiskAssessment] = Field(default_factory=list)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class VerificationFocus(BaseModel):
|
|
121
|
+
what: str
|
|
122
|
+
how_to_verify: str
|
|
123
|
+
priority: Priority
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class EnvChanges(BaseModel):
|
|
127
|
+
new_dependencies: list[str] = Field(default_factory=list)
|
|
128
|
+
config_changes: list[str] = Field(default_factory=list)
|
|
129
|
+
migration_needed: bool = False
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class ExecutionTrace(BaseModel):
|
|
133
|
+
commands_run: list[str] = Field(default_factory=list)
|
|
134
|
+
self_check_passed: bool = False
|
|
135
|
+
self_check_output_hash: Optional[str] = None
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class ExecutionHandoff(BaseModel):
|
|
139
|
+
"""Emitted by Execute Agent after producing code changes."""
|
|
140
|
+
handoff_version: str = "1.0"
|
|
141
|
+
source: str
|
|
142
|
+
task_id: str
|
|
143
|
+
timestamp: datetime = Field(default_factory=_utcnow)
|
|
144
|
+
intent: str
|
|
145
|
+
changes: list[FileChange]
|
|
146
|
+
verification_focus: list[VerificationFocus]
|
|
147
|
+
env_changes: EnvChanges = Field(default_factory=EnvChanges)
|
|
148
|
+
execution_trace: ExecutionTrace
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
# ── Verdict ──────────────────────────────────────────────────
|
|
152
|
+
|
|
153
|
+
class FindingEvidence(BaseModel):
|
|
154
|
+
type: str # diff | log | metric | ast | trace
|
|
155
|
+
data: str
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class Finding(BaseModel):
|
|
159
|
+
id: str = Field(default_factory=lambda: f"F-{_utcnow().timestamp()}")
|
|
160
|
+
severity: Severity
|
|
161
|
+
category: str
|
|
162
|
+
title: str
|
|
163
|
+
detail: str
|
|
164
|
+
evidence: FindingEvidence
|
|
165
|
+
matched_signature: Optional[str] = None
|
|
166
|
+
suggested_action: SuggestedAction = SuggestedAction.ESCALATE
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class Verdict(BaseModel):
|
|
170
|
+
"""Output from any Verifier Agent."""
|
|
171
|
+
verdict_version: str = "1.0"
|
|
172
|
+
verifier_type: VerifierType
|
|
173
|
+
task_id: str
|
|
174
|
+
timestamp: datetime = Field(default_factory=_utcnow)
|
|
175
|
+
overall: VerdictOverall
|
|
176
|
+
findings: list[Finding] = Field(default_factory=list)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# ── Semiconductor Verdict ────────────────────────────────────
|
|
180
|
+
|
|
181
|
+
class SemanticVerdict(BaseModel):
|
|
182
|
+
"""Specialized verdict for semantic verification."""
|
|
183
|
+
verdict_version: str = "1.0"
|
|
184
|
+
verifier_type: str = "semantic"
|
|
185
|
+
task_id: str
|
|
186
|
+
timestamp: datetime = Field(default_factory=_utcnow)
|
|
187
|
+
alignment: Alignment
|
|
188
|
+
reasoning: str
|
|
189
|
+
concerns: list[str] = Field(default_factory=list)
|
|
190
|
+
overall: VerdictOverall = VerdictOverall.PASS
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# ── Convergence ──────────────────────────────────────────────
|
|
194
|
+
|
|
195
|
+
class ConvergenceDecision(BaseModel):
|
|
196
|
+
action: ConcurrencyAction
|
|
197
|
+
reason: str
|
|
198
|
+
fix_strategy: Optional[str] = None
|
|
199
|
+
escalation: Optional['EscalationReport'] = None
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# ── Escalation ────────────────────────────────────────────────
|
|
203
|
+
|
|
204
|
+
class EscalationReport(BaseModel):
|
|
205
|
+
escalation_id: str
|
|
206
|
+
task_id: str
|
|
207
|
+
triggered_by: str
|
|
208
|
+
current_state: dict
|
|
209
|
+
history: list[dict] = Field(default_factory=list)
|
|
210
|
+
what_agent_tried: list[str] = Field(default_factory=list)
|
|
211
|
+
blocking_finding: Optional[dict] = None
|
|
212
|
+
suggested_human_action: str = ""
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
# ── Events ───────────────────────────────────────────────────
|
|
216
|
+
|
|
217
|
+
class DevHiveEvent(BaseModel):
|
|
218
|
+
"""Base event type for the event bus."""
|
|
219
|
+
event_type: str
|
|
220
|
+
task_id: str
|
|
221
|
+
timestamp: datetime = Field(default_factory=_utcnow)
|
|
222
|
+
payload: dict = Field(default_factory=dict)
|
package/setup.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Setup script for the C signature matching extension."""
|
|
2
|
+
|
|
3
|
+
from setuptools import setup, Extension
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
matcher_ext = Extension(
|
|
7
|
+
"signature._matcher",
|
|
8
|
+
sources=[
|
|
9
|
+
os.path.join("signature", "src", "matcher.c"),
|
|
10
|
+
],
|
|
11
|
+
include_dirs=[os.path.join("signature", "src")],
|
|
12
|
+
extra_compile_args=["-O3", "-march=native", "-ffast-math"],
|
|
13
|
+
extra_link_args=["-lm"],
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
setup(
|
|
17
|
+
name="devhive",
|
|
18
|
+
version="0.1.0",
|
|
19
|
+
description="DevHive — Multi-Agent Software Development System",
|
|
20
|
+
packages=[
|
|
21
|
+
"devhive",
|
|
22
|
+
"devhive.orchestrator",
|
|
23
|
+
"devhive.agents",
|
|
24
|
+
"devhive.protocol",
|
|
25
|
+
"devhive.verification",
|
|
26
|
+
"devhive.signature",
|
|
27
|
+
"devhive.storage",
|
|
28
|
+
"devhive.tools",
|
|
29
|
+
"devhive.control_plane",
|
|
30
|
+
],
|
|
31
|
+
package_dir={"devhive": "."},
|
|
32
|
+
ext_modules=[matcher_ext],
|
|
33
|
+
install_requires=[
|
|
34
|
+
"httpx>=0.25.0",
|
|
35
|
+
"pydantic>=2.0.0",
|
|
36
|
+
"pyyaml>=6.0",
|
|
37
|
+
],
|
|
38
|
+
entry_points={
|
|
39
|
+
"console_scripts": [
|
|
40
|
+
"devhive=devhive.control_plane.cli:main",
|
|
41
|
+
],
|
|
42
|
+
},
|
|
43
|
+
python_requires=">=3.12",
|
|
44
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""Python wrapper for the C signature matching engine.
|
|
2
|
+
|
|
3
|
+
Falls back to a pure-Python implementation if the C extension is not built.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import ctypes
|
|
7
|
+
import os
|
|
8
|
+
from ctypes import c_int, c_double, c_char, c_uint64, POINTER, Structure
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# ── C Types ────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
class CFeatureVector(Structure):
|
|
15
|
+
_fields_ = [
|
|
16
|
+
("error_type", c_char * 64),
|
|
17
|
+
("error_message_hash", c_uint64),
|
|
18
|
+
("location_pattern", c_char * 256),
|
|
19
|
+
("stack_depth", c_int),
|
|
20
|
+
("stack_hashes", c_uint64 * 10),
|
|
21
|
+
("change_type", c_int),
|
|
22
|
+
("distance_to_error", c_int),
|
|
23
|
+
("is_new_in_diff", c_int),
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class CMatchResult(Structure):
|
|
28
|
+
_fields_ = [
|
|
29
|
+
("sig_id", c_char * 32),
|
|
30
|
+
("similarity", c_double),
|
|
31
|
+
("reliability", c_double),
|
|
32
|
+
("match_count", c_int),
|
|
33
|
+
("resolution_strategy", c_char * 32),
|
|
34
|
+
("fix_template", c_char * 512),
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class CMatcherConfig(Structure):
|
|
39
|
+
_fields_ = [
|
|
40
|
+
("w_error_type", c_double),
|
|
41
|
+
("w_error_message", c_double),
|
|
42
|
+
("w_location", c_double),
|
|
43
|
+
("w_stack_trace", c_double),
|
|
44
|
+
("w_change_context", c_double),
|
|
45
|
+
("w_temporal", c_double),
|
|
46
|
+
("min_confidence", c_double),
|
|
47
|
+
("top_k", c_int),
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class SignatureEngine:
|
|
52
|
+
"""High-performance signature matching engine.
|
|
53
|
+
|
|
54
|
+
Uses C extension if available, otherwise falls back to Python.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, db_path: str = None, config: dict = None):
|
|
58
|
+
self.db_path = db_path
|
|
59
|
+
self._lib = None
|
|
60
|
+
self._db = None
|
|
61
|
+
self._config = CMatcherConfig()
|
|
62
|
+
self._load_library()
|
|
63
|
+
self._init_config(config)
|
|
64
|
+
|
|
65
|
+
def _load_library(self):
|
|
66
|
+
"""Try to load the C extension library."""
|
|
67
|
+
lib_path = os.path.join(os.path.dirname(__file__), "src", "libmatcher.so")
|
|
68
|
+
if not os.path.exists(lib_path):
|
|
69
|
+
# Try build directory
|
|
70
|
+
lib_path = os.path.join(os.path.dirname(__file__),
|
|
71
|
+
"src", "build", "libmatcher.so")
|
|
72
|
+
if not os.path.exists(lib_path):
|
|
73
|
+
return # Will use Python fallback
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
self._lib = ctypes.CDLL(lib_path)
|
|
77
|
+
# Set up function signatures
|
|
78
|
+
self._lib.db_create.argtypes = [c_int]
|
|
79
|
+
self._lib.db_create.restype = ctypes.c_void_p
|
|
80
|
+
|
|
81
|
+
self._lib.db_destroy.argtypes = [ctypes.c_void_p]
|
|
82
|
+
self._lib.db_destroy.restype = None
|
|
83
|
+
|
|
84
|
+
self._lib.config_default.restype = CMatcherConfig
|
|
85
|
+
|
|
86
|
+
self._lib.db_match.argtypes = [
|
|
87
|
+
ctypes.c_void_p, POINTER(CMatcherConfig),
|
|
88
|
+
POINTER(CFeatureVector), POINTER(CMatchResult), c_int]
|
|
89
|
+
self._lib.db_match.restype = c_int
|
|
90
|
+
|
|
91
|
+
self._db = self._lib.db_create(1024)
|
|
92
|
+
except Exception:
|
|
93
|
+
self._lib = None
|
|
94
|
+
|
|
95
|
+
def _init_config(self, config: dict = None):
|
|
96
|
+
cfg = config or {}
|
|
97
|
+
if self._lib:
|
|
98
|
+
self._config = self._lib.config_default()
|
|
99
|
+
else:
|
|
100
|
+
self._config = CMatcherConfig()
|
|
101
|
+
self._config.w_error_type = cfg.get("w_error_type", 0.30)
|
|
102
|
+
self._config.w_error_message = cfg.get("w_error_message", 0.15)
|
|
103
|
+
self._config.w_location = cfg.get("w_location", 0.25)
|
|
104
|
+
self._config.w_stack_trace = cfg.get("w_stack_trace", 0.15)
|
|
105
|
+
self._config.w_change_context = cfg.get("w_change_context", 0.10)
|
|
106
|
+
self._config.w_temporal = cfg.get("w_temporal", 0.05)
|
|
107
|
+
self._config.min_confidence = cfg.get("min_confidence", 0.65)
|
|
108
|
+
self._config.top_k = cfg.get("top_k", 3)
|
|
109
|
+
|
|
110
|
+
def match(self, query: dict, k: int = 3,
|
|
111
|
+
min_confidence: float = None) -> list[dict]:
|
|
112
|
+
"""Match a query against the signature database.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
query: Dict with keys: error_type, error_message, location_pattern,
|
|
116
|
+
stack_hashes, change_type, distance_to_error, is_new_in_diff
|
|
117
|
+
k: Number of top results to return
|
|
118
|
+
min_confidence: Override minimum confidence threshold
|
|
119
|
+
Returns:
|
|
120
|
+
List of matched signatures with similarity scores
|
|
121
|
+
"""
|
|
122
|
+
if min_confidence is not None:
|
|
123
|
+
self._config.min_confidence = min_confidence
|
|
124
|
+
self._config.top_k = k
|
|
125
|
+
|
|
126
|
+
fv = self._build_feature_vector(query)
|
|
127
|
+
|
|
128
|
+
if self._lib and self._db:
|
|
129
|
+
return self._match_c(fv, k)
|
|
130
|
+
else:
|
|
131
|
+
return self._match_python(query, k)
|
|
132
|
+
|
|
133
|
+
def _build_feature_vector(self, query: dict) -> CFeatureVector:
|
|
134
|
+
fv = CFeatureVector()
|
|
135
|
+
fv.error_type = (query.get("error_type", "") or "").encode()[:63]
|
|
136
|
+
fv.error_message_hash = hash(query.get("error_message", "")) & 0xFFFFFFFFFFFFFFFF
|
|
137
|
+
fv.location_pattern = (query.get("location_pattern", "") or "").encode()[:255]
|
|
138
|
+
fv.stack_depth = 0
|
|
139
|
+
fv.change_type = 0
|
|
140
|
+
fv.distance_to_error = 0
|
|
141
|
+
fv.is_new_in_diff = 1 if query.get("is_new_in_diff", True) else 0
|
|
142
|
+
return fv
|
|
143
|
+
|
|
144
|
+
def _match_c(self, fv: CFeatureVector, k: int) -> list[dict]:
|
|
145
|
+
results = (CMatchResult * k)()
|
|
146
|
+
n = self._lib.db_match(self._db, ctypes.byref(self._config),
|
|
147
|
+
ctypes.byref(fv), results, k)
|
|
148
|
+
return [
|
|
149
|
+
{
|
|
150
|
+
"signature_id": results[i].sig_id.decode(),
|
|
151
|
+
"similarity": results[i].similarity,
|
|
152
|
+
"reliability": results[i].reliability,
|
|
153
|
+
"match_count": results[i].match_count,
|
|
154
|
+
"resolution_strategy": results[i].resolution_strategy.decode(),
|
|
155
|
+
"fix_template": results[i].fix_template.decode(),
|
|
156
|
+
}
|
|
157
|
+
for i in range(n)
|
|
158
|
+
]
|
|
159
|
+
|
|
160
|
+
def _match_python(self, query: dict, k: int) -> list[dict]:
|
|
161
|
+
"""Pure Python fallback matcher."""
|
|
162
|
+
# Simple keyword-based matching as fallback
|
|
163
|
+
results = []
|
|
164
|
+
error_type = query.get("error_type", "").lower()
|
|
165
|
+
location = query.get("location_pattern", "").lower()
|
|
166
|
+
|
|
167
|
+
# Load signatures from file if available
|
|
168
|
+
signatures = self._load_signatures()
|
|
169
|
+
|
|
170
|
+
for sig in signatures:
|
|
171
|
+
score = 0.0
|
|
172
|
+
fv = sig.get("feature_vector", {})
|
|
173
|
+
|
|
174
|
+
# Error type match
|
|
175
|
+
if error_type and error_type in fv.get("error_type", "").lower():
|
|
176
|
+
score += self._config.w_error_type
|
|
177
|
+
|
|
178
|
+
# Location match
|
|
179
|
+
sig_loc = fv.get("error_location_pattern", "").lower()
|
|
180
|
+
if location and sig_loc:
|
|
181
|
+
if location == sig_loc:
|
|
182
|
+
score += self._config.w_location
|
|
183
|
+
elif os.path.commonprefix([location, sig_loc]):
|
|
184
|
+
score += self._config.w_location * 0.5
|
|
185
|
+
|
|
186
|
+
if score >= self._config.min_confidence:
|
|
187
|
+
results.append({
|
|
188
|
+
"signature_id": sig.get("signature_id", ""),
|
|
189
|
+
"similarity": score,
|
|
190
|
+
"reliability": sig.get("diagnosis", {}).get("reliability", 0.5),
|
|
191
|
+
"match_count": sig.get("match_count", 0),
|
|
192
|
+
"resolution_strategy": sig.get("resolution", {}).get("strategy", ""),
|
|
193
|
+
"fix_template": sig.get("resolution", {}).get("fix_template", ""),
|
|
194
|
+
})
|
|
195
|
+
|
|
196
|
+
results.sort(key=lambda r: r["similarity"], reverse=True)
|
|
197
|
+
return results[:k]
|
|
198
|
+
|
|
199
|
+
def _load_signatures(self) -> list[dict]:
|
|
200
|
+
"""Load signatures from the JSON database."""
|
|
201
|
+
import json
|
|
202
|
+
if self.db_path and os.path.exists(self.db_path):
|
|
203
|
+
with open(self.db_path) as f:
|
|
204
|
+
data = json.load(f)
|
|
205
|
+
return data.get("signatures", [])
|
|
206
|
+
return []
|
|
207
|
+
|
|
208
|
+
def close(self):
|
|
209
|
+
if self._lib and self._db:
|
|
210
|
+
self._lib.db_destroy(self._db)
|
|
211
|
+
self._db = None
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Feature vector extractor — converts raw error data to structured vectors."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import re
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def extract_feature_vector(
|
|
9
|
+
error_output: str,
|
|
10
|
+
changed_files: list[str] = None,
|
|
11
|
+
change_types: list[str] = None,
|
|
12
|
+
) -> dict:
|
|
13
|
+
"""Extract a feature vector from raw error output.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
error_output: Raw stderr/stdout from a failed test run
|
|
17
|
+
changed_files: List of files modified in this change
|
|
18
|
+
change_types: Types of changes (logic_fix, refactor, etc.)
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Dict with feature_vector, ready for signature matching
|
|
22
|
+
"""
|
|
23
|
+
changed_files = changed_files or []
|
|
24
|
+
change_types = change_types or []
|
|
25
|
+
|
|
26
|
+
# 1. Parse error type
|
|
27
|
+
error_type = _parse_error_type(error_output)
|
|
28
|
+
|
|
29
|
+
# 2. Hash the normalized error message
|
|
30
|
+
normalized = _normalize_error_message(error_output)
|
|
31
|
+
error_message_hash = hashlib.sha256(normalized.encode()).hexdigest()[:16]
|
|
32
|
+
|
|
33
|
+
# 3. Extract location pattern
|
|
34
|
+
location_pattern = _extract_location(error_output, changed_files)
|
|
35
|
+
|
|
36
|
+
# 4. Extract stack trace hashes
|
|
37
|
+
stack_hashes = _extract_stack_signature(error_output)
|
|
38
|
+
|
|
39
|
+
# 5. Determine distance to error
|
|
40
|
+
distance = _compute_distance(location_pattern, changed_files)
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
"feature_vector": {
|
|
44
|
+
"error_type": error_type,
|
|
45
|
+
"error_message_hash": error_message_hash,
|
|
46
|
+
"error_location_pattern": location_pattern,
|
|
47
|
+
"stack_trace_signature": stack_hashes[:5],
|
|
48
|
+
"change_context": {
|
|
49
|
+
"files_touched": changed_files,
|
|
50
|
+
"change_types": change_types,
|
|
51
|
+
"distance_to_error": distance,
|
|
52
|
+
},
|
|
53
|
+
"temporal": {
|
|
54
|
+
"is_new_in_this_diff": True,
|
|
55
|
+
},
|
|
56
|
+
},
|
|
57
|
+
"raw_normalized": normalized[:500],
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _parse_error_type(error_output: str) -> str:
|
|
62
|
+
"""Extract the error/exception type from output."""
|
|
63
|
+
# Common Python patterns
|
|
64
|
+
patterns = [
|
|
65
|
+
(r'(\w+Error)', None), # ValueError, KeyError, etc.
|
|
66
|
+
(r'(\w+Exception)', None), # RuntimeException, etc.
|
|
67
|
+
(r'(\w+Warning)', None), # DeprecationWarning, etc.
|
|
68
|
+
(r'AssertionError', None),
|
|
69
|
+
(r'Traceback.*\n\s*(\w+)', None), # First line after Traceback
|
|
70
|
+
(r'FAILED.*-.*Error:\s*(\w+)', None), # pytest output
|
|
71
|
+
(r'segmentation fault', 'Segfault'),
|
|
72
|
+
(r'timeout', 'TimeoutError'),
|
|
73
|
+
(r'out of memory', 'OutOfMemoryError'),
|
|
74
|
+
(r'connection refused', 'ConnectionRefused'),
|
|
75
|
+
(r'connection timeout', 'ConnectionTimeout'),
|
|
76
|
+
(r'permission denied', 'PermissionError'),
|
|
77
|
+
(r'file not found', 'FileNotFoundError'),
|
|
78
|
+
(r'module not found', 'ModuleNotFoundError'),
|
|
79
|
+
(r'import error', 'ImportError'),
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
for pattern, fallback in patterns:
|
|
83
|
+
match = re.search(pattern, error_output, re.IGNORECASE)
|
|
84
|
+
if match:
|
|
85
|
+
return match.group(1) if fallback is None else fallback
|
|
86
|
+
|
|
87
|
+
return "UnknownError"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _normalize_error_message(error_output: str) -> str:
|
|
91
|
+
"""Normalize error message for consistent hashing."""
|
|
92
|
+
# Remove file paths (variable between environments)
|
|
93
|
+
text = re.sub(r'/[^\s]+\.py:\d+', '<FILE>:<LINE>', error_output)
|
|
94
|
+
# Remove hex addresses
|
|
95
|
+
text = re.sub(r'0x[0-9a-fA-F]+', '<ADDR>', text)
|
|
96
|
+
# Remove timestamps
|
|
97
|
+
text = re.sub(r'\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}[^\s]*', '<TIMESTAMP>', text)
|
|
98
|
+
# Remove numeric IDs (UUIDs, etc.)
|
|
99
|
+
text = re.sub(r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}',
|
|
100
|
+
'<UUID>', text)
|
|
101
|
+
# Collapse whitespace
|
|
102
|
+
text = re.sub(r'\s+', ' ', text).strip().lower()
|
|
103
|
+
return text
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _extract_location(error_output: str, changed_files: list[str]) -> str:
|
|
107
|
+
"""Extract the file location most likely associated with the error."""
|
|
108
|
+
# Look for file:line patterns
|
|
109
|
+
file_pattern = r'File "([^"]+)", line (\d+)'
|
|
110
|
+
matches = re.findall(file_pattern, error_output)
|
|
111
|
+
|
|
112
|
+
for file_path, line in matches:
|
|
113
|
+
for changed in changed_files:
|
|
114
|
+
if changed in file_path or file_path.endswith(changed.split("/")[-1]):
|
|
115
|
+
return f"{file_path}:{line}"
|
|
116
|
+
|
|
117
|
+
if matches:
|
|
118
|
+
return f"{matches[0][0]}:{matches[0][1]}"
|
|
119
|
+
if changed_files:
|
|
120
|
+
return changed_files[0]
|
|
121
|
+
|
|
122
|
+
return "unknown"
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _extract_stack_signature(error_output: str) -> list[str]:
|
|
126
|
+
"""Extract stack trace as a list of location hashes."""
|
|
127
|
+
file_pattern = r'File "([^"]+)", line (\d+), in (\w+)'
|
|
128
|
+
matches = re.findall(file_pattern, error_output)
|
|
129
|
+
|
|
130
|
+
hashes = []
|
|
131
|
+
for file_path, line, func in matches:
|
|
132
|
+
# Create a short hash of each stack frame
|
|
133
|
+
frame = f"{file_path.split('/')[-1]}:{line}:{func}"
|
|
134
|
+
h = hashlib.sha256(frame.encode()).hexdigest()[:8]
|
|
135
|
+
hashes.append(h)
|
|
136
|
+
|
|
137
|
+
return hashes if hashes else [hashlib.sha256(error_output[:200].encode()).hexdigest()[:8]]
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _compute_distance(location: str, changed_files: list[str]) -> str:
|
|
141
|
+
"""Determine if the error is in a changed file, same module, or elsewhere."""
|
|
142
|
+
if not location or location == "unknown":
|
|
143
|
+
return "DIFFERENT_MODULE"
|
|
144
|
+
|
|
145
|
+
for f in changed_files:
|
|
146
|
+
if f in location or location.endswith(f.split("/")[-1]):
|
|
147
|
+
return "SAME_FILE"
|
|
148
|
+
|
|
149
|
+
# Check same directory/module
|
|
150
|
+
location_dir = "/".join(location.split("/")[:-1])
|
|
151
|
+
for f in changed_files:
|
|
152
|
+
f_dir = "/".join(f.split("/")[:-1])
|
|
153
|
+
if location_dir and f_dir and location_dir == f_dir:
|
|
154
|
+
return "SAME_MODULE"
|
|
155
|
+
|
|
156
|
+
return "DIFFERENT_MODULE"
|