@oswaldzsh/devhive 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +91 -0
  2. package/__init__.py +0 -0
  3. package/agents/__init__.py +0 -0
  4. package/agents/base.py +118 -0
  5. package/agents/execute.py +150 -0
  6. package/agents/verifier_dynamic.py +164 -0
  7. package/agents/verifier_semantic.py +84 -0
  8. package/agents/verifier_static.py +153 -0
  9. package/bin/dh +77 -0
  10. package/config.yaml +71 -0
  11. package/control_plane/__init__.py +0 -0
  12. package/control_plane/cli.py +596 -0
  13. package/control_plane/dashboard.py +57 -0
  14. package/control_plane/notifications.py +54 -0
  15. package/control_plane/tui.py +352 -0
  16. package/install.sh +67 -0
  17. package/orchestrator/__init__.py +0 -0
  18. package/orchestrator/agent_pool.py +107 -0
  19. package/orchestrator/convergence_gate.py +133 -0
  20. package/orchestrator/engine.py +353 -0
  21. package/orchestrator/event_bus.py +58 -0
  22. package/orchestrator/task_queue.py +59 -0
  23. package/package.json +50 -0
  24. package/protocol/__init__.py +0 -0
  25. package/protocol/schemas.py +222 -0
  26. package/setup.py +44 -0
  27. package/signature/__init__.py +0 -0
  28. package/signature/engine.py +211 -0
  29. package/signature/extractor.py +156 -0
  30. package/signature/learner.py +75 -0
  31. package/signature/src/matcher.c +263 -0
  32. package/signature/src/matcher.h +135 -0
  33. package/signatures/seed_signatures.json +174 -0
  34. package/storage/__init__.py +0 -0
  35. package/storage/checkpoint.py +153 -0
  36. package/storage/signature_db.py +62 -0
  37. package/tools/__init__.py +0 -0
  38. package/tools/api_client.py +101 -0
  39. package/tools/git.py +75 -0
  40. package/tools/sandbox.py +79 -0
  41. package/verification/__init__.py +0 -0
  42. package/verification/diagnostic.py +124 -0
  43. package/verification/patterns/api_breaking.yaml +25 -0
  44. package/verification/patterns/code_quality.yaml +41 -0
  45. package/verification/patterns/security.yaml +41 -0
  46. package/verification/pipeline.py +61 -0
@@ -0,0 +1,222 @@
1
+ """DevHive Protocol Schemas
2
+
3
+ All inter-agent communication is fully typed via Pydantic models.
4
+ No natural-language handoffs — every message is machine-verifiable.
5
+ """
6
+
7
+ from pydantic import BaseModel, Field
8
+ from enum import Enum
9
+ from datetime import datetime, timezone
10
+ from typing import Optional
11
+
12
+
13
+ def _utcnow() -> datetime:
14
+ return datetime.now(timezone.utc).replace(tzinfo=None)
15
+
16
+
17
+ # ── Enums ────────────────────────────────────────────────────
18
+
19
+ class ChangeType(str, Enum):
20
+ LOGIC_FIX = "logic_fix"
21
+ NEW_FEATURE = "new_feature"
22
+ REFACTOR = "refactor"
23
+ CONFIG = "config"
24
+ DEPENDENCY = "dependency"
25
+ DOCS = "docs"
26
+
27
+
28
+ class Severity(str, Enum):
29
+ CRITICAL = "CRITICAL"
30
+ HIGH = "HIGH"
31
+ MEDIUM = "MEDIUM"
32
+ LOW = "LOW"
33
+ INFO = "INFO"
34
+
35
+
36
+ class Priority(str, Enum):
37
+ CRITICAL = "CRITICAL"
38
+ HIGH = "HIGH"
39
+ MEDIUM = "MEDIUM"
40
+ LOW = "LOW"
41
+
42
+
43
+ class VerdictOverall(str, Enum):
44
+ PASS = "PASS"
45
+ WARN = "WARN"
46
+ FAIL = "FAIL"
47
+ CONFLICT = "CONFLICT"
48
+
49
+
50
+ class VerifierType(str, Enum):
51
+ STATIC = "static"
52
+ DYNAMIC = "dynamic"
53
+ SEMANTIC = "semantic"
54
+
55
+
56
+ class ConflictType(str, Enum):
57
+ FACT = "fact"
58
+ INTERPRETATION = "interpretation"
59
+ SPEC_AMBIGUITY = "spec_ambiguity"
60
+
61
+
62
+ class ConcurrencyAction(str, Enum):
63
+ PASS = "PASS"
64
+ FIX = "FIX"
65
+ ESCALATE = "ESCALATE"
66
+ CONFLICT = "CONFLICT"
67
+
68
+
69
+ class Alignment(str, Enum):
70
+ ALIGNED = "ALIGNED"
71
+ ENHANCED = "ENHANCED"
72
+ DEVIATED = "DEVIATED"
73
+ CONFLICT = "CONFLICT"
74
+
75
+
76
+ class SuggestedAction(str, Enum):
77
+ ROLLBACK = "ROLLBACK"
78
+ FIX = "FIX"
79
+ RETEST = "RETEST"
80
+ ESCALATE = "ESCALATE"
81
+ IGNORE = "IGNORE"
82
+
83
+
84
+ # ── Task / Spec ──────────────────────────────────────────────
85
+
86
+ class TaskSpec(BaseModel):
87
+ """Human-provided target-state description."""
88
+ title: str
89
+ description: str
90
+ acceptance_criteria: list[str] = Field(default_factory=list)
91
+ scope_constraints: list[str] = Field(default_factory=list)
92
+ sensitive_modules: list[str] = Field(default_factory=list)
93
+ priority: Priority = Priority.MEDIUM
94
+
95
+
96
+ class Task(BaseModel):
97
+ id: str
98
+ spec: TaskSpec
99
+ branch: str
100
+ base_commit: str
101
+ created_at: datetime = Field(default_factory=_utcnow)
102
+ current_stage: str = "SPECIFY"
103
+
104
+
105
+ # ── Handoff ──────────────────────────────────────────────────
106
+
107
+ class RiskAssessment(BaseModel):
108
+ risk: str
109
+ severity: Severity
110
+
111
+
112
+ class FileChange(BaseModel):
113
+ file: str
114
+ diff_range: dict # {"start_line": int, "end_line": int}
115
+ change_type: ChangeType
116
+ summary: str
117
+ risk_self_assessment: list[RiskAssessment] = Field(default_factory=list)
118
+
119
+
120
+ class VerificationFocus(BaseModel):
121
+ what: str
122
+ how_to_verify: str
123
+ priority: Priority
124
+
125
+
126
+ class EnvChanges(BaseModel):
127
+ new_dependencies: list[str] = Field(default_factory=list)
128
+ config_changes: list[str] = Field(default_factory=list)
129
+ migration_needed: bool = False
130
+
131
+
132
+ class ExecutionTrace(BaseModel):
133
+ commands_run: list[str] = Field(default_factory=list)
134
+ self_check_passed: bool = False
135
+ self_check_output_hash: Optional[str] = None
136
+
137
+
138
+ class ExecutionHandoff(BaseModel):
139
+ """Emitted by Execute Agent after producing code changes."""
140
+ handoff_version: str = "1.0"
141
+ source: str
142
+ task_id: str
143
+ timestamp: datetime = Field(default_factory=_utcnow)
144
+ intent: str
145
+ changes: list[FileChange]
146
+ verification_focus: list[VerificationFocus]
147
+ env_changes: EnvChanges = Field(default_factory=EnvChanges)
148
+ execution_trace: ExecutionTrace
149
+
150
+
151
+ # ── Verdict ──────────────────────────────────────────────────
152
+
153
+ class FindingEvidence(BaseModel):
154
+ type: str # diff | log | metric | ast | trace
155
+ data: str
156
+
157
+
158
+ class Finding(BaseModel):
159
+ id: str = Field(default_factory=lambda: f"F-{_utcnow().timestamp()}")
160
+ severity: Severity
161
+ category: str
162
+ title: str
163
+ detail: str
164
+ evidence: FindingEvidence
165
+ matched_signature: Optional[str] = None
166
+ suggested_action: SuggestedAction = SuggestedAction.ESCALATE
167
+
168
+
169
+ class Verdict(BaseModel):
170
+ """Output from any Verifier Agent."""
171
+ verdict_version: str = "1.0"
172
+ verifier_type: VerifierType
173
+ task_id: str
174
+ timestamp: datetime = Field(default_factory=_utcnow)
175
+ overall: VerdictOverall
176
+ findings: list[Finding] = Field(default_factory=list)
177
+
178
+
179
+ # ── Semiconductor Verdict ────────────────────────────────────
180
+
181
+ class SemanticVerdict(BaseModel):
182
+ """Specialized verdict for semantic verification."""
183
+ verdict_version: str = "1.0"
184
+ verifier_type: str = "semantic"
185
+ task_id: str
186
+ timestamp: datetime = Field(default_factory=_utcnow)
187
+ alignment: Alignment
188
+ reasoning: str
189
+ concerns: list[str] = Field(default_factory=list)
190
+ overall: VerdictOverall = VerdictOverall.PASS
191
+
192
+
193
+ # ── Convergence ──────────────────────────────────────────────
194
+
195
+ class ConvergenceDecision(BaseModel):
196
+ action: ConcurrencyAction
197
+ reason: str
198
+ fix_strategy: Optional[str] = None
199
+ escalation: Optional['EscalationReport'] = None
200
+
201
+
202
+ # ── Escalation ────────────────────────────────────────────────
203
+
204
+ class EscalationReport(BaseModel):
205
+ escalation_id: str
206
+ task_id: str
207
+ triggered_by: str
208
+ current_state: dict
209
+ history: list[dict] = Field(default_factory=list)
210
+ what_agent_tried: list[str] = Field(default_factory=list)
211
+ blocking_finding: Optional[dict] = None
212
+ suggested_human_action: str = ""
213
+
214
+
215
+ # ── Events ───────────────────────────────────────────────────
216
+
217
+ class DevHiveEvent(BaseModel):
218
+ """Base event type for the event bus."""
219
+ event_type: str
220
+ task_id: str
221
+ timestamp: datetime = Field(default_factory=_utcnow)
222
+ payload: dict = Field(default_factory=dict)
package/setup.py ADDED
@@ -0,0 +1,44 @@
1
+ """Setup script for the C signature matching extension."""
2
+
3
+ from setuptools import setup, Extension
4
+ import os
5
+
6
+ matcher_ext = Extension(
7
+ "signature._matcher",
8
+ sources=[
9
+ os.path.join("signature", "src", "matcher.c"),
10
+ ],
11
+ include_dirs=[os.path.join("signature", "src")],
12
+ extra_compile_args=["-O3", "-march=native", "-ffast-math"],
13
+ extra_link_args=["-lm"],
14
+ )
15
+
16
+ setup(
17
+ name="devhive",
18
+ version="0.1.0",
19
+ description="DevHive — Multi-Agent Software Development System",
20
+ packages=[
21
+ "devhive",
22
+ "devhive.orchestrator",
23
+ "devhive.agents",
24
+ "devhive.protocol",
25
+ "devhive.verification",
26
+ "devhive.signature",
27
+ "devhive.storage",
28
+ "devhive.tools",
29
+ "devhive.control_plane",
30
+ ],
31
+ package_dir={"devhive": "."},
32
+ ext_modules=[matcher_ext],
33
+ install_requires=[
34
+ "httpx>=0.25.0",
35
+ "pydantic>=2.0.0",
36
+ "pyyaml>=6.0",
37
+ ],
38
+ entry_points={
39
+ "console_scripts": [
40
+ "devhive=devhive.control_plane.cli:main",
41
+ ],
42
+ },
43
+ python_requires=">=3.12",
44
+ )
File without changes
@@ -0,0 +1,211 @@
1
+ """Python wrapper for the C signature matching engine.
2
+
3
+ Falls back to a pure-Python implementation if the C extension is not built.
4
+ """
5
+
6
+ import ctypes
7
+ import os
8
+ from ctypes import c_int, c_double, c_char, c_uint64, POINTER, Structure
9
+ from typing import Optional
10
+
11
+
12
+ # ── C Types ────────────────────────────────────────────────
13
+
14
+ class CFeatureVector(Structure):
15
+ _fields_ = [
16
+ ("error_type", c_char * 64),
17
+ ("error_message_hash", c_uint64),
18
+ ("location_pattern", c_char * 256),
19
+ ("stack_depth", c_int),
20
+ ("stack_hashes", c_uint64 * 10),
21
+ ("change_type", c_int),
22
+ ("distance_to_error", c_int),
23
+ ("is_new_in_diff", c_int),
24
+ ]
25
+
26
+
27
+ class CMatchResult(Structure):
28
+ _fields_ = [
29
+ ("sig_id", c_char * 32),
30
+ ("similarity", c_double),
31
+ ("reliability", c_double),
32
+ ("match_count", c_int),
33
+ ("resolution_strategy", c_char * 32),
34
+ ("fix_template", c_char * 512),
35
+ ]
36
+
37
+
38
+ class CMatcherConfig(Structure):
39
+ _fields_ = [
40
+ ("w_error_type", c_double),
41
+ ("w_error_message", c_double),
42
+ ("w_location", c_double),
43
+ ("w_stack_trace", c_double),
44
+ ("w_change_context", c_double),
45
+ ("w_temporal", c_double),
46
+ ("min_confidence", c_double),
47
+ ("top_k", c_int),
48
+ ]
49
+
50
+
51
+ class SignatureEngine:
52
+ """High-performance signature matching engine.
53
+
54
+ Uses C extension if available, otherwise falls back to Python.
55
+ """
56
+
57
+ def __init__(self, db_path: str = None, config: dict = None):
58
+ self.db_path = db_path
59
+ self._lib = None
60
+ self._db = None
61
+ self._config = CMatcherConfig()
62
+ self._load_library()
63
+ self._init_config(config)
64
+
65
+ def _load_library(self):
66
+ """Try to load the C extension library."""
67
+ lib_path = os.path.join(os.path.dirname(__file__), "src", "libmatcher.so")
68
+ if not os.path.exists(lib_path):
69
+ # Try build directory
70
+ lib_path = os.path.join(os.path.dirname(__file__),
71
+ "src", "build", "libmatcher.so")
72
+ if not os.path.exists(lib_path):
73
+ return # Will use Python fallback
74
+
75
+ try:
76
+ self._lib = ctypes.CDLL(lib_path)
77
+ # Set up function signatures
78
+ self._lib.db_create.argtypes = [c_int]
79
+ self._lib.db_create.restype = ctypes.c_void_p
80
+
81
+ self._lib.db_destroy.argtypes = [ctypes.c_void_p]
82
+ self._lib.db_destroy.restype = None
83
+
84
+ self._lib.config_default.restype = CMatcherConfig
85
+
86
+ self._lib.db_match.argtypes = [
87
+ ctypes.c_void_p, POINTER(CMatcherConfig),
88
+ POINTER(CFeatureVector), POINTER(CMatchResult), c_int]
89
+ self._lib.db_match.restype = c_int
90
+
91
+ self._db = self._lib.db_create(1024)
92
+ except Exception:
93
+ self._lib = None
94
+
95
+ def _init_config(self, config: dict = None):
96
+ cfg = config or {}
97
+ if self._lib:
98
+ self._config = self._lib.config_default()
99
+ else:
100
+ self._config = CMatcherConfig()
101
+ self._config.w_error_type = cfg.get("w_error_type", 0.30)
102
+ self._config.w_error_message = cfg.get("w_error_message", 0.15)
103
+ self._config.w_location = cfg.get("w_location", 0.25)
104
+ self._config.w_stack_trace = cfg.get("w_stack_trace", 0.15)
105
+ self._config.w_change_context = cfg.get("w_change_context", 0.10)
106
+ self._config.w_temporal = cfg.get("w_temporal", 0.05)
107
+ self._config.min_confidence = cfg.get("min_confidence", 0.65)
108
+ self._config.top_k = cfg.get("top_k", 3)
109
+
110
+ def match(self, query: dict, k: int = 3,
111
+ min_confidence: float = None) -> list[dict]:
112
+ """Match a query against the signature database.
113
+
114
+ Args:
115
+ query: Dict with keys: error_type, error_message, location_pattern,
116
+ stack_hashes, change_type, distance_to_error, is_new_in_diff
117
+ k: Number of top results to return
118
+ min_confidence: Override minimum confidence threshold
119
+ Returns:
120
+ List of matched signatures with similarity scores
121
+ """
122
+ if min_confidence is not None:
123
+ self._config.min_confidence = min_confidence
124
+ self._config.top_k = k
125
+
126
+ fv = self._build_feature_vector(query)
127
+
128
+ if self._lib and self._db:
129
+ return self._match_c(fv, k)
130
+ else:
131
+ return self._match_python(query, k)
132
+
133
+ def _build_feature_vector(self, query: dict) -> CFeatureVector:
134
+ fv = CFeatureVector()
135
+ fv.error_type = (query.get("error_type", "") or "").encode()[:63]
136
+ fv.error_message_hash = hash(query.get("error_message", "")) & 0xFFFFFFFFFFFFFFFF
137
+ fv.location_pattern = (query.get("location_pattern", "") or "").encode()[:255]
138
+ fv.stack_depth = 0
139
+ fv.change_type = 0
140
+ fv.distance_to_error = 0
141
+ fv.is_new_in_diff = 1 if query.get("is_new_in_diff", True) else 0
142
+ return fv
143
+
144
+ def _match_c(self, fv: CFeatureVector, k: int) -> list[dict]:
145
+ results = (CMatchResult * k)()
146
+ n = self._lib.db_match(self._db, ctypes.byref(self._config),
147
+ ctypes.byref(fv), results, k)
148
+ return [
149
+ {
150
+ "signature_id": results[i].sig_id.decode(),
151
+ "similarity": results[i].similarity,
152
+ "reliability": results[i].reliability,
153
+ "match_count": results[i].match_count,
154
+ "resolution_strategy": results[i].resolution_strategy.decode(),
155
+ "fix_template": results[i].fix_template.decode(),
156
+ }
157
+ for i in range(n)
158
+ ]
159
+
160
+ def _match_python(self, query: dict, k: int) -> list[dict]:
161
+ """Pure Python fallback matcher."""
162
+ # Simple keyword-based matching as fallback
163
+ results = []
164
+ error_type = query.get("error_type", "").lower()
165
+ location = query.get("location_pattern", "").lower()
166
+
167
+ # Load signatures from file if available
168
+ signatures = self._load_signatures()
169
+
170
+ for sig in signatures:
171
+ score = 0.0
172
+ fv = sig.get("feature_vector", {})
173
+
174
+ # Error type match
175
+ if error_type and error_type in fv.get("error_type", "").lower():
176
+ score += self._config.w_error_type
177
+
178
+ # Location match
179
+ sig_loc = fv.get("error_location_pattern", "").lower()
180
+ if location and sig_loc:
181
+ if location == sig_loc:
182
+ score += self._config.w_location
183
+ elif os.path.commonprefix([location, sig_loc]):
184
+ score += self._config.w_location * 0.5
185
+
186
+ if score >= self._config.min_confidence:
187
+ results.append({
188
+ "signature_id": sig.get("signature_id", ""),
189
+ "similarity": score,
190
+ "reliability": sig.get("diagnosis", {}).get("reliability", 0.5),
191
+ "match_count": sig.get("match_count", 0),
192
+ "resolution_strategy": sig.get("resolution", {}).get("strategy", ""),
193
+ "fix_template": sig.get("resolution", {}).get("fix_template", ""),
194
+ })
195
+
196
+ results.sort(key=lambda r: r["similarity"], reverse=True)
197
+ return results[:k]
198
+
199
+ def _load_signatures(self) -> list[dict]:
200
+ """Load signatures from the JSON database."""
201
+ import json
202
+ if self.db_path and os.path.exists(self.db_path):
203
+ with open(self.db_path) as f:
204
+ data = json.load(f)
205
+ return data.get("signatures", [])
206
+ return []
207
+
208
+ def close(self):
209
+ if self._lib and self._db:
210
+ self._lib.db_destroy(self._db)
211
+ self._db = None
@@ -0,0 +1,156 @@
1
+ """Feature vector extractor — converts raw error data to structured vectors."""
2
+
3
+ import hashlib
4
+ import re
5
+ from typing import Optional
6
+
7
+
8
+ def extract_feature_vector(
9
+ error_output: str,
10
+ changed_files: list[str] = None,
11
+ change_types: list[str] = None,
12
+ ) -> dict:
13
+ """Extract a feature vector from raw error output.
14
+
15
+ Args:
16
+ error_output: Raw stderr/stdout from a failed test run
17
+ changed_files: List of files modified in this change
18
+ change_types: Types of changes (logic_fix, refactor, etc.)
19
+
20
+ Returns:
21
+ Dict with feature_vector, ready for signature matching
22
+ """
23
+ changed_files = changed_files or []
24
+ change_types = change_types or []
25
+
26
+ # 1. Parse error type
27
+ error_type = _parse_error_type(error_output)
28
+
29
+ # 2. Hash the normalized error message
30
+ normalized = _normalize_error_message(error_output)
31
+ error_message_hash = hashlib.sha256(normalized.encode()).hexdigest()[:16]
32
+
33
+ # 3. Extract location pattern
34
+ location_pattern = _extract_location(error_output, changed_files)
35
+
36
+ # 4. Extract stack trace hashes
37
+ stack_hashes = _extract_stack_signature(error_output)
38
+
39
+ # 5. Determine distance to error
40
+ distance = _compute_distance(location_pattern, changed_files)
41
+
42
+ return {
43
+ "feature_vector": {
44
+ "error_type": error_type,
45
+ "error_message_hash": error_message_hash,
46
+ "error_location_pattern": location_pattern,
47
+ "stack_trace_signature": stack_hashes[:5],
48
+ "change_context": {
49
+ "files_touched": changed_files,
50
+ "change_types": change_types,
51
+ "distance_to_error": distance,
52
+ },
53
+ "temporal": {
54
+ "is_new_in_this_diff": True,
55
+ },
56
+ },
57
+ "raw_normalized": normalized[:500],
58
+ }
59
+
60
+
61
+ def _parse_error_type(error_output: str) -> str:
62
+ """Extract the error/exception type from output."""
63
+ # Common Python patterns
64
+ patterns = [
65
+ (r'(\w+Error)', None), # ValueError, KeyError, etc.
66
+ (r'(\w+Exception)', None), # RuntimeException, etc.
67
+ (r'(\w+Warning)', None), # DeprecationWarning, etc.
68
+ (r'AssertionError', None),
69
+ (r'Traceback.*\n\s*(\w+)', None), # First line after Traceback
70
+ (r'FAILED.*-.*Error:\s*(\w+)', None), # pytest output
71
+ (r'segmentation fault', 'Segfault'),
72
+ (r'timeout', 'TimeoutError'),
73
+ (r'out of memory', 'OutOfMemoryError'),
74
+ (r'connection refused', 'ConnectionRefused'),
75
+ (r'connection timeout', 'ConnectionTimeout'),
76
+ (r'permission denied', 'PermissionError'),
77
+ (r'file not found', 'FileNotFoundError'),
78
+ (r'module not found', 'ModuleNotFoundError'),
79
+ (r'import error', 'ImportError'),
80
+ ]
81
+
82
+ for pattern, fallback in patterns:
83
+ match = re.search(pattern, error_output, re.IGNORECASE)
84
+ if match:
85
+ return match.group(1) if fallback is None else fallback
86
+
87
+ return "UnknownError"
88
+
89
+
90
+ def _normalize_error_message(error_output: str) -> str:
91
+ """Normalize error message for consistent hashing."""
92
+ # Remove file paths (variable between environments)
93
+ text = re.sub(r'/[^\s]+\.py:\d+', '<FILE>:<LINE>', error_output)
94
+ # Remove hex addresses
95
+ text = re.sub(r'0x[0-9a-fA-F]+', '<ADDR>', text)
96
+ # Remove timestamps
97
+ text = re.sub(r'\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}[^\s]*', '<TIMESTAMP>', text)
98
+ # Remove numeric IDs (UUIDs, etc.)
99
+ text = re.sub(r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}',
100
+ '<UUID>', text)
101
+ # Collapse whitespace
102
+ text = re.sub(r'\s+', ' ', text).strip().lower()
103
+ return text
104
+
105
+
106
+ def _extract_location(error_output: str, changed_files: list[str]) -> str:
107
+ """Extract the file location most likely associated with the error."""
108
+ # Look for file:line patterns
109
+ file_pattern = r'File "([^"]+)", line (\d+)'
110
+ matches = re.findall(file_pattern, error_output)
111
+
112
+ for file_path, line in matches:
113
+ for changed in changed_files:
114
+ if changed in file_path or file_path.endswith(changed.split("/")[-1]):
115
+ return f"{file_path}:{line}"
116
+
117
+ if matches:
118
+ return f"{matches[0][0]}:{matches[0][1]}"
119
+ if changed_files:
120
+ return changed_files[0]
121
+
122
+ return "unknown"
123
+
124
+
125
+ def _extract_stack_signature(error_output: str) -> list[str]:
126
+ """Extract stack trace as a list of location hashes."""
127
+ file_pattern = r'File "([^"]+)", line (\d+), in (\w+)'
128
+ matches = re.findall(file_pattern, error_output)
129
+
130
+ hashes = []
131
+ for file_path, line, func in matches:
132
+ # Create a short hash of each stack frame
133
+ frame = f"{file_path.split('/')[-1]}:{line}:{func}"
134
+ h = hashlib.sha256(frame.encode()).hexdigest()[:8]
135
+ hashes.append(h)
136
+
137
+ return hashes if hashes else [hashlib.sha256(error_output[:200].encode()).hexdigest()[:8]]
138
+
139
+
140
+ def _compute_distance(location: str, changed_files: list[str]) -> str:
141
+ """Determine if the error is in a changed file, same module, or elsewhere."""
142
+ if not location or location == "unknown":
143
+ return "DIFFERENT_MODULE"
144
+
145
+ for f in changed_files:
146
+ if f in location or location.endswith(f.split("/")[-1]):
147
+ return "SAME_FILE"
148
+
149
+ # Check same directory/module
150
+ location_dir = "/".join(location.split("/")[:-1])
151
+ for f in changed_files:
152
+ f_dir = "/".join(f.split("/")[:-1])
153
+ if location_dir and f_dir and location_dir == f_dir:
154
+ return "SAME_MODULE"
155
+
156
+ return "DIFFERENT_MODULE"