emdash-core 0.1.37__py3-none-any.whl → 0.1.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emdash_core/agent/agents.py +9 -0
- emdash_core/agent/background.py +481 -0
- emdash_core/agent/inprocess_subagent.py +70 -1
- emdash_core/agent/mcp/config.py +78 -2
- emdash_core/agent/prompts/main_agent.py +53 -1
- emdash_core/agent/prompts/plan_mode.py +65 -44
- emdash_core/agent/prompts/subagents.py +73 -1
- emdash_core/agent/prompts/workflow.py +179 -28
- emdash_core/agent/providers/models.py +1 -1
- emdash_core/agent/providers/openai_provider.py +10 -0
- emdash_core/agent/research/researcher.py +154 -45
- emdash_core/agent/runner/agent_runner.py +145 -19
- emdash_core/agent/runner/sdk_runner.py +29 -2
- emdash_core/agent/skills.py +81 -1
- emdash_core/agent/toolkit.py +87 -11
- emdash_core/agent/tools/__init__.py +2 -0
- emdash_core/agent/tools/coding.py +344 -52
- emdash_core/agent/tools/lsp.py +361 -0
- emdash_core/agent/tools/skill.py +21 -1
- emdash_core/agent/tools/task.py +16 -19
- emdash_core/agent/tools/task_output.py +262 -32
- emdash_core/agent/verifier/__init__.py +11 -0
- emdash_core/agent/verifier/manager.py +295 -0
- emdash_core/agent/verifier/models.py +97 -0
- emdash_core/{swarm/worktree_manager.py → agent/worktree.py} +19 -1
- emdash_core/api/agent.py +297 -2
- emdash_core/api/research.py +3 -3
- emdash_core/api/router.py +0 -4
- emdash_core/context/longevity.py +197 -0
- emdash_core/context/providers/explored_areas.py +83 -39
- emdash_core/context/reranker.py +35 -144
- emdash_core/context/simple_reranker.py +500 -0
- emdash_core/context/tool_relevance.py +84 -0
- emdash_core/core/config.py +8 -0
- emdash_core/graph/__init__.py +8 -1
- emdash_core/graph/connection.py +24 -3
- emdash_core/graph/writer.py +7 -1
- emdash_core/models/agent.py +10 -0
- emdash_core/server.py +1 -6
- emdash_core/sse/stream.py +16 -1
- emdash_core/utils/__init__.py +0 -2
- emdash_core/utils/git.py +103 -0
- emdash_core/utils/image.py +147 -160
- {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/METADATA +6 -6
- {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/RECORD +47 -52
- emdash_core/api/swarm.py +0 -223
- emdash_core/db/__init__.py +0 -67
- emdash_core/db/auth.py +0 -134
- emdash_core/db/models.py +0 -91
- emdash_core/db/provider.py +0 -222
- emdash_core/db/providers/__init__.py +0 -5
- emdash_core/db/providers/supabase.py +0 -452
- emdash_core/swarm/__init__.py +0 -17
- emdash_core/swarm/merge_agent.py +0 -383
- emdash_core/swarm/session_manager.py +0 -274
- emdash_core/swarm/swarm_runner.py +0 -226
- emdash_core/swarm/task_definition.py +0 -137
- emdash_core/swarm/worker_spawner.py +0 -319
- {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/WHEEL +0 -0
- {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
"""VerifierManager - runs verifiers and generates reports."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
import subprocess
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
|
|
12
|
+
from ..providers import get_provider
|
|
13
|
+
from .models import VerifierConfig, VerifierResult, VerificationReport
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
console = Console()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class VerifierManager:
|
|
20
|
+
"""Manages and runs verification checks."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, repo_root: Path):
|
|
23
|
+
"""Initialize verifier manager.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
repo_root: Root directory of the repository
|
|
27
|
+
"""
|
|
28
|
+
self.repo_root = repo_root
|
|
29
|
+
self.config_file = repo_root / ".emdash" / "verifiers.json"
|
|
30
|
+
self.verifiers = self._load_config()
|
|
31
|
+
|
|
32
|
+
def _load_config(self) -> list[VerifierConfig]:
|
|
33
|
+
"""Load verifiers from config file."""
|
|
34
|
+
if not self.config_file.exists():
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
data = json.loads(self.config_file.read_text())
|
|
39
|
+
verifiers = []
|
|
40
|
+
for v in data.get("verifiers", []):
|
|
41
|
+
config = VerifierConfig.from_dict(v)
|
|
42
|
+
if config.enabled:
|
|
43
|
+
verifiers.append(config)
|
|
44
|
+
return verifiers
|
|
45
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
46
|
+
console.print(f"[yellow]Warning: Failed to load verifiers.json: {e}[/yellow]")
|
|
47
|
+
return []
|
|
48
|
+
|
|
49
|
+
def get_config(self) -> dict:
|
|
50
|
+
"""Get full config including max_attempts.
|
|
51
|
+
|
|
52
|
+
Config options:
|
|
53
|
+
max_attempts: Maximum number of attempts (default: 3)
|
|
54
|
+
Use 0 for infinite attempts (no limit)
|
|
55
|
+
"""
|
|
56
|
+
if not self.config_file.exists():
|
|
57
|
+
return {"verifiers": [], "max_attempts": 3}
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
config = json.loads(self.config_file.read_text())
|
|
61
|
+
# Support legacy max_retries as fallback
|
|
62
|
+
if "max_attempts" not in config and "max_retries" in config:
|
|
63
|
+
config["max_attempts"] = config["max_retries"]
|
|
64
|
+
return config
|
|
65
|
+
except json.JSONDecodeError:
|
|
66
|
+
return {"verifiers": [], "max_attempts": 3}
|
|
67
|
+
|
|
68
|
+
def save_config(self, config: dict) -> None:
|
|
69
|
+
"""Save config to file."""
|
|
70
|
+
self.config_file.parent.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
self.config_file.write_text(json.dumps(config, indent=2))
|
|
72
|
+
self.verifiers = self._load_config()
|
|
73
|
+
|
|
74
|
+
def run_all(self, context: dict | None = None) -> VerificationReport:
|
|
75
|
+
"""Run all enabled verifiers.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
context: Optional context dict with git_diff, goal, files_changed
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
VerificationReport with all results
|
|
82
|
+
"""
|
|
83
|
+
if context is None:
|
|
84
|
+
context = {}
|
|
85
|
+
|
|
86
|
+
results = []
|
|
87
|
+
for verifier in self.verifiers:
|
|
88
|
+
result = self._run_verifier(verifier, context)
|
|
89
|
+
results.append(result)
|
|
90
|
+
|
|
91
|
+
all_passed = all(r.passed for r in results) if results else True
|
|
92
|
+
summary = self._build_summary(results)
|
|
93
|
+
|
|
94
|
+
return VerificationReport(
|
|
95
|
+
results=results,
|
|
96
|
+
all_passed=all_passed,
|
|
97
|
+
summary=summary,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
def _run_verifier(self, config: VerifierConfig, context: dict) -> VerifierResult:
|
|
101
|
+
"""Run a single verifier."""
|
|
102
|
+
if config.type == "command":
|
|
103
|
+
return self._run_command_verifier(config)
|
|
104
|
+
else:
|
|
105
|
+
return self._run_llm_verifier(config, context)
|
|
106
|
+
|
|
107
|
+
def _run_command_verifier(self, config: VerifierConfig) -> VerifierResult:
|
|
108
|
+
"""Run a command-based verifier."""
|
|
109
|
+
if not config.command:
|
|
110
|
+
return VerifierResult(
|
|
111
|
+
name=config.name,
|
|
112
|
+
passed=False,
|
|
113
|
+
output="No command specified",
|
|
114
|
+
duration=0,
|
|
115
|
+
issues=["No command specified"],
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
start = time.time()
|
|
119
|
+
try:
|
|
120
|
+
result = subprocess.run(
|
|
121
|
+
config.command,
|
|
122
|
+
shell=True,
|
|
123
|
+
capture_output=True,
|
|
124
|
+
text=True,
|
|
125
|
+
timeout=config.timeout,
|
|
126
|
+
cwd=self.repo_root,
|
|
127
|
+
)
|
|
128
|
+
passed = result.returncode == 0 if config.pass_on_exit_0 else True
|
|
129
|
+
output = (result.stdout + result.stderr).strip()
|
|
130
|
+
|
|
131
|
+
# Extract issues from output if failed
|
|
132
|
+
issues = []
|
|
133
|
+
if not passed:
|
|
134
|
+
issues = self._extract_issues_from_output(output)
|
|
135
|
+
|
|
136
|
+
return VerifierResult(
|
|
137
|
+
name=config.name,
|
|
138
|
+
passed=passed,
|
|
139
|
+
output=output[:5000], # truncate long output
|
|
140
|
+
duration=time.time() - start,
|
|
141
|
+
issues=issues,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
except subprocess.TimeoutExpired:
|
|
145
|
+
return VerifierResult(
|
|
146
|
+
name=config.name,
|
|
147
|
+
passed=False,
|
|
148
|
+
output=f"Command timed out after {config.timeout}s",
|
|
149
|
+
duration=config.timeout,
|
|
150
|
+
issues=["Command timed out"],
|
|
151
|
+
)
|
|
152
|
+
except Exception as e:
|
|
153
|
+
return VerifierResult(
|
|
154
|
+
name=config.name,
|
|
155
|
+
passed=False,
|
|
156
|
+
output=str(e),
|
|
157
|
+
duration=time.time() - start,
|
|
158
|
+
issues=[str(e)],
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def _run_llm_verifier(self, config: VerifierConfig, context: dict) -> VerifierResult:
|
|
162
|
+
"""Run an LLM-based verifier using gpt-oss-120b."""
|
|
163
|
+
if not config.prompt:
|
|
164
|
+
return VerifierResult(
|
|
165
|
+
name=config.name,
|
|
166
|
+
passed=False,
|
|
167
|
+
output="No prompt specified",
|
|
168
|
+
duration=0,
|
|
169
|
+
issues=["No prompt specified"],
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
start = time.time()
|
|
173
|
+
try:
|
|
174
|
+
provider = get_provider("gpt-oss-120b")
|
|
175
|
+
|
|
176
|
+
# Build prompt with context
|
|
177
|
+
full_prompt = self._build_llm_prompt(config.prompt, context)
|
|
178
|
+
|
|
179
|
+
response = provider.chat([{"role": "user", "content": full_prompt}])
|
|
180
|
+
content = response.content or ""
|
|
181
|
+
|
|
182
|
+
# Parse LLM response
|
|
183
|
+
result_data = self._parse_llm_response(content)
|
|
184
|
+
|
|
185
|
+
return VerifierResult(
|
|
186
|
+
name=config.name,
|
|
187
|
+
passed=result_data.get("pass", False),
|
|
188
|
+
output=result_data.get("summary", content[:500]),
|
|
189
|
+
duration=time.time() - start,
|
|
190
|
+
issues=result_data.get("issues", []),
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
except Exception as e:
|
|
194
|
+
return VerifierResult(
|
|
195
|
+
name=config.name,
|
|
196
|
+
passed=False,
|
|
197
|
+
output=f"LLM error: {e}",
|
|
198
|
+
duration=time.time() - start,
|
|
199
|
+
issues=[str(e)],
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
def _build_llm_prompt(self, user_prompt: str, context: dict) -> str:
|
|
203
|
+
"""Build full prompt for LLM verifier."""
|
|
204
|
+
parts = [user_prompt, "", "## Context"]
|
|
205
|
+
|
|
206
|
+
if context.get("goal"):
|
|
207
|
+
parts.append(f"- Goal: {context['goal']}")
|
|
208
|
+
|
|
209
|
+
if context.get("files_changed"):
|
|
210
|
+
files = context["files_changed"]
|
|
211
|
+
if isinstance(files, list):
|
|
212
|
+
parts.append(f"- Files changed: {', '.join(files[:10])}")
|
|
213
|
+
|
|
214
|
+
if context.get("git_diff"):
|
|
215
|
+
diff = context["git_diff"]
|
|
216
|
+
# Truncate large diffs
|
|
217
|
+
if len(diff) > 10000:
|
|
218
|
+
diff = diff[:10000] + "\n... [truncated]"
|
|
219
|
+
parts.append(f"\n## Git Diff\n```diff\n{diff}\n```")
|
|
220
|
+
|
|
221
|
+
parts.append("""
|
|
222
|
+
## Response Format
|
|
223
|
+
Return JSON only:
|
|
224
|
+
{"pass": true/false, "issues": ["issue1", ...], "summary": "brief summary"}
|
|
225
|
+
""")
|
|
226
|
+
|
|
227
|
+
return "\n".join(parts)
|
|
228
|
+
|
|
229
|
+
def _parse_llm_response(self, content: str) -> dict[str, Any]:
|
|
230
|
+
"""Parse LLM response to extract pass/fail and issues."""
|
|
231
|
+
try:
|
|
232
|
+
# Try to find JSON in the response
|
|
233
|
+
json_match = re.search(r"\{[\s\S]*\}", content)
|
|
234
|
+
if json_match:
|
|
235
|
+
data = json.loads(json_match.group())
|
|
236
|
+
return {
|
|
237
|
+
"pass": data.get("pass", False),
|
|
238
|
+
"issues": data.get("issues", []),
|
|
239
|
+
"summary": data.get("summary", ""),
|
|
240
|
+
}
|
|
241
|
+
except json.JSONDecodeError:
|
|
242
|
+
pass
|
|
243
|
+
|
|
244
|
+
# Fallback: look for keywords
|
|
245
|
+
content_lower = content.lower()
|
|
246
|
+
passed = any(word in content_lower for word in ["pass", "approved", "looks good", "lgtm"])
|
|
247
|
+
failed = any(word in content_lower for word in ["fail", "issue", "problem", "bug", "error"])
|
|
248
|
+
|
|
249
|
+
return {
|
|
250
|
+
"pass": passed and not failed,
|
|
251
|
+
"issues": [content[:200]] if failed else [],
|
|
252
|
+
"summary": content[:200],
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
def _extract_issues_from_output(self, output: str) -> list[str]:
|
|
256
|
+
"""Extract issues from command output."""
|
|
257
|
+
issues = []
|
|
258
|
+
|
|
259
|
+
# Common patterns for test failures, lint errors, etc.
|
|
260
|
+
patterns = [
|
|
261
|
+
r"FAIL[ED]?:?\s*(.+)",
|
|
262
|
+
r"ERROR:?\s*(.+)",
|
|
263
|
+
r"error:?\s*(.+)",
|
|
264
|
+
r"AssertionError:?\s*(.+)",
|
|
265
|
+
r"TypeError:?\s*(.+)",
|
|
266
|
+
r"✗\s*(.+)",
|
|
267
|
+
]
|
|
268
|
+
|
|
269
|
+
for pattern in patterns:
|
|
270
|
+
matches = re.findall(pattern, output, re.IGNORECASE | re.MULTILINE)
|
|
271
|
+
for match in matches[:5]: # limit to 5 per pattern
|
|
272
|
+
issue = match.strip()[:200]
|
|
273
|
+
if issue and issue not in issues:
|
|
274
|
+
issues.append(issue)
|
|
275
|
+
|
|
276
|
+
# If no patterns matched, use first few lines
|
|
277
|
+
if not issues:
|
|
278
|
+
lines = output.strip().split("\n")
|
|
279
|
+
issues = [line.strip()[:200] for line in lines[:3] if line.strip()]
|
|
280
|
+
|
|
281
|
+
return issues[:10] # limit total issues
|
|
282
|
+
|
|
283
|
+
def _build_summary(self, results: list[VerifierResult]) -> str:
|
|
284
|
+
"""Build summary string from results."""
|
|
285
|
+
if not results:
|
|
286
|
+
return "No verifiers configured"
|
|
287
|
+
|
|
288
|
+
passed = sum(1 for r in results if r.passed)
|
|
289
|
+
total = len(results)
|
|
290
|
+
|
|
291
|
+
if passed == total:
|
|
292
|
+
return f"All {total} verifier(s) passed"
|
|
293
|
+
else:
|
|
294
|
+
failed_names = [r.name for r in results if not r.passed]
|
|
295
|
+
return f"{passed}/{total} passed. Failed: {', '.join(failed_names)}"
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Data models for the verification system."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class VerifierConfig:
|
|
9
|
+
"""Configuration for a single verifier."""
|
|
10
|
+
|
|
11
|
+
type: Literal["command", "llm"]
|
|
12
|
+
name: str
|
|
13
|
+
command: str | None = None # for command type
|
|
14
|
+
prompt: str | None = None # for llm type
|
|
15
|
+
timeout: int = 120 # seconds
|
|
16
|
+
pass_on_exit_0: bool = True # for command type
|
|
17
|
+
enabled: bool = True # can disable without removing
|
|
18
|
+
|
|
19
|
+
@classmethod
|
|
20
|
+
def from_dict(cls, data: dict) -> "VerifierConfig":
|
|
21
|
+
"""Create from dictionary."""
|
|
22
|
+
return cls(
|
|
23
|
+
type=data.get("type", "command"),
|
|
24
|
+
name=data.get("name", "unnamed"),
|
|
25
|
+
command=data.get("command"),
|
|
26
|
+
prompt=data.get("prompt"),
|
|
27
|
+
timeout=data.get("timeout", 120),
|
|
28
|
+
pass_on_exit_0=data.get("pass_on_exit_0", True),
|
|
29
|
+
enabled=data.get("enabled", True),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
def to_dict(self) -> dict:
|
|
33
|
+
"""Convert to dictionary."""
|
|
34
|
+
result = {
|
|
35
|
+
"type": self.type,
|
|
36
|
+
"name": self.name,
|
|
37
|
+
"enabled": self.enabled,
|
|
38
|
+
}
|
|
39
|
+
if self.type == "command":
|
|
40
|
+
result["command"] = self.command
|
|
41
|
+
result["timeout"] = self.timeout
|
|
42
|
+
result["pass_on_exit_0"] = self.pass_on_exit_0
|
|
43
|
+
else:
|
|
44
|
+
result["prompt"] = self.prompt
|
|
45
|
+
return result
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class VerifierResult:
|
|
50
|
+
"""Result from running a single verifier."""
|
|
51
|
+
|
|
52
|
+
name: str
|
|
53
|
+
passed: bool
|
|
54
|
+
output: str
|
|
55
|
+
duration: float
|
|
56
|
+
issues: list[str] = field(default_factory=list)
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def status_icon(self) -> str:
|
|
60
|
+
"""Get status icon for display."""
|
|
61
|
+
return "[green]✓[/green]" if self.passed else "[red]✗[/red]"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class VerificationReport:
|
|
66
|
+
"""Complete report from running all verifiers."""
|
|
67
|
+
|
|
68
|
+
results: list[VerifierResult]
|
|
69
|
+
all_passed: bool
|
|
70
|
+
summary: str
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def passed_count(self) -> int:
|
|
74
|
+
"""Count of passed verifiers."""
|
|
75
|
+
return sum(1 for r in self.results if r.passed)
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def failed_count(self) -> int:
|
|
79
|
+
"""Count of failed verifiers."""
|
|
80
|
+
return sum(1 for r in self.results if not r.passed)
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def total_duration(self) -> float:
|
|
84
|
+
"""Total duration of all verifiers."""
|
|
85
|
+
return sum(r.duration for r in self.results)
|
|
86
|
+
|
|
87
|
+
def get_failures(self) -> list[VerifierResult]:
|
|
88
|
+
"""Get list of failed results."""
|
|
89
|
+
return [r for r in self.results if not r.passed]
|
|
90
|
+
|
|
91
|
+
def get_all_issues(self) -> list[str]:
|
|
92
|
+
"""Get all issues from all failed verifiers."""
|
|
93
|
+
issues = []
|
|
94
|
+
for r in self.results:
|
|
95
|
+
if not r.passed:
|
|
96
|
+
issues.extend(r.issues)
|
|
97
|
+
return issues
|
|
@@ -1,4 +1,22 @@
|
|
|
1
|
-
"""Git worktree management for
|
|
1
|
+
"""Git worktree management for isolated agent changes.
|
|
2
|
+
|
|
3
|
+
This module provides worktree management for running agents in isolated
|
|
4
|
+
git worktrees, allowing changes to be reviewed before merging to the
|
|
5
|
+
main branch.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
# Enable worktree mode via environment variable
|
|
9
|
+
export EMDASH_USE_WORKTREE=true
|
|
10
|
+
|
|
11
|
+
# Or via API options
|
|
12
|
+
options.use_worktree = True
|
|
13
|
+
|
|
14
|
+
Flow:
|
|
15
|
+
1. User sends message to agent
|
|
16
|
+
2. If use_worktree=True, create worktree: .emdash-worktrees/{session-id}/
|
|
17
|
+
3. Agent makes changes in the worktree
|
|
18
|
+
4. When done, user can apply or discard changes via API
|
|
19
|
+
"""
|
|
2
20
|
|
|
3
21
|
import re
|
|
4
22
|
import shutil
|