epi-recorder 2.1.3__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epi_analyzer/__init__.py +9 -0
- epi_analyzer/detector.py +337 -0
- epi_cli/__init__.py +4 -0
- epi_cli/__main__.py +4 -0
- epi_cli/chat.py +21 -3
- epi_cli/debug.py +107 -0
- epi_cli/keys.py +4 -0
- epi_cli/ls.py +5 -1
- epi_cli/main.py +8 -0
- epi_cli/record.py +4 -0
- epi_cli/run.py +12 -4
- epi_cli/verify.py +4 -0
- epi_cli/view.py +4 -0
- epi_core/__init__.py +5 -1
- epi_core/container.py +68 -55
- epi_core/redactor.py +4 -0
- epi_core/schemas.py +6 -2
- epi_core/serialize.py +4 -0
- epi_core/storage.py +186 -0
- epi_core/trust.py +4 -0
- epi_recorder/__init__.py +5 -1
- epi_recorder/api.py +28 -2
- epi_recorder/async_api.py +151 -0
- epi_recorder/bootstrap.py +4 -0
- epi_recorder/environment.py +4 -0
- epi_recorder/patcher.py +33 -13
- epi_recorder/test_import.py +2 -0
- epi_recorder/test_script.py +2 -0
- epi_recorder-2.2.0.dist-info/METADATA +162 -0
- epi_recorder-2.2.0.dist-info/RECORD +38 -0
- {epi_recorder-2.1.3.dist-info → epi_recorder-2.2.0.dist-info}/WHEEL +1 -1
- {epi_recorder-2.1.3.dist-info → epi_recorder-2.2.0.dist-info}/licenses/LICENSE +4 -29
- {epi_recorder-2.1.3.dist-info → epi_recorder-2.2.0.dist-info}/top_level.txt +1 -0
- epi_viewer_static/app.js +38 -7
- epi_viewer_static/crypto.js +3 -0
- epi_viewer_static/index.html +4 -2
- epi_viewer_static/viewer_lite.css +3 -1
- epi_postinstall.py +0 -197
- epi_recorder-2.1.3.dist-info/METADATA +0 -577
- epi_recorder-2.1.3.dist-info/RECORD +0 -34
- {epi_recorder-2.1.3.dist-info → epi_recorder-2.2.0.dist-info}/entry_points.txt +0 -0
epi_analyzer/__init__.py
ADDED
epi_analyzer/detector.py
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
"""
|
|
2
|
+
EPI Agent Mistake Detector
|
|
3
|
+
|
|
4
|
+
AI-powered analysis of agent execution to identify bugs:
|
|
5
|
+
- Infinite loops (same tool called repeatedly with errors)
|
|
6
|
+
- Hallucinations (confident LLM output followed by tool failures)
|
|
7
|
+
- Inefficiency (excessive token usage, repeated work)
|
|
8
|
+
- Repetitive patterns (agent redoing same queries)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import sqlite3
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import List, Dict, Any, Optional
|
|
15
|
+
from difflib import SequenceMatcher
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MistakeDetector:
|
|
19
|
+
"""
|
|
20
|
+
AI-powered agent bug detection.
|
|
21
|
+
Analyzes .epi files to find infinite loops, hallucinations, inefficiencies.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, epi_file: str):
|
|
25
|
+
"""
|
|
26
|
+
Initialize detector with an EPI recording file.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
epi_file: Path to .epi file (can be .epi.db or steps.jsonl)
|
|
30
|
+
"""
|
|
31
|
+
self.epi_path = Path(epi_file)
|
|
32
|
+
self.steps = self._load_steps()
|
|
33
|
+
self.mistakes: List[Dict] = []
|
|
34
|
+
|
|
35
|
+
def _load_steps(self) -> List[Dict]:
|
|
36
|
+
"""Load steps from EPI file (ZIP, SQLite, or JSONL)"""
|
|
37
|
+
import tempfile
|
|
38
|
+
import zipfile
|
|
39
|
+
|
|
40
|
+
# If it's a ZIP file (.epi), unpack it first
|
|
41
|
+
if self.epi_path.is_file() and self.epi_path.suffix == '.epi':
|
|
42
|
+
try:
|
|
43
|
+
# Check if it's a valid ZIP
|
|
44
|
+
if zipfile.is_zipfile(self.epi_path):
|
|
45
|
+
temp_dir = Path(tempfile.mkdtemp())
|
|
46
|
+
with zipfile.ZipFile(self.epi_path, 'r') as zf:
|
|
47
|
+
zf.extractall(temp_dir)
|
|
48
|
+
|
|
49
|
+
# Look for steps.jsonl in extracted content
|
|
50
|
+
steps_file = temp_dir / "steps.jsonl"
|
|
51
|
+
if steps_file.exists():
|
|
52
|
+
return self._load_from_jsonl(steps_file)
|
|
53
|
+
|
|
54
|
+
# Also check for SQLite db
|
|
55
|
+
for db_file in temp_dir.glob("*.db"):
|
|
56
|
+
try:
|
|
57
|
+
return self._load_from_sqlite(db_file)
|
|
58
|
+
except Exception:
|
|
59
|
+
continue
|
|
60
|
+
except Exception:
|
|
61
|
+
pass # Fall through to other methods
|
|
62
|
+
|
|
63
|
+
# Try loading from steps.jsonl in directory
|
|
64
|
+
if self.epi_path.is_dir():
|
|
65
|
+
jsonl_path = self.epi_path / "steps.jsonl"
|
|
66
|
+
if jsonl_path.exists():
|
|
67
|
+
return self._load_from_jsonl(jsonl_path)
|
|
68
|
+
|
|
69
|
+
# Check for temp databases
|
|
70
|
+
temp_dbs = list(self.epi_path.glob("*_temp.db"))
|
|
71
|
+
if temp_dbs:
|
|
72
|
+
return self._load_from_sqlite(temp_dbs[0])
|
|
73
|
+
|
|
74
|
+
# Try as JSONL file directly
|
|
75
|
+
if self.epi_path.suffix == '.jsonl':
|
|
76
|
+
return self._load_from_jsonl(self.epi_path)
|
|
77
|
+
|
|
78
|
+
# Try as SQLite database
|
|
79
|
+
db_paths = [
|
|
80
|
+
self.epi_path,
|
|
81
|
+
self.epi_path.with_suffix('.epi.db'),
|
|
82
|
+
self.epi_path / 'recording.db'
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
for db_path in db_paths:
|
|
86
|
+
if db_path.exists():
|
|
87
|
+
try:
|
|
88
|
+
return self._load_from_sqlite(db_path)
|
|
89
|
+
except Exception:
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
raise FileNotFoundError(f"No valid .epi file found at {self.epi_path}")
|
|
93
|
+
|
|
94
|
+
def _load_from_jsonl(self, path: Path) -> List[Dict]:
|
|
95
|
+
"""Load steps from JSONL file"""
|
|
96
|
+
steps = []
|
|
97
|
+
with open(path, 'r', encoding='utf-8') as f:
|
|
98
|
+
for i, line in enumerate(f):
|
|
99
|
+
if line.strip():
|
|
100
|
+
step = json.loads(line)
|
|
101
|
+
steps.append({
|
|
102
|
+
'id': i,
|
|
103
|
+
'index': step.get('index', i),
|
|
104
|
+
'type': step.get('kind', 'unknown'),
|
|
105
|
+
'content': step.get('content', {}),
|
|
106
|
+
'timestamp': step.get('timestamp', '')
|
|
107
|
+
})
|
|
108
|
+
return steps
|
|
109
|
+
|
|
110
|
+
def _load_from_sqlite(self, db_path: Path) -> List[Dict]:
|
|
111
|
+
"""Load steps from SQLite database"""
|
|
112
|
+
conn = sqlite3.connect(str(db_path))
|
|
113
|
+
cursor = conn.execute('SELECT * FROM steps ORDER BY id')
|
|
114
|
+
|
|
115
|
+
steps = []
|
|
116
|
+
for row in cursor.fetchall():
|
|
117
|
+
content = json.loads(row[3]) if isinstance(row[3], str) else row[3]
|
|
118
|
+
steps.append({
|
|
119
|
+
'id': row[0],
|
|
120
|
+
'index': row[1],
|
|
121
|
+
'type': row[2],
|
|
122
|
+
'content': content,
|
|
123
|
+
'timestamp': row[4] if len(row) > 4 else None
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
conn.close()
|
|
127
|
+
return steps
|
|
128
|
+
|
|
129
|
+
def analyze(self) -> List[Dict]:
|
|
130
|
+
"""Run all detection patterns"""
|
|
131
|
+
self._detect_infinite_loops()
|
|
132
|
+
self._detect_hallucinations()
|
|
133
|
+
self._detect_inefficiency()
|
|
134
|
+
self._detect_repetitive_patterns()
|
|
135
|
+
return self.mistakes
|
|
136
|
+
|
|
137
|
+
def _detect_infinite_loops(self):
|
|
138
|
+
"""Detect agent stuck calling same tool repeatedly"""
|
|
139
|
+
# Look for LLM request/response patterns
|
|
140
|
+
llm_steps = [s for s in self.steps if 'llm' in s['type'].lower()]
|
|
141
|
+
|
|
142
|
+
if len(llm_steps) < 5:
|
|
143
|
+
return
|
|
144
|
+
|
|
145
|
+
# Check last N calls for repetition
|
|
146
|
+
window = 5
|
|
147
|
+
recent = llm_steps[-window:]
|
|
148
|
+
|
|
149
|
+
# Extract patterns (model, messages similarity)
|
|
150
|
+
patterns = []
|
|
151
|
+
for step in recent:
|
|
152
|
+
content = step.get('content', {})
|
|
153
|
+
# Check if this is a request with messages
|
|
154
|
+
messages = content.get('messages', [])
|
|
155
|
+
if messages:
|
|
156
|
+
# Get last user message
|
|
157
|
+
user_msgs = [m for m in messages if m.get('role') == 'user']
|
|
158
|
+
if user_msgs:
|
|
159
|
+
patterns.append(user_msgs[-1].get('content', '')[:100])
|
|
160
|
+
|
|
161
|
+
# If we see very similar patterns repeated, it's likely a loop
|
|
162
|
+
if len(patterns) >= 3:
|
|
163
|
+
similarities = [
|
|
164
|
+
self._calculate_similarity(patterns[i], patterns[i+1])
|
|
165
|
+
for i in range(len(patterns)-1)
|
|
166
|
+
]
|
|
167
|
+
avg_similarity = sum(similarities) / len(similarities)
|
|
168
|
+
|
|
169
|
+
if avg_similarity > 0.8: # 80% similar
|
|
170
|
+
self.mistakes.append({
|
|
171
|
+
'type': 'INFINITE_LOOP',
|
|
172
|
+
'severity': 'CRITICAL',
|
|
173
|
+
'step': recent[-1]['id'],
|
|
174
|
+
'explanation': f'Agent appears stuck in a loop - repeated similar requests {window} times',
|
|
175
|
+
'fix': 'Add max_iterations limit or better error handling',
|
|
176
|
+
'cost_impact': 'High - stuck in loop burning API credits',
|
|
177
|
+
'pattern_similarity': f'{avg_similarity:.0%}'
|
|
178
|
+
})
|
|
179
|
+
|
|
180
|
+
def _detect_hallucinations(self):
|
|
181
|
+
"""Detect high-confidence LLM calls followed by errors"""
|
|
182
|
+
for i, step in enumerate(self.steps[:-1]):
|
|
183
|
+
if 'llm.response' not in step['type'].lower():
|
|
184
|
+
continue
|
|
185
|
+
|
|
186
|
+
content = step.get('content', {})
|
|
187
|
+
|
|
188
|
+
# Check if next few steps show errors
|
|
189
|
+
next_steps = self.steps[i+1:min(i+4, len(self.steps))]
|
|
190
|
+
errors = [s for s in next_steps if 'error' in s['type'].lower()]
|
|
191
|
+
|
|
192
|
+
if errors and content.get('provider') in ['openai', 'google']:
|
|
193
|
+
# LLM gave response but then errors occurred
|
|
194
|
+
choices = content.get('choices', [])
|
|
195
|
+
if choices:
|
|
196
|
+
finish_reason = choices[0].get('finish_reason', 'stop')
|
|
197
|
+
if finish_reason == 'stop': # Completed confidently
|
|
198
|
+
response_text = choices[0].get('message', {}).get('content', '')[:150]
|
|
199
|
+
|
|
200
|
+
self.mistakes.append({
|
|
201
|
+
'type': 'HALLUCINATION',
|
|
202
|
+
'severity': 'HIGH',
|
|
203
|
+
'step': step['id'],
|
|
204
|
+
'explanation': 'LLM generated confident output but subsequent operations failed',
|
|
205
|
+
'details': f"LLM said: {response_text}...",
|
|
206
|
+
'error_step': errors[0]['id'],
|
|
207
|
+
'fix': 'Add output validation or use function calling with strict schemas'
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
def _detect_inefficiency(self):
|
|
211
|
+
"""Detect expensive operations for simple tasks"""
|
|
212
|
+
llm_responses = [s for s in self.steps if 'llm.response' in s['type'].lower()]
|
|
213
|
+
|
|
214
|
+
if not llm_responses:
|
|
215
|
+
return
|
|
216
|
+
|
|
217
|
+
# Calculate token usage
|
|
218
|
+
total_tokens = 0
|
|
219
|
+
for step in llm_responses:
|
|
220
|
+
content = step.get('content', {})
|
|
221
|
+
usage = content.get('usage', {})
|
|
222
|
+
if usage:
|
|
223
|
+
total_tokens += usage.get('total_tokens', 0)
|
|
224
|
+
|
|
225
|
+
step_count = len(self.steps)
|
|
226
|
+
|
|
227
|
+
# Red flags
|
|
228
|
+
flags = []
|
|
229
|
+
|
|
230
|
+
if total_tokens > 10000 and step_count < 5:
|
|
231
|
+
flags.append(f"High token usage ({total_tokens:,} tokens) for simple workflow")
|
|
232
|
+
|
|
233
|
+
# Estimate cost (rough)
|
|
234
|
+
# GPT-4: ~$0.03/1K input, ~$0.06/1K output - use avg $0.045/1K
|
|
235
|
+
estimated_cost = (total_tokens / 1000) * 0.045
|
|
236
|
+
if estimated_cost > 0.50:
|
|
237
|
+
flags.append(f"Expensive execution (~${estimated_cost:.2f})")
|
|
238
|
+
|
|
239
|
+
# Check for model inefficiency (using GPT-4 when GPT-3.5 would work)
|
|
240
|
+
gpt4_calls = sum(1 for s in llm_responses
|
|
241
|
+
if 'gpt-4' in s.get('content', {}).get('model', '').lower())
|
|
242
|
+
if gpt4_calls > 0 and step_count < 3:
|
|
243
|
+
flags.append(f"Using GPT-4 for simple task ({gpt4_calls} calls)")
|
|
244
|
+
|
|
245
|
+
if flags:
|
|
246
|
+
self.mistakes.append({
|
|
247
|
+
'type': 'INEFFICIENT',
|
|
248
|
+
'severity': 'MEDIUM',
|
|
249
|
+
'step': llm_responses[-1]['id'],
|
|
250
|
+
'explanation': '; '.join(flags),
|
|
251
|
+
'metrics': {
|
|
252
|
+
'total_tokens': total_tokens,
|
|
253
|
+
'estimated_cost': round(estimated_cost, 2),
|
|
254
|
+
'step_count': step_count,
|
|
255
|
+
'llm_calls': len(llm_responses)
|
|
256
|
+
},
|
|
257
|
+
'fix': 'Consider using GPT-3.5-turbo or caching responses'
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
def _detect_repetitive_patterns(self):
|
|
261
|
+
"""Detect agent redoing same work"""
|
|
262
|
+
if len(self.steps) < 10:
|
|
263
|
+
return
|
|
264
|
+
|
|
265
|
+
# Look for repeated LLM requests
|
|
266
|
+
llm_requests = [s for s in self.steps if 'llm.request' in s['type'].lower()]
|
|
267
|
+
|
|
268
|
+
if len(llm_requests) < 3:
|
|
269
|
+
return
|
|
270
|
+
|
|
271
|
+
# Extract user messages
|
|
272
|
+
queries = []
|
|
273
|
+
for step in llm_requests:
|
|
274
|
+
content = step.get('content', {})
|
|
275
|
+
messages = content.get('messages', [])
|
|
276
|
+
for msg in messages:
|
|
277
|
+
if msg.get('role') == 'user':
|
|
278
|
+
queries.append((step['id'], msg.get('content', '')[:100]))
|
|
279
|
+
break
|
|
280
|
+
|
|
281
|
+
# Find similar queries
|
|
282
|
+
for i in range(len(queries)):
|
|
283
|
+
for j in range(i+1, len(queries)):
|
|
284
|
+
similarity = self._calculate_similarity(queries[i][1], queries[j][1])
|
|
285
|
+
if similarity > 0.7: # 70% similar
|
|
286
|
+
self.mistakes.append({
|
|
287
|
+
'type': 'REPETITIVE_PATTERN',
|
|
288
|
+
'severity': 'LOW',
|
|
289
|
+
'step': queries[j][0],
|
|
290
|
+
'explanation': f'Similar query repeated (steps {queries[i][0]} and {queries[j][0]})',
|
|
291
|
+
'pattern': f'"{queries[i][1][:50]}..."',
|
|
292
|
+
'fix': 'Implement memory/caching to avoid redundant LLM calls'
|
|
293
|
+
})
|
|
294
|
+
return # Only report first instance
|
|
295
|
+
|
|
296
|
+
def _calculate_similarity(self, a: str, b: str) -> float:
|
|
297
|
+
"""Simple string similarity using SequenceMatcher"""
|
|
298
|
+
return SequenceMatcher(None, a, b).ratio()
|
|
299
|
+
|
|
300
|
+
def get_summary(self) -> str:
|
|
301
|
+
"""Human-readable summary of detected mistakes"""
|
|
302
|
+
if not self.mistakes:
|
|
303
|
+
return "[OK] No obvious mistakes detected"
|
|
304
|
+
|
|
305
|
+
# Count by severity
|
|
306
|
+
critical = sum(1 for m in self.mistakes if m.get('severity') == 'CRITICAL')
|
|
307
|
+
high = sum(1 for m in self.mistakes if m.get('severity') == 'HIGH')
|
|
308
|
+
medium = sum(1 for m in self.mistakes if m.get('severity') == 'MEDIUM')
|
|
309
|
+
low = sum(1 for m in self.mistakes if m.get('severity') == 'LOW')
|
|
310
|
+
|
|
311
|
+
lines = [
|
|
312
|
+
f"[!] Found {len(self.mistakes)} issue(s):",
|
|
313
|
+
f" {critical} Critical, {high} High, {medium} Medium, {low} Low severity",
|
|
314
|
+
""
|
|
315
|
+
]
|
|
316
|
+
|
|
317
|
+
# Show details for each mistake
|
|
318
|
+
for i, m in enumerate(self.mistakes, 1):
|
|
319
|
+
severity_marker = {
|
|
320
|
+
'CRITICAL': '[!!!]',
|
|
321
|
+
'HIGH': '[!!]',
|
|
322
|
+
'MEDIUM': '[!]',
|
|
323
|
+
'LOW': '[-]'
|
|
324
|
+
}.get(m.get('severity', 'LOW'), '[?]')
|
|
325
|
+
|
|
326
|
+
lines.append(f"{i}. {severity_marker} [{m.get('severity')}] {m.get('type')} at Step {m.get('step')}")
|
|
327
|
+
lines.append(f" -> {m.get('explanation')}")
|
|
328
|
+
if 'fix' in m:
|
|
329
|
+
lines.append(f" -> Fix: {m['fix']}")
|
|
330
|
+
lines.append("")
|
|
331
|
+
|
|
332
|
+
return '\n'.join(lines)
|
|
333
|
+
|
|
334
|
+
return '\n'.join(lines)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
|
epi_cli/__init__.py
CHANGED
epi_cli/__main__.py
CHANGED
epi_cli/chat.py
CHANGED
|
@@ -45,6 +45,7 @@ def load_steps_from_epi(epi_path: Path) -> list:
|
|
|
45
45
|
|
|
46
46
|
def chat(
|
|
47
47
|
epi_file: Path = typer.Argument(..., help="Path to .epi file to chat with"),
|
|
48
|
+
query: str = typer.Option(None, "--query", "-q", help="Single question (non-interactive mode)"),
|
|
48
49
|
model: str = typer.Option("gemini-2.0-flash", "--model", "-m", help="Gemini model to use")
|
|
49
50
|
):
|
|
50
51
|
"""
|
|
@@ -52,8 +53,9 @@ def chat(
|
|
|
52
53
|
|
|
53
54
|
Ask natural language questions about what happened in your recording.
|
|
54
55
|
|
|
55
|
-
|
|
56
|
-
epi chat my_recording.epi
|
|
56
|
+
Examples:
|
|
57
|
+
epi chat my_recording.epi # Interactive mode
|
|
58
|
+
epi chat my_recording.epi -q "What happened?" # Single question
|
|
57
59
|
"""
|
|
58
60
|
# Resolve path
|
|
59
61
|
if not epi_file.exists():
|
|
@@ -150,7 +152,19 @@ When answering questions:
|
|
|
150
152
|
))
|
|
151
153
|
console.print()
|
|
152
154
|
|
|
153
|
-
#
|
|
155
|
+
# Non-interactive mode: answer single question and exit
|
|
156
|
+
if query:
|
|
157
|
+
try:
|
|
158
|
+
full_prompt = f"{context}\n\nUser question: {query}"
|
|
159
|
+
response = chat_session.send_message(full_prompt)
|
|
160
|
+
console.print("[bold green]AI:[/bold green]")
|
|
161
|
+
console.print(Markdown(response.text))
|
|
162
|
+
return
|
|
163
|
+
except Exception as e:
|
|
164
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
165
|
+
raise typer.Exit(1)
|
|
166
|
+
|
|
167
|
+
# Interactive chat loop
|
|
154
168
|
while True:
|
|
155
169
|
try:
|
|
156
170
|
question = Prompt.ask("[bold cyan]You[/bold cyan]")
|
|
@@ -191,3 +205,7 @@ When answering questions:
|
|
|
191
205
|
console.print(f"[red]Error:[/red] {e}")
|
|
192
206
|
console.print("[dim]Try asking a different question.[/dim]")
|
|
193
207
|
console.print()
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
|
epi_cli/debug.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""
|
|
2
|
+
EPI Debug Command - AI-powered agent mistake detection.
|
|
3
|
+
|
|
4
|
+
Analyzes .epi recordings to find:
|
|
5
|
+
- Infinite loops
|
|
6
|
+
- Hallucinations
|
|
7
|
+
- Inefficiencies
|
|
8
|
+
- Repetitive patterns
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
import typer
|
|
14
|
+
from rich.console import Console
|
|
15
|
+
from rich.panel import Panel
|
|
16
|
+
|
|
17
|
+
from epi_analyzer.detector import MistakeDetector
|
|
18
|
+
|
|
19
|
+
console = Console()
|
|
20
|
+
app = typer.Typer(name="debug", help="Debug AI agent recordings for mistakes")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@app.callback(invoke_without_command=True)
|
|
24
|
+
def debug(
|
|
25
|
+
ctx: typer.Context,
|
|
26
|
+
epi_file: Path = typer.Argument(..., help="Path to .epi recording file or directory"),
|
|
27
|
+
output_json: bool = typer.Option(False, "--json", help="Output as JSON"),
|
|
28
|
+
export: Path = typer.Option(None, "--export", help="Export report to file"),
|
|
29
|
+
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed analysis"),
|
|
30
|
+
):
|
|
31
|
+
"""
|
|
32
|
+
Analyze agent execution for mistakes and inefficiencies.
|
|
33
|
+
|
|
34
|
+
This command uses AI-powered analysis to detect:
|
|
35
|
+
- Infinite loops (same tool called repeatedly)
|
|
36
|
+
- Hallucinations (LLM confident but wrong)
|
|
37
|
+
- Inefficiencies (excessive token usage)
|
|
38
|
+
- Repetitive patterns (redundant work)
|
|
39
|
+
|
|
40
|
+
Examples:
|
|
41
|
+
epi debug agent_session.epi
|
|
42
|
+
epi debug recording_dir/ --json
|
|
43
|
+
epi debug agent.epi --export report.txt
|
|
44
|
+
"""
|
|
45
|
+
console.print(f"Analyzing [cyan]{epi_file}[/cyan]...")
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
# Run analysis
|
|
49
|
+
detector = MistakeDetector(str(epi_file))
|
|
50
|
+
mistakes = detector.analyze()
|
|
51
|
+
|
|
52
|
+
# Prepare output
|
|
53
|
+
if output_json:
|
|
54
|
+
output = json.dumps(mistakes, indent=2)
|
|
55
|
+
else:
|
|
56
|
+
output = detector.get_summary()
|
|
57
|
+
|
|
58
|
+
if verbose and mistakes:
|
|
59
|
+
# Add detailed metrics for each mistake
|
|
60
|
+
details = ["\nDetailed Analysis:"]
|
|
61
|
+
for i, m in enumerate(mistakes, 1):
|
|
62
|
+
details.append(f"\n{i}. {m.get('type')} (Step {m.get('step')})")
|
|
63
|
+
for key, value in m.items():
|
|
64
|
+
if key not in ['type', 'step']:
|
|
65
|
+
details.append(f" {key}: {value}")
|
|
66
|
+
output += "\n".join(details)
|
|
67
|
+
|
|
68
|
+
# Display or export
|
|
69
|
+
if export:
|
|
70
|
+
export.write_text(output, encoding='utf-8')
|
|
71
|
+
console.print(f"\nReport saved to [green]{export}[/green]")
|
|
72
|
+
else:
|
|
73
|
+
console.print(f"\n{output}")
|
|
74
|
+
|
|
75
|
+
# Show actionable summary if mistakes found
|
|
76
|
+
if mistakes and not output_json:
|
|
77
|
+
critical_count = sum(1 for m in mistakes if m.get('severity') == 'CRITICAL')
|
|
78
|
+
if critical_count > 0:
|
|
79
|
+
console.print(
|
|
80
|
+
Panel(
|
|
81
|
+
f"[bold red]WARNING: {critical_count} CRITICAL issue(s) detected![/bold red]\n\n"
|
|
82
|
+
"These issues can cause your agent to fail or waste resources.\n"
|
|
83
|
+
"Review the suggestions above to fix them.",
|
|
84
|
+
title="Action Required",
|
|
85
|
+
border_style="red"
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Exit code: 1 if critical mistakes found
|
|
90
|
+
if any(m.get('severity') == 'CRITICAL' for m in mistakes):
|
|
91
|
+
raise typer.Exit(code=1)
|
|
92
|
+
|
|
93
|
+
console.print("\nAnalysis complete")
|
|
94
|
+
|
|
95
|
+
except FileNotFoundError as e:
|
|
96
|
+
console.print(f"[red]ERROR: File not found:[/red] {e}")
|
|
97
|
+
raise typer.Exit(code=2)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
console.print(f"[red]ERROR analyzing file:[/red] {e}")
|
|
100
|
+
if verbose:
|
|
101
|
+
import traceback
|
|
102
|
+
console.print(traceback.format_exc())
|
|
103
|
+
raise typer.Exit(code=3)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
epi_cli/keys.py
CHANGED
epi_cli/ls.py
CHANGED
epi_cli/main.py
CHANGED
|
@@ -126,6 +126,10 @@ app.command(name="ls", help="List local recordings (./epi-recordings/)")(ls_comm
|
|
|
126
126
|
from epi_cli.chat import chat as chat_command
|
|
127
127
|
app.command(name="chat", help="Chat with your evidence file using AI")(chat_command)
|
|
128
128
|
|
|
129
|
+
# NEW: debug command (v2.2.0 - AI-powered mistake detection)
|
|
130
|
+
from epi_cli.debug import app as debug_app
|
|
131
|
+
app.add_typer(debug_app, name="debug", help="Debug AI agent recordings for mistakes")
|
|
132
|
+
|
|
129
133
|
# Phase 1: keys command (for manual key management)
|
|
130
134
|
@app.command()
|
|
131
135
|
def keys(
|
|
@@ -320,3 +324,7 @@ def cli_main():
|
|
|
320
324
|
|
|
321
325
|
if __name__ == "__main__":
|
|
322
326
|
cli_main()
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
|
epi_cli/record.py
CHANGED
epi_cli/run.py
CHANGED
|
@@ -323,7 +323,7 @@ def run(
|
|
|
323
323
|
km = KeyManager()
|
|
324
324
|
priv = km.load_private_key("default")
|
|
325
325
|
|
|
326
|
-
#
|
|
326
|
+
# Extract, sign, and repack with new viewer
|
|
327
327
|
import json as _json
|
|
328
328
|
with zipfile.ZipFile(out, "r") as zf:
|
|
329
329
|
raw = zf.read("manifest.json").decode("utf-8")
|
|
@@ -336,14 +336,18 @@ def run(
|
|
|
336
336
|
sm = _sign(m, priv, "default")
|
|
337
337
|
signed_json = sm.model_dump_json(indent=2)
|
|
338
338
|
|
|
339
|
-
#
|
|
339
|
+
# Regenerate viewer.html with signed manifest and steps
|
|
340
|
+
viewer_html = EPIContainer._create_embedded_viewer(temp_workspace, sm)
|
|
341
|
+
|
|
342
|
+
# Replace manifest AND viewer in ZIP
|
|
340
343
|
temp_zip = out.with_suffix(".epi.tmp")
|
|
341
344
|
with zipfile.ZipFile(out, "r") as zf_in:
|
|
342
345
|
with zipfile.ZipFile(temp_zip, "w", zipfile.ZIP_DEFLATED) as zf_out:
|
|
343
346
|
for item in zf_in.namelist():
|
|
344
|
-
if item
|
|
347
|
+
if item not in ("manifest.json", "viewer.html"):
|
|
345
348
|
zf_out.writestr(item, zf_in.read(item))
|
|
346
349
|
zf_out.writestr("manifest.json", signed_json)
|
|
350
|
+
zf_out.writestr("viewer.html", viewer_html)
|
|
347
351
|
|
|
348
352
|
temp_zip.replace(out)
|
|
349
353
|
signed = True
|
|
@@ -394,4 +398,8 @@ def run(
|
|
|
394
398
|
raise typer.Exit(rc)
|
|
395
399
|
if not verified and not no_verify:
|
|
396
400
|
raise typer.Exit(1)
|
|
397
|
-
raise typer.Exit(0)
|
|
401
|
+
raise typer.Exit(0)
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
|
epi_cli/verify.py
CHANGED
epi_cli/view.py
CHANGED
epi_core/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
EPI Core - Core data structures, serialization, and container management.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
__version__ = "2.
|
|
5
|
+
__version__ = "2.2.0"
|
|
6
6
|
|
|
7
7
|
from epi_core.schemas import ManifestModel, StepModel
|
|
8
8
|
from epi_core.serialize import get_canonical_hash
|
|
@@ -12,3 +12,7 @@ __all__ = [
|
|
|
12
12
|
"StepModel",
|
|
13
13
|
"get_canonical_hash",
|
|
14
14
|
]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|