epi-recorder 2.1.2__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epi_analyzer/__init__.py +9 -0
- epi_analyzer/detector.py +337 -0
- epi_cli/__init__.py +4 -0
- epi_cli/__main__.py +4 -0
- epi_cli/chat.py +211 -0
- epi_cli/debug.py +107 -0
- epi_cli/keys.py +4 -0
- epi_cli/ls.py +5 -1
- epi_cli/main.py +15 -1
- epi_cli/record.py +4 -0
- epi_cli/run.py +12 -4
- epi_cli/verify.py +4 -0
- epi_cli/view.py +4 -0
- epi_core/__init__.py +5 -1
- epi_core/container.py +68 -55
- epi_core/redactor.py +4 -0
- epi_core/schemas.py +6 -2
- epi_core/serialize.py +4 -0
- epi_core/storage.py +186 -0
- epi_core/trust.py +4 -0
- epi_recorder/__init__.py +5 -1
- epi_recorder/api.py +28 -2
- epi_recorder/async_api.py +151 -0
- epi_recorder/bootstrap.py +4 -0
- epi_recorder/environment.py +4 -0
- epi_recorder/patcher.py +143 -14
- epi_recorder/test_import.py +2 -0
- epi_recorder/test_script.py +2 -0
- epi_recorder-2.2.0.dist-info/METADATA +162 -0
- epi_recorder-2.2.0.dist-info/RECORD +38 -0
- {epi_recorder-2.1.2.dist-info → epi_recorder-2.2.0.dist-info}/WHEEL +1 -1
- {epi_recorder-2.1.2.dist-info → epi_recorder-2.2.0.dist-info}/licenses/LICENSE +4 -29
- {epi_recorder-2.1.2.dist-info → epi_recorder-2.2.0.dist-info}/top_level.txt +1 -0
- epi_viewer_static/app.js +38 -7
- epi_viewer_static/crypto.js +3 -0
- epi_viewer_static/index.html +4 -2
- epi_viewer_static/viewer_lite.css +3 -1
- epi_postinstall.py +0 -197
- epi_recorder-2.1.2.dist-info/METADATA +0 -574
- epi_recorder-2.1.2.dist-info/RECORD +0 -33
- {epi_recorder-2.1.2.dist-info → epi_recorder-2.2.0.dist-info}/entry_points.txt +0 -0
epi_analyzer/__init__.py
ADDED
epi_analyzer/detector.py
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
"""
|
|
2
|
+
EPI Agent Mistake Detector
|
|
3
|
+
|
|
4
|
+
AI-powered analysis of agent execution to identify bugs:
|
|
5
|
+
- Infinite loops (same tool called repeatedly with errors)
|
|
6
|
+
- Hallucinations (confident LLM output followed by tool failures)
|
|
7
|
+
- Inefficiency (excessive token usage, repeated work)
|
|
8
|
+
- Repetitive patterns (agent redoing same queries)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import sqlite3
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import List, Dict, Any, Optional
|
|
15
|
+
from difflib import SequenceMatcher
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MistakeDetector:
|
|
19
|
+
"""
|
|
20
|
+
AI-powered agent bug detection.
|
|
21
|
+
Analyzes .epi files to find infinite loops, hallucinations, inefficiencies.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, epi_file: str):
|
|
25
|
+
"""
|
|
26
|
+
Initialize detector with an EPI recording file.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
epi_file: Path to .epi file (can be .epi.db or steps.jsonl)
|
|
30
|
+
"""
|
|
31
|
+
self.epi_path = Path(epi_file)
|
|
32
|
+
self.steps = self._load_steps()
|
|
33
|
+
self.mistakes: List[Dict] = []
|
|
34
|
+
|
|
35
|
+
def _load_steps(self) -> List[Dict]:
|
|
36
|
+
"""Load steps from EPI file (ZIP, SQLite, or JSONL)"""
|
|
37
|
+
import tempfile
|
|
38
|
+
import zipfile
|
|
39
|
+
|
|
40
|
+
# If it's a ZIP file (.epi), unpack it first
|
|
41
|
+
if self.epi_path.is_file() and self.epi_path.suffix == '.epi':
|
|
42
|
+
try:
|
|
43
|
+
# Check if it's a valid ZIP
|
|
44
|
+
if zipfile.is_zipfile(self.epi_path):
|
|
45
|
+
temp_dir = Path(tempfile.mkdtemp())
|
|
46
|
+
with zipfile.ZipFile(self.epi_path, 'r') as zf:
|
|
47
|
+
zf.extractall(temp_dir)
|
|
48
|
+
|
|
49
|
+
# Look for steps.jsonl in extracted content
|
|
50
|
+
steps_file = temp_dir / "steps.jsonl"
|
|
51
|
+
if steps_file.exists():
|
|
52
|
+
return self._load_from_jsonl(steps_file)
|
|
53
|
+
|
|
54
|
+
# Also check for SQLite db
|
|
55
|
+
for db_file in temp_dir.glob("*.db"):
|
|
56
|
+
try:
|
|
57
|
+
return self._load_from_sqlite(db_file)
|
|
58
|
+
except Exception:
|
|
59
|
+
continue
|
|
60
|
+
except Exception:
|
|
61
|
+
pass # Fall through to other methods
|
|
62
|
+
|
|
63
|
+
# Try loading from steps.jsonl in directory
|
|
64
|
+
if self.epi_path.is_dir():
|
|
65
|
+
jsonl_path = self.epi_path / "steps.jsonl"
|
|
66
|
+
if jsonl_path.exists():
|
|
67
|
+
return self._load_from_jsonl(jsonl_path)
|
|
68
|
+
|
|
69
|
+
# Check for temp databases
|
|
70
|
+
temp_dbs = list(self.epi_path.glob("*_temp.db"))
|
|
71
|
+
if temp_dbs:
|
|
72
|
+
return self._load_from_sqlite(temp_dbs[0])
|
|
73
|
+
|
|
74
|
+
# Try as JSONL file directly
|
|
75
|
+
if self.epi_path.suffix == '.jsonl':
|
|
76
|
+
return self._load_from_jsonl(self.epi_path)
|
|
77
|
+
|
|
78
|
+
# Try as SQLite database
|
|
79
|
+
db_paths = [
|
|
80
|
+
self.epi_path,
|
|
81
|
+
self.epi_path.with_suffix('.epi.db'),
|
|
82
|
+
self.epi_path / 'recording.db'
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
for db_path in db_paths:
|
|
86
|
+
if db_path.exists():
|
|
87
|
+
try:
|
|
88
|
+
return self._load_from_sqlite(db_path)
|
|
89
|
+
except Exception:
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
raise FileNotFoundError(f"No valid .epi file found at {self.epi_path}")
|
|
93
|
+
|
|
94
|
+
def _load_from_jsonl(self, path: Path) -> List[Dict]:
|
|
95
|
+
"""Load steps from JSONL file"""
|
|
96
|
+
steps = []
|
|
97
|
+
with open(path, 'r', encoding='utf-8') as f:
|
|
98
|
+
for i, line in enumerate(f):
|
|
99
|
+
if line.strip():
|
|
100
|
+
step = json.loads(line)
|
|
101
|
+
steps.append({
|
|
102
|
+
'id': i,
|
|
103
|
+
'index': step.get('index', i),
|
|
104
|
+
'type': step.get('kind', 'unknown'),
|
|
105
|
+
'content': step.get('content', {}),
|
|
106
|
+
'timestamp': step.get('timestamp', '')
|
|
107
|
+
})
|
|
108
|
+
return steps
|
|
109
|
+
|
|
110
|
+
def _load_from_sqlite(self, db_path: Path) -> List[Dict]:
|
|
111
|
+
"""Load steps from SQLite database"""
|
|
112
|
+
conn = sqlite3.connect(str(db_path))
|
|
113
|
+
cursor = conn.execute('SELECT * FROM steps ORDER BY id')
|
|
114
|
+
|
|
115
|
+
steps = []
|
|
116
|
+
for row in cursor.fetchall():
|
|
117
|
+
content = json.loads(row[3]) if isinstance(row[3], str) else row[3]
|
|
118
|
+
steps.append({
|
|
119
|
+
'id': row[0],
|
|
120
|
+
'index': row[1],
|
|
121
|
+
'type': row[2],
|
|
122
|
+
'content': content,
|
|
123
|
+
'timestamp': row[4] if len(row) > 4 else None
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
conn.close()
|
|
127
|
+
return steps
|
|
128
|
+
|
|
129
|
+
def analyze(self) -> List[Dict]:
|
|
130
|
+
"""Run all detection patterns"""
|
|
131
|
+
self._detect_infinite_loops()
|
|
132
|
+
self._detect_hallucinations()
|
|
133
|
+
self._detect_inefficiency()
|
|
134
|
+
self._detect_repetitive_patterns()
|
|
135
|
+
return self.mistakes
|
|
136
|
+
|
|
137
|
+
def _detect_infinite_loops(self):
|
|
138
|
+
"""Detect agent stuck calling same tool repeatedly"""
|
|
139
|
+
# Look for LLM request/response patterns
|
|
140
|
+
llm_steps = [s for s in self.steps if 'llm' in s['type'].lower()]
|
|
141
|
+
|
|
142
|
+
if len(llm_steps) < 5:
|
|
143
|
+
return
|
|
144
|
+
|
|
145
|
+
# Check last N calls for repetition
|
|
146
|
+
window = 5
|
|
147
|
+
recent = llm_steps[-window:]
|
|
148
|
+
|
|
149
|
+
# Extract patterns (model, messages similarity)
|
|
150
|
+
patterns = []
|
|
151
|
+
for step in recent:
|
|
152
|
+
content = step.get('content', {})
|
|
153
|
+
# Check if this is a request with messages
|
|
154
|
+
messages = content.get('messages', [])
|
|
155
|
+
if messages:
|
|
156
|
+
# Get last user message
|
|
157
|
+
user_msgs = [m for m in messages if m.get('role') == 'user']
|
|
158
|
+
if user_msgs:
|
|
159
|
+
patterns.append(user_msgs[-1].get('content', '')[:100])
|
|
160
|
+
|
|
161
|
+
# If we see very similar patterns repeated, it's likely a loop
|
|
162
|
+
if len(patterns) >= 3:
|
|
163
|
+
similarities = [
|
|
164
|
+
self._calculate_similarity(patterns[i], patterns[i+1])
|
|
165
|
+
for i in range(len(patterns)-1)
|
|
166
|
+
]
|
|
167
|
+
avg_similarity = sum(similarities) / len(similarities)
|
|
168
|
+
|
|
169
|
+
if avg_similarity > 0.8: # 80% similar
|
|
170
|
+
self.mistakes.append({
|
|
171
|
+
'type': 'INFINITE_LOOP',
|
|
172
|
+
'severity': 'CRITICAL',
|
|
173
|
+
'step': recent[-1]['id'],
|
|
174
|
+
'explanation': f'Agent appears stuck in a loop - repeated similar requests {window} times',
|
|
175
|
+
'fix': 'Add max_iterations limit or better error handling',
|
|
176
|
+
'cost_impact': 'High - stuck in loop burning API credits',
|
|
177
|
+
'pattern_similarity': f'{avg_similarity:.0%}'
|
|
178
|
+
})
|
|
179
|
+
|
|
180
|
+
def _detect_hallucinations(self):
|
|
181
|
+
"""Detect high-confidence LLM calls followed by errors"""
|
|
182
|
+
for i, step in enumerate(self.steps[:-1]):
|
|
183
|
+
if 'llm.response' not in step['type'].lower():
|
|
184
|
+
continue
|
|
185
|
+
|
|
186
|
+
content = step.get('content', {})
|
|
187
|
+
|
|
188
|
+
# Check if next few steps show errors
|
|
189
|
+
next_steps = self.steps[i+1:min(i+4, len(self.steps))]
|
|
190
|
+
errors = [s for s in next_steps if 'error' in s['type'].lower()]
|
|
191
|
+
|
|
192
|
+
if errors and content.get('provider') in ['openai', 'google']:
|
|
193
|
+
# LLM gave response but then errors occurred
|
|
194
|
+
choices = content.get('choices', [])
|
|
195
|
+
if choices:
|
|
196
|
+
finish_reason = choices[0].get('finish_reason', 'stop')
|
|
197
|
+
if finish_reason == 'stop': # Completed confidently
|
|
198
|
+
response_text = choices[0].get('message', {}).get('content', '')[:150]
|
|
199
|
+
|
|
200
|
+
self.mistakes.append({
|
|
201
|
+
'type': 'HALLUCINATION',
|
|
202
|
+
'severity': 'HIGH',
|
|
203
|
+
'step': step['id'],
|
|
204
|
+
'explanation': 'LLM generated confident output but subsequent operations failed',
|
|
205
|
+
'details': f"LLM said: {response_text}...",
|
|
206
|
+
'error_step': errors[0]['id'],
|
|
207
|
+
'fix': 'Add output validation or use function calling with strict schemas'
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
def _detect_inefficiency(self):
|
|
211
|
+
"""Detect expensive operations for simple tasks"""
|
|
212
|
+
llm_responses = [s for s in self.steps if 'llm.response' in s['type'].lower()]
|
|
213
|
+
|
|
214
|
+
if not llm_responses:
|
|
215
|
+
return
|
|
216
|
+
|
|
217
|
+
# Calculate token usage
|
|
218
|
+
total_tokens = 0
|
|
219
|
+
for step in llm_responses:
|
|
220
|
+
content = step.get('content', {})
|
|
221
|
+
usage = content.get('usage', {})
|
|
222
|
+
if usage:
|
|
223
|
+
total_tokens += usage.get('total_tokens', 0)
|
|
224
|
+
|
|
225
|
+
step_count = len(self.steps)
|
|
226
|
+
|
|
227
|
+
# Red flags
|
|
228
|
+
flags = []
|
|
229
|
+
|
|
230
|
+
if total_tokens > 10000 and step_count < 5:
|
|
231
|
+
flags.append(f"High token usage ({total_tokens:,} tokens) for simple workflow")
|
|
232
|
+
|
|
233
|
+
# Estimate cost (rough)
|
|
234
|
+
# GPT-4: ~$0.03/1K input, ~$0.06/1K output - use avg $0.045/1K
|
|
235
|
+
estimated_cost = (total_tokens / 1000) * 0.045
|
|
236
|
+
if estimated_cost > 0.50:
|
|
237
|
+
flags.append(f"Expensive execution (~${estimated_cost:.2f})")
|
|
238
|
+
|
|
239
|
+
# Check for model inefficiency (using GPT-4 when GPT-3.5 would work)
|
|
240
|
+
gpt4_calls = sum(1 for s in llm_responses
|
|
241
|
+
if 'gpt-4' in s.get('content', {}).get('model', '').lower())
|
|
242
|
+
if gpt4_calls > 0 and step_count < 3:
|
|
243
|
+
flags.append(f"Using GPT-4 for simple task ({gpt4_calls} calls)")
|
|
244
|
+
|
|
245
|
+
if flags:
|
|
246
|
+
self.mistakes.append({
|
|
247
|
+
'type': 'INEFFICIENT',
|
|
248
|
+
'severity': 'MEDIUM',
|
|
249
|
+
'step': llm_responses[-1]['id'],
|
|
250
|
+
'explanation': '; '.join(flags),
|
|
251
|
+
'metrics': {
|
|
252
|
+
'total_tokens': total_tokens,
|
|
253
|
+
'estimated_cost': round(estimated_cost, 2),
|
|
254
|
+
'step_count': step_count,
|
|
255
|
+
'llm_calls': len(llm_responses)
|
|
256
|
+
},
|
|
257
|
+
'fix': 'Consider using GPT-3.5-turbo or caching responses'
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
def _detect_repetitive_patterns(self):
|
|
261
|
+
"""Detect agent redoing same work"""
|
|
262
|
+
if len(self.steps) < 10:
|
|
263
|
+
return
|
|
264
|
+
|
|
265
|
+
# Look for repeated LLM requests
|
|
266
|
+
llm_requests = [s for s in self.steps if 'llm.request' in s['type'].lower()]
|
|
267
|
+
|
|
268
|
+
if len(llm_requests) < 3:
|
|
269
|
+
return
|
|
270
|
+
|
|
271
|
+
# Extract user messages
|
|
272
|
+
queries = []
|
|
273
|
+
for step in llm_requests:
|
|
274
|
+
content = step.get('content', {})
|
|
275
|
+
messages = content.get('messages', [])
|
|
276
|
+
for msg in messages:
|
|
277
|
+
if msg.get('role') == 'user':
|
|
278
|
+
queries.append((step['id'], msg.get('content', '')[:100]))
|
|
279
|
+
break
|
|
280
|
+
|
|
281
|
+
# Find similar queries
|
|
282
|
+
for i in range(len(queries)):
|
|
283
|
+
for j in range(i+1, len(queries)):
|
|
284
|
+
similarity = self._calculate_similarity(queries[i][1], queries[j][1])
|
|
285
|
+
if similarity > 0.7: # 70% similar
|
|
286
|
+
self.mistakes.append({
|
|
287
|
+
'type': 'REPETITIVE_PATTERN',
|
|
288
|
+
'severity': 'LOW',
|
|
289
|
+
'step': queries[j][0],
|
|
290
|
+
'explanation': f'Similar query repeated (steps {queries[i][0]} and {queries[j][0]})',
|
|
291
|
+
'pattern': f'"{queries[i][1][:50]}..."',
|
|
292
|
+
'fix': 'Implement memory/caching to avoid redundant LLM calls'
|
|
293
|
+
})
|
|
294
|
+
return # Only report first instance
|
|
295
|
+
|
|
296
|
+
def _calculate_similarity(self, a: str, b: str) -> float:
|
|
297
|
+
"""Simple string similarity using SequenceMatcher"""
|
|
298
|
+
return SequenceMatcher(None, a, b).ratio()
|
|
299
|
+
|
|
300
|
+
def get_summary(self) -> str:
|
|
301
|
+
"""Human-readable summary of detected mistakes"""
|
|
302
|
+
if not self.mistakes:
|
|
303
|
+
return "[OK] No obvious mistakes detected"
|
|
304
|
+
|
|
305
|
+
# Count by severity
|
|
306
|
+
critical = sum(1 for m in self.mistakes if m.get('severity') == 'CRITICAL')
|
|
307
|
+
high = sum(1 for m in self.mistakes if m.get('severity') == 'HIGH')
|
|
308
|
+
medium = sum(1 for m in self.mistakes if m.get('severity') == 'MEDIUM')
|
|
309
|
+
low = sum(1 for m in self.mistakes if m.get('severity') == 'LOW')
|
|
310
|
+
|
|
311
|
+
lines = [
|
|
312
|
+
f"[!] Found {len(self.mistakes)} issue(s):",
|
|
313
|
+
f" {critical} Critical, {high} High, {medium} Medium, {low} Low severity",
|
|
314
|
+
""
|
|
315
|
+
]
|
|
316
|
+
|
|
317
|
+
# Show details for each mistake
|
|
318
|
+
for i, m in enumerate(self.mistakes, 1):
|
|
319
|
+
severity_marker = {
|
|
320
|
+
'CRITICAL': '[!!!]',
|
|
321
|
+
'HIGH': '[!!]',
|
|
322
|
+
'MEDIUM': '[!]',
|
|
323
|
+
'LOW': '[-]'
|
|
324
|
+
}.get(m.get('severity', 'LOW'), '[?]')
|
|
325
|
+
|
|
326
|
+
lines.append(f"{i}. {severity_marker} [{m.get('severity')}] {m.get('type')} at Step {m.get('step')}")
|
|
327
|
+
lines.append(f" -> {m.get('explanation')}")
|
|
328
|
+
if 'fix' in m:
|
|
329
|
+
lines.append(f" -> Fix: {m['fix']}")
|
|
330
|
+
lines.append("")
|
|
331
|
+
|
|
332
|
+
return '\n'.join(lines)
|
|
333
|
+
|
|
334
|
+
return '\n'.join(lines)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
|
epi_cli/__init__.py
CHANGED
epi_cli/__main__.py
CHANGED
epi_cli/chat.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""
|
|
2
|
+
EPI CLI Chat - Interactive evidence querying with AI.
|
|
3
|
+
|
|
4
|
+
Allows users to ask natural language questions about their .epi evidence files.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import warnings
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
import typer
|
|
14
|
+
from rich.console import Console
|
|
15
|
+
from rich.markdown import Markdown
|
|
16
|
+
from rich.panel import Panel
|
|
17
|
+
from rich.prompt import Prompt
|
|
18
|
+
import google.api_core.exceptions
|
|
19
|
+
|
|
20
|
+
from epi_core.container import EPIContainer
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
console = Console()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def load_steps_from_epi(epi_path: Path) -> list:
|
|
27
|
+
"""Load steps from an .epi file."""
|
|
28
|
+
import tempfile
|
|
29
|
+
|
|
30
|
+
temp_dir = Path(tempfile.mkdtemp())
|
|
31
|
+
extracted = EPIContainer.unpack(epi_path, temp_dir)
|
|
32
|
+
|
|
33
|
+
steps_file = extracted / "steps.jsonl"
|
|
34
|
+
if not steps_file.exists():
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
steps = []
|
|
38
|
+
with open(steps_file, 'r', encoding='utf-8') as f:
|
|
39
|
+
for line in f:
|
|
40
|
+
if line.strip():
|
|
41
|
+
steps.append(json.loads(line))
|
|
42
|
+
|
|
43
|
+
return steps
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def chat(
|
|
47
|
+
epi_file: Path = typer.Argument(..., help="Path to .epi file to chat with"),
|
|
48
|
+
query: str = typer.Option(None, "--query", "-q", help="Single question (non-interactive mode)"),
|
|
49
|
+
model: str = typer.Option("gemini-2.0-flash", "--model", "-m", help="Gemini model to use")
|
|
50
|
+
):
|
|
51
|
+
"""
|
|
52
|
+
Chat with your evidence file using AI.
|
|
53
|
+
|
|
54
|
+
Ask natural language questions about what happened in your recording.
|
|
55
|
+
|
|
56
|
+
Examples:
|
|
57
|
+
epi chat my_recording.epi # Interactive mode
|
|
58
|
+
epi chat my_recording.epi -q "What happened?" # Single question
|
|
59
|
+
"""
|
|
60
|
+
# Resolve path
|
|
61
|
+
if not epi_file.exists():
|
|
62
|
+
# Try epi-recordings directory
|
|
63
|
+
recordings_dir = Path("./epi-recordings")
|
|
64
|
+
potential_path = recordings_dir / f"{epi_file.stem}.epi"
|
|
65
|
+
if potential_path.exists():
|
|
66
|
+
epi_file = potential_path
|
|
67
|
+
else:
|
|
68
|
+
console.print(f"[red]Error:[/red] File not found: {epi_file}")
|
|
69
|
+
raise typer.Exit(1)
|
|
70
|
+
|
|
71
|
+
# Check for API key
|
|
72
|
+
api_key = os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")
|
|
73
|
+
if not api_key:
|
|
74
|
+
console.print(Panel(
|
|
75
|
+
"[yellow]No API key found![/yellow]\n\n"
|
|
76
|
+
"Set your Google AI API key:\n"
|
|
77
|
+
" [cyan]set GOOGLE_API_KEY=your-key-here[/cyan] (Windows)\n"
|
|
78
|
+
" [cyan]export GOOGLE_API_KEY=your-key-here[/cyan] (Mac/Linux)\n\n"
|
|
79
|
+
"Get a free key at: [link]https://makersuite.google.com/app/apikey[/link]",
|
|
80
|
+
title="[!] API Key Required",
|
|
81
|
+
border_style="yellow"
|
|
82
|
+
))
|
|
83
|
+
raise typer.Exit(1)
|
|
84
|
+
|
|
85
|
+
# Load the .epi file
|
|
86
|
+
console.print(f"\n[dim]Loading evidence from:[/dim] {epi_file}")
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
manifest = EPIContainer.read_manifest(epi_file)
|
|
90
|
+
steps = load_steps_from_epi(epi_file)
|
|
91
|
+
except Exception as e:
|
|
92
|
+
console.print(f"[red]Error loading .epi file:[/red] {e}")
|
|
93
|
+
raise typer.Exit(1)
|
|
94
|
+
|
|
95
|
+
# Initialize Gemini
|
|
96
|
+
try:
|
|
97
|
+
import warnings
|
|
98
|
+
with warnings.catch_warnings():
|
|
99
|
+
warnings.simplefilter("ignore")
|
|
100
|
+
import google.generativeai as genai
|
|
101
|
+
|
|
102
|
+
genai.configure(api_key=api_key)
|
|
103
|
+
ai_model = genai.GenerativeModel(model)
|
|
104
|
+
except ImportError:
|
|
105
|
+
console.print(Panel(
|
|
106
|
+
"[red]Google Generative AI package not installed![/red]\n\n"
|
|
107
|
+
"Install it with:\n"
|
|
108
|
+
" [cyan]pip install google-generativeai[/cyan]",
|
|
109
|
+
title="[X] Missing Dependency",
|
|
110
|
+
border_style="red"
|
|
111
|
+
))
|
|
112
|
+
raise typer.Exit(1)
|
|
113
|
+
except Exception as e:
|
|
114
|
+
console.print(f"[red]Error initializing Gemini:[/red] {e}")
|
|
115
|
+
raise typer.Exit(1)
|
|
116
|
+
|
|
117
|
+
# Build context
|
|
118
|
+
context = f"""You are an expert assistant analyzing an EPI evidence recording file.
|
|
119
|
+
|
|
120
|
+
The recording contains cryptographically signed, tamper-proof evidence of an AI workflow execution.
|
|
121
|
+
|
|
122
|
+
Recording metadata:
|
|
123
|
+
- Created: {manifest.created_at}
|
|
124
|
+
- Goal: {manifest.goal or 'Not specified'}
|
|
125
|
+
- Command: {manifest.cli_command or 'Not specified'}
|
|
126
|
+
- Workflow ID: {manifest.workflow_id}
|
|
127
|
+
- Total steps: {len(steps)}
|
|
128
|
+
|
|
129
|
+
Here are the recorded steps (this is the timeline of events):
|
|
130
|
+
{json.dumps(steps[:50], indent=2, default=str)[:8000]}
|
|
131
|
+
|
|
132
|
+
When answering questions:
|
|
133
|
+
1. Be specific and cite step indices when relevant
|
|
134
|
+
2. Distinguish between LLM requests, responses, and other events
|
|
135
|
+
3. If asked about security, note that API keys are automatically redacted
|
|
136
|
+
4. Keep answers concise but informative
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
# Start chat session
|
|
140
|
+
chat_session = ai_model.start_chat(history=[])
|
|
141
|
+
|
|
142
|
+
# Display header
|
|
143
|
+
console.print()
|
|
144
|
+
console.print(Panel(
|
|
145
|
+
f"[bold cyan]EPI Evidence Chat[/bold cyan]\n\n"
|
|
146
|
+
f"[dim]File:[/dim] {epi_file.name}\n"
|
|
147
|
+
f"[dim]Steps:[/dim] {len(steps)}\n"
|
|
148
|
+
f"[dim]Model:[/dim] {model}\n\n"
|
|
149
|
+
f"Ask questions about this evidence recording.\n"
|
|
150
|
+
f"Type [yellow]exit[/yellow] or [yellow]quit[/yellow] to end the session.",
|
|
151
|
+
border_style="cyan"
|
|
152
|
+
))
|
|
153
|
+
console.print()
|
|
154
|
+
|
|
155
|
+
# Non-interactive mode: answer single question and exit
|
|
156
|
+
if query:
|
|
157
|
+
try:
|
|
158
|
+
full_prompt = f"{context}\n\nUser question: {query}"
|
|
159
|
+
response = chat_session.send_message(full_prompt)
|
|
160
|
+
console.print("[bold green]AI:[/bold green]")
|
|
161
|
+
console.print(Markdown(response.text))
|
|
162
|
+
return
|
|
163
|
+
except Exception as e:
|
|
164
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
165
|
+
raise typer.Exit(1)
|
|
166
|
+
|
|
167
|
+
# Interactive chat loop
|
|
168
|
+
while True:
|
|
169
|
+
try:
|
|
170
|
+
question = Prompt.ask("[bold cyan]You[/bold cyan]")
|
|
171
|
+
except (KeyboardInterrupt, EOFError):
|
|
172
|
+
console.print("\n[dim]Goodbye![/dim]")
|
|
173
|
+
break
|
|
174
|
+
|
|
175
|
+
if question.lower() in ('exit', 'quit', 'q'):
|
|
176
|
+
console.print("[dim]Goodbye![/dim]")
|
|
177
|
+
break
|
|
178
|
+
|
|
179
|
+
if not question.strip():
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
# Send to Gemini with context
|
|
183
|
+
try:
|
|
184
|
+
full_prompt = f"{context}\n\nUser question: {question}"
|
|
185
|
+
response = chat_session.send_message(full_prompt)
|
|
186
|
+
|
|
187
|
+
console.print()
|
|
188
|
+
console.print("[bold green]AI:[/bold green]")
|
|
189
|
+
console.print(Markdown(response.text))
|
|
190
|
+
console.print()
|
|
191
|
+
|
|
192
|
+
except google.api_core.exceptions.ResourceExhausted:
|
|
193
|
+
console.print(Panel(
|
|
194
|
+
"[yellow]API Quota Exceeded[/yellow]\n\n"
|
|
195
|
+
"You have hit the rate limit for the Gemini API (free tier).\n"
|
|
196
|
+
"Please wait a minute before trying again.",
|
|
197
|
+
title="[!] Rate Limit",
|
|
198
|
+
border_style="yellow"
|
|
199
|
+
))
|
|
200
|
+
except google.api_core.exceptions.NotFound:
|
|
201
|
+
console.print(f"[red]Error:[/red] The model '{model}' was not found. Try using a different model with --model.")
|
|
202
|
+
except google.api_core.exceptions.InvalidArgument as e:
|
|
203
|
+
console.print(f"[red]Error:[/red] Invalid argument: {e}")
|
|
204
|
+
except Exception as e:
|
|
205
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
206
|
+
console.print("[dim]Try asking a different question.[/dim]")
|
|
207
|
+
console.print()
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
|
epi_cli/debug.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""
|
|
2
|
+
EPI Debug Command - AI-powered agent mistake detection.
|
|
3
|
+
|
|
4
|
+
Analyzes .epi recordings to find:
|
|
5
|
+
- Infinite loops
|
|
6
|
+
- Hallucinations
|
|
7
|
+
- Inefficiencies
|
|
8
|
+
- Repetitive patterns
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
import typer
|
|
14
|
+
from rich.console import Console
|
|
15
|
+
from rich.panel import Panel
|
|
16
|
+
|
|
17
|
+
from epi_analyzer.detector import MistakeDetector
|
|
18
|
+
|
|
19
|
+
console = Console()
|
|
20
|
+
app = typer.Typer(name="debug", help="Debug AI agent recordings for mistakes")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@app.callback(invoke_without_command=True)
|
|
24
|
+
def debug(
|
|
25
|
+
ctx: typer.Context,
|
|
26
|
+
epi_file: Path = typer.Argument(..., help="Path to .epi recording file or directory"),
|
|
27
|
+
output_json: bool = typer.Option(False, "--json", help="Output as JSON"),
|
|
28
|
+
export: Path = typer.Option(None, "--export", help="Export report to file"),
|
|
29
|
+
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed analysis"),
|
|
30
|
+
):
|
|
31
|
+
"""
|
|
32
|
+
Analyze agent execution for mistakes and inefficiencies.
|
|
33
|
+
|
|
34
|
+
This command uses AI-powered analysis to detect:
|
|
35
|
+
- Infinite loops (same tool called repeatedly)
|
|
36
|
+
- Hallucinations (LLM confident but wrong)
|
|
37
|
+
- Inefficiencies (excessive token usage)
|
|
38
|
+
- Repetitive patterns (redundant work)
|
|
39
|
+
|
|
40
|
+
Examples:
|
|
41
|
+
epi debug agent_session.epi
|
|
42
|
+
epi debug recording_dir/ --json
|
|
43
|
+
epi debug agent.epi --export report.txt
|
|
44
|
+
"""
|
|
45
|
+
console.print(f"Analyzing [cyan]{epi_file}[/cyan]...")
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
# Run analysis
|
|
49
|
+
detector = MistakeDetector(str(epi_file))
|
|
50
|
+
mistakes = detector.analyze()
|
|
51
|
+
|
|
52
|
+
# Prepare output
|
|
53
|
+
if output_json:
|
|
54
|
+
output = json.dumps(mistakes, indent=2)
|
|
55
|
+
else:
|
|
56
|
+
output = detector.get_summary()
|
|
57
|
+
|
|
58
|
+
if verbose and mistakes:
|
|
59
|
+
# Add detailed metrics for each mistake
|
|
60
|
+
details = ["\nDetailed Analysis:"]
|
|
61
|
+
for i, m in enumerate(mistakes, 1):
|
|
62
|
+
details.append(f"\n{i}. {m.get('type')} (Step {m.get('step')})")
|
|
63
|
+
for key, value in m.items():
|
|
64
|
+
if key not in ['type', 'step']:
|
|
65
|
+
details.append(f" {key}: {value}")
|
|
66
|
+
output += "\n".join(details)
|
|
67
|
+
|
|
68
|
+
# Display or export
|
|
69
|
+
if export:
|
|
70
|
+
export.write_text(output, encoding='utf-8')
|
|
71
|
+
console.print(f"\nReport saved to [green]{export}[/green]")
|
|
72
|
+
else:
|
|
73
|
+
console.print(f"\n{output}")
|
|
74
|
+
|
|
75
|
+
# Show actionable summary if mistakes found
|
|
76
|
+
if mistakes and not output_json:
|
|
77
|
+
critical_count = sum(1 for m in mistakes if m.get('severity') == 'CRITICAL')
|
|
78
|
+
if critical_count > 0:
|
|
79
|
+
console.print(
|
|
80
|
+
Panel(
|
|
81
|
+
f"[bold red]WARNING: {critical_count} CRITICAL issue(s) detected![/bold red]\n\n"
|
|
82
|
+
"These issues can cause your agent to fail or waste resources.\n"
|
|
83
|
+
"Review the suggestions above to fix them.",
|
|
84
|
+
title="Action Required",
|
|
85
|
+
border_style="red"
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Exit code: 1 if critical mistakes found
|
|
90
|
+
if any(m.get('severity') == 'CRITICAL' for m in mistakes):
|
|
91
|
+
raise typer.Exit(code=1)
|
|
92
|
+
|
|
93
|
+
console.print("\nAnalysis complete")
|
|
94
|
+
|
|
95
|
+
except FileNotFoundError as e:
|
|
96
|
+
console.print(f"[red]ERROR: File not found:[/red] {e}")
|
|
97
|
+
raise typer.Exit(code=2)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
console.print(f"[red]ERROR analyzing file:[/red] {e}")
|
|
100
|
+
if verbose:
|
|
101
|
+
import traceback
|
|
102
|
+
console.print(traceback.format_exc())
|
|
103
|
+
raise typer.Exit(code=3)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|