empathy-framework 4.7.0__py3-none-any.whl → 4.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- empathy_framework-4.8.0.dist-info/METADATA +753 -0
- {empathy_framework-4.7.0.dist-info → empathy_framework-4.8.0.dist-info}/RECORD +83 -37
- {empathy_framework-4.7.0.dist-info → empathy_framework-4.8.0.dist-info}/WHEEL +1 -1
- {empathy_framework-4.7.0.dist-info → empathy_framework-4.8.0.dist-info}/entry_points.txt +2 -1
- empathy_os/__init__.py +2 -0
- empathy_os/cache/hash_only.py +6 -3
- empathy_os/cache/hybrid.py +6 -3
- empathy_os/cli/__init__.py +128 -238
- empathy_os/cli/__main__.py +5 -33
- empathy_os/cli/commands/__init__.py +1 -8
- empathy_os/cli/commands/help.py +331 -0
- empathy_os/cli/commands/info.py +140 -0
- empathy_os/cli/commands/inspect.py +437 -0
- empathy_os/cli/commands/metrics.py +92 -0
- empathy_os/cli/commands/orchestrate.py +184 -0
- empathy_os/cli/commands/patterns.py +207 -0
- empathy_os/cli/commands/provider.py +93 -81
- empathy_os/cli/commands/setup.py +96 -0
- empathy_os/cli/commands/status.py +235 -0
- empathy_os/cli/commands/sync.py +166 -0
- empathy_os/cli/commands/tier.py +121 -0
- empathy_os/cli/commands/workflow.py +574 -0
- empathy_os/cli/parsers/__init__.py +62 -0
- empathy_os/cli/parsers/help.py +41 -0
- empathy_os/cli/parsers/info.py +26 -0
- empathy_os/cli/parsers/inspect.py +66 -0
- empathy_os/cli/parsers/metrics.py +42 -0
- empathy_os/cli/parsers/orchestrate.py +61 -0
- empathy_os/cli/parsers/patterns.py +54 -0
- empathy_os/cli/parsers/provider.py +40 -0
- empathy_os/cli/parsers/setup.py +42 -0
- empathy_os/cli/parsers/status.py +47 -0
- empathy_os/cli/parsers/sync.py +31 -0
- empathy_os/cli/parsers/tier.py +33 -0
- empathy_os/cli/parsers/workflow.py +77 -0
- empathy_os/cli/utils/__init__.py +1 -0
- empathy_os/cli/utils/data.py +242 -0
- empathy_os/cli/utils/helpers.py +68 -0
- empathy_os/{cli.py → cli_legacy.py} +27 -27
- empathy_os/cli_minimal.py +662 -0
- empathy_os/cli_router.py +384 -0
- empathy_os/cli_unified.py +38 -2
- empathy_os/memory/__init__.py +19 -5
- empathy_os/memory/short_term.py +14 -404
- empathy_os/memory/types.py +437 -0
- empathy_os/memory/unified.py +61 -48
- empathy_os/models/fallback.py +1 -1
- empathy_os/models/provider_config.py +59 -344
- empathy_os/models/registry.py +31 -180
- empathy_os/monitoring/alerts.py +14 -20
- empathy_os/monitoring/alerts_cli.py +24 -7
- empathy_os/project_index/__init__.py +2 -0
- empathy_os/project_index/index.py +210 -5
- empathy_os/project_index/scanner.py +45 -14
- empathy_os/project_index/scanner_parallel.py +291 -0
- empathy_os/socratic/ab_testing.py +1 -1
- empathy_os/vscode_bridge 2.py +173 -0
- empathy_os/workflows/__init__.py +31 -2
- empathy_os/workflows/base.py +349 -325
- empathy_os/workflows/bug_predict.py +8 -0
- empathy_os/workflows/builder.py +273 -0
- empathy_os/workflows/caching.py +253 -0
- empathy_os/workflows/code_review_pipeline.py +1 -0
- empathy_os/workflows/history.py +510 -0
- empathy_os/workflows/output.py +410 -0
- empathy_os/workflows/perf_audit.py +125 -19
- empathy_os/workflows/progress.py +324 -22
- empathy_os/workflows/progressive/README 2.md +454 -0
- empathy_os/workflows/progressive/__init__ 2.py +92 -0
- empathy_os/workflows/progressive/cli 2.py +242 -0
- empathy_os/workflows/progressive/core 2.py +488 -0
- empathy_os/workflows/progressive/orchestrator 2.py +701 -0
- empathy_os/workflows/progressive/reports 2.py +528 -0
- empathy_os/workflows/progressive/telemetry 2.py +280 -0
- empathy_os/workflows/progressive/test_gen 2.py +514 -0
- empathy_os/workflows/progressive/workflow 2.py +628 -0
- empathy_os/workflows/routing.py +168 -0
- empathy_os/workflows/secure_release.py +1 -0
- empathy_os/workflows/security_audit.py +190 -0
- empathy_os/workflows/security_audit_phase3.py +328 -0
- empathy_os/workflows/telemetry_mixin.py +269 -0
- empathy_framework-4.7.0.dist-info/METADATA +0 -1598
- empathy_os/dashboard/__init__.py +0 -15
- empathy_os/dashboard/server.py +0 -941
- {empathy_framework-4.7.0.dist-info → empathy_framework-4.8.0.dist-info}/licenses/LICENSE +0 -0
- {empathy_framework-4.7.0.dist-info → empathy_framework-4.8.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
"""Phase 3 Scanner Improvements - AST-based Command Injection Detection
|
|
2
|
+
|
|
3
|
+
This module provides AST-based analysis for detecting actual eval/exec usage
|
|
4
|
+
vs mentions in comments, docstrings, and documentation.
|
|
5
|
+
|
|
6
|
+
Created: 2026-01-26
|
|
7
|
+
Related: docs/SECURITY_PHASE2_COMPLETE.md
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import ast
|
|
11
|
+
import logging
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class EvalExecDetector(ast.NodeVisitor):
|
|
19
|
+
"""AST visitor that detects actual eval() and exec() calls.
|
|
20
|
+
|
|
21
|
+
This visitor walks the AST to find real function calls to eval() and exec(),
|
|
22
|
+
distinguishing them from:
|
|
23
|
+
- String literals mentioning eval/exec
|
|
24
|
+
- Comments mentioning eval/exec
|
|
25
|
+
- Docstrings documenting security policies
|
|
26
|
+
- Detection code checking for eval/exec patterns
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, file_path: str):
|
|
30
|
+
"""Initialize detector.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
file_path: Path to file being analyzed (for context)
|
|
34
|
+
"""
|
|
35
|
+
self.file_path = file_path
|
|
36
|
+
self.findings: list[dict[str, Any]] = []
|
|
37
|
+
self._in_docstring = False
|
|
38
|
+
self._current_function = None
|
|
39
|
+
|
|
40
|
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
41
|
+
"""Visit function definition to track context."""
|
|
42
|
+
self._current_function = node.name
|
|
43
|
+
self.generic_visit(node)
|
|
44
|
+
self._current_function = None
|
|
45
|
+
|
|
46
|
+
def visit_Call(self, node: ast.Call) -> None:
|
|
47
|
+
"""Visit function call nodes to detect eval/exec."""
|
|
48
|
+
# Check if this is a call to eval() or exec()
|
|
49
|
+
func_name = None
|
|
50
|
+
|
|
51
|
+
if isinstance(node.func, ast.Name):
|
|
52
|
+
func_name = node.func.id
|
|
53
|
+
elif isinstance(node.func, ast.Attribute):
|
|
54
|
+
# Handle attribute access like obj.exec()
|
|
55
|
+
func_name = node.func.attr
|
|
56
|
+
|
|
57
|
+
if func_name in ("eval", "exec"):
|
|
58
|
+
# Found a real eval/exec call!
|
|
59
|
+
self.findings.append({
|
|
60
|
+
"type": "command_injection",
|
|
61
|
+
"function": func_name,
|
|
62
|
+
"line": node.lineno,
|
|
63
|
+
"col": node.col_offset,
|
|
64
|
+
"context": self._current_function,
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
self.generic_visit(node)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def analyze_file_for_eval_exec(file_path: str | Path) -> list[dict[str, Any]]:
|
|
71
|
+
"""Analyze a Python file for actual eval/exec usage using AST.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
file_path: Path to Python file to analyze
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
List of findings (actual eval/exec calls)
|
|
78
|
+
|
|
79
|
+
Example:
|
|
80
|
+
>>> findings = analyze_file_for_eval_exec("myfile.py")
|
|
81
|
+
>>> for finding in findings:
|
|
82
|
+
... print(f"{finding['function']} at line {finding['line']}")
|
|
83
|
+
"""
|
|
84
|
+
file_path = Path(file_path)
|
|
85
|
+
|
|
86
|
+
if not file_path.exists():
|
|
87
|
+
return []
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
91
|
+
tree = ast.parse(content, filename=str(file_path))
|
|
92
|
+
|
|
93
|
+
detector = EvalExecDetector(str(file_path))
|
|
94
|
+
detector.visit(tree)
|
|
95
|
+
|
|
96
|
+
return detector.findings
|
|
97
|
+
|
|
98
|
+
except SyntaxError as e:
|
|
99
|
+
logger.debug(f"Syntax error parsing {file_path}: {e}")
|
|
100
|
+
return []
|
|
101
|
+
except Exception as e:
|
|
102
|
+
logger.debug(f"Error analyzing {file_path}: {e}")
|
|
103
|
+
return []
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def is_scanner_implementation_file(file_path: str) -> bool:
|
|
107
|
+
"""Check if file is part of security scanner implementation.
|
|
108
|
+
|
|
109
|
+
Scanner files legitimately contain eval/exec patterns for detection
|
|
110
|
+
purposes and should not be flagged.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
file_path: Path to check
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
True if this is a scanner implementation file
|
|
117
|
+
"""
|
|
118
|
+
scanner_indicators = [
|
|
119
|
+
# Scanner implementation files
|
|
120
|
+
"bug_predict",
|
|
121
|
+
"security_audit",
|
|
122
|
+
"security_scan",
|
|
123
|
+
"vulnerability_scan",
|
|
124
|
+
"owasp",
|
|
125
|
+
"secrets_detector",
|
|
126
|
+
"pii_scrubber",
|
|
127
|
+
|
|
128
|
+
# Pattern/rule definition files
|
|
129
|
+
"patterns.py",
|
|
130
|
+
"rules.py",
|
|
131
|
+
"checks.py",
|
|
132
|
+
|
|
133
|
+
# Test files for security scanners
|
|
134
|
+
"test_bug_predict",
|
|
135
|
+
"test_security",
|
|
136
|
+
"test_scanner",
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
path_lower = file_path.lower()
|
|
140
|
+
return any(indicator in path_lower for indicator in scanner_indicators)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def is_in_docstring_or_comment(line_content: str, file_content: str, line_num: int) -> bool:
|
|
144
|
+
"""Enhanced check if line is in docstring or comment.
|
|
145
|
+
|
|
146
|
+
Phase 3 Enhancement: More robust detection of documentation context.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
line_content: The line to check
|
|
150
|
+
file_content: Full file content
|
|
151
|
+
line_num: Line number (1-indexed)
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
True if line is in docstring or comment
|
|
155
|
+
"""
|
|
156
|
+
line = line_content.strip()
|
|
157
|
+
|
|
158
|
+
# Check for comment lines
|
|
159
|
+
if line.startswith("#"):
|
|
160
|
+
return True
|
|
161
|
+
|
|
162
|
+
# Check for inline comments
|
|
163
|
+
if "#" in line_content and line_content.index("#") < line_content.find("eval") if "eval" in line_content else True:
|
|
164
|
+
return True
|
|
165
|
+
|
|
166
|
+
# Parse file as AST to find docstrings
|
|
167
|
+
try:
|
|
168
|
+
tree = ast.parse(file_content)
|
|
169
|
+
|
|
170
|
+
# Get all docstrings
|
|
171
|
+
docstrings = []
|
|
172
|
+
for node in ast.walk(tree):
|
|
173
|
+
docstring = ast.get_docstring(node)
|
|
174
|
+
if docstring:
|
|
175
|
+
docstrings.append(docstring)
|
|
176
|
+
|
|
177
|
+
# Check if any docstring contains this line content
|
|
178
|
+
for docstring in docstrings:
|
|
179
|
+
if line_content.strip() in docstring:
|
|
180
|
+
return True
|
|
181
|
+
|
|
182
|
+
except SyntaxError:
|
|
183
|
+
pass
|
|
184
|
+
|
|
185
|
+
# Check for security policy patterns
|
|
186
|
+
security_patterns = [
|
|
187
|
+
"no eval",
|
|
188
|
+
"no exec",
|
|
189
|
+
"never use eval",
|
|
190
|
+
"never use exec",
|
|
191
|
+
"avoid eval",
|
|
192
|
+
"avoid exec",
|
|
193
|
+
"security:",
|
|
194
|
+
"- no eval",
|
|
195
|
+
"- no exec",
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
line_lower = line.lower()
|
|
199
|
+
if any(pattern in line_lower for pattern in security_patterns):
|
|
200
|
+
return True
|
|
201
|
+
|
|
202
|
+
return False
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def enhanced_command_injection_detection(
|
|
206
|
+
file_path: str,
|
|
207
|
+
original_findings: list[dict[str, Any]]
|
|
208
|
+
) -> list[dict[str, Any]]:
|
|
209
|
+
"""Enhanced command injection detection with AST-based filtering.
|
|
210
|
+
|
|
211
|
+
Phase 3: Uses AST to distinguish actual eval/exec calls from mentions
|
|
212
|
+
in documentation, comments, and scanner implementation.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
file_path: Path to file being analyzed
|
|
216
|
+
original_findings: Findings from regex-based detection
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Filtered list of actual vulnerabilities (not false positives)
|
|
220
|
+
"""
|
|
221
|
+
# Step 1: Check if this is a scanner implementation file
|
|
222
|
+
if is_scanner_implementation_file(file_path):
|
|
223
|
+
return [] # Scanner files are allowed to mention eval/exec
|
|
224
|
+
|
|
225
|
+
# Step 2: For Python files, use AST-based detection
|
|
226
|
+
if file_path.endswith(".py"):
|
|
227
|
+
try:
|
|
228
|
+
ast_findings = analyze_file_for_eval_exec(file_path)
|
|
229
|
+
|
|
230
|
+
# Convert AST findings to format compatible with original
|
|
231
|
+
filtered = []
|
|
232
|
+
for finding in ast_findings:
|
|
233
|
+
filtered.append({
|
|
234
|
+
"type": "command_injection",
|
|
235
|
+
"file": file_path,
|
|
236
|
+
"line": finding["line"],
|
|
237
|
+
"match": f"{finding['function']}(",
|
|
238
|
+
"severity": "critical",
|
|
239
|
+
"owasp": "A03:2021 Injection",
|
|
240
|
+
"context": finding.get("context", ""),
|
|
241
|
+
})
|
|
242
|
+
|
|
243
|
+
return filtered
|
|
244
|
+
|
|
245
|
+
except Exception as e:
|
|
246
|
+
logger.debug(f"AST analysis failed for {file_path}, falling back to regex: {e}")
|
|
247
|
+
# Fall back to original findings if AST fails
|
|
248
|
+
pass
|
|
249
|
+
|
|
250
|
+
# Step 3: For non-Python files or if AST fails, filter original findings
|
|
251
|
+
try:
|
|
252
|
+
file_content = Path(file_path).read_text(encoding="utf-8", errors="ignore")
|
|
253
|
+
|
|
254
|
+
filtered = []
|
|
255
|
+
for finding in original_findings:
|
|
256
|
+
line_num = finding.get("line", 0)
|
|
257
|
+
lines = file_content.split("\n")
|
|
258
|
+
|
|
259
|
+
if 0 < line_num <= len(lines):
|
|
260
|
+
line_content = lines[line_num - 1]
|
|
261
|
+
|
|
262
|
+
# Skip if in docstring or comment
|
|
263
|
+
if is_in_docstring_or_comment(line_content, file_content, line_num):
|
|
264
|
+
continue
|
|
265
|
+
|
|
266
|
+
filtered.append(finding)
|
|
267
|
+
|
|
268
|
+
return filtered
|
|
269
|
+
|
|
270
|
+
except Exception as e:
|
|
271
|
+
logger.debug(f"Enhanced filtering failed for {file_path}: {e}")
|
|
272
|
+
return original_findings
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
# =============================================================================
|
|
276
|
+
# Integration with SecurityAuditWorkflow
|
|
277
|
+
# =============================================================================
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def apply_phase3_filtering(findings: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
281
|
+
"""Apply Phase 3 AST-based filtering to command injection findings.
|
|
282
|
+
|
|
283
|
+
This is the main entry point for Phase 3 improvements.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
findings: List of command injection findings from regex-based detection
|
|
287
|
+
(should only contain command_injection type)
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Filtered list with false positives removed
|
|
291
|
+
"""
|
|
292
|
+
if not findings:
|
|
293
|
+
return []
|
|
294
|
+
|
|
295
|
+
# Group findings by file
|
|
296
|
+
by_file: dict[str, list[dict[str, Any]]] = {}
|
|
297
|
+
for finding in findings:
|
|
298
|
+
file_path = finding.get("file", "")
|
|
299
|
+
if file_path not in by_file:
|
|
300
|
+
by_file[file_path] = []
|
|
301
|
+
by_file[file_path].append(finding)
|
|
302
|
+
|
|
303
|
+
# Apply enhanced detection per file
|
|
304
|
+
filtered_findings = []
|
|
305
|
+
for file_path, file_findings in by_file.items():
|
|
306
|
+
enhanced = enhanced_command_injection_detection(file_path, file_findings)
|
|
307
|
+
filtered_findings.extend(enhanced)
|
|
308
|
+
|
|
309
|
+
return filtered_findings
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
if __name__ == "__main__":
|
|
313
|
+
# Test on known files
|
|
314
|
+
test_files = [
|
|
315
|
+
"src/empathy_os/workflows/bug_predict.py",
|
|
316
|
+
"src/empathy_os/orchestration/execution_strategies.py",
|
|
317
|
+
"tests/test_bug_predict_workflow.py",
|
|
318
|
+
]
|
|
319
|
+
|
|
320
|
+
for file in test_files:
|
|
321
|
+
if Path(file).exists():
|
|
322
|
+
findings = analyze_file_for_eval_exec(file)
|
|
323
|
+
print(f"\n{file}:")
|
|
324
|
+
print(f" Actual eval/exec calls: {len(findings)}")
|
|
325
|
+
for f in findings:
|
|
326
|
+
print(f" Line {f['line']}: {f['function']}() in {f.get('context', 'module')}")
|
|
327
|
+
else:
|
|
328
|
+
print(f"\n{file}: Not found")
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""Telemetry Mixin for Workflow LLM Call Tracking
|
|
2
|
+
|
|
3
|
+
Extracted from BaseWorkflow to improve maintainability and reusability.
|
|
4
|
+
Provides telemetry tracking for LLM calls and workflow executions.
|
|
5
|
+
|
|
6
|
+
Copyright 2025 Smart-AI-Memory
|
|
7
|
+
Licensed under Fair Source License 0.9
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import uuid
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from typing import TYPE_CHECKING, Any
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from empathy_os.models import (
|
|
19
|
+
TelemetryBackend,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
# Try to import UsageTracker
|
|
25
|
+
try:
|
|
26
|
+
from empathy_os.telemetry import UsageTracker
|
|
27
|
+
|
|
28
|
+
TELEMETRY_AVAILABLE = True
|
|
29
|
+
except ImportError:
|
|
30
|
+
TELEMETRY_AVAILABLE = False
|
|
31
|
+
UsageTracker = None # type: ignore
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class TelemetryMixin:
|
|
35
|
+
"""Mixin that provides telemetry tracking for workflow LLM calls.
|
|
36
|
+
|
|
37
|
+
This mixin extracts telemetry logic from BaseWorkflow to improve
|
|
38
|
+
maintainability and enable reuse in other contexts.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
_telemetry_backend: Backend for storing telemetry records
|
|
42
|
+
_telemetry_tracker: UsageTracker singleton for tracking
|
|
43
|
+
_enable_telemetry: Whether telemetry is enabled
|
|
44
|
+
_run_id: Current workflow run ID for correlation
|
|
45
|
+
|
|
46
|
+
Usage:
|
|
47
|
+
class MyWorkflow(TelemetryMixin, BaseWorkflow):
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
# TelemetryMixin methods are now available
|
|
51
|
+
workflow._track_telemetry(...)
|
|
52
|
+
workflow._emit_call_telemetry(...)
|
|
53
|
+
workflow._emit_workflow_telemetry(...)
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
# Instance variables (set by __init__ or subclass)
|
|
57
|
+
_telemetry_backend: TelemetryBackend | None = None
|
|
58
|
+
_telemetry_tracker: UsageTracker | None = None
|
|
59
|
+
_enable_telemetry: bool = True
|
|
60
|
+
_run_id: str | None = None
|
|
61
|
+
|
|
62
|
+
# These must be provided by the class using this mixin
|
|
63
|
+
name: str = "unknown"
|
|
64
|
+
_provider_str: str = "unknown"
|
|
65
|
+
|
|
66
|
+
def _init_telemetry(self, telemetry_backend: TelemetryBackend | None = None) -> None:
|
|
67
|
+
"""Initialize telemetry tracking.
|
|
68
|
+
|
|
69
|
+
Call this from __init__ to set up telemetry.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
telemetry_backend: Optional backend for storing telemetry records.
|
|
73
|
+
Defaults to TelemetryStore (JSONL file backend).
|
|
74
|
+
"""
|
|
75
|
+
from empathy_os.models import get_telemetry_store
|
|
76
|
+
|
|
77
|
+
self._telemetry_backend = telemetry_backend or get_telemetry_store()
|
|
78
|
+
self._telemetry_tracker = None
|
|
79
|
+
self._enable_telemetry = True
|
|
80
|
+
|
|
81
|
+
if TELEMETRY_AVAILABLE and UsageTracker is not None:
|
|
82
|
+
try:
|
|
83
|
+
self._telemetry_tracker = UsageTracker.get_instance()
|
|
84
|
+
except (OSError, PermissionError) as e:
|
|
85
|
+
# File system errors - log but disable telemetry
|
|
86
|
+
logger.debug(f"Failed to initialize telemetry tracker (file system error): {e}")
|
|
87
|
+
self._enable_telemetry = False
|
|
88
|
+
except (AttributeError, TypeError, ValueError) as e:
|
|
89
|
+
# Configuration or initialization errors
|
|
90
|
+
logger.debug(f"Failed to initialize telemetry tracker (config error): {e}")
|
|
91
|
+
self._enable_telemetry = False
|
|
92
|
+
|
|
93
|
+
def _track_telemetry(
|
|
94
|
+
self,
|
|
95
|
+
stage: str,
|
|
96
|
+
tier: Any, # ModelTier
|
|
97
|
+
model: str,
|
|
98
|
+
cost: float,
|
|
99
|
+
tokens: dict[str, int],
|
|
100
|
+
cache_hit: bool,
|
|
101
|
+
cache_type: str | None,
|
|
102
|
+
duration_ms: int,
|
|
103
|
+
) -> None:
|
|
104
|
+
"""Track telemetry for an LLM call.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
stage: Stage name
|
|
108
|
+
tier: Model tier used (ModelTier enum)
|
|
109
|
+
model: Model ID used
|
|
110
|
+
cost: Cost in USD
|
|
111
|
+
tokens: Dictionary with "input" and "output" token counts
|
|
112
|
+
cache_hit: Whether this was a cache hit
|
|
113
|
+
cache_type: Cache type if cache hit
|
|
114
|
+
duration_ms: Duration in milliseconds
|
|
115
|
+
"""
|
|
116
|
+
if not self._enable_telemetry or self._telemetry_tracker is None:
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
provider_str = getattr(self, "_provider_str", "unknown")
|
|
121
|
+
self._telemetry_tracker.track_llm_call(
|
|
122
|
+
workflow=self.name,
|
|
123
|
+
stage=stage,
|
|
124
|
+
tier=tier.value.upper() if hasattr(tier, "value") else str(tier).upper(),
|
|
125
|
+
model=model,
|
|
126
|
+
provider=provider_str,
|
|
127
|
+
cost=cost,
|
|
128
|
+
tokens=tokens,
|
|
129
|
+
cache_hit=cache_hit,
|
|
130
|
+
cache_type=cache_type,
|
|
131
|
+
duration_ms=duration_ms,
|
|
132
|
+
)
|
|
133
|
+
except (AttributeError, TypeError, ValueError) as e:
|
|
134
|
+
# INTENTIONAL: Telemetry tracking failures should never crash workflows
|
|
135
|
+
logger.debug(f"Failed to track telemetry (config/data error): {e}")
|
|
136
|
+
except (OSError, PermissionError) as e:
|
|
137
|
+
# File system errors - log but never crash workflow
|
|
138
|
+
logger.debug(f"Failed to track telemetry (file system error): {e}")
|
|
139
|
+
|
|
140
|
+
def _emit_call_telemetry(
|
|
141
|
+
self,
|
|
142
|
+
step_name: str,
|
|
143
|
+
task_type: str,
|
|
144
|
+
tier: str,
|
|
145
|
+
model_id: str,
|
|
146
|
+
input_tokens: int,
|
|
147
|
+
output_tokens: int,
|
|
148
|
+
cost: float,
|
|
149
|
+
latency_ms: int,
|
|
150
|
+
success: bool = True,
|
|
151
|
+
error_message: str | None = None,
|
|
152
|
+
fallback_used: bool = False,
|
|
153
|
+
) -> None:
|
|
154
|
+
"""Emit an LLMCallRecord to the telemetry backend.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
step_name: Name of the workflow step
|
|
158
|
+
task_type: Task type used for routing
|
|
159
|
+
tier: Model tier used
|
|
160
|
+
model_id: Model ID used
|
|
161
|
+
input_tokens: Input token count
|
|
162
|
+
output_tokens: Output token count
|
|
163
|
+
cost: Estimated cost
|
|
164
|
+
latency_ms: Latency in milliseconds
|
|
165
|
+
success: Whether the call succeeded
|
|
166
|
+
error_message: Error message if failed
|
|
167
|
+
fallback_used: Whether fallback was used
|
|
168
|
+
"""
|
|
169
|
+
from empathy_os.models import LLMCallRecord
|
|
170
|
+
|
|
171
|
+
record = LLMCallRecord(
|
|
172
|
+
call_id=str(uuid.uuid4()),
|
|
173
|
+
timestamp=datetime.now().isoformat(),
|
|
174
|
+
workflow_name=self.name,
|
|
175
|
+
step_name=step_name,
|
|
176
|
+
task_type=task_type,
|
|
177
|
+
provider=getattr(self, "_provider_str", "unknown"),
|
|
178
|
+
tier=tier,
|
|
179
|
+
model_id=model_id,
|
|
180
|
+
input_tokens=input_tokens,
|
|
181
|
+
output_tokens=output_tokens,
|
|
182
|
+
estimated_cost=cost,
|
|
183
|
+
latency_ms=latency_ms,
|
|
184
|
+
success=success,
|
|
185
|
+
error_message=error_message,
|
|
186
|
+
fallback_used=fallback_used,
|
|
187
|
+
metadata={"run_id": self._run_id},
|
|
188
|
+
)
|
|
189
|
+
try:
|
|
190
|
+
if self._telemetry_backend is not None:
|
|
191
|
+
self._telemetry_backend.log_call(record)
|
|
192
|
+
except (AttributeError, ValueError, TypeError):
|
|
193
|
+
# Telemetry backend errors - log but don't crash workflow
|
|
194
|
+
logger.debug("Failed to log call telemetry (backend error)")
|
|
195
|
+
except OSError:
|
|
196
|
+
# File system errors - log but don't crash workflow
|
|
197
|
+
logger.debug("Failed to log call telemetry (file system error)")
|
|
198
|
+
except Exception: # noqa: BLE001
|
|
199
|
+
# INTENTIONAL: Telemetry is optional diagnostics - never crash workflow
|
|
200
|
+
logger.debug("Unexpected error logging call telemetry")
|
|
201
|
+
|
|
202
|
+
def _emit_workflow_telemetry(self, result: Any) -> None:
|
|
203
|
+
"""Emit a WorkflowRunRecord to the telemetry backend.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
result: The WorkflowResult to record
|
|
207
|
+
"""
|
|
208
|
+
from empathy_os.models import WorkflowRunRecord, WorkflowStageRecord
|
|
209
|
+
|
|
210
|
+
# Build stage records
|
|
211
|
+
stages = [
|
|
212
|
+
WorkflowStageRecord(
|
|
213
|
+
stage_name=s.name,
|
|
214
|
+
tier=s.tier.value if hasattr(s.tier, "value") else str(s.tier),
|
|
215
|
+
model_id=(
|
|
216
|
+
self.get_model_for_tier(s.tier)
|
|
217
|
+
if hasattr(self, "get_model_for_tier")
|
|
218
|
+
else "unknown"
|
|
219
|
+
),
|
|
220
|
+
input_tokens=s.input_tokens,
|
|
221
|
+
output_tokens=s.output_tokens,
|
|
222
|
+
cost=s.cost,
|
|
223
|
+
latency_ms=s.duration_ms,
|
|
224
|
+
success=not s.skipped and result.error is None,
|
|
225
|
+
skipped=s.skipped,
|
|
226
|
+
skip_reason=s.skip_reason,
|
|
227
|
+
)
|
|
228
|
+
for s in result.stages
|
|
229
|
+
]
|
|
230
|
+
|
|
231
|
+
record = WorkflowRunRecord(
|
|
232
|
+
run_id=self._run_id or str(uuid.uuid4()),
|
|
233
|
+
workflow_name=self.name,
|
|
234
|
+
started_at=result.started_at.isoformat(),
|
|
235
|
+
completed_at=result.completed_at.isoformat(),
|
|
236
|
+
stages=stages,
|
|
237
|
+
total_input_tokens=sum(s.input_tokens for s in result.stages if not s.skipped),
|
|
238
|
+
total_output_tokens=sum(s.output_tokens for s in result.stages if not s.skipped),
|
|
239
|
+
total_cost=result.cost_report.total_cost,
|
|
240
|
+
baseline_cost=result.cost_report.baseline_cost,
|
|
241
|
+
savings=result.cost_report.savings,
|
|
242
|
+
savings_percent=result.cost_report.savings_percent,
|
|
243
|
+
total_duration_ms=result.total_duration_ms,
|
|
244
|
+
success=result.success,
|
|
245
|
+
error=result.error,
|
|
246
|
+
providers_used=[getattr(self, "_provider_str", "unknown")],
|
|
247
|
+
tiers_used=list(result.cost_report.by_tier.keys()),
|
|
248
|
+
)
|
|
249
|
+
try:
|
|
250
|
+
if self._telemetry_backend is not None:
|
|
251
|
+
self._telemetry_backend.log_workflow(record)
|
|
252
|
+
except (AttributeError, ValueError, TypeError):
|
|
253
|
+
# Telemetry backend errors - log but don't crash workflow
|
|
254
|
+
logger.debug("Failed to log workflow telemetry (backend error)")
|
|
255
|
+
except OSError:
|
|
256
|
+
# File system errors - log but don't crash workflow
|
|
257
|
+
logger.debug("Failed to log workflow telemetry (file system error)")
|
|
258
|
+
except Exception: # noqa: BLE001
|
|
259
|
+
# INTENTIONAL: Telemetry is optional diagnostics - never crash workflow
|
|
260
|
+
logger.debug("Unexpected error logging workflow telemetry")
|
|
261
|
+
|
|
262
|
+
def _generate_run_id(self) -> str:
|
|
263
|
+
"""Generate a new run ID for telemetry correlation.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
A new UUID string for the run
|
|
267
|
+
"""
|
|
268
|
+
self._run_id = str(uuid.uuid4())
|
|
269
|
+
return self._run_id
|