empathy-framework 5.1.1__py3-none-any.whl → 5.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {empathy_framework-5.1.1.dist-info → empathy_framework-5.2.1.dist-info}/METADATA +52 -3
- {empathy_framework-5.1.1.dist-info → empathy_framework-5.2.1.dist-info}/RECORD +69 -28
- empathy_os/cli_router.py +9 -0
- empathy_os/core_modules/__init__.py +15 -0
- empathy_os/mcp/__init__.py +10 -0
- empathy_os/mcp/server.py +506 -0
- empathy_os/memory/control_panel.py +1 -131
- empathy_os/memory/control_panel_support.py +145 -0
- empathy_os/memory/encryption.py +159 -0
- empathy_os/memory/long_term.py +41 -626
- empathy_os/memory/long_term_types.py +99 -0
- empathy_os/memory/mixins/__init__.py +25 -0
- empathy_os/memory/mixins/backend_init_mixin.py +244 -0
- empathy_os/memory/mixins/capabilities_mixin.py +199 -0
- empathy_os/memory/mixins/handoff_mixin.py +208 -0
- empathy_os/memory/mixins/lifecycle_mixin.py +49 -0
- empathy_os/memory/mixins/long_term_mixin.py +352 -0
- empathy_os/memory/mixins/promotion_mixin.py +109 -0
- empathy_os/memory/mixins/short_term_mixin.py +182 -0
- empathy_os/memory/short_term.py +7 -0
- empathy_os/memory/simple_storage.py +302 -0
- empathy_os/memory/storage_backend.py +167 -0
- empathy_os/memory/unified.py +21 -1120
- empathy_os/meta_workflows/cli_commands/__init__.py +56 -0
- empathy_os/meta_workflows/cli_commands/agent_commands.py +321 -0
- empathy_os/meta_workflows/cli_commands/analytics_commands.py +442 -0
- empathy_os/meta_workflows/cli_commands/config_commands.py +232 -0
- empathy_os/meta_workflows/cli_commands/memory_commands.py +182 -0
- empathy_os/meta_workflows/cli_commands/template_commands.py +354 -0
- empathy_os/meta_workflows/cli_commands/workflow_commands.py +382 -0
- empathy_os/meta_workflows/cli_meta_workflows.py +52 -1802
- empathy_os/models/telemetry/__init__.py +71 -0
- empathy_os/models/telemetry/analytics.py +594 -0
- empathy_os/models/telemetry/backend.py +196 -0
- empathy_os/models/telemetry/data_models.py +431 -0
- empathy_os/models/telemetry/storage.py +489 -0
- empathy_os/orchestration/__init__.py +35 -0
- empathy_os/orchestration/execution_strategies.py +481 -0
- empathy_os/orchestration/meta_orchestrator.py +488 -1
- empathy_os/routing/workflow_registry.py +36 -0
- empathy_os/telemetry/cli.py +19 -724
- empathy_os/telemetry/commands/__init__.py +14 -0
- empathy_os/telemetry/commands/dashboard_commands.py +696 -0
- empathy_os/tools.py +183 -0
- empathy_os/workflows/__init__.py +5 -0
- empathy_os/workflows/autonomous_test_gen.py +860 -161
- empathy_os/workflows/base.py +6 -2
- empathy_os/workflows/code_review.py +4 -1
- empathy_os/workflows/document_gen/__init__.py +25 -0
- empathy_os/workflows/document_gen/config.py +30 -0
- empathy_os/workflows/document_gen/report_formatter.py +162 -0
- empathy_os/workflows/document_gen/workflow.py +1426 -0
- empathy_os/workflows/document_gen.py +22 -1598
- empathy_os/workflows/security_audit.py +2 -2
- empathy_os/workflows/security_audit_phase3.py +7 -4
- empathy_os/workflows/seo_optimization.py +633 -0
- empathy_os/workflows/test_gen/__init__.py +52 -0
- empathy_os/workflows/test_gen/ast_analyzer.py +249 -0
- empathy_os/workflows/test_gen/config.py +88 -0
- empathy_os/workflows/test_gen/data_models.py +38 -0
- empathy_os/workflows/test_gen/report_formatter.py +289 -0
- empathy_os/workflows/test_gen/test_templates.py +381 -0
- empathy_os/workflows/test_gen/workflow.py +655 -0
- empathy_os/workflows/test_gen.py +42 -1905
- empathy_os/memory/types 2.py +0 -441
- empathy_os/models/telemetry.py +0 -1660
- {empathy_framework-5.1.1.dist-info → empathy_framework-5.2.1.dist-info}/WHEEL +0 -0
- {empathy_framework-5.1.1.dist-info → empathy_framework-5.2.1.dist-info}/entry_points.txt +0 -0
- {empathy_framework-5.1.1.dist-info → empathy_framework-5.2.1.dist-info}/licenses/LICENSE +0 -0
- {empathy_framework-5.1.1.dist-info → empathy_framework-5.2.1.dist-info}/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +0 -0
- {empathy_framework-5.1.1.dist-info → empathy_framework-5.2.1.dist-info}/top_level.txt +0 -0
empathy_os/models/telemetry.py
DELETED
|
@@ -1,1660 +0,0 @@
|
|
|
1
|
-
"""Structured Telemetry for Multi-Model Workflows
|
|
2
|
-
|
|
3
|
-
Provides normalized schemas for tracking LLM calls and workflow runs:
|
|
4
|
-
- LLMCallRecord: Per-call metrics (model, tokens, cost, latency)
|
|
5
|
-
- WorkflowRunRecord: Per-workflow metrics (stages, total cost, duration)
|
|
6
|
-
- TelemetryBackend: Abstract interface for telemetry storage
|
|
7
|
-
- TelemetryStore: JSONL file-based backend (default)
|
|
8
|
-
- Analytics helpers for cost analysis and optimization
|
|
9
|
-
|
|
10
|
-
Tier 1 Automation Monitoring:
|
|
11
|
-
- TaskRoutingRecord: Task routing decisions and outcomes
|
|
12
|
-
- TestExecutionRecord: Test execution results and coverage
|
|
13
|
-
- CoverageRecord: Test coverage metrics and trends
|
|
14
|
-
- AgentAssignmentRecord: Agent assignments for simple tasks
|
|
15
|
-
|
|
16
|
-
Copyright 2025 Smart-AI-Memory
|
|
17
|
-
Licensed under Fair Source License 0.9
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
import heapq
|
|
21
|
-
import json
|
|
22
|
-
from dataclasses import asdict, dataclass, field
|
|
23
|
-
from datetime import datetime
|
|
24
|
-
from pathlib import Path
|
|
25
|
-
from typing import Any, Protocol, runtime_checkable
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@dataclass
|
|
29
|
-
class LLMCallRecord:
|
|
30
|
-
"""Record of a single LLM API call.
|
|
31
|
-
|
|
32
|
-
Captures all relevant metrics for cost tracking, performance analysis,
|
|
33
|
-
and debugging.
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
# Identification
|
|
37
|
-
call_id: str
|
|
38
|
-
timestamp: str # ISO format
|
|
39
|
-
|
|
40
|
-
# Context
|
|
41
|
-
workflow_name: str | None = None
|
|
42
|
-
step_name: str | None = None
|
|
43
|
-
user_id: str | None = None
|
|
44
|
-
session_id: str | None = None
|
|
45
|
-
|
|
46
|
-
# Task routing
|
|
47
|
-
task_type: str = "unknown"
|
|
48
|
-
provider: str = "anthropic"
|
|
49
|
-
tier: str = "capable"
|
|
50
|
-
model_id: str = ""
|
|
51
|
-
|
|
52
|
-
# Token usage
|
|
53
|
-
input_tokens: int = 0
|
|
54
|
-
output_tokens: int = 0
|
|
55
|
-
|
|
56
|
-
# Cost (in USD)
|
|
57
|
-
estimated_cost: float = 0.0
|
|
58
|
-
actual_cost: float | None = None
|
|
59
|
-
|
|
60
|
-
# Performance
|
|
61
|
-
latency_ms: int = 0
|
|
62
|
-
|
|
63
|
-
# Fallback and resilience tracking
|
|
64
|
-
fallback_used: bool = False
|
|
65
|
-
fallback_chain: list[str] = field(default_factory=list)
|
|
66
|
-
original_provider: str | None = None
|
|
67
|
-
original_model: str | None = None
|
|
68
|
-
retry_count: int = 0 # Number of retries before success
|
|
69
|
-
circuit_breaker_state: str | None = None # "closed", "open", "half-open"
|
|
70
|
-
|
|
71
|
-
# Error tracking
|
|
72
|
-
success: bool = True
|
|
73
|
-
error_type: str | None = None
|
|
74
|
-
error_message: str | None = None
|
|
75
|
-
|
|
76
|
-
# Additional metadata
|
|
77
|
-
metadata: dict[str, Any] = field(default_factory=dict)
|
|
78
|
-
|
|
79
|
-
def to_dict(self) -> dict[str, Any]:
|
|
80
|
-
"""Convert to dictionary for JSON serialization."""
|
|
81
|
-
return asdict(self)
|
|
82
|
-
|
|
83
|
-
@classmethod
|
|
84
|
-
def from_dict(cls, data: dict[str, Any]) -> "LLMCallRecord":
|
|
85
|
-
"""Create from dictionary."""
|
|
86
|
-
return cls(**data)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
@dataclass
|
|
90
|
-
class WorkflowStageRecord:
|
|
91
|
-
"""Record of a single workflow stage execution."""
|
|
92
|
-
|
|
93
|
-
stage_name: str
|
|
94
|
-
tier: str
|
|
95
|
-
model_id: str
|
|
96
|
-
input_tokens: int = 0
|
|
97
|
-
output_tokens: int = 0
|
|
98
|
-
cost: float = 0.0
|
|
99
|
-
latency_ms: int = 0
|
|
100
|
-
success: bool = True
|
|
101
|
-
skipped: bool = False
|
|
102
|
-
skip_reason: str | None = None
|
|
103
|
-
error: str | None = None
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
@dataclass
|
|
107
|
-
class WorkflowRunRecord:
|
|
108
|
-
"""Record of a complete workflow execution.
|
|
109
|
-
|
|
110
|
-
Aggregates stage-level metrics and provides workflow-level analytics.
|
|
111
|
-
"""
|
|
112
|
-
|
|
113
|
-
# Identification
|
|
114
|
-
run_id: str
|
|
115
|
-
workflow_name: str
|
|
116
|
-
started_at: str # ISO format
|
|
117
|
-
completed_at: str | None = None
|
|
118
|
-
|
|
119
|
-
# Context
|
|
120
|
-
user_id: str | None = None
|
|
121
|
-
session_id: str | None = None
|
|
122
|
-
|
|
123
|
-
# Stages
|
|
124
|
-
stages: list[WorkflowStageRecord] = field(default_factory=list)
|
|
125
|
-
|
|
126
|
-
# Aggregated metrics
|
|
127
|
-
total_input_tokens: int = 0
|
|
128
|
-
total_output_tokens: int = 0
|
|
129
|
-
total_cost: float = 0.0
|
|
130
|
-
baseline_cost: float = 0.0 # If all stages used premium
|
|
131
|
-
savings: float = 0.0
|
|
132
|
-
savings_percent: float = 0.0
|
|
133
|
-
|
|
134
|
-
# Performance
|
|
135
|
-
total_duration_ms: int = 0
|
|
136
|
-
|
|
137
|
-
# Status
|
|
138
|
-
success: bool = True
|
|
139
|
-
error: str | None = None
|
|
140
|
-
|
|
141
|
-
# Provider usage
|
|
142
|
-
providers_used: list[str] = field(default_factory=list)
|
|
143
|
-
tiers_used: list[str] = field(default_factory=list)
|
|
144
|
-
|
|
145
|
-
def to_dict(self) -> dict[str, Any]:
|
|
146
|
-
"""Convert to dictionary for JSON serialization."""
|
|
147
|
-
data = asdict(self)
|
|
148
|
-
data["stages"] = [asdict(s) for s in self.stages]
|
|
149
|
-
return data
|
|
150
|
-
|
|
151
|
-
@classmethod
|
|
152
|
-
def from_dict(cls, data: dict[str, Any]) -> "WorkflowRunRecord":
|
|
153
|
-
"""Create from dictionary."""
|
|
154
|
-
stages = [WorkflowStageRecord(**s) for s in data.pop("stages", [])]
|
|
155
|
-
return cls(stages=stages, **data)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
@dataclass
|
|
159
|
-
class TaskRoutingRecord:
|
|
160
|
-
"""Record of task routing decision for Tier 1 automation.
|
|
161
|
-
|
|
162
|
-
Tracks which agent/workflow handles each task, routing strategy,
|
|
163
|
-
and execution outcome for automation monitoring.
|
|
164
|
-
"""
|
|
165
|
-
|
|
166
|
-
# Identification (required)
|
|
167
|
-
routing_id: str
|
|
168
|
-
timestamp: str # ISO format
|
|
169
|
-
|
|
170
|
-
# Task context (required)
|
|
171
|
-
task_description: str
|
|
172
|
-
task_type: str # "code_review", "test_gen", "bug_fix", "refactor", etc.
|
|
173
|
-
task_complexity: str # "simple", "moderate", "complex"
|
|
174
|
-
|
|
175
|
-
# Routing decision (required)
|
|
176
|
-
assigned_agent: str # "test_gen_workflow", "code_review_workflow", etc.
|
|
177
|
-
assigned_tier: str # "cheap", "capable", "premium"
|
|
178
|
-
routing_strategy: str # "rule_based", "ml_predicted", "manual_override"
|
|
179
|
-
|
|
180
|
-
# Optional fields with defaults
|
|
181
|
-
task_dependencies: list[str] = field(default_factory=list) # Task IDs this depends on
|
|
182
|
-
confidence_score: float = 1.0 # 0.0-1.0 for ML predictions
|
|
183
|
-
|
|
184
|
-
# Execution tracking
|
|
185
|
-
status: str = "pending" # "pending", "running", "completed", "failed"
|
|
186
|
-
started_at: str | None = None
|
|
187
|
-
completed_at: str | None = None
|
|
188
|
-
|
|
189
|
-
# Outcome
|
|
190
|
-
success: bool = False
|
|
191
|
-
quality_score: float | None = None # 0.0-1.0 if applicable
|
|
192
|
-
retry_count: int = 0
|
|
193
|
-
error_type: str | None = None
|
|
194
|
-
error_message: str | None = None
|
|
195
|
-
|
|
196
|
-
# Cost tracking
|
|
197
|
-
estimated_cost: float = 0.0
|
|
198
|
-
actual_cost: float | None = None
|
|
199
|
-
|
|
200
|
-
# Metadata
|
|
201
|
-
user_id: str | None = None
|
|
202
|
-
session_id: str | None = None
|
|
203
|
-
metadata: dict[str, Any] = field(default_factory=dict)
|
|
204
|
-
|
|
205
|
-
def to_dict(self) -> dict[str, Any]:
|
|
206
|
-
"""Convert to dictionary for JSON serialization."""
|
|
207
|
-
return asdict(self)
|
|
208
|
-
|
|
209
|
-
@classmethod
|
|
210
|
-
def from_dict(cls, data: dict[str, Any]) -> "TaskRoutingRecord":
|
|
211
|
-
"""Create from dictionary."""
|
|
212
|
-
return cls(**data)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
@dataclass
|
|
216
|
-
class TestExecutionRecord:
|
|
217
|
-
"""Record of test execution for Tier 1 QA automation.
|
|
218
|
-
|
|
219
|
-
Tracks test execution results, coverage metrics, and failure details
|
|
220
|
-
for quality assurance monitoring.
|
|
221
|
-
"""
|
|
222
|
-
|
|
223
|
-
# Identification (required)
|
|
224
|
-
execution_id: str
|
|
225
|
-
timestamp: str # ISO format
|
|
226
|
-
|
|
227
|
-
# Test context (required)
|
|
228
|
-
test_suite: str # "unit", "integration", "e2e", "all"
|
|
229
|
-
|
|
230
|
-
# Optional fields with defaults
|
|
231
|
-
test_files: list[str] = field(default_factory=list) # Specific test files executed
|
|
232
|
-
triggered_by: str = "manual" # "workflow", "manual", "ci", "pre_commit"
|
|
233
|
-
|
|
234
|
-
# Execution details
|
|
235
|
-
command: str = ""
|
|
236
|
-
working_directory: str = ""
|
|
237
|
-
duration_seconds: float = 0.0
|
|
238
|
-
|
|
239
|
-
# Results
|
|
240
|
-
total_tests: int = 0
|
|
241
|
-
passed: int = 0
|
|
242
|
-
failed: int = 0
|
|
243
|
-
skipped: int = 0
|
|
244
|
-
errors: int = 0
|
|
245
|
-
|
|
246
|
-
# Coverage (if available)
|
|
247
|
-
coverage_percentage: float | None = None
|
|
248
|
-
coverage_report_path: str | None = None
|
|
249
|
-
|
|
250
|
-
# Failures
|
|
251
|
-
failed_tests: list[dict[str, Any]] = field(
|
|
252
|
-
default_factory=list
|
|
253
|
-
) # [{name, file, error, traceback}]
|
|
254
|
-
|
|
255
|
-
# Status
|
|
256
|
-
success: bool = False # True if all tests passed
|
|
257
|
-
exit_code: int = 0
|
|
258
|
-
|
|
259
|
-
# Metadata
|
|
260
|
-
workflow_id: str | None = None # Link to workflow that triggered this
|
|
261
|
-
metadata: dict[str, Any] = field(default_factory=dict)
|
|
262
|
-
|
|
263
|
-
def to_dict(self) -> dict[str, Any]:
|
|
264
|
-
"""Convert to dictionary for JSON serialization."""
|
|
265
|
-
return asdict(self)
|
|
266
|
-
|
|
267
|
-
@classmethod
|
|
268
|
-
def from_dict(cls, data: dict[str, Any]) -> "TestExecutionRecord":
|
|
269
|
-
"""Create from dictionary."""
|
|
270
|
-
return cls(**data)
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
@dataclass
|
|
274
|
-
class CoverageRecord:
|
|
275
|
-
"""Record of test coverage metrics for Tier 1 QA monitoring.
|
|
276
|
-
|
|
277
|
-
Tracks coverage percentage, trends, and critical gaps for
|
|
278
|
-
continuous quality improvement.
|
|
279
|
-
"""
|
|
280
|
-
|
|
281
|
-
# Identification (required)
|
|
282
|
-
record_id: str
|
|
283
|
-
timestamp: str # ISO format
|
|
284
|
-
|
|
285
|
-
# Coverage metrics (required)
|
|
286
|
-
overall_percentage: float
|
|
287
|
-
lines_total: int
|
|
288
|
-
lines_covered: int
|
|
289
|
-
|
|
290
|
-
# Optional fields with defaults
|
|
291
|
-
branches_total: int = 0
|
|
292
|
-
branches_covered: int = 0
|
|
293
|
-
|
|
294
|
-
# File-level breakdown
|
|
295
|
-
files_total: int = 0
|
|
296
|
-
files_well_covered: int = 0 # >= 80%
|
|
297
|
-
files_critical: int = 0 # < 50%
|
|
298
|
-
untested_files: list[str] = field(default_factory=list)
|
|
299
|
-
|
|
300
|
-
# Critical gaps
|
|
301
|
-
critical_gaps: list[dict[str, Any]] = field(
|
|
302
|
-
default_factory=list
|
|
303
|
-
) # [{file, coverage, priority}]
|
|
304
|
-
|
|
305
|
-
# Trend data
|
|
306
|
-
previous_percentage: float | None = None
|
|
307
|
-
trend: str | None = None # "improving", "declining", "stable"
|
|
308
|
-
|
|
309
|
-
# Source
|
|
310
|
-
coverage_format: str = "xml" # "xml", "json", "lcov"
|
|
311
|
-
coverage_file: str = ""
|
|
312
|
-
|
|
313
|
-
# Metadata
|
|
314
|
-
workflow_id: str | None = None
|
|
315
|
-
metadata: dict[str, Any] = field(default_factory=dict)
|
|
316
|
-
|
|
317
|
-
def to_dict(self) -> dict[str, Any]:
|
|
318
|
-
"""Convert to dictionary for JSON serialization."""
|
|
319
|
-
return asdict(self)
|
|
320
|
-
|
|
321
|
-
@classmethod
|
|
322
|
-
def from_dict(cls, data: dict[str, Any]) -> "CoverageRecord":
|
|
323
|
-
"""Create from dictionary."""
|
|
324
|
-
return cls(**data)
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
@dataclass
|
|
328
|
-
class AgentAssignmentRecord:
|
|
329
|
-
"""Record of agent assignment for simple tasks (Tier 1).
|
|
330
|
-
|
|
331
|
-
Tracks task assignments to agents/workflows with clear specs
|
|
332
|
-
and no complex dependencies for automation monitoring.
|
|
333
|
-
"""
|
|
334
|
-
|
|
335
|
-
# Identification (required)
|
|
336
|
-
assignment_id: str
|
|
337
|
-
timestamp: str # ISO format
|
|
338
|
-
|
|
339
|
-
# Task details (required)
|
|
340
|
-
task_id: str
|
|
341
|
-
task_title: str
|
|
342
|
-
task_description: str
|
|
343
|
-
|
|
344
|
-
# Assignment (required)
|
|
345
|
-
assigned_agent: str # Agent/workflow name
|
|
346
|
-
|
|
347
|
-
# Optional fields with defaults
|
|
348
|
-
task_spec_clarity: float = 0.0 # 0.0-1.0, higher = clearer spec
|
|
349
|
-
assignment_reason: str = "" # Why this agent was chosen
|
|
350
|
-
estimated_duration_hours: float = 0.0
|
|
351
|
-
|
|
352
|
-
# Criteria checks
|
|
353
|
-
has_clear_spec: bool = False
|
|
354
|
-
has_dependencies: bool = False
|
|
355
|
-
requires_human_review: bool = False
|
|
356
|
-
automated_eligible: bool = False # True for Tier 1
|
|
357
|
-
|
|
358
|
-
# Execution tracking
|
|
359
|
-
status: str = "assigned" # "assigned", "in_progress", "completed", "blocked"
|
|
360
|
-
started_at: str | None = None
|
|
361
|
-
completed_at: str | None = None
|
|
362
|
-
actual_duration_hours: float | None = None
|
|
363
|
-
|
|
364
|
-
# Outcome
|
|
365
|
-
success: bool = False
|
|
366
|
-
quality_check_passed: bool = False
|
|
367
|
-
human_review_required: bool = False
|
|
368
|
-
|
|
369
|
-
# Metadata
|
|
370
|
-
workflow_id: str | None = None
|
|
371
|
-
metadata: dict[str, Any] = field(default_factory=dict)
|
|
372
|
-
|
|
373
|
-
def to_dict(self) -> dict[str, Any]:
|
|
374
|
-
"""Convert to dictionary for JSON serialization."""
|
|
375
|
-
return asdict(self)
|
|
376
|
-
|
|
377
|
-
@classmethod
|
|
378
|
-
def from_dict(cls, data: dict[str, Any]) -> "AgentAssignmentRecord":
|
|
379
|
-
"""Create from dictionary."""
|
|
380
|
-
return cls(**data)
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
@dataclass
|
|
384
|
-
class FileTestRecord:
|
|
385
|
-
"""Record of test execution for a specific source file.
|
|
386
|
-
|
|
387
|
-
Tracks when tests for an individual file were last run, results,
|
|
388
|
-
and coverage - enabling per-file test status tracking.
|
|
389
|
-
|
|
390
|
-
This complements TestExecutionRecord (suite-level) by providing
|
|
391
|
-
granular file-level test tracking for better test maintenance.
|
|
392
|
-
"""
|
|
393
|
-
|
|
394
|
-
# Identification (required)
|
|
395
|
-
file_path: str # Source file path (relative to project root)
|
|
396
|
-
timestamp: str # ISO format - when tests were run
|
|
397
|
-
|
|
398
|
-
# Test results (required)
|
|
399
|
-
last_test_result: str # "passed", "failed", "error", "skipped", "no_tests"
|
|
400
|
-
test_count: int # Number of tests for this file
|
|
401
|
-
|
|
402
|
-
# Detailed results with defaults
|
|
403
|
-
passed: int = 0
|
|
404
|
-
failed: int = 0
|
|
405
|
-
skipped: int = 0
|
|
406
|
-
errors: int = 0
|
|
407
|
-
|
|
408
|
-
# Timing
|
|
409
|
-
duration_seconds: float = 0.0
|
|
410
|
-
|
|
411
|
-
# Coverage for this file (if available)
|
|
412
|
-
coverage_percent: float | None = None
|
|
413
|
-
lines_total: int = 0
|
|
414
|
-
lines_covered: int = 0
|
|
415
|
-
|
|
416
|
-
# Test file info
|
|
417
|
-
test_file_path: str | None = None # Associated test file
|
|
418
|
-
|
|
419
|
-
# Failure details (if any)
|
|
420
|
-
failed_tests: list[dict[str, Any]] = field(default_factory=list)
|
|
421
|
-
|
|
422
|
-
# Staleness tracking
|
|
423
|
-
source_modified_at: str | None = None # When source file was last modified
|
|
424
|
-
tests_modified_at: str | None = None # When test file was last modified
|
|
425
|
-
is_stale: bool = False # Tests haven't been run since source changed
|
|
426
|
-
|
|
427
|
-
# Link to execution
|
|
428
|
-
execution_id: str | None = None # Link to TestExecutionRecord
|
|
429
|
-
workflow_id: str | None = None
|
|
430
|
-
|
|
431
|
-
# Metadata
|
|
432
|
-
metadata: dict[str, Any] = field(default_factory=dict)
|
|
433
|
-
|
|
434
|
-
def to_dict(self) -> dict[str, Any]:
|
|
435
|
-
"""Convert to dictionary for JSON serialization."""
|
|
436
|
-
return asdict(self)
|
|
437
|
-
|
|
438
|
-
@classmethod
|
|
439
|
-
def from_dict(cls, data: dict[str, Any]) -> "FileTestRecord":
|
|
440
|
-
"""Create from dictionary."""
|
|
441
|
-
return cls(**data)
|
|
442
|
-
|
|
443
|
-
@property
|
|
444
|
-
def success(self) -> bool:
|
|
445
|
-
"""Check if all tests passed."""
|
|
446
|
-
return self.last_test_result == "passed" and self.failed == 0 and self.errors == 0
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
@runtime_checkable
|
|
450
|
-
class TelemetryBackend(Protocol):
|
|
451
|
-
"""Protocol for telemetry storage backends.
|
|
452
|
-
|
|
453
|
-
Implementations can store telemetry data in different backends:
|
|
454
|
-
- JSONL files (default, via TelemetryStore)
|
|
455
|
-
- Database (PostgreSQL, SQLite, etc.)
|
|
456
|
-
- Cloud services (DataDog, New Relic, etc.)
|
|
457
|
-
- Custom backends
|
|
458
|
-
|
|
459
|
-
Supports both core telemetry (LLM calls, workflows) and Tier 1
|
|
460
|
-
automation monitoring (task routing, tests, coverage, assignments).
|
|
461
|
-
|
|
462
|
-
Example implementing a custom backend:
|
|
463
|
-
>>> class DatabaseBackend:
|
|
464
|
-
... def log_call(self, record: LLMCallRecord) -> None:
|
|
465
|
-
... # Insert into database
|
|
466
|
-
... pass
|
|
467
|
-
...
|
|
468
|
-
... def log_workflow(self, record: WorkflowRunRecord) -> None:
|
|
469
|
-
... # Insert into database
|
|
470
|
-
... pass
|
|
471
|
-
...
|
|
472
|
-
... def get_calls(self, since=None, workflow_name=None, limit=1000):
|
|
473
|
-
... # Query database
|
|
474
|
-
... return []
|
|
475
|
-
...
|
|
476
|
-
... def get_workflows(self, since=None, workflow_name=None, limit=100):
|
|
477
|
-
... # Query database
|
|
478
|
-
... return []
|
|
479
|
-
"""
|
|
480
|
-
|
|
481
|
-
def log_call(self, record: LLMCallRecord) -> None:
|
|
482
|
-
"""Log an LLM call record."""
|
|
483
|
-
...
|
|
484
|
-
|
|
485
|
-
def log_workflow(self, record: WorkflowRunRecord) -> None:
|
|
486
|
-
"""Log a workflow run record."""
|
|
487
|
-
...
|
|
488
|
-
|
|
489
|
-
def get_calls(
|
|
490
|
-
self,
|
|
491
|
-
since: datetime | None = None,
|
|
492
|
-
workflow_name: str | None = None,
|
|
493
|
-
limit: int = 1000,
|
|
494
|
-
) -> list[LLMCallRecord]:
|
|
495
|
-
"""Get LLM call records with optional filters."""
|
|
496
|
-
...
|
|
497
|
-
|
|
498
|
-
def get_workflows(
|
|
499
|
-
self,
|
|
500
|
-
since: datetime | None = None,
|
|
501
|
-
workflow_name: str | None = None,
|
|
502
|
-
limit: int = 100,
|
|
503
|
-
) -> list[WorkflowRunRecord]:
|
|
504
|
-
"""Get workflow run records with optional filters."""
|
|
505
|
-
...
|
|
506
|
-
|
|
507
|
-
# Tier 1 automation monitoring methods
|
|
508
|
-
def log_task_routing(self, record: TaskRoutingRecord) -> None:
|
|
509
|
-
"""Log a task routing decision."""
|
|
510
|
-
...
|
|
511
|
-
|
|
512
|
-
def log_test_execution(self, record: TestExecutionRecord) -> None:
|
|
513
|
-
"""Log a test execution."""
|
|
514
|
-
...
|
|
515
|
-
|
|
516
|
-
def log_coverage(self, record: CoverageRecord) -> None:
|
|
517
|
-
"""Log coverage metrics."""
|
|
518
|
-
...
|
|
519
|
-
|
|
520
|
-
def log_agent_assignment(self, record: AgentAssignmentRecord) -> None:
|
|
521
|
-
"""Log an agent assignment."""
|
|
522
|
-
...
|
|
523
|
-
|
|
524
|
-
def get_task_routings(
|
|
525
|
-
self,
|
|
526
|
-
since: datetime | None = None,
|
|
527
|
-
status: str | None = None,
|
|
528
|
-
limit: int = 1000,
|
|
529
|
-
) -> list[TaskRoutingRecord]:
|
|
530
|
-
"""Get task routing records with optional filters."""
|
|
531
|
-
...
|
|
532
|
-
|
|
533
|
-
def get_test_executions(
|
|
534
|
-
self,
|
|
535
|
-
since: datetime | None = None,
|
|
536
|
-
success_only: bool = False,
|
|
537
|
-
limit: int = 100,
|
|
538
|
-
) -> list[TestExecutionRecord]:
|
|
539
|
-
"""Get test execution records with optional filters."""
|
|
540
|
-
...
|
|
541
|
-
|
|
542
|
-
def get_coverage_history(
|
|
543
|
-
self,
|
|
544
|
-
since: datetime | None = None,
|
|
545
|
-
limit: int = 100,
|
|
546
|
-
) -> list[CoverageRecord]:
|
|
547
|
-
"""Get coverage history records."""
|
|
548
|
-
...
|
|
549
|
-
|
|
550
|
-
def get_agent_assignments(
|
|
551
|
-
self,
|
|
552
|
-
since: datetime | None = None,
|
|
553
|
-
automated_only: bool = True,
|
|
554
|
-
limit: int = 1000,
|
|
555
|
-
) -> list[AgentAssignmentRecord]:
|
|
556
|
-
"""Get agent assignment records with optional filters."""
|
|
557
|
-
...
|
|
558
|
-
|
|
559
|
-
# Per-file test tracking methods
|
|
560
|
-
def log_file_test(self, record: "FileTestRecord") -> None:
|
|
561
|
-
"""Log a per-file test execution record."""
|
|
562
|
-
...
|
|
563
|
-
|
|
564
|
-
def get_file_tests(
|
|
565
|
-
self,
|
|
566
|
-
file_path: str | None = None,
|
|
567
|
-
since: datetime | None = None,
|
|
568
|
-
result_filter: str | None = None,
|
|
569
|
-
limit: int = 1000,
|
|
570
|
-
) -> list["FileTestRecord"]:
|
|
571
|
-
"""Get per-file test records with optional filters."""
|
|
572
|
-
...
|
|
573
|
-
|
|
574
|
-
def get_latest_file_test(self, file_path: str) -> "FileTestRecord | None":
|
|
575
|
-
"""Get the most recent test record for a specific file."""
|
|
576
|
-
...
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
def _parse_timestamp(timestamp_str: str) -> datetime:
|
|
580
|
-
"""Parse ISO format timestamp, handling 'Z' suffix for Python 3.10 compatibility.
|
|
581
|
-
|
|
582
|
-
Args:
|
|
583
|
-
timestamp_str: ISO format timestamp string, possibly with 'Z' suffix
|
|
584
|
-
|
|
585
|
-
Returns:
|
|
586
|
-
Parsed datetime object (timezone-naive UTC)
|
|
587
|
-
"""
|
|
588
|
-
# Python 3.10's fromisoformat() doesn't handle 'Z' suffix
|
|
589
|
-
if timestamp_str.endswith("Z"):
|
|
590
|
-
timestamp_str = timestamp_str[:-1]
|
|
591
|
-
|
|
592
|
-
dt = datetime.fromisoformat(timestamp_str)
|
|
593
|
-
|
|
594
|
-
# Convert to naive UTC if timezone-aware
|
|
595
|
-
if dt.tzinfo is not None:
|
|
596
|
-
dt = dt.replace(tzinfo=None)
|
|
597
|
-
|
|
598
|
-
return dt
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
class TelemetryStore:
|
|
602
|
-
"""JSONL file-based telemetry backend (default implementation).
|
|
603
|
-
|
|
604
|
-
Stores records in JSONL format for easy streaming and analysis.
|
|
605
|
-
Implements the TelemetryBackend protocol.
|
|
606
|
-
|
|
607
|
-
Supports both core telemetry and Tier 1 automation monitoring.
|
|
608
|
-
"""
|
|
609
|
-
|
|
610
|
-
def __init__(self, storage_dir: str = ".empathy"):
|
|
611
|
-
"""Initialize telemetry store.
|
|
612
|
-
|
|
613
|
-
Args:
|
|
614
|
-
storage_dir: Directory for telemetry files
|
|
615
|
-
|
|
616
|
-
"""
|
|
617
|
-
self.storage_dir = Path(storage_dir)
|
|
618
|
-
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
|
619
|
-
|
|
620
|
-
# Core telemetry files
|
|
621
|
-
self.calls_file = self.storage_dir / "llm_calls.jsonl"
|
|
622
|
-
self.workflows_file = self.storage_dir / "workflow_runs.jsonl"
|
|
623
|
-
|
|
624
|
-
# Tier 1 automation monitoring files
|
|
625
|
-
self.task_routing_file = self.storage_dir / "task_routing.jsonl"
|
|
626
|
-
self.test_executions_file = self.storage_dir / "test_executions.jsonl"
|
|
627
|
-
self.coverage_history_file = self.storage_dir / "coverage_history.jsonl"
|
|
628
|
-
self.agent_assignments_file = self.storage_dir / "agent_assignments.jsonl"
|
|
629
|
-
|
|
630
|
-
# Per-file test tracking
|
|
631
|
-
self.file_tests_file = self.storage_dir / "file_tests.jsonl"
|
|
632
|
-
|
|
633
|
-
def log_call(self, record: LLMCallRecord) -> None:
|
|
634
|
-
"""Log an LLM call record."""
|
|
635
|
-
with open(self.calls_file, "a") as f:
|
|
636
|
-
f.write(json.dumps(record.to_dict()) + "\n")
|
|
637
|
-
|
|
638
|
-
def log_workflow(self, record: WorkflowRunRecord) -> None:
|
|
639
|
-
"""Log a workflow run record."""
|
|
640
|
-
with open(self.workflows_file, "a") as f:
|
|
641
|
-
f.write(json.dumps(record.to_dict()) + "\n")
|
|
642
|
-
|
|
643
|
-
def get_calls(
|
|
644
|
-
self,
|
|
645
|
-
since: datetime | None = None,
|
|
646
|
-
workflow_name: str | None = None,
|
|
647
|
-
limit: int = 1000,
|
|
648
|
-
) -> list[LLMCallRecord]:
|
|
649
|
-
"""Get LLM call records.
|
|
650
|
-
|
|
651
|
-
Args:
|
|
652
|
-
since: Only return records after this time
|
|
653
|
-
workflow_name: Filter by workflow name
|
|
654
|
-
limit: Maximum records to return
|
|
655
|
-
|
|
656
|
-
Returns:
|
|
657
|
-
List of LLMCallRecord
|
|
658
|
-
|
|
659
|
-
"""
|
|
660
|
-
records: list[LLMCallRecord] = []
|
|
661
|
-
if not self.calls_file.exists():
|
|
662
|
-
return records
|
|
663
|
-
|
|
664
|
-
with open(self.calls_file) as f:
|
|
665
|
-
for line in f:
|
|
666
|
-
if not line.strip():
|
|
667
|
-
continue
|
|
668
|
-
try:
|
|
669
|
-
data = json.loads(line)
|
|
670
|
-
record = LLMCallRecord.from_dict(data)
|
|
671
|
-
|
|
672
|
-
# Apply filters
|
|
673
|
-
if since:
|
|
674
|
-
record_time = _parse_timestamp(record.timestamp)
|
|
675
|
-
if record_time < since:
|
|
676
|
-
continue
|
|
677
|
-
|
|
678
|
-
if workflow_name and record.workflow_name != workflow_name:
|
|
679
|
-
continue
|
|
680
|
-
|
|
681
|
-
records.append(record)
|
|
682
|
-
|
|
683
|
-
if len(records) >= limit:
|
|
684
|
-
break
|
|
685
|
-
except (json.JSONDecodeError, KeyError):
|
|
686
|
-
continue
|
|
687
|
-
|
|
688
|
-
return records
|
|
689
|
-
|
|
690
|
-
def get_workflows(
|
|
691
|
-
self,
|
|
692
|
-
since: datetime | None = None,
|
|
693
|
-
workflow_name: str | None = None,
|
|
694
|
-
limit: int = 100,
|
|
695
|
-
) -> list[WorkflowRunRecord]:
|
|
696
|
-
"""Get workflow run records.
|
|
697
|
-
|
|
698
|
-
Args:
|
|
699
|
-
since: Only return records after this time
|
|
700
|
-
workflow_name: Filter by workflow name
|
|
701
|
-
limit: Maximum records to return
|
|
702
|
-
|
|
703
|
-
Returns:
|
|
704
|
-
List of WorkflowRunRecord
|
|
705
|
-
|
|
706
|
-
"""
|
|
707
|
-
records: list[WorkflowRunRecord] = []
|
|
708
|
-
if not self.workflows_file.exists():
|
|
709
|
-
return records
|
|
710
|
-
|
|
711
|
-
with open(self.workflows_file) as f:
|
|
712
|
-
for line in f:
|
|
713
|
-
if not line.strip():
|
|
714
|
-
continue
|
|
715
|
-
try:
|
|
716
|
-
data = json.loads(line)
|
|
717
|
-
record = WorkflowRunRecord.from_dict(data)
|
|
718
|
-
|
|
719
|
-
# Apply filters
|
|
720
|
-
if since:
|
|
721
|
-
record_time = _parse_timestamp(record.started_at)
|
|
722
|
-
if record_time < since:
|
|
723
|
-
continue
|
|
724
|
-
|
|
725
|
-
if workflow_name and record.workflow_name != workflow_name:
|
|
726
|
-
continue
|
|
727
|
-
|
|
728
|
-
records.append(record)
|
|
729
|
-
|
|
730
|
-
if len(records) >= limit:
|
|
731
|
-
break
|
|
732
|
-
except (json.JSONDecodeError, KeyError):
|
|
733
|
-
continue
|
|
734
|
-
|
|
735
|
-
return records
|
|
736
|
-
|
|
737
|
-
# Tier 1 automation monitoring methods
|
|
738
|
-
|
|
739
|
-
def log_task_routing(self, record: TaskRoutingRecord) -> None:
|
|
740
|
-
"""Log a task routing decision."""
|
|
741
|
-
with open(self.task_routing_file, "a") as f:
|
|
742
|
-
f.write(json.dumps(record.to_dict()) + "\n")
|
|
743
|
-
|
|
744
|
-
def log_test_execution(self, record: TestExecutionRecord) -> None:
|
|
745
|
-
"""Log a test execution."""
|
|
746
|
-
with open(self.test_executions_file, "a") as f:
|
|
747
|
-
f.write(json.dumps(record.to_dict()) + "\n")
|
|
748
|
-
|
|
749
|
-
def log_coverage(self, record: CoverageRecord) -> None:
|
|
750
|
-
"""Log coverage metrics."""
|
|
751
|
-
with open(self.coverage_history_file, "a") as f:
|
|
752
|
-
f.write(json.dumps(record.to_dict()) + "\n")
|
|
753
|
-
|
|
754
|
-
def log_agent_assignment(self, record: AgentAssignmentRecord) -> None:
|
|
755
|
-
"""Log an agent assignment."""
|
|
756
|
-
with open(self.agent_assignments_file, "a") as f:
|
|
757
|
-
f.write(json.dumps(record.to_dict()) + "\n")
|
|
758
|
-
|
|
759
|
-
def get_task_routings(
|
|
760
|
-
self,
|
|
761
|
-
since: datetime | None = None,
|
|
762
|
-
status: str | None = None,
|
|
763
|
-
limit: int = 1000,
|
|
764
|
-
) -> list[TaskRoutingRecord]:
|
|
765
|
-
"""Get task routing records.
|
|
766
|
-
|
|
767
|
-
Args:
|
|
768
|
-
since: Only return records after this time
|
|
769
|
-
status: Filter by status (pending, running, completed, failed)
|
|
770
|
-
limit: Maximum records to return
|
|
771
|
-
|
|
772
|
-
Returns:
|
|
773
|
-
List of TaskRoutingRecord
|
|
774
|
-
|
|
775
|
-
"""
|
|
776
|
-
records: list[TaskRoutingRecord] = []
|
|
777
|
-
if not self.task_routing_file.exists():
|
|
778
|
-
return records
|
|
779
|
-
|
|
780
|
-
with open(self.task_routing_file) as f:
|
|
781
|
-
for line in f:
|
|
782
|
-
if not line.strip():
|
|
783
|
-
continue
|
|
784
|
-
try:
|
|
785
|
-
data = json.loads(line)
|
|
786
|
-
record = TaskRoutingRecord.from_dict(data)
|
|
787
|
-
|
|
788
|
-
# Apply filters
|
|
789
|
-
if since:
|
|
790
|
-
record_time = _parse_timestamp(record.timestamp)
|
|
791
|
-
if record_time < since:
|
|
792
|
-
continue
|
|
793
|
-
|
|
794
|
-
if status and record.status != status:
|
|
795
|
-
continue
|
|
796
|
-
|
|
797
|
-
records.append(record)
|
|
798
|
-
|
|
799
|
-
if len(records) >= limit:
|
|
800
|
-
break
|
|
801
|
-
except (json.JSONDecodeError, KeyError):
|
|
802
|
-
continue
|
|
803
|
-
|
|
804
|
-
return records
|
|
805
|
-
|
|
806
|
-
def get_test_executions(
|
|
807
|
-
self,
|
|
808
|
-
since: datetime | None = None,
|
|
809
|
-
success_only: bool = False,
|
|
810
|
-
limit: int = 100,
|
|
811
|
-
) -> list[TestExecutionRecord]:
|
|
812
|
-
"""Get test execution records.
|
|
813
|
-
|
|
814
|
-
Args:
|
|
815
|
-
since: Only return records after this time
|
|
816
|
-
success_only: Only return successful test runs
|
|
817
|
-
limit: Maximum records to return
|
|
818
|
-
|
|
819
|
-
Returns:
|
|
820
|
-
List of TestExecutionRecord
|
|
821
|
-
|
|
822
|
-
"""
|
|
823
|
-
records: list[TestExecutionRecord] = []
|
|
824
|
-
if not self.test_executions_file.exists():
|
|
825
|
-
return records
|
|
826
|
-
|
|
827
|
-
with open(self.test_executions_file) as f:
|
|
828
|
-
for line in f:
|
|
829
|
-
if not line.strip():
|
|
830
|
-
continue
|
|
831
|
-
try:
|
|
832
|
-
data = json.loads(line)
|
|
833
|
-
record = TestExecutionRecord.from_dict(data)
|
|
834
|
-
|
|
835
|
-
# Apply filters
|
|
836
|
-
if since:
|
|
837
|
-
record_time = _parse_timestamp(record.timestamp)
|
|
838
|
-
if record_time < since:
|
|
839
|
-
continue
|
|
840
|
-
|
|
841
|
-
if success_only and not record.success:
|
|
842
|
-
continue
|
|
843
|
-
|
|
844
|
-
records.append(record)
|
|
845
|
-
|
|
846
|
-
if len(records) >= limit:
|
|
847
|
-
break
|
|
848
|
-
except (json.JSONDecodeError, KeyError):
|
|
849
|
-
continue
|
|
850
|
-
|
|
851
|
-
return records
|
|
852
|
-
|
|
853
|
-
def get_coverage_history(
|
|
854
|
-
self,
|
|
855
|
-
since: datetime | None = None,
|
|
856
|
-
limit: int = 100,
|
|
857
|
-
) -> list[CoverageRecord]:
|
|
858
|
-
"""Get coverage history records.
|
|
859
|
-
|
|
860
|
-
Args:
|
|
861
|
-
since: Only return records after this time
|
|
862
|
-
limit: Maximum records to return
|
|
863
|
-
|
|
864
|
-
Returns:
|
|
865
|
-
List of CoverageRecord
|
|
866
|
-
|
|
867
|
-
"""
|
|
868
|
-
records: list[CoverageRecord] = []
|
|
869
|
-
if not self.coverage_history_file.exists():
|
|
870
|
-
return records
|
|
871
|
-
|
|
872
|
-
with open(self.coverage_history_file) as f:
|
|
873
|
-
for line in f:
|
|
874
|
-
if not line.strip():
|
|
875
|
-
continue
|
|
876
|
-
try:
|
|
877
|
-
data = json.loads(line)
|
|
878
|
-
record = CoverageRecord.from_dict(data)
|
|
879
|
-
|
|
880
|
-
# Apply filters
|
|
881
|
-
if since:
|
|
882
|
-
record_time = _parse_timestamp(record.timestamp)
|
|
883
|
-
if record_time < since:
|
|
884
|
-
continue
|
|
885
|
-
|
|
886
|
-
records.append(record)
|
|
887
|
-
|
|
888
|
-
if len(records) >= limit:
|
|
889
|
-
break
|
|
890
|
-
except (json.JSONDecodeError, KeyError):
|
|
891
|
-
continue
|
|
892
|
-
|
|
893
|
-
return records
|
|
894
|
-
|
|
895
|
-
def get_agent_assignments(
|
|
896
|
-
self,
|
|
897
|
-
since: datetime | None = None,
|
|
898
|
-
automated_only: bool = True,
|
|
899
|
-
limit: int = 1000,
|
|
900
|
-
) -> list[AgentAssignmentRecord]:
|
|
901
|
-
"""Get agent assignment records.
|
|
902
|
-
|
|
903
|
-
Args:
|
|
904
|
-
since: Only return records after this time
|
|
905
|
-
automated_only: Only return assignments eligible for Tier 1 automation
|
|
906
|
-
limit: Maximum records to return
|
|
907
|
-
|
|
908
|
-
Returns:
|
|
909
|
-
List of AgentAssignmentRecord
|
|
910
|
-
|
|
911
|
-
"""
|
|
912
|
-
records: list[AgentAssignmentRecord] = []
|
|
913
|
-
if not self.agent_assignments_file.exists():
|
|
914
|
-
return records
|
|
915
|
-
|
|
916
|
-
with open(self.agent_assignments_file) as f:
|
|
917
|
-
for line in f:
|
|
918
|
-
if not line.strip():
|
|
919
|
-
continue
|
|
920
|
-
try:
|
|
921
|
-
data = json.loads(line)
|
|
922
|
-
record = AgentAssignmentRecord.from_dict(data)
|
|
923
|
-
|
|
924
|
-
# Apply filters
|
|
925
|
-
if since:
|
|
926
|
-
record_time = _parse_timestamp(record.timestamp)
|
|
927
|
-
if record_time < since:
|
|
928
|
-
continue
|
|
929
|
-
|
|
930
|
-
if automated_only and not record.automated_eligible:
|
|
931
|
-
continue
|
|
932
|
-
|
|
933
|
-
records.append(record)
|
|
934
|
-
|
|
935
|
-
if len(records) >= limit:
|
|
936
|
-
break
|
|
937
|
-
except (json.JSONDecodeError, KeyError):
|
|
938
|
-
continue
|
|
939
|
-
|
|
940
|
-
return records
|
|
941
|
-
|
|
942
|
-
# Per-file test tracking methods
|
|
943
|
-
|
|
944
|
-
def log_file_test(self, record: "FileTestRecord") -> None:
|
|
945
|
-
"""Log a per-file test execution record.
|
|
946
|
-
|
|
947
|
-
Args:
|
|
948
|
-
record: FileTestRecord to log
|
|
949
|
-
"""
|
|
950
|
-
with open(self.file_tests_file, "a") as f:
|
|
951
|
-
f.write(json.dumps(record.to_dict()) + "\n")
|
|
952
|
-
|
|
953
|
-
def get_file_tests(
|
|
954
|
-
self,
|
|
955
|
-
file_path: str | None = None,
|
|
956
|
-
since: datetime | None = None,
|
|
957
|
-
result_filter: str | None = None,
|
|
958
|
-
limit: int = 1000,
|
|
959
|
-
) -> list["FileTestRecord"]:
|
|
960
|
-
"""Get per-file test records with optional filters.
|
|
961
|
-
|
|
962
|
-
Args:
|
|
963
|
-
file_path: Filter by specific file path
|
|
964
|
-
since: Only return records after this time
|
|
965
|
-
result_filter: Filter by result (passed, failed, error, skipped, no_tests)
|
|
966
|
-
limit: Maximum records to return
|
|
967
|
-
|
|
968
|
-
Returns:
|
|
969
|
-
List of FileTestRecord
|
|
970
|
-
"""
|
|
971
|
-
records: list[FileTestRecord] = []
|
|
972
|
-
if not self.file_tests_file.exists():
|
|
973
|
-
return records
|
|
974
|
-
|
|
975
|
-
with open(self.file_tests_file) as f:
|
|
976
|
-
for line in f:
|
|
977
|
-
if not line.strip():
|
|
978
|
-
continue
|
|
979
|
-
try:
|
|
980
|
-
data = json.loads(line)
|
|
981
|
-
record = FileTestRecord.from_dict(data)
|
|
982
|
-
|
|
983
|
-
# Apply filters
|
|
984
|
-
if file_path and record.file_path != file_path:
|
|
985
|
-
continue
|
|
986
|
-
|
|
987
|
-
if since:
|
|
988
|
-
record_time = _parse_timestamp(record.timestamp)
|
|
989
|
-
if record_time < since:
|
|
990
|
-
continue
|
|
991
|
-
|
|
992
|
-
if result_filter and record.last_test_result != result_filter:
|
|
993
|
-
continue
|
|
994
|
-
|
|
995
|
-
records.append(record)
|
|
996
|
-
|
|
997
|
-
if len(records) >= limit:
|
|
998
|
-
break
|
|
999
|
-
except (json.JSONDecodeError, KeyError):
|
|
1000
|
-
continue
|
|
1001
|
-
|
|
1002
|
-
return records
|
|
1003
|
-
|
|
1004
|
-
def get_latest_file_test(self, file_path: str) -> "FileTestRecord | None":
|
|
1005
|
-
"""Get the most recent test record for a specific file.
|
|
1006
|
-
|
|
1007
|
-
Args:
|
|
1008
|
-
file_path: Path to the source file
|
|
1009
|
-
|
|
1010
|
-
Returns:
|
|
1011
|
-
Most recent FileTestRecord or None if not found
|
|
1012
|
-
"""
|
|
1013
|
-
records = self.get_file_tests(file_path=file_path, limit=10000)
|
|
1014
|
-
if not records:
|
|
1015
|
-
return None
|
|
1016
|
-
|
|
1017
|
-
# Return the most recent record (last one since we read in chronological order)
|
|
1018
|
-
return records[-1]
|
|
1019
|
-
|
|
1020
|
-
def get_files_needing_tests(
|
|
1021
|
-
self,
|
|
1022
|
-
stale_only: bool = False,
|
|
1023
|
-
failed_only: bool = False,
|
|
1024
|
-
) -> list["FileTestRecord"]:
|
|
1025
|
-
"""Get files that need test attention.
|
|
1026
|
-
|
|
1027
|
-
Args:
|
|
1028
|
-
stale_only: Only return files with stale tests
|
|
1029
|
-
failed_only: Only return files with failed tests
|
|
1030
|
-
|
|
1031
|
-
Returns:
|
|
1032
|
-
List of FileTestRecord for files needing attention
|
|
1033
|
-
"""
|
|
1034
|
-
all_records = self.get_file_tests(limit=100000)
|
|
1035
|
-
|
|
1036
|
-
# Get latest record per file
|
|
1037
|
-
latest_by_file: dict[str, FileTestRecord] = {}
|
|
1038
|
-
for record in all_records:
|
|
1039
|
-
existing = latest_by_file.get(record.file_path)
|
|
1040
|
-
if existing is None:
|
|
1041
|
-
latest_by_file[record.file_path] = record
|
|
1042
|
-
else:
|
|
1043
|
-
# Keep the more recent one
|
|
1044
|
-
if record.timestamp > existing.timestamp:
|
|
1045
|
-
latest_by_file[record.file_path] = record
|
|
1046
|
-
|
|
1047
|
-
# Filter based on criteria
|
|
1048
|
-
results = []
|
|
1049
|
-
for record in latest_by_file.values():
|
|
1050
|
-
if stale_only and not record.is_stale:
|
|
1051
|
-
continue
|
|
1052
|
-
if failed_only and record.last_test_result not in ("failed", "error"):
|
|
1053
|
-
continue
|
|
1054
|
-
if not stale_only and not failed_only:
|
|
1055
|
-
# Return all files needing attention (stale OR failed OR no_tests)
|
|
1056
|
-
if (
|
|
1057
|
-
record.last_test_result not in ("failed", "error", "no_tests")
|
|
1058
|
-
and not record.is_stale
|
|
1059
|
-
):
|
|
1060
|
-
continue
|
|
1061
|
-
results.append(record)
|
|
1062
|
-
|
|
1063
|
-
return results
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
class TelemetryAnalytics:
|
|
1067
|
-
"""Analytics helpers for telemetry data.
|
|
1068
|
-
|
|
1069
|
-
Provides insights into cost optimization, provider usage, and performance.
|
|
1070
|
-
"""
|
|
1071
|
-
|
|
1072
|
-
def __init__(self, store: TelemetryStore | None = None):
|
|
1073
|
-
"""Initialize analytics.
|
|
1074
|
-
|
|
1075
|
-
Args:
|
|
1076
|
-
store: TelemetryStore to analyze (creates default if None)
|
|
1077
|
-
|
|
1078
|
-
"""
|
|
1079
|
-
self.store = store or TelemetryStore()
|
|
1080
|
-
|
|
1081
|
-
def top_expensive_workflows(
|
|
1082
|
-
self,
|
|
1083
|
-
n: int = 10,
|
|
1084
|
-
since: datetime | None = None,
|
|
1085
|
-
) -> list[dict[str, Any]]:
|
|
1086
|
-
"""Get the most expensive workflows.
|
|
1087
|
-
|
|
1088
|
-
Args:
|
|
1089
|
-
n: Number of workflows to return
|
|
1090
|
-
since: Only consider workflows after this time
|
|
1091
|
-
|
|
1092
|
-
Returns:
|
|
1093
|
-
List of dicts with workflow_name, total_cost, run_count
|
|
1094
|
-
|
|
1095
|
-
"""
|
|
1096
|
-
workflows = self.store.get_workflows(since=since, limit=10000)
|
|
1097
|
-
|
|
1098
|
-
# Aggregate by workflow name
|
|
1099
|
-
costs: dict[str, dict[str, Any]] = {}
|
|
1100
|
-
for wf in workflows:
|
|
1101
|
-
if wf.workflow_name not in costs:
|
|
1102
|
-
costs[wf.workflow_name] = {
|
|
1103
|
-
"workflow_name": wf.workflow_name,
|
|
1104
|
-
"total_cost": 0.0,
|
|
1105
|
-
"run_count": 0,
|
|
1106
|
-
"total_savings": 0.0,
|
|
1107
|
-
"avg_duration_ms": 0,
|
|
1108
|
-
}
|
|
1109
|
-
costs[wf.workflow_name]["total_cost"] += wf.total_cost
|
|
1110
|
-
costs[wf.workflow_name]["run_count"] += 1
|
|
1111
|
-
costs[wf.workflow_name]["total_savings"] += wf.savings
|
|
1112
|
-
|
|
1113
|
-
# Calculate averages and sort
|
|
1114
|
-
result = list(costs.values())
|
|
1115
|
-
for item in result:
|
|
1116
|
-
if item["run_count"] > 0:
|
|
1117
|
-
item["avg_cost"] = item["total_cost"] / item["run_count"]
|
|
1118
|
-
|
|
1119
|
-
result.sort(key=lambda x: x["total_cost"], reverse=True)
|
|
1120
|
-
return result[:n]
|
|
1121
|
-
|
|
1122
|
-
def provider_usage_summary(
|
|
1123
|
-
self,
|
|
1124
|
-
since: datetime | None = None,
|
|
1125
|
-
) -> dict[str, dict[str, Any]]:
|
|
1126
|
-
"""Get usage summary by provider.
|
|
1127
|
-
|
|
1128
|
-
Args:
|
|
1129
|
-
since: Only consider calls after this time
|
|
1130
|
-
|
|
1131
|
-
Returns:
|
|
1132
|
-
Dict mapping provider to usage stats
|
|
1133
|
-
|
|
1134
|
-
"""
|
|
1135
|
-
calls = self.store.get_calls(since=since, limit=100000)
|
|
1136
|
-
|
|
1137
|
-
summary: dict[str, dict[str, Any]] = {}
|
|
1138
|
-
for call in calls:
|
|
1139
|
-
if call.provider not in summary:
|
|
1140
|
-
summary[call.provider] = {
|
|
1141
|
-
"call_count": 0,
|
|
1142
|
-
"total_tokens": 0,
|
|
1143
|
-
"total_cost": 0.0,
|
|
1144
|
-
"error_count": 0,
|
|
1145
|
-
"avg_latency_ms": 0,
|
|
1146
|
-
"by_tier": {"cheap": 0, "capable": 0, "premium": 0},
|
|
1147
|
-
}
|
|
1148
|
-
|
|
1149
|
-
s = summary[call.provider]
|
|
1150
|
-
s["call_count"] += 1
|
|
1151
|
-
s["total_tokens"] += call.input_tokens + call.output_tokens
|
|
1152
|
-
s["total_cost"] += call.estimated_cost
|
|
1153
|
-
if not call.success:
|
|
1154
|
-
s["error_count"] += 1
|
|
1155
|
-
if call.tier in s["by_tier"]:
|
|
1156
|
-
s["by_tier"][call.tier] += 1
|
|
1157
|
-
|
|
1158
|
-
# Calculate averages
|
|
1159
|
-
for _provider, stats in summary.items():
|
|
1160
|
-
if stats["call_count"] > 0:
|
|
1161
|
-
stats["avg_cost"] = stats["total_cost"] / stats["call_count"]
|
|
1162
|
-
|
|
1163
|
-
return summary
|
|
1164
|
-
|
|
1165
|
-
def tier_distribution(
|
|
1166
|
-
self,
|
|
1167
|
-
since: datetime | None = None,
|
|
1168
|
-
) -> dict[str, dict[str, Any]]:
|
|
1169
|
-
"""Get call distribution by tier.
|
|
1170
|
-
|
|
1171
|
-
Args:
|
|
1172
|
-
since: Only consider calls after this time
|
|
1173
|
-
|
|
1174
|
-
Returns:
|
|
1175
|
-
Dict mapping tier to stats
|
|
1176
|
-
|
|
1177
|
-
"""
|
|
1178
|
-
calls = self.store.get_calls(since=since, limit=100000)
|
|
1179
|
-
|
|
1180
|
-
dist: dict[str, dict[str, Any]] = {
|
|
1181
|
-
"cheap": {"count": 0, "cost": 0.0, "tokens": 0},
|
|
1182
|
-
"capable": {"count": 0, "cost": 0.0, "tokens": 0},
|
|
1183
|
-
"premium": {"count": 0, "cost": 0.0, "tokens": 0},
|
|
1184
|
-
}
|
|
1185
|
-
|
|
1186
|
-
for call in calls:
|
|
1187
|
-
if call.tier in dist:
|
|
1188
|
-
dist[call.tier]["count"] += 1
|
|
1189
|
-
dist[call.tier]["cost"] += call.estimated_cost
|
|
1190
|
-
dist[call.tier]["tokens"] += call.input_tokens + call.output_tokens
|
|
1191
|
-
|
|
1192
|
-
total_calls = sum(d["count"] for d in dist.values())
|
|
1193
|
-
for _tier, stats in dist.items():
|
|
1194
|
-
stats["percent"] = (stats["count"] / total_calls * 100) if total_calls > 0 else 0
|
|
1195
|
-
|
|
1196
|
-
return dist
|
|
1197
|
-
|
|
1198
|
-
def fallback_stats(
|
|
1199
|
-
self,
|
|
1200
|
-
since: datetime | None = None,
|
|
1201
|
-
) -> dict[str, Any]:
|
|
1202
|
-
"""Get fallback usage statistics.
|
|
1203
|
-
|
|
1204
|
-
Args:
|
|
1205
|
-
since: Only consider calls after this time
|
|
1206
|
-
|
|
1207
|
-
Returns:
|
|
1208
|
-
Dict with fallback stats
|
|
1209
|
-
|
|
1210
|
-
"""
|
|
1211
|
-
calls = self.store.get_calls(since=since, limit=100000)
|
|
1212
|
-
|
|
1213
|
-
total = len(calls)
|
|
1214
|
-
fallback_count = sum(1 for c in calls if c.fallback_used)
|
|
1215
|
-
error_count = sum(1 for c in calls if not c.success)
|
|
1216
|
-
|
|
1217
|
-
# Count by original provider
|
|
1218
|
-
by_provider: dict[str, int] = {}
|
|
1219
|
-
for call in calls:
|
|
1220
|
-
if call.fallback_used and call.original_provider:
|
|
1221
|
-
by_provider[call.original_provider] = by_provider.get(call.original_provider, 0) + 1
|
|
1222
|
-
|
|
1223
|
-
return {
|
|
1224
|
-
"total_calls": total,
|
|
1225
|
-
"fallback_count": fallback_count,
|
|
1226
|
-
"fallback_percent": (fallback_count / total * 100) if total > 0 else 0,
|
|
1227
|
-
"error_count": error_count,
|
|
1228
|
-
"error_rate": (error_count / total * 100) if total > 0 else 0,
|
|
1229
|
-
"by_original_provider": by_provider,
|
|
1230
|
-
}
|
|
1231
|
-
|
|
1232
|
-
def sonnet_opus_fallback_analysis(
|
|
1233
|
-
self,
|
|
1234
|
-
since: datetime | None = None,
|
|
1235
|
-
) -> dict[str, Any]:
|
|
1236
|
-
"""Analyze Sonnet 4.5 → Opus 4.5 fallback performance and cost savings.
|
|
1237
|
-
|
|
1238
|
-
Tracks:
|
|
1239
|
-
- How often Sonnet 4.5 succeeds vs needs Opus fallback
|
|
1240
|
-
- Cost savings from using Sonnet instead of always using Opus
|
|
1241
|
-
- Success rates by model
|
|
1242
|
-
|
|
1243
|
-
Args:
|
|
1244
|
-
since: Only consider calls after this time
|
|
1245
|
-
|
|
1246
|
-
Returns:
|
|
1247
|
-
Dict with fallback analysis and cost savings
|
|
1248
|
-
"""
|
|
1249
|
-
calls = self.store.get_calls(since=since, limit=100000)
|
|
1250
|
-
|
|
1251
|
-
# Filter for Anthropic calls (Sonnet/Opus)
|
|
1252
|
-
anthropic_calls = [
|
|
1253
|
-
c
|
|
1254
|
-
for c in calls
|
|
1255
|
-
if c.provider == "anthropic"
|
|
1256
|
-
and c.model_id in ["claude-sonnet-4-5", "claude-opus-4-5-20251101"]
|
|
1257
|
-
]
|
|
1258
|
-
|
|
1259
|
-
if not anthropic_calls:
|
|
1260
|
-
return {
|
|
1261
|
-
"total_calls": 0,
|
|
1262
|
-
"sonnet_attempts": 0,
|
|
1263
|
-
"sonnet_successes": 0,
|
|
1264
|
-
"opus_fallbacks": 0,
|
|
1265
|
-
"success_rate_sonnet": 0.0,
|
|
1266
|
-
"fallback_rate": 0.0,
|
|
1267
|
-
"actual_cost": 0.0,
|
|
1268
|
-
"always_opus_cost": 0.0,
|
|
1269
|
-
"savings": 0.0,
|
|
1270
|
-
"savings_percent": 0.0,
|
|
1271
|
-
}
|
|
1272
|
-
|
|
1273
|
-
total = len(anthropic_calls)
|
|
1274
|
-
|
|
1275
|
-
# Count Sonnet attempts and successes
|
|
1276
|
-
sonnet_calls = [c for c in anthropic_calls if c.model_id == "claude-sonnet-4-5"]
|
|
1277
|
-
sonnet_successes = sum(1 for c in sonnet_calls if c.success)
|
|
1278
|
-
|
|
1279
|
-
# Count Opus fallbacks (calls with fallback_used and ended up on Opus)
|
|
1280
|
-
opus_fallbacks = sum(
|
|
1281
|
-
1
|
|
1282
|
-
for c in anthropic_calls
|
|
1283
|
-
if c.model_id == "claude-opus-4-5-20251101" and c.fallback_used
|
|
1284
|
-
)
|
|
1285
|
-
|
|
1286
|
-
# Calculate costs
|
|
1287
|
-
actual_cost = sum(c.estimated_cost for c in anthropic_calls)
|
|
1288
|
-
|
|
1289
|
-
# Calculate what it would cost if everything used Opus
|
|
1290
|
-
opus_input_cost = 15.00 / 1_000_000 # per token
|
|
1291
|
-
opus_output_cost = 75.00 / 1_000_000 # per token
|
|
1292
|
-
always_opus_cost = sum(
|
|
1293
|
-
(c.input_tokens * opus_input_cost) + (c.output_tokens * opus_output_cost)
|
|
1294
|
-
for c in anthropic_calls
|
|
1295
|
-
)
|
|
1296
|
-
|
|
1297
|
-
savings = always_opus_cost - actual_cost
|
|
1298
|
-
savings_percent = (savings / always_opus_cost * 100) if always_opus_cost > 0 else 0
|
|
1299
|
-
|
|
1300
|
-
return {
|
|
1301
|
-
"total_calls": total,
|
|
1302
|
-
"sonnet_attempts": len(sonnet_calls),
|
|
1303
|
-
"sonnet_successes": sonnet_successes,
|
|
1304
|
-
"opus_fallbacks": opus_fallbacks,
|
|
1305
|
-
"success_rate_sonnet": (
|
|
1306
|
-
(sonnet_successes / len(sonnet_calls) * 100) if sonnet_calls else 0.0
|
|
1307
|
-
),
|
|
1308
|
-
"fallback_rate": (opus_fallbacks / total * 100) if total > 0 else 0.0,
|
|
1309
|
-
"actual_cost": actual_cost,
|
|
1310
|
-
"always_opus_cost": always_opus_cost,
|
|
1311
|
-
"savings": savings,
|
|
1312
|
-
"savings_percent": savings_percent,
|
|
1313
|
-
"avg_cost_per_call": actual_cost / total if total > 0 else 0.0,
|
|
1314
|
-
"avg_opus_cost_per_call": always_opus_cost / total if total > 0 else 0.0,
|
|
1315
|
-
}
|
|
1316
|
-
|
|
1317
|
-
def cost_savings_report(
|
|
1318
|
-
self,
|
|
1319
|
-
since: datetime | None = None,
|
|
1320
|
-
) -> dict[str, Any]:
|
|
1321
|
-
"""Generate cost savings report.
|
|
1322
|
-
|
|
1323
|
-
Args:
|
|
1324
|
-
since: Only consider workflows after this time
|
|
1325
|
-
|
|
1326
|
-
Returns:
|
|
1327
|
-
Dict with savings analysis
|
|
1328
|
-
|
|
1329
|
-
"""
|
|
1330
|
-
workflows = self.store.get_workflows(since=since, limit=10000)
|
|
1331
|
-
|
|
1332
|
-
total_cost = sum(wf.total_cost for wf in workflows)
|
|
1333
|
-
total_baseline = sum(wf.baseline_cost for wf in workflows)
|
|
1334
|
-
total_savings = sum(wf.savings for wf in workflows)
|
|
1335
|
-
|
|
1336
|
-
return {
|
|
1337
|
-
"workflow_count": len(workflows),
|
|
1338
|
-
"total_actual_cost": total_cost,
|
|
1339
|
-
"total_baseline_cost": total_baseline,
|
|
1340
|
-
"total_savings": total_savings,
|
|
1341
|
-
"savings_percent": (
|
|
1342
|
-
(total_savings / total_baseline * 100) if total_baseline > 0 else 0
|
|
1343
|
-
),
|
|
1344
|
-
"avg_cost_per_workflow": total_cost / len(workflows) if workflows else 0,
|
|
1345
|
-
}
|
|
1346
|
-
|
|
1347
|
-
# Tier 1 automation monitoring analytics
|
|
1348
|
-
|
|
1349
|
-
def task_routing_accuracy(
|
|
1350
|
-
self,
|
|
1351
|
-
since: datetime | None = None,
|
|
1352
|
-
) -> dict[str, Any]:
|
|
1353
|
-
"""Analyze task routing accuracy.
|
|
1354
|
-
|
|
1355
|
-
Args:
|
|
1356
|
-
since: Only consider routings after this time
|
|
1357
|
-
|
|
1358
|
-
Returns:
|
|
1359
|
-
Dict with routing accuracy metrics by task type and strategy
|
|
1360
|
-
|
|
1361
|
-
"""
|
|
1362
|
-
routings = self.store.get_task_routings(since=since, limit=10000)
|
|
1363
|
-
|
|
1364
|
-
if not routings:
|
|
1365
|
-
return {
|
|
1366
|
-
"total_tasks": 0,
|
|
1367
|
-
"successful_routing": 0,
|
|
1368
|
-
"accuracy_rate": 0.0,
|
|
1369
|
-
"avg_confidence": 0.0,
|
|
1370
|
-
"by_task_type": {},
|
|
1371
|
-
"by_strategy": {},
|
|
1372
|
-
}
|
|
1373
|
-
|
|
1374
|
-
total = len(routings)
|
|
1375
|
-
successful = sum(1 for r in routings if r.success)
|
|
1376
|
-
total_confidence = sum(r.confidence_score for r in routings)
|
|
1377
|
-
|
|
1378
|
-
# Aggregate by task type
|
|
1379
|
-
by_type: dict[str, dict[str, int | float]] = {}
|
|
1380
|
-
for r in routings:
|
|
1381
|
-
if r.task_type not in by_type:
|
|
1382
|
-
by_type[r.task_type] = {"total": 0, "success": 0}
|
|
1383
|
-
by_type[r.task_type]["total"] += 1
|
|
1384
|
-
if r.success:
|
|
1385
|
-
by_type[r.task_type]["success"] += 1
|
|
1386
|
-
|
|
1387
|
-
# Calculate rates
|
|
1388
|
-
for _task_type, stats in by_type.items():
|
|
1389
|
-
stats["rate"] = stats["success"] / stats["total"] if stats["total"] > 0 else 0.0
|
|
1390
|
-
|
|
1391
|
-
# Aggregate by strategy
|
|
1392
|
-
by_strategy: dict[str, dict[str, int]] = {}
|
|
1393
|
-
for r in routings:
|
|
1394
|
-
if r.routing_strategy not in by_strategy:
|
|
1395
|
-
by_strategy[r.routing_strategy] = {"total": 0, "success": 0}
|
|
1396
|
-
by_strategy[r.routing_strategy]["total"] += 1
|
|
1397
|
-
if r.success:
|
|
1398
|
-
by_strategy[r.routing_strategy]["success"] += 1
|
|
1399
|
-
|
|
1400
|
-
return {
|
|
1401
|
-
"total_tasks": total,
|
|
1402
|
-
"successful_routing": successful,
|
|
1403
|
-
"accuracy_rate": successful / total if total > 0 else 0.0,
|
|
1404
|
-
"avg_confidence": total_confidence / total if total > 0 else 0.0,
|
|
1405
|
-
"by_task_type": by_type,
|
|
1406
|
-
"by_strategy": by_strategy,
|
|
1407
|
-
}
|
|
1408
|
-
|
|
1409
|
-
def test_execution_trends(
|
|
1410
|
-
self,
|
|
1411
|
-
since: datetime | None = None,
|
|
1412
|
-
) -> dict[str, Any]:
|
|
1413
|
-
"""Analyze test execution trends.
|
|
1414
|
-
|
|
1415
|
-
Args:
|
|
1416
|
-
since: Only consider executions after this time
|
|
1417
|
-
|
|
1418
|
-
Returns:
|
|
1419
|
-
Dict with test execution metrics and trends
|
|
1420
|
-
|
|
1421
|
-
"""
|
|
1422
|
-
executions = self.store.get_test_executions(since=since, limit=1000)
|
|
1423
|
-
|
|
1424
|
-
if not executions:
|
|
1425
|
-
return {
|
|
1426
|
-
"total_executions": 0,
|
|
1427
|
-
"success_rate": 0.0,
|
|
1428
|
-
"avg_duration_seconds": 0.0,
|
|
1429
|
-
"total_tests_run": 0,
|
|
1430
|
-
"total_failures": 0,
|
|
1431
|
-
"coverage_trend": "stable",
|
|
1432
|
-
"most_failing_tests": [],
|
|
1433
|
-
}
|
|
1434
|
-
|
|
1435
|
-
total_execs = len(executions)
|
|
1436
|
-
successful_execs = sum(1 for e in executions if e.success)
|
|
1437
|
-
total_duration = sum(e.duration_seconds for e in executions)
|
|
1438
|
-
total_tests = sum(e.total_tests for e in executions)
|
|
1439
|
-
total_failures = sum(e.failed for e in executions)
|
|
1440
|
-
|
|
1441
|
-
# Find most failing tests
|
|
1442
|
-
failure_counts: dict[str, int] = {}
|
|
1443
|
-
for exec_rec in executions:
|
|
1444
|
-
for test in exec_rec.failed_tests:
|
|
1445
|
-
test_name = test.get("name", "unknown")
|
|
1446
|
-
failure_counts[test_name] = failure_counts.get(test_name, 0) + 1
|
|
1447
|
-
|
|
1448
|
-
most_failing = [
|
|
1449
|
-
{"name": name, "failures": count}
|
|
1450
|
-
for name, count in heapq.nlargest(10, failure_counts.items(), key=lambda x: x[1])
|
|
1451
|
-
]
|
|
1452
|
-
|
|
1453
|
-
return {
|
|
1454
|
-
"total_executions": total_execs,
|
|
1455
|
-
"success_rate": successful_execs / total_execs if total_execs > 0 else 0.0,
|
|
1456
|
-
"avg_duration_seconds": total_duration / total_execs if total_execs > 0 else 0.0,
|
|
1457
|
-
"total_tests_run": total_tests,
|
|
1458
|
-
"total_failures": total_failures,
|
|
1459
|
-
"coverage_trend": "stable", # Will be computed from coverage_progress
|
|
1460
|
-
"most_failing_tests": most_failing,
|
|
1461
|
-
}
|
|
1462
|
-
|
|
1463
|
-
def coverage_progress(
|
|
1464
|
-
self,
|
|
1465
|
-
since: datetime | None = None,
|
|
1466
|
-
) -> dict[str, Any]:
|
|
1467
|
-
"""Track coverage progress over time.
|
|
1468
|
-
|
|
1469
|
-
Args:
|
|
1470
|
-
since: Only consider coverage records after this time
|
|
1471
|
-
|
|
1472
|
-
Returns:
|
|
1473
|
-
Dict with coverage metrics and trends
|
|
1474
|
-
|
|
1475
|
-
"""
|
|
1476
|
-
records = self.store.get_coverage_history(since=since, limit=1000)
|
|
1477
|
-
|
|
1478
|
-
if not records:
|
|
1479
|
-
return {
|
|
1480
|
-
"current_coverage": 0.0,
|
|
1481
|
-
"previous_coverage": 0.0,
|
|
1482
|
-
"change": 0.0,
|
|
1483
|
-
"trend": "no_data",
|
|
1484
|
-
"coverage_history": [],
|
|
1485
|
-
"files_improved": 0,
|
|
1486
|
-
"files_declined": 0,
|
|
1487
|
-
"critical_gaps_count": 0,
|
|
1488
|
-
}
|
|
1489
|
-
|
|
1490
|
-
# Latest and first records
|
|
1491
|
-
latest = records[-1]
|
|
1492
|
-
first = records[0]
|
|
1493
|
-
current_coverage = latest.overall_percentage
|
|
1494
|
-
|
|
1495
|
-
# Calculate trend by comparing first to last
|
|
1496
|
-
if len(records) == 1:
|
|
1497
|
-
# Single record - no trend analysis possible
|
|
1498
|
-
prev_coverage = 0.0
|
|
1499
|
-
change = 0.0
|
|
1500
|
-
trend = "stable"
|
|
1501
|
-
else:
|
|
1502
|
-
# Multiple records - compare first to last
|
|
1503
|
-
prev_coverage = first.overall_percentage
|
|
1504
|
-
change = current_coverage - prev_coverage
|
|
1505
|
-
|
|
1506
|
-
# Determine trend based on change
|
|
1507
|
-
if change > 1.0:
|
|
1508
|
-
trend = "improving"
|
|
1509
|
-
elif change < -1.0:
|
|
1510
|
-
trend = "declining"
|
|
1511
|
-
else:
|
|
1512
|
-
trend = "stable"
|
|
1513
|
-
|
|
1514
|
-
# Build coverage history from records
|
|
1515
|
-
coverage_history = [
|
|
1516
|
-
{
|
|
1517
|
-
"timestamp": r.timestamp,
|
|
1518
|
-
"coverage": r.overall_percentage,
|
|
1519
|
-
"trend": r.trend,
|
|
1520
|
-
}
|
|
1521
|
-
for r in records
|
|
1522
|
-
]
|
|
1523
|
-
|
|
1524
|
-
return {
|
|
1525
|
-
"current_coverage": current_coverage,
|
|
1526
|
-
"previous_coverage": prev_coverage,
|
|
1527
|
-
"change": change,
|
|
1528
|
-
"trend": trend,
|
|
1529
|
-
"coverage_history": coverage_history,
|
|
1530
|
-
"files_improved": 0, # Would need file-level history
|
|
1531
|
-
"files_declined": 0, # Would need file-level history
|
|
1532
|
-
"critical_gaps_count": len(latest.critical_gaps),
|
|
1533
|
-
}
|
|
1534
|
-
|
|
1535
|
-
def agent_performance(
|
|
1536
|
-
self,
|
|
1537
|
-
since: datetime | None = None,
|
|
1538
|
-
) -> dict[str, Any]:
|
|
1539
|
-
"""Analyze agent/workflow performance.
|
|
1540
|
-
|
|
1541
|
-
Args:
|
|
1542
|
-
since: Only consider assignments after this time
|
|
1543
|
-
|
|
1544
|
-
Returns:
|
|
1545
|
-
Dict with agent performance metrics
|
|
1546
|
-
|
|
1547
|
-
"""
|
|
1548
|
-
assignments = self.store.get_agent_assignments(
|
|
1549
|
-
since=since, automated_only=False, limit=10000
|
|
1550
|
-
)
|
|
1551
|
-
|
|
1552
|
-
if not assignments:
|
|
1553
|
-
return {
|
|
1554
|
-
"total_assignments": 0,
|
|
1555
|
-
"by_agent": {},
|
|
1556
|
-
"automation_rate": 0.0,
|
|
1557
|
-
"human_review_rate": 0.0,
|
|
1558
|
-
}
|
|
1559
|
-
|
|
1560
|
-
# Aggregate by agent
|
|
1561
|
-
by_agent: dict[str, dict[str, Any]] = {}
|
|
1562
|
-
total_assignments = len(assignments)
|
|
1563
|
-
total_automated = 0
|
|
1564
|
-
total_human_review = 0
|
|
1565
|
-
|
|
1566
|
-
for assignment in assignments:
|
|
1567
|
-
agent = assignment.assigned_agent
|
|
1568
|
-
if agent not in by_agent:
|
|
1569
|
-
by_agent[agent] = {
|
|
1570
|
-
"assignments": 0,
|
|
1571
|
-
"completed": 0,
|
|
1572
|
-
"successful": 0,
|
|
1573
|
-
"success_rate": 0.0,
|
|
1574
|
-
"avg_duration_hours": 0.0,
|
|
1575
|
-
"quality_score_avg": 0.0,
|
|
1576
|
-
"total_duration": 0.0,
|
|
1577
|
-
"quality_scores": [],
|
|
1578
|
-
}
|
|
1579
|
-
|
|
1580
|
-
stats = by_agent[agent]
|
|
1581
|
-
stats["assignments"] += 1
|
|
1582
|
-
if assignment.status == "completed":
|
|
1583
|
-
stats["completed"] += 1
|
|
1584
|
-
if assignment.actual_duration_hours is not None:
|
|
1585
|
-
stats["total_duration"] += assignment.actual_duration_hours
|
|
1586
|
-
|
|
1587
|
-
# Track successful assignments (not just completed)
|
|
1588
|
-
if assignment.success:
|
|
1589
|
-
stats["successful"] += 1
|
|
1590
|
-
|
|
1591
|
-
if assignment.automated_eligible:
|
|
1592
|
-
total_automated += 1
|
|
1593
|
-
if assignment.human_review_required:
|
|
1594
|
-
total_human_review += 1
|
|
1595
|
-
|
|
1596
|
-
# Calculate averages
|
|
1597
|
-
for _agent, stats in by_agent.items():
|
|
1598
|
-
if stats["assignments"] > 0:
|
|
1599
|
-
stats["success_rate"] = stats["successful"] / stats["assignments"]
|
|
1600
|
-
if stats["completed"] > 0:
|
|
1601
|
-
stats["avg_duration_hours"] = stats["total_duration"] / stats["completed"]
|
|
1602
|
-
|
|
1603
|
-
# Remove helper fields
|
|
1604
|
-
del stats["total_duration"]
|
|
1605
|
-
del stats["quality_scores"]
|
|
1606
|
-
del stats["successful"] # Remove helper field, keep success_rate
|
|
1607
|
-
|
|
1608
|
-
return {
|
|
1609
|
-
"total_assignments": total_assignments,
|
|
1610
|
-
"by_agent": by_agent,
|
|
1611
|
-
"automation_rate": (
|
|
1612
|
-
total_automated / total_assignments if total_assignments > 0 else 0.0
|
|
1613
|
-
),
|
|
1614
|
-
"human_review_rate": (
|
|
1615
|
-
total_human_review / total_assignments if total_assignments > 0 else 0.0
|
|
1616
|
-
),
|
|
1617
|
-
}
|
|
1618
|
-
|
|
1619
|
-
def tier1_summary(
|
|
1620
|
-
self,
|
|
1621
|
-
since: datetime | None = None,
|
|
1622
|
-
) -> dict[str, Any]:
|
|
1623
|
-
"""Comprehensive Tier 1 automation summary.
|
|
1624
|
-
|
|
1625
|
-
Args:
|
|
1626
|
-
since: Only consider records after this time
|
|
1627
|
-
|
|
1628
|
-
Returns:
|
|
1629
|
-
Dict combining all Tier 1 metrics
|
|
1630
|
-
|
|
1631
|
-
"""
|
|
1632
|
-
return {
|
|
1633
|
-
"task_routing": self.task_routing_accuracy(since),
|
|
1634
|
-
"test_execution": self.test_execution_trends(since),
|
|
1635
|
-
"coverage": self.coverage_progress(since),
|
|
1636
|
-
"agent_performance": self.agent_performance(since),
|
|
1637
|
-
"cost_savings": self.cost_savings_report(since),
|
|
1638
|
-
}
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
# Singleton for global telemetry
|
|
1642
|
-
_telemetry_store: TelemetryStore | None = None
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
def get_telemetry_store(storage_dir: str = ".empathy") -> TelemetryStore:
|
|
1646
|
-
"""Get or create the global telemetry store."""
|
|
1647
|
-
global _telemetry_store
|
|
1648
|
-
if _telemetry_store is None:
|
|
1649
|
-
_telemetry_store = TelemetryStore(storage_dir)
|
|
1650
|
-
return _telemetry_store
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
def log_llm_call(record: LLMCallRecord) -> None:
|
|
1654
|
-
"""Convenience function to log an LLM call."""
|
|
1655
|
-
get_telemetry_store().log_call(record)
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
def log_workflow_run(record: WorkflowRunRecord) -> None:
|
|
1659
|
-
"""Convenience function to log a workflow run."""
|
|
1660
|
-
get_telemetry_store().log_workflow(record)
|