empathy-framework 3.9.3__py3-none-any.whl → 3.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {empathy_framework-3.9.3.dist-info → empathy_framework-3.10.2.dist-info}/METADATA +37 -5
- {empathy_framework-3.9.3.dist-info → empathy_framework-3.10.2.dist-info}/RECORD +12 -12
- empathy_os/cache/hybrid.py +69 -9
- empathy_os/cli.py +183 -18
- empathy_os/cli_unified.py +28 -3
- empathy_os/models/telemetry.py +900 -2
- empathy_os/workflows/health_check.py +37 -0
- empathy_os/workflows/new_sample_workflow1.py +3 -3
- {empathy_framework-3.9.3.dist-info → empathy_framework-3.10.2.dist-info}/WHEEL +0 -0
- {empathy_framework-3.9.3.dist-info → empathy_framework-3.10.2.dist-info}/entry_points.txt +0 -0
- {empathy_framework-3.9.3.dist-info → empathy_framework-3.10.2.dist-info}/licenses/LICENSE +0 -0
- {empathy_framework-3.9.3.dist-info → empathy_framework-3.10.2.dist-info}/top_level.txt +0 -0
empathy_os/models/telemetry.py
CHANGED
|
@@ -7,6 +7,12 @@ Provides normalized schemas for tracking LLM calls and workflow runs:
|
|
|
7
7
|
- TelemetryStore: JSONL file-based backend (default)
|
|
8
8
|
- Analytics helpers for cost analysis and optimization
|
|
9
9
|
|
|
10
|
+
Tier 1 Automation Monitoring:
|
|
11
|
+
- TaskRoutingRecord: Task routing decisions and outcomes
|
|
12
|
+
- TestExecutionRecord: Test execution results and coverage
|
|
13
|
+
- CoverageRecord: Test coverage metrics and trends
|
|
14
|
+
- AgentAssignmentRecord: Agent assignments for simple tasks
|
|
15
|
+
|
|
10
16
|
Copyright 2025 Smart-AI-Memory
|
|
11
17
|
Licensed under Fair Source License 0.9
|
|
12
18
|
"""
|
|
@@ -148,6 +154,231 @@ class WorkflowRunRecord:
|
|
|
148
154
|
return cls(stages=stages, **data)
|
|
149
155
|
|
|
150
156
|
|
|
157
|
+
@dataclass
|
|
158
|
+
class TaskRoutingRecord:
|
|
159
|
+
"""Record of task routing decision for Tier 1 automation.
|
|
160
|
+
|
|
161
|
+
Tracks which agent/workflow handles each task, routing strategy,
|
|
162
|
+
and execution outcome for automation monitoring.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
# Identification (required)
|
|
166
|
+
routing_id: str
|
|
167
|
+
timestamp: str # ISO format
|
|
168
|
+
|
|
169
|
+
# Task context (required)
|
|
170
|
+
task_description: str
|
|
171
|
+
task_type: str # "code_review", "test_gen", "bug_fix", "refactor", etc.
|
|
172
|
+
task_complexity: str # "simple", "moderate", "complex"
|
|
173
|
+
|
|
174
|
+
# Routing decision (required)
|
|
175
|
+
assigned_agent: str # "test_gen_workflow", "code_review_workflow", etc.
|
|
176
|
+
assigned_tier: str # "cheap", "capable", "premium"
|
|
177
|
+
routing_strategy: str # "rule_based", "ml_predicted", "manual_override"
|
|
178
|
+
|
|
179
|
+
# Optional fields with defaults
|
|
180
|
+
task_dependencies: list[str] = field(default_factory=list) # Task IDs this depends on
|
|
181
|
+
confidence_score: float = 1.0 # 0.0-1.0 for ML predictions
|
|
182
|
+
|
|
183
|
+
# Execution tracking
|
|
184
|
+
status: str = "pending" # "pending", "running", "completed", "failed"
|
|
185
|
+
started_at: str | None = None
|
|
186
|
+
completed_at: str | None = None
|
|
187
|
+
|
|
188
|
+
# Outcome
|
|
189
|
+
success: bool = False
|
|
190
|
+
quality_score: float | None = None # 0.0-1.0 if applicable
|
|
191
|
+
retry_count: int = 0
|
|
192
|
+
error_type: str | None = None
|
|
193
|
+
error_message: str | None = None
|
|
194
|
+
|
|
195
|
+
# Cost tracking
|
|
196
|
+
estimated_cost: float = 0.0
|
|
197
|
+
actual_cost: float | None = None
|
|
198
|
+
|
|
199
|
+
# Metadata
|
|
200
|
+
user_id: str | None = None
|
|
201
|
+
session_id: str | None = None
|
|
202
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
203
|
+
|
|
204
|
+
def to_dict(self) -> dict[str, Any]:
|
|
205
|
+
"""Convert to dictionary for JSON serialization."""
|
|
206
|
+
return asdict(self)
|
|
207
|
+
|
|
208
|
+
@classmethod
|
|
209
|
+
def from_dict(cls, data: dict[str, Any]) -> "TaskRoutingRecord":
|
|
210
|
+
"""Create from dictionary."""
|
|
211
|
+
return cls(**data)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@dataclass
|
|
215
|
+
class TestExecutionRecord:
|
|
216
|
+
"""Record of test execution for Tier 1 QA automation.
|
|
217
|
+
|
|
218
|
+
Tracks test execution results, coverage metrics, and failure details
|
|
219
|
+
for quality assurance monitoring.
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
# Identification (required)
|
|
223
|
+
execution_id: str
|
|
224
|
+
timestamp: str # ISO format
|
|
225
|
+
|
|
226
|
+
# Test context (required)
|
|
227
|
+
test_suite: str # "unit", "integration", "e2e", "all"
|
|
228
|
+
|
|
229
|
+
# Optional fields with defaults
|
|
230
|
+
test_files: list[str] = field(default_factory=list) # Specific test files executed
|
|
231
|
+
triggered_by: str = "manual" # "workflow", "manual", "ci", "pre_commit"
|
|
232
|
+
|
|
233
|
+
# Execution details
|
|
234
|
+
command: str = ""
|
|
235
|
+
working_directory: str = ""
|
|
236
|
+
duration_seconds: float = 0.0
|
|
237
|
+
|
|
238
|
+
# Results
|
|
239
|
+
total_tests: int = 0
|
|
240
|
+
passed: int = 0
|
|
241
|
+
failed: int = 0
|
|
242
|
+
skipped: int = 0
|
|
243
|
+
errors: int = 0
|
|
244
|
+
|
|
245
|
+
# Coverage (if available)
|
|
246
|
+
coverage_percentage: float | None = None
|
|
247
|
+
coverage_report_path: str | None = None
|
|
248
|
+
|
|
249
|
+
# Failures
|
|
250
|
+
failed_tests: list[dict[str, Any]] = field(
|
|
251
|
+
default_factory=list
|
|
252
|
+
) # [{name, file, error, traceback}]
|
|
253
|
+
|
|
254
|
+
# Status
|
|
255
|
+
success: bool = False # True if all tests passed
|
|
256
|
+
exit_code: int = 0
|
|
257
|
+
|
|
258
|
+
# Metadata
|
|
259
|
+
workflow_id: str | None = None # Link to workflow that triggered this
|
|
260
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
261
|
+
|
|
262
|
+
def to_dict(self) -> dict[str, Any]:
|
|
263
|
+
"""Convert to dictionary for JSON serialization."""
|
|
264
|
+
return asdict(self)
|
|
265
|
+
|
|
266
|
+
@classmethod
|
|
267
|
+
def from_dict(cls, data: dict[str, Any]) -> "TestExecutionRecord":
|
|
268
|
+
"""Create from dictionary."""
|
|
269
|
+
return cls(**data)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
@dataclass
|
|
273
|
+
class CoverageRecord:
|
|
274
|
+
"""Record of test coverage metrics for Tier 1 QA monitoring.
|
|
275
|
+
|
|
276
|
+
Tracks coverage percentage, trends, and critical gaps for
|
|
277
|
+
continuous quality improvement.
|
|
278
|
+
"""
|
|
279
|
+
|
|
280
|
+
# Identification (required)
|
|
281
|
+
record_id: str
|
|
282
|
+
timestamp: str # ISO format
|
|
283
|
+
|
|
284
|
+
# Coverage metrics (required)
|
|
285
|
+
overall_percentage: float
|
|
286
|
+
lines_total: int
|
|
287
|
+
lines_covered: int
|
|
288
|
+
|
|
289
|
+
# Optional fields with defaults
|
|
290
|
+
branches_total: int = 0
|
|
291
|
+
branches_covered: int = 0
|
|
292
|
+
|
|
293
|
+
# File-level breakdown
|
|
294
|
+
files_total: int = 0
|
|
295
|
+
files_well_covered: int = 0 # >= 80%
|
|
296
|
+
files_critical: int = 0 # < 50%
|
|
297
|
+
untested_files: list[str] = field(default_factory=list)
|
|
298
|
+
|
|
299
|
+
# Critical gaps
|
|
300
|
+
critical_gaps: list[dict[str, Any]] = field(
|
|
301
|
+
default_factory=list
|
|
302
|
+
) # [{file, coverage, priority}]
|
|
303
|
+
|
|
304
|
+
# Trend data
|
|
305
|
+
previous_percentage: float | None = None
|
|
306
|
+
trend: str | None = None # "improving", "declining", "stable"
|
|
307
|
+
|
|
308
|
+
# Source
|
|
309
|
+
coverage_format: str = "xml" # "xml", "json", "lcov"
|
|
310
|
+
coverage_file: str = ""
|
|
311
|
+
|
|
312
|
+
# Metadata
|
|
313
|
+
workflow_id: str | None = None
|
|
314
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
315
|
+
|
|
316
|
+
def to_dict(self) -> dict[str, Any]:
|
|
317
|
+
"""Convert to dictionary for JSON serialization."""
|
|
318
|
+
return asdict(self)
|
|
319
|
+
|
|
320
|
+
@classmethod
|
|
321
|
+
def from_dict(cls, data: dict[str, Any]) -> "CoverageRecord":
|
|
322
|
+
"""Create from dictionary."""
|
|
323
|
+
return cls(**data)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
@dataclass
|
|
327
|
+
class AgentAssignmentRecord:
|
|
328
|
+
"""Record of agent assignment for simple tasks (Tier 1).
|
|
329
|
+
|
|
330
|
+
Tracks task assignments to agents/workflows with clear specs
|
|
331
|
+
and no complex dependencies for automation monitoring.
|
|
332
|
+
"""
|
|
333
|
+
|
|
334
|
+
# Identification (required)
|
|
335
|
+
assignment_id: str
|
|
336
|
+
timestamp: str # ISO format
|
|
337
|
+
|
|
338
|
+
# Task details (required)
|
|
339
|
+
task_id: str
|
|
340
|
+
task_title: str
|
|
341
|
+
task_description: str
|
|
342
|
+
|
|
343
|
+
# Assignment (required)
|
|
344
|
+
assigned_agent: str # Agent/workflow name
|
|
345
|
+
|
|
346
|
+
# Optional fields with defaults
|
|
347
|
+
task_spec_clarity: float = 0.0 # 0.0-1.0, higher = clearer spec
|
|
348
|
+
assignment_reason: str = "" # Why this agent was chosen
|
|
349
|
+
estimated_duration_hours: float = 0.0
|
|
350
|
+
|
|
351
|
+
# Criteria checks
|
|
352
|
+
has_clear_spec: bool = False
|
|
353
|
+
has_dependencies: bool = False
|
|
354
|
+
requires_human_review: bool = False
|
|
355
|
+
automated_eligible: bool = False # True for Tier 1
|
|
356
|
+
|
|
357
|
+
# Execution tracking
|
|
358
|
+
status: str = "assigned" # "assigned", "in_progress", "completed", "blocked"
|
|
359
|
+
started_at: str | None = None
|
|
360
|
+
completed_at: str | None = None
|
|
361
|
+
actual_duration_hours: float | None = None
|
|
362
|
+
|
|
363
|
+
# Outcome
|
|
364
|
+
success: bool = False
|
|
365
|
+
quality_check_passed: bool = False
|
|
366
|
+
human_review_required: bool = False
|
|
367
|
+
|
|
368
|
+
# Metadata
|
|
369
|
+
workflow_id: str | None = None
|
|
370
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
371
|
+
|
|
372
|
+
def to_dict(self) -> dict[str, Any]:
|
|
373
|
+
"""Convert to dictionary for JSON serialization."""
|
|
374
|
+
return asdict(self)
|
|
375
|
+
|
|
376
|
+
@classmethod
|
|
377
|
+
def from_dict(cls, data: dict[str, Any]) -> "AgentAssignmentRecord":
|
|
378
|
+
"""Create from dictionary."""
|
|
379
|
+
return cls(**data)
|
|
380
|
+
|
|
381
|
+
|
|
151
382
|
@runtime_checkable
|
|
152
383
|
class TelemetryBackend(Protocol):
|
|
153
384
|
"""Protocol for telemetry storage backends.
|
|
@@ -158,6 +389,9 @@ class TelemetryBackend(Protocol):
|
|
|
158
389
|
- Cloud services (DataDog, New Relic, etc.)
|
|
159
390
|
- Custom backends
|
|
160
391
|
|
|
392
|
+
Supports both core telemetry (LLM calls, workflows) and Tier 1
|
|
393
|
+
automation monitoring (task routing, tests, coverage, assignments).
|
|
394
|
+
|
|
161
395
|
Example implementing a custom backend:
|
|
162
396
|
>>> class DatabaseBackend:
|
|
163
397
|
... def log_call(self, record: LLMCallRecord) -> None:
|
|
@@ -203,12 +437,88 @@ class TelemetryBackend(Protocol):
|
|
|
203
437
|
"""Get workflow run records with optional filters."""
|
|
204
438
|
...
|
|
205
439
|
|
|
440
|
+
# Tier 1 automation monitoring methods
|
|
441
|
+
def log_task_routing(self, record: TaskRoutingRecord) -> None:
|
|
442
|
+
"""Log a task routing decision."""
|
|
443
|
+
...
|
|
444
|
+
|
|
445
|
+
def log_test_execution(self, record: TestExecutionRecord) -> None:
|
|
446
|
+
"""Log a test execution."""
|
|
447
|
+
...
|
|
448
|
+
|
|
449
|
+
def log_coverage(self, record: CoverageRecord) -> None:
|
|
450
|
+
"""Log coverage metrics."""
|
|
451
|
+
...
|
|
452
|
+
|
|
453
|
+
def log_agent_assignment(self, record: AgentAssignmentRecord) -> None:
|
|
454
|
+
"""Log an agent assignment."""
|
|
455
|
+
...
|
|
456
|
+
|
|
457
|
+
def get_task_routings(
|
|
458
|
+
self,
|
|
459
|
+
since: datetime | None = None,
|
|
460
|
+
status: str | None = None,
|
|
461
|
+
limit: int = 1000,
|
|
462
|
+
) -> list[TaskRoutingRecord]:
|
|
463
|
+
"""Get task routing records with optional filters."""
|
|
464
|
+
...
|
|
465
|
+
|
|
466
|
+
def get_test_executions(
|
|
467
|
+
self,
|
|
468
|
+
since: datetime | None = None,
|
|
469
|
+
success_only: bool = False,
|
|
470
|
+
limit: int = 100,
|
|
471
|
+
) -> list[TestExecutionRecord]:
|
|
472
|
+
"""Get test execution records with optional filters."""
|
|
473
|
+
...
|
|
474
|
+
|
|
475
|
+
def get_coverage_history(
|
|
476
|
+
self,
|
|
477
|
+
since: datetime | None = None,
|
|
478
|
+
limit: int = 100,
|
|
479
|
+
) -> list[CoverageRecord]:
|
|
480
|
+
"""Get coverage history records."""
|
|
481
|
+
...
|
|
482
|
+
|
|
483
|
+
def get_agent_assignments(
|
|
484
|
+
self,
|
|
485
|
+
since: datetime | None = None,
|
|
486
|
+
automated_only: bool = True,
|
|
487
|
+
limit: int = 1000,
|
|
488
|
+
) -> list[AgentAssignmentRecord]:
|
|
489
|
+
"""Get agent assignment records with optional filters."""
|
|
490
|
+
...
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def _parse_timestamp(timestamp_str: str) -> datetime:
|
|
494
|
+
"""Parse ISO format timestamp, handling 'Z' suffix for Python 3.10 compatibility.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
timestamp_str: ISO format timestamp string, possibly with 'Z' suffix
|
|
498
|
+
|
|
499
|
+
Returns:
|
|
500
|
+
Parsed datetime object (timezone-naive UTC)
|
|
501
|
+
"""
|
|
502
|
+
# Python 3.10's fromisoformat() doesn't handle 'Z' suffix
|
|
503
|
+
if timestamp_str.endswith('Z'):
|
|
504
|
+
timestamp_str = timestamp_str[:-1]
|
|
505
|
+
|
|
506
|
+
dt = datetime.fromisoformat(timestamp_str)
|
|
507
|
+
|
|
508
|
+
# Convert to naive UTC if timezone-aware
|
|
509
|
+
if dt.tzinfo is not None:
|
|
510
|
+
dt = dt.replace(tzinfo=None)
|
|
511
|
+
|
|
512
|
+
return dt
|
|
513
|
+
|
|
206
514
|
|
|
207
515
|
class TelemetryStore:
|
|
208
516
|
"""JSONL file-based telemetry backend (default implementation).
|
|
209
517
|
|
|
210
518
|
Stores records in JSONL format for easy streaming and analysis.
|
|
211
519
|
Implements the TelemetryBackend protocol.
|
|
520
|
+
|
|
521
|
+
Supports both core telemetry and Tier 1 automation monitoring.
|
|
212
522
|
"""
|
|
213
523
|
|
|
214
524
|
def __init__(self, storage_dir: str = ".empathy"):
|
|
@@ -221,9 +531,16 @@ class TelemetryStore:
|
|
|
221
531
|
self.storage_dir = Path(storage_dir)
|
|
222
532
|
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
|
223
533
|
|
|
534
|
+
# Core telemetry files
|
|
224
535
|
self.calls_file = self.storage_dir / "llm_calls.jsonl"
|
|
225
536
|
self.workflows_file = self.storage_dir / "workflow_runs.jsonl"
|
|
226
537
|
|
|
538
|
+
# Tier 1 automation monitoring files
|
|
539
|
+
self.task_routing_file = self.storage_dir / "task_routing.jsonl"
|
|
540
|
+
self.test_executions_file = self.storage_dir / "test_executions.jsonl"
|
|
541
|
+
self.coverage_history_file = self.storage_dir / "coverage_history.jsonl"
|
|
542
|
+
self.agent_assignments_file = self.storage_dir / "agent_assignments.jsonl"
|
|
543
|
+
|
|
227
544
|
def log_call(self, record: LLMCallRecord) -> None:
|
|
228
545
|
"""Log an LLM call record."""
|
|
229
546
|
with open(self.calls_file, "a") as f:
|
|
@@ -265,7 +582,7 @@ class TelemetryStore:
|
|
|
265
582
|
|
|
266
583
|
# Apply filters
|
|
267
584
|
if since:
|
|
268
|
-
record_time =
|
|
585
|
+
record_time = _parse_timestamp(record.timestamp)
|
|
269
586
|
if record_time < since:
|
|
270
587
|
continue
|
|
271
588
|
|
|
@@ -312,7 +629,7 @@ class TelemetryStore:
|
|
|
312
629
|
|
|
313
630
|
# Apply filters
|
|
314
631
|
if since:
|
|
315
|
-
record_time =
|
|
632
|
+
record_time = _parse_timestamp(record.started_at)
|
|
316
633
|
if record_time < since:
|
|
317
634
|
continue
|
|
318
635
|
|
|
@@ -328,6 +645,211 @@ class TelemetryStore:
|
|
|
328
645
|
|
|
329
646
|
return records
|
|
330
647
|
|
|
648
|
+
# Tier 1 automation monitoring methods
|
|
649
|
+
|
|
650
|
+
def log_task_routing(self, record: TaskRoutingRecord) -> None:
|
|
651
|
+
"""Log a task routing decision."""
|
|
652
|
+
with open(self.task_routing_file, "a") as f:
|
|
653
|
+
f.write(json.dumps(record.to_dict()) + "\n")
|
|
654
|
+
|
|
655
|
+
def log_test_execution(self, record: TestExecutionRecord) -> None:
|
|
656
|
+
"""Log a test execution."""
|
|
657
|
+
with open(self.test_executions_file, "a") as f:
|
|
658
|
+
f.write(json.dumps(record.to_dict()) + "\n")
|
|
659
|
+
|
|
660
|
+
def log_coverage(self, record: CoverageRecord) -> None:
|
|
661
|
+
"""Log coverage metrics."""
|
|
662
|
+
with open(self.coverage_history_file, "a") as f:
|
|
663
|
+
f.write(json.dumps(record.to_dict()) + "\n")
|
|
664
|
+
|
|
665
|
+
def log_agent_assignment(self, record: AgentAssignmentRecord) -> None:
|
|
666
|
+
"""Log an agent assignment."""
|
|
667
|
+
with open(self.agent_assignments_file, "a") as f:
|
|
668
|
+
f.write(json.dumps(record.to_dict()) + "\n")
|
|
669
|
+
|
|
670
|
+
def get_task_routings(
|
|
671
|
+
self,
|
|
672
|
+
since: datetime | None = None,
|
|
673
|
+
status: str | None = None,
|
|
674
|
+
limit: int = 1000,
|
|
675
|
+
) -> list[TaskRoutingRecord]:
|
|
676
|
+
"""Get task routing records.
|
|
677
|
+
|
|
678
|
+
Args:
|
|
679
|
+
since: Only return records after this time
|
|
680
|
+
status: Filter by status (pending, running, completed, failed)
|
|
681
|
+
limit: Maximum records to return
|
|
682
|
+
|
|
683
|
+
Returns:
|
|
684
|
+
List of TaskRoutingRecord
|
|
685
|
+
|
|
686
|
+
"""
|
|
687
|
+
records: list[TaskRoutingRecord] = []
|
|
688
|
+
if not self.task_routing_file.exists():
|
|
689
|
+
return records
|
|
690
|
+
|
|
691
|
+
with open(self.task_routing_file) as f:
|
|
692
|
+
for line in f:
|
|
693
|
+
if not line.strip():
|
|
694
|
+
continue
|
|
695
|
+
try:
|
|
696
|
+
data = json.loads(line)
|
|
697
|
+
record = TaskRoutingRecord.from_dict(data)
|
|
698
|
+
|
|
699
|
+
# Apply filters
|
|
700
|
+
if since:
|
|
701
|
+
record_time = _parse_timestamp(record.timestamp)
|
|
702
|
+
if record_time < since:
|
|
703
|
+
continue
|
|
704
|
+
|
|
705
|
+
if status and record.status != status:
|
|
706
|
+
continue
|
|
707
|
+
|
|
708
|
+
records.append(record)
|
|
709
|
+
|
|
710
|
+
if len(records) >= limit:
|
|
711
|
+
break
|
|
712
|
+
except (json.JSONDecodeError, KeyError):
|
|
713
|
+
continue
|
|
714
|
+
|
|
715
|
+
return records
|
|
716
|
+
|
|
717
|
+
def get_test_executions(
|
|
718
|
+
self,
|
|
719
|
+
since: datetime | None = None,
|
|
720
|
+
success_only: bool = False,
|
|
721
|
+
limit: int = 100,
|
|
722
|
+
) -> list[TestExecutionRecord]:
|
|
723
|
+
"""Get test execution records.
|
|
724
|
+
|
|
725
|
+
Args:
|
|
726
|
+
since: Only return records after this time
|
|
727
|
+
success_only: Only return successful test runs
|
|
728
|
+
limit: Maximum records to return
|
|
729
|
+
|
|
730
|
+
Returns:
|
|
731
|
+
List of TestExecutionRecord
|
|
732
|
+
|
|
733
|
+
"""
|
|
734
|
+
records: list[TestExecutionRecord] = []
|
|
735
|
+
if not self.test_executions_file.exists():
|
|
736
|
+
return records
|
|
737
|
+
|
|
738
|
+
with open(self.test_executions_file) as f:
|
|
739
|
+
for line in f:
|
|
740
|
+
if not line.strip():
|
|
741
|
+
continue
|
|
742
|
+
try:
|
|
743
|
+
data = json.loads(line)
|
|
744
|
+
record = TestExecutionRecord.from_dict(data)
|
|
745
|
+
|
|
746
|
+
# Apply filters
|
|
747
|
+
if since:
|
|
748
|
+
record_time = _parse_timestamp(record.timestamp)
|
|
749
|
+
if record_time < since:
|
|
750
|
+
continue
|
|
751
|
+
|
|
752
|
+
if success_only and not record.success:
|
|
753
|
+
continue
|
|
754
|
+
|
|
755
|
+
records.append(record)
|
|
756
|
+
|
|
757
|
+
if len(records) >= limit:
|
|
758
|
+
break
|
|
759
|
+
except (json.JSONDecodeError, KeyError):
|
|
760
|
+
continue
|
|
761
|
+
|
|
762
|
+
return records
|
|
763
|
+
|
|
764
|
+
def get_coverage_history(
|
|
765
|
+
self,
|
|
766
|
+
since: datetime | None = None,
|
|
767
|
+
limit: int = 100,
|
|
768
|
+
) -> list[CoverageRecord]:
|
|
769
|
+
"""Get coverage history records.
|
|
770
|
+
|
|
771
|
+
Args:
|
|
772
|
+
since: Only return records after this time
|
|
773
|
+
limit: Maximum records to return
|
|
774
|
+
|
|
775
|
+
Returns:
|
|
776
|
+
List of CoverageRecord
|
|
777
|
+
|
|
778
|
+
"""
|
|
779
|
+
records: list[CoverageRecord] = []
|
|
780
|
+
if not self.coverage_history_file.exists():
|
|
781
|
+
return records
|
|
782
|
+
|
|
783
|
+
with open(self.coverage_history_file) as f:
|
|
784
|
+
for line in f:
|
|
785
|
+
if not line.strip():
|
|
786
|
+
continue
|
|
787
|
+
try:
|
|
788
|
+
data = json.loads(line)
|
|
789
|
+
record = CoverageRecord.from_dict(data)
|
|
790
|
+
|
|
791
|
+
# Apply filters
|
|
792
|
+
if since:
|
|
793
|
+
record_time = _parse_timestamp(record.timestamp)
|
|
794
|
+
if record_time < since:
|
|
795
|
+
continue
|
|
796
|
+
|
|
797
|
+
records.append(record)
|
|
798
|
+
|
|
799
|
+
if len(records) >= limit:
|
|
800
|
+
break
|
|
801
|
+
except (json.JSONDecodeError, KeyError):
|
|
802
|
+
continue
|
|
803
|
+
|
|
804
|
+
return records
|
|
805
|
+
|
|
806
|
+
def get_agent_assignments(
|
|
807
|
+
self,
|
|
808
|
+
since: datetime | None = None,
|
|
809
|
+
automated_only: bool = True,
|
|
810
|
+
limit: int = 1000,
|
|
811
|
+
) -> list[AgentAssignmentRecord]:
|
|
812
|
+
"""Get agent assignment records.
|
|
813
|
+
|
|
814
|
+
Args:
|
|
815
|
+
since: Only return records after this time
|
|
816
|
+
automated_only: Only return assignments eligible for Tier 1 automation
|
|
817
|
+
limit: Maximum records to return
|
|
818
|
+
|
|
819
|
+
Returns:
|
|
820
|
+
List of AgentAssignmentRecord
|
|
821
|
+
|
|
822
|
+
"""
|
|
823
|
+
records: list[AgentAssignmentRecord] = []
|
|
824
|
+
if not self.agent_assignments_file.exists():
|
|
825
|
+
return records
|
|
826
|
+
|
|
827
|
+
with open(self.agent_assignments_file) as f:
|
|
828
|
+
for line in f:
|
|
829
|
+
if not line.strip():
|
|
830
|
+
continue
|
|
831
|
+
try:
|
|
832
|
+
data = json.loads(line)
|
|
833
|
+
record = AgentAssignmentRecord.from_dict(data)
|
|
834
|
+
|
|
835
|
+
# Apply filters
|
|
836
|
+
if since:
|
|
837
|
+
record_time = _parse_timestamp(record.timestamp)
|
|
838
|
+
if record_time < since:
|
|
839
|
+
continue
|
|
840
|
+
|
|
841
|
+
if automated_only and not record.automated_eligible:
|
|
842
|
+
continue
|
|
843
|
+
|
|
844
|
+
records.append(record)
|
|
845
|
+
|
|
846
|
+
if len(records) >= limit:
|
|
847
|
+
break
|
|
848
|
+
except (json.JSONDecodeError, KeyError):
|
|
849
|
+
continue
|
|
850
|
+
|
|
851
|
+
return records
|
|
852
|
+
|
|
331
853
|
|
|
332
854
|
class TelemetryAnalytics:
|
|
333
855
|
"""Analytics helpers for telemetry data.
|
|
@@ -495,6 +1017,89 @@ class TelemetryAnalytics:
|
|
|
495
1017
|
"by_original_provider": by_provider,
|
|
496
1018
|
}
|
|
497
1019
|
|
|
1020
|
+
def sonnet_opus_fallback_analysis(
|
|
1021
|
+
self,
|
|
1022
|
+
since: datetime | None = None,
|
|
1023
|
+
) -> dict[str, Any]:
|
|
1024
|
+
"""Analyze Sonnet 4.5 → Opus 4.5 fallback performance and cost savings.
|
|
1025
|
+
|
|
1026
|
+
Tracks:
|
|
1027
|
+
- How often Sonnet 4.5 succeeds vs needs Opus fallback
|
|
1028
|
+
- Cost savings from using Sonnet instead of always using Opus
|
|
1029
|
+
- Success rates by model
|
|
1030
|
+
|
|
1031
|
+
Args:
|
|
1032
|
+
since: Only consider calls after this time
|
|
1033
|
+
|
|
1034
|
+
Returns:
|
|
1035
|
+
Dict with fallback analysis and cost savings
|
|
1036
|
+
"""
|
|
1037
|
+
calls = self.store.get_calls(since=since, limit=100000)
|
|
1038
|
+
|
|
1039
|
+
# Filter for Anthropic calls (Sonnet/Opus)
|
|
1040
|
+
anthropic_calls = [
|
|
1041
|
+
c for c in calls
|
|
1042
|
+
if c.provider == "anthropic"
|
|
1043
|
+
and c.model_id in ["claude-sonnet-4-5", "claude-opus-4-5-20251101"]
|
|
1044
|
+
]
|
|
1045
|
+
|
|
1046
|
+
if not anthropic_calls:
|
|
1047
|
+
return {
|
|
1048
|
+
"total_calls": 0,
|
|
1049
|
+
"sonnet_attempts": 0,
|
|
1050
|
+
"sonnet_successes": 0,
|
|
1051
|
+
"opus_fallbacks": 0,
|
|
1052
|
+
"success_rate_sonnet": 0.0,
|
|
1053
|
+
"fallback_rate": 0.0,
|
|
1054
|
+
"actual_cost": 0.0,
|
|
1055
|
+
"always_opus_cost": 0.0,
|
|
1056
|
+
"savings": 0.0,
|
|
1057
|
+
"savings_percent": 0.0,
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
total = len(anthropic_calls)
|
|
1061
|
+
|
|
1062
|
+
# Count Sonnet attempts and successes
|
|
1063
|
+
sonnet_calls = [c for c in anthropic_calls if c.model_id == "claude-sonnet-4-5"]
|
|
1064
|
+
sonnet_successes = sum(1 for c in sonnet_calls if c.success)
|
|
1065
|
+
|
|
1066
|
+
# Count Opus fallbacks (calls with fallback_used and ended up on Opus)
|
|
1067
|
+
opus_fallbacks = sum(
|
|
1068
|
+
1 for c in anthropic_calls
|
|
1069
|
+
if c.model_id == "claude-opus-4-5-20251101" and c.fallback_used
|
|
1070
|
+
)
|
|
1071
|
+
|
|
1072
|
+
# Calculate costs
|
|
1073
|
+
actual_cost = sum(c.estimated_cost for c in anthropic_calls)
|
|
1074
|
+
|
|
1075
|
+
# Calculate what it would cost if everything used Opus
|
|
1076
|
+
opus_input_cost = 15.00 / 1_000_000 # per token
|
|
1077
|
+
opus_output_cost = 75.00 / 1_000_000 # per token
|
|
1078
|
+
always_opus_cost = sum(
|
|
1079
|
+
(c.input_tokens * opus_input_cost) + (c.output_tokens * opus_output_cost)
|
|
1080
|
+
for c in anthropic_calls
|
|
1081
|
+
)
|
|
1082
|
+
|
|
1083
|
+
savings = always_opus_cost - actual_cost
|
|
1084
|
+
savings_percent = (savings / always_opus_cost * 100) if always_opus_cost > 0 else 0
|
|
1085
|
+
|
|
1086
|
+
return {
|
|
1087
|
+
"total_calls": total,
|
|
1088
|
+
"sonnet_attempts": len(sonnet_calls),
|
|
1089
|
+
"sonnet_successes": sonnet_successes,
|
|
1090
|
+
"opus_fallbacks": opus_fallbacks,
|
|
1091
|
+
"success_rate_sonnet": (
|
|
1092
|
+
(sonnet_successes / len(sonnet_calls) * 100) if sonnet_calls else 0.0
|
|
1093
|
+
),
|
|
1094
|
+
"fallback_rate": (opus_fallbacks / total * 100) if total > 0 else 0.0,
|
|
1095
|
+
"actual_cost": actual_cost,
|
|
1096
|
+
"always_opus_cost": always_opus_cost,
|
|
1097
|
+
"savings": savings,
|
|
1098
|
+
"savings_percent": savings_percent,
|
|
1099
|
+
"avg_cost_per_call": actual_cost / total if total > 0 else 0.0,
|
|
1100
|
+
"avg_opus_cost_per_call": always_opus_cost / total if total > 0 else 0.0,
|
|
1101
|
+
}
|
|
1102
|
+
|
|
498
1103
|
def cost_savings_report(
|
|
499
1104
|
self,
|
|
500
1105
|
since: datetime | None = None,
|
|
@@ -525,6 +1130,299 @@ class TelemetryAnalytics:
|
|
|
525
1130
|
"avg_cost_per_workflow": total_cost / len(workflows) if workflows else 0,
|
|
526
1131
|
}
|
|
527
1132
|
|
|
1133
|
+
# Tier 1 automation monitoring analytics
|
|
1134
|
+
|
|
1135
|
+
def task_routing_accuracy(
|
|
1136
|
+
self,
|
|
1137
|
+
since: datetime | None = None,
|
|
1138
|
+
) -> dict[str, Any]:
|
|
1139
|
+
"""Analyze task routing accuracy.
|
|
1140
|
+
|
|
1141
|
+
Args:
|
|
1142
|
+
since: Only consider routings after this time
|
|
1143
|
+
|
|
1144
|
+
Returns:
|
|
1145
|
+
Dict with routing accuracy metrics by task type and strategy
|
|
1146
|
+
|
|
1147
|
+
"""
|
|
1148
|
+
routings = self.store.get_task_routings(since=since, limit=10000)
|
|
1149
|
+
|
|
1150
|
+
if not routings:
|
|
1151
|
+
return {
|
|
1152
|
+
"total_tasks": 0,
|
|
1153
|
+
"successful_routing": 0,
|
|
1154
|
+
"accuracy_rate": 0.0,
|
|
1155
|
+
"avg_confidence": 0.0,
|
|
1156
|
+
"by_task_type": {},
|
|
1157
|
+
"by_strategy": {},
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
total = len(routings)
|
|
1161
|
+
successful = sum(1 for r in routings if r.success)
|
|
1162
|
+
total_confidence = sum(r.confidence_score for r in routings)
|
|
1163
|
+
|
|
1164
|
+
# Aggregate by task type
|
|
1165
|
+
by_type: dict[str, dict[str, int | float]] = {}
|
|
1166
|
+
for r in routings:
|
|
1167
|
+
if r.task_type not in by_type:
|
|
1168
|
+
by_type[r.task_type] = {"total": 0, "success": 0}
|
|
1169
|
+
by_type[r.task_type]["total"] += 1
|
|
1170
|
+
if r.success:
|
|
1171
|
+
by_type[r.task_type]["success"] += 1
|
|
1172
|
+
|
|
1173
|
+
# Calculate rates
|
|
1174
|
+
for _task_type, stats in by_type.items():
|
|
1175
|
+
stats["rate"] = stats["success"] / stats["total"] if stats["total"] > 0 else 0.0
|
|
1176
|
+
|
|
1177
|
+
# Aggregate by strategy
|
|
1178
|
+
by_strategy: dict[str, dict[str, int]] = {}
|
|
1179
|
+
for r in routings:
|
|
1180
|
+
if r.routing_strategy not in by_strategy:
|
|
1181
|
+
by_strategy[r.routing_strategy] = {"total": 0, "success": 0}
|
|
1182
|
+
by_strategy[r.routing_strategy]["total"] += 1
|
|
1183
|
+
if r.success:
|
|
1184
|
+
by_strategy[r.routing_strategy]["success"] += 1
|
|
1185
|
+
|
|
1186
|
+
return {
|
|
1187
|
+
"total_tasks": total,
|
|
1188
|
+
"successful_routing": successful,
|
|
1189
|
+
"accuracy_rate": successful / total if total > 0 else 0.0,
|
|
1190
|
+
"avg_confidence": total_confidence / total if total > 0 else 0.0,
|
|
1191
|
+
"by_task_type": by_type,
|
|
1192
|
+
"by_strategy": by_strategy,
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
def test_execution_trends(
|
|
1196
|
+
self,
|
|
1197
|
+
since: datetime | None = None,
|
|
1198
|
+
) -> dict[str, Any]:
|
|
1199
|
+
"""Analyze test execution trends.
|
|
1200
|
+
|
|
1201
|
+
Args:
|
|
1202
|
+
since: Only consider executions after this time
|
|
1203
|
+
|
|
1204
|
+
Returns:
|
|
1205
|
+
Dict with test execution metrics and trends
|
|
1206
|
+
|
|
1207
|
+
"""
|
|
1208
|
+
executions = self.store.get_test_executions(since=since, limit=1000)
|
|
1209
|
+
|
|
1210
|
+
if not executions:
|
|
1211
|
+
return {
|
|
1212
|
+
"total_executions": 0,
|
|
1213
|
+
"success_rate": 0.0,
|
|
1214
|
+
"avg_duration_seconds": 0.0,
|
|
1215
|
+
"total_tests_run": 0,
|
|
1216
|
+
"total_failures": 0,
|
|
1217
|
+
"coverage_trend": "stable",
|
|
1218
|
+
"most_failing_tests": [],
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1221
|
+
total_execs = len(executions)
|
|
1222
|
+
successful_execs = sum(1 for e in executions if e.success)
|
|
1223
|
+
total_duration = sum(e.duration_seconds for e in executions)
|
|
1224
|
+
total_tests = sum(e.total_tests for e in executions)
|
|
1225
|
+
total_failures = sum(e.failed for e in executions)
|
|
1226
|
+
|
|
1227
|
+
# Find most failing tests
|
|
1228
|
+
failure_counts: dict[str, int] = {}
|
|
1229
|
+
for exec_rec in executions:
|
|
1230
|
+
for test in exec_rec.failed_tests:
|
|
1231
|
+
test_name = test.get("name", "unknown")
|
|
1232
|
+
failure_counts[test_name] = failure_counts.get(test_name, 0) + 1
|
|
1233
|
+
|
|
1234
|
+
most_failing = [
|
|
1235
|
+
{"name": name, "failures": count}
|
|
1236
|
+
for name, count in sorted(failure_counts.items(), key=lambda x: x[1], reverse=True)[:10]
|
|
1237
|
+
]
|
|
1238
|
+
|
|
1239
|
+
return {
|
|
1240
|
+
"total_executions": total_execs,
|
|
1241
|
+
"success_rate": successful_execs / total_execs if total_execs > 0 else 0.0,
|
|
1242
|
+
"avg_duration_seconds": total_duration / total_execs if total_execs > 0 else 0.0,
|
|
1243
|
+
"total_tests_run": total_tests,
|
|
1244
|
+
"total_failures": total_failures,
|
|
1245
|
+
"coverage_trend": "stable", # Will be computed from coverage_progress
|
|
1246
|
+
"most_failing_tests": most_failing,
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
def coverage_progress(
|
|
1250
|
+
self,
|
|
1251
|
+
since: datetime | None = None,
|
|
1252
|
+
) -> dict[str, Any]:
|
|
1253
|
+
"""Track coverage progress over time.
|
|
1254
|
+
|
|
1255
|
+
Args:
|
|
1256
|
+
since: Only consider coverage records after this time
|
|
1257
|
+
|
|
1258
|
+
Returns:
|
|
1259
|
+
Dict with coverage metrics and trends
|
|
1260
|
+
|
|
1261
|
+
"""
|
|
1262
|
+
records = self.store.get_coverage_history(since=since, limit=1000)
|
|
1263
|
+
|
|
1264
|
+
if not records:
|
|
1265
|
+
return {
|
|
1266
|
+
"current_coverage": 0.0,
|
|
1267
|
+
"previous_coverage": 0.0,
|
|
1268
|
+
"change": 0.0,
|
|
1269
|
+
"trend": "no_data",
|
|
1270
|
+
"coverage_history": [],
|
|
1271
|
+
"files_improved": 0,
|
|
1272
|
+
"files_declined": 0,
|
|
1273
|
+
"critical_gaps_count": 0,
|
|
1274
|
+
}
|
|
1275
|
+
|
|
1276
|
+
# Latest and first records
|
|
1277
|
+
latest = records[-1]
|
|
1278
|
+
first = records[0]
|
|
1279
|
+
current_coverage = latest.overall_percentage
|
|
1280
|
+
|
|
1281
|
+
# Calculate trend by comparing first to last
|
|
1282
|
+
if len(records) == 1:
|
|
1283
|
+
# Single record - no trend analysis possible
|
|
1284
|
+
prev_coverage = 0.0
|
|
1285
|
+
change = 0.0
|
|
1286
|
+
trend = "stable"
|
|
1287
|
+
else:
|
|
1288
|
+
# Multiple records - compare first to last
|
|
1289
|
+
prev_coverage = first.overall_percentage
|
|
1290
|
+
change = current_coverage - prev_coverage
|
|
1291
|
+
|
|
1292
|
+
# Determine trend based on change
|
|
1293
|
+
if change > 1.0:
|
|
1294
|
+
trend = "improving"
|
|
1295
|
+
elif change < -1.0:
|
|
1296
|
+
trend = "declining"
|
|
1297
|
+
else:
|
|
1298
|
+
trend = "stable"
|
|
1299
|
+
|
|
1300
|
+
# Build coverage history from records
|
|
1301
|
+
coverage_history = [
|
|
1302
|
+
{
|
|
1303
|
+
"timestamp": r.timestamp,
|
|
1304
|
+
"coverage": r.overall_percentage,
|
|
1305
|
+
"trend": r.trend,
|
|
1306
|
+
}
|
|
1307
|
+
for r in records
|
|
1308
|
+
]
|
|
1309
|
+
|
|
1310
|
+
return {
|
|
1311
|
+
"current_coverage": current_coverage,
|
|
1312
|
+
"previous_coverage": prev_coverage,
|
|
1313
|
+
"change": change,
|
|
1314
|
+
"trend": trend,
|
|
1315
|
+
"coverage_history": coverage_history,
|
|
1316
|
+
"files_improved": 0, # Would need file-level history
|
|
1317
|
+
"files_declined": 0, # Would need file-level history
|
|
1318
|
+
"critical_gaps_count": len(latest.critical_gaps),
|
|
1319
|
+
}
|
|
1320
|
+
|
|
1321
|
+
def agent_performance(
|
|
1322
|
+
self,
|
|
1323
|
+
since: datetime | None = None,
|
|
1324
|
+
) -> dict[str, Any]:
|
|
1325
|
+
"""Analyze agent/workflow performance.
|
|
1326
|
+
|
|
1327
|
+
Args:
|
|
1328
|
+
since: Only consider assignments after this time
|
|
1329
|
+
|
|
1330
|
+
Returns:
|
|
1331
|
+
Dict with agent performance metrics
|
|
1332
|
+
|
|
1333
|
+
"""
|
|
1334
|
+
assignments = self.store.get_agent_assignments(
|
|
1335
|
+
since=since, automated_only=False, limit=10000
|
|
1336
|
+
)
|
|
1337
|
+
|
|
1338
|
+
if not assignments:
|
|
1339
|
+
return {
|
|
1340
|
+
"total_assignments": 0,
|
|
1341
|
+
"by_agent": {},
|
|
1342
|
+
"automation_rate": 0.0,
|
|
1343
|
+
"human_review_rate": 0.0,
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1346
|
+
# Aggregate by agent
|
|
1347
|
+
by_agent: dict[str, dict[str, Any]] = {}
|
|
1348
|
+
total_assignments = len(assignments)
|
|
1349
|
+
total_automated = 0
|
|
1350
|
+
total_human_review = 0
|
|
1351
|
+
|
|
1352
|
+
for assignment in assignments:
|
|
1353
|
+
agent = assignment.assigned_agent
|
|
1354
|
+
if agent not in by_agent:
|
|
1355
|
+
by_agent[agent] = {
|
|
1356
|
+
"assignments": 0,
|
|
1357
|
+
"completed": 0,
|
|
1358
|
+
"successful": 0,
|
|
1359
|
+
"success_rate": 0.0,
|
|
1360
|
+
"avg_duration_hours": 0.0,
|
|
1361
|
+
"quality_score_avg": 0.0,
|
|
1362
|
+
"total_duration": 0.0,
|
|
1363
|
+
"quality_scores": [],
|
|
1364
|
+
}
|
|
1365
|
+
|
|
1366
|
+
stats = by_agent[agent]
|
|
1367
|
+
stats["assignments"] += 1
|
|
1368
|
+
if assignment.status == "completed":
|
|
1369
|
+
stats["completed"] += 1
|
|
1370
|
+
if assignment.actual_duration_hours is not None:
|
|
1371
|
+
stats["total_duration"] += assignment.actual_duration_hours
|
|
1372
|
+
|
|
1373
|
+
# Track successful assignments (not just completed)
|
|
1374
|
+
if assignment.success:
|
|
1375
|
+
stats["successful"] += 1
|
|
1376
|
+
|
|
1377
|
+
if assignment.automated_eligible:
|
|
1378
|
+
total_automated += 1
|
|
1379
|
+
if assignment.human_review_required:
|
|
1380
|
+
total_human_review += 1
|
|
1381
|
+
|
|
1382
|
+
# Calculate averages
|
|
1383
|
+
for _agent, stats in by_agent.items():
|
|
1384
|
+
if stats["assignments"] > 0:
|
|
1385
|
+
stats["success_rate"] = stats["successful"] / stats["assignments"]
|
|
1386
|
+
if stats["completed"] > 0:
|
|
1387
|
+
stats["avg_duration_hours"] = stats["total_duration"] / stats["completed"]
|
|
1388
|
+
|
|
1389
|
+
# Remove helper fields
|
|
1390
|
+
del stats["total_duration"]
|
|
1391
|
+
del stats["quality_scores"]
|
|
1392
|
+
del stats["successful"] # Remove helper field, keep success_rate
|
|
1393
|
+
|
|
1394
|
+
return {
|
|
1395
|
+
"total_assignments": total_assignments,
|
|
1396
|
+
"by_agent": by_agent,
|
|
1397
|
+
"automation_rate": (
|
|
1398
|
+
total_automated / total_assignments if total_assignments > 0 else 0.0
|
|
1399
|
+
),
|
|
1400
|
+
"human_review_rate": (
|
|
1401
|
+
total_human_review / total_assignments if total_assignments > 0 else 0.0
|
|
1402
|
+
),
|
|
1403
|
+
}
|
|
1404
|
+
|
|
1405
|
+
def tier1_summary(
|
|
1406
|
+
self,
|
|
1407
|
+
since: datetime | None = None,
|
|
1408
|
+
) -> dict[str, Any]:
|
|
1409
|
+
"""Comprehensive Tier 1 automation summary.
|
|
1410
|
+
|
|
1411
|
+
Args:
|
|
1412
|
+
since: Only consider records after this time
|
|
1413
|
+
|
|
1414
|
+
Returns:
|
|
1415
|
+
Dict combining all Tier 1 metrics
|
|
1416
|
+
|
|
1417
|
+
"""
|
|
1418
|
+
return {
|
|
1419
|
+
"task_routing": self.task_routing_accuracy(since),
|
|
1420
|
+
"test_execution": self.test_execution_trends(since),
|
|
1421
|
+
"coverage": self.coverage_progress(since),
|
|
1422
|
+
"agent_performance": self.agent_performance(since),
|
|
1423
|
+
"cost_savings": self.cost_savings_report(since),
|
|
1424
|
+
}
|
|
1425
|
+
|
|
528
1426
|
|
|
529
1427
|
# Singleton for global telemetry
|
|
530
1428
|
_telemetry_store: TelemetryStore | None = None
|