empathy-framework 3.9.3__py3-none-any.whl → 3.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,12 @@ Provides normalized schemas for tracking LLM calls and workflow runs:
7
7
  - TelemetryStore: JSONL file-based backend (default)
8
8
  - Analytics helpers for cost analysis and optimization
9
9
 
10
+ Tier 1 Automation Monitoring:
11
+ - TaskRoutingRecord: Task routing decisions and outcomes
12
+ - TestExecutionRecord: Test execution results and coverage
13
+ - CoverageRecord: Test coverage metrics and trends
14
+ - AgentAssignmentRecord: Agent assignments for simple tasks
15
+
10
16
  Copyright 2025 Smart-AI-Memory
11
17
  Licensed under Fair Source License 0.9
12
18
  """
@@ -148,6 +154,231 @@ class WorkflowRunRecord:
148
154
  return cls(stages=stages, **data)
149
155
 
150
156
 
157
+ @dataclass
158
+ class TaskRoutingRecord:
159
+ """Record of task routing decision for Tier 1 automation.
160
+
161
+ Tracks which agent/workflow handles each task, routing strategy,
162
+ and execution outcome for automation monitoring.
163
+ """
164
+
165
+ # Identification (required)
166
+ routing_id: str
167
+ timestamp: str # ISO format
168
+
169
+ # Task context (required)
170
+ task_description: str
171
+ task_type: str # "code_review", "test_gen", "bug_fix", "refactor", etc.
172
+ task_complexity: str # "simple", "moderate", "complex"
173
+
174
+ # Routing decision (required)
175
+ assigned_agent: str # "test_gen_workflow", "code_review_workflow", etc.
176
+ assigned_tier: str # "cheap", "capable", "premium"
177
+ routing_strategy: str # "rule_based", "ml_predicted", "manual_override"
178
+
179
+ # Optional fields with defaults
180
+ task_dependencies: list[str] = field(default_factory=list) # Task IDs this depends on
181
+ confidence_score: float = 1.0 # 0.0-1.0 for ML predictions
182
+
183
+ # Execution tracking
184
+ status: str = "pending" # "pending", "running", "completed", "failed"
185
+ started_at: str | None = None
186
+ completed_at: str | None = None
187
+
188
+ # Outcome
189
+ success: bool = False
190
+ quality_score: float | None = None # 0.0-1.0 if applicable
191
+ retry_count: int = 0
192
+ error_type: str | None = None
193
+ error_message: str | None = None
194
+
195
+ # Cost tracking
196
+ estimated_cost: float = 0.0
197
+ actual_cost: float | None = None
198
+
199
+ # Metadata
200
+ user_id: str | None = None
201
+ session_id: str | None = None
202
+ metadata: dict[str, Any] = field(default_factory=dict)
203
+
204
+ def to_dict(self) -> dict[str, Any]:
205
+ """Convert to dictionary for JSON serialization."""
206
+ return asdict(self)
207
+
208
+ @classmethod
209
+ def from_dict(cls, data: dict[str, Any]) -> "TaskRoutingRecord":
210
+ """Create from dictionary."""
211
+ return cls(**data)
212
+
213
+
214
+ @dataclass
215
+ class TestExecutionRecord:
216
+ """Record of test execution for Tier 1 QA automation.
217
+
218
+ Tracks test execution results, coverage metrics, and failure details
219
+ for quality assurance monitoring.
220
+ """
221
+
222
+ # Identification (required)
223
+ execution_id: str
224
+ timestamp: str # ISO format
225
+
226
+ # Test context (required)
227
+ test_suite: str # "unit", "integration", "e2e", "all"
228
+
229
+ # Optional fields with defaults
230
+ test_files: list[str] = field(default_factory=list) # Specific test files executed
231
+ triggered_by: str = "manual" # "workflow", "manual", "ci", "pre_commit"
232
+
233
+ # Execution details
234
+ command: str = ""
235
+ working_directory: str = ""
236
+ duration_seconds: float = 0.0
237
+
238
+ # Results
239
+ total_tests: int = 0
240
+ passed: int = 0
241
+ failed: int = 0
242
+ skipped: int = 0
243
+ errors: int = 0
244
+
245
+ # Coverage (if available)
246
+ coverage_percentage: float | None = None
247
+ coverage_report_path: str | None = None
248
+
249
+ # Failures
250
+ failed_tests: list[dict[str, Any]] = field(
251
+ default_factory=list
252
+ ) # [{name, file, error, traceback}]
253
+
254
+ # Status
255
+ success: bool = False # True if all tests passed
256
+ exit_code: int = 0
257
+
258
+ # Metadata
259
+ workflow_id: str | None = None # Link to workflow that triggered this
260
+ metadata: dict[str, Any] = field(default_factory=dict)
261
+
262
+ def to_dict(self) -> dict[str, Any]:
263
+ """Convert to dictionary for JSON serialization."""
264
+ return asdict(self)
265
+
266
+ @classmethod
267
+ def from_dict(cls, data: dict[str, Any]) -> "TestExecutionRecord":
268
+ """Create from dictionary."""
269
+ return cls(**data)
270
+
271
+
272
+ @dataclass
273
+ class CoverageRecord:
274
+ """Record of test coverage metrics for Tier 1 QA monitoring.
275
+
276
+ Tracks coverage percentage, trends, and critical gaps for
277
+ continuous quality improvement.
278
+ """
279
+
280
+ # Identification (required)
281
+ record_id: str
282
+ timestamp: str # ISO format
283
+
284
+ # Coverage metrics (required)
285
+ overall_percentage: float
286
+ lines_total: int
287
+ lines_covered: int
288
+
289
+ # Optional fields with defaults
290
+ branches_total: int = 0
291
+ branches_covered: int = 0
292
+
293
+ # File-level breakdown
294
+ files_total: int = 0
295
+ files_well_covered: int = 0 # >= 80%
296
+ files_critical: int = 0 # < 50%
297
+ untested_files: list[str] = field(default_factory=list)
298
+
299
+ # Critical gaps
300
+ critical_gaps: list[dict[str, Any]] = field(
301
+ default_factory=list
302
+ ) # [{file, coverage, priority}]
303
+
304
+ # Trend data
305
+ previous_percentage: float | None = None
306
+ trend: str | None = None # "improving", "declining", "stable"
307
+
308
+ # Source
309
+ coverage_format: str = "xml" # "xml", "json", "lcov"
310
+ coverage_file: str = ""
311
+
312
+ # Metadata
313
+ workflow_id: str | None = None
314
+ metadata: dict[str, Any] = field(default_factory=dict)
315
+
316
+ def to_dict(self) -> dict[str, Any]:
317
+ """Convert to dictionary for JSON serialization."""
318
+ return asdict(self)
319
+
320
+ @classmethod
321
+ def from_dict(cls, data: dict[str, Any]) -> "CoverageRecord":
322
+ """Create from dictionary."""
323
+ return cls(**data)
324
+
325
+
326
+ @dataclass
327
+ class AgentAssignmentRecord:
328
+ """Record of agent assignment for simple tasks (Tier 1).
329
+
330
+ Tracks task assignments to agents/workflows with clear specs
331
+ and no complex dependencies for automation monitoring.
332
+ """
333
+
334
+ # Identification (required)
335
+ assignment_id: str
336
+ timestamp: str # ISO format
337
+
338
+ # Task details (required)
339
+ task_id: str
340
+ task_title: str
341
+ task_description: str
342
+
343
+ # Assignment (required)
344
+ assigned_agent: str # Agent/workflow name
345
+
346
+ # Optional fields with defaults
347
+ task_spec_clarity: float = 0.0 # 0.0-1.0, higher = clearer spec
348
+ assignment_reason: str = "" # Why this agent was chosen
349
+ estimated_duration_hours: float = 0.0
350
+
351
+ # Criteria checks
352
+ has_clear_spec: bool = False
353
+ has_dependencies: bool = False
354
+ requires_human_review: bool = False
355
+ automated_eligible: bool = False # True for Tier 1
356
+
357
+ # Execution tracking
358
+ status: str = "assigned" # "assigned", "in_progress", "completed", "blocked"
359
+ started_at: str | None = None
360
+ completed_at: str | None = None
361
+ actual_duration_hours: float | None = None
362
+
363
+ # Outcome
364
+ success: bool = False
365
+ quality_check_passed: bool = False
366
+ human_review_required: bool = False
367
+
368
+ # Metadata
369
+ workflow_id: str | None = None
370
+ metadata: dict[str, Any] = field(default_factory=dict)
371
+
372
+ def to_dict(self) -> dict[str, Any]:
373
+ """Convert to dictionary for JSON serialization."""
374
+ return asdict(self)
375
+
376
+ @classmethod
377
+ def from_dict(cls, data: dict[str, Any]) -> "AgentAssignmentRecord":
378
+ """Create from dictionary."""
379
+ return cls(**data)
380
+
381
+
151
382
  @runtime_checkable
152
383
  class TelemetryBackend(Protocol):
153
384
  """Protocol for telemetry storage backends.
@@ -158,6 +389,9 @@ class TelemetryBackend(Protocol):
158
389
  - Cloud services (DataDog, New Relic, etc.)
159
390
  - Custom backends
160
391
 
392
+ Supports both core telemetry (LLM calls, workflows) and Tier 1
393
+ automation monitoring (task routing, tests, coverage, assignments).
394
+
161
395
  Example implementing a custom backend:
162
396
  >>> class DatabaseBackend:
163
397
  ... def log_call(self, record: LLMCallRecord) -> None:
@@ -203,12 +437,88 @@ class TelemetryBackend(Protocol):
203
437
  """Get workflow run records with optional filters."""
204
438
  ...
205
439
 
440
+ # Tier 1 automation monitoring methods
441
+ def log_task_routing(self, record: TaskRoutingRecord) -> None:
442
+ """Log a task routing decision."""
443
+ ...
444
+
445
+ def log_test_execution(self, record: TestExecutionRecord) -> None:
446
+ """Log a test execution."""
447
+ ...
448
+
449
+ def log_coverage(self, record: CoverageRecord) -> None:
450
+ """Log coverage metrics."""
451
+ ...
452
+
453
+ def log_agent_assignment(self, record: AgentAssignmentRecord) -> None:
454
+ """Log an agent assignment."""
455
+ ...
456
+
457
+ def get_task_routings(
458
+ self,
459
+ since: datetime | None = None,
460
+ status: str | None = None,
461
+ limit: int = 1000,
462
+ ) -> list[TaskRoutingRecord]:
463
+ """Get task routing records with optional filters."""
464
+ ...
465
+
466
+ def get_test_executions(
467
+ self,
468
+ since: datetime | None = None,
469
+ success_only: bool = False,
470
+ limit: int = 100,
471
+ ) -> list[TestExecutionRecord]:
472
+ """Get test execution records with optional filters."""
473
+ ...
474
+
475
+ def get_coverage_history(
476
+ self,
477
+ since: datetime | None = None,
478
+ limit: int = 100,
479
+ ) -> list[CoverageRecord]:
480
+ """Get coverage history records."""
481
+ ...
482
+
483
+ def get_agent_assignments(
484
+ self,
485
+ since: datetime | None = None,
486
+ automated_only: bool = True,
487
+ limit: int = 1000,
488
+ ) -> list[AgentAssignmentRecord]:
489
+ """Get agent assignment records with optional filters."""
490
+ ...
491
+
492
+
493
+ def _parse_timestamp(timestamp_str: str) -> datetime:
494
+ """Parse ISO format timestamp, handling 'Z' suffix for Python 3.10 compatibility.
495
+
496
+ Args:
497
+ timestamp_str: ISO format timestamp string, possibly with 'Z' suffix
498
+
499
+ Returns:
500
+ Parsed datetime object (timezone-naive UTC)
501
+ """
502
+ # Python 3.10's fromisoformat() doesn't handle 'Z' suffix
503
+ if timestamp_str.endswith('Z'):
504
+ timestamp_str = timestamp_str[:-1]
505
+
506
+ dt = datetime.fromisoformat(timestamp_str)
507
+
508
+ # Convert to naive UTC if timezone-aware
509
+ if dt.tzinfo is not None:
510
+ dt = dt.replace(tzinfo=None)
511
+
512
+ return dt
513
+
206
514
 
207
515
  class TelemetryStore:
208
516
  """JSONL file-based telemetry backend (default implementation).
209
517
 
210
518
  Stores records in JSONL format for easy streaming and analysis.
211
519
  Implements the TelemetryBackend protocol.
520
+
521
+ Supports both core telemetry and Tier 1 automation monitoring.
212
522
  """
213
523
 
214
524
  def __init__(self, storage_dir: str = ".empathy"):
@@ -221,9 +531,16 @@ class TelemetryStore:
221
531
  self.storage_dir = Path(storage_dir)
222
532
  self.storage_dir.mkdir(parents=True, exist_ok=True)
223
533
 
534
+ # Core telemetry files
224
535
  self.calls_file = self.storage_dir / "llm_calls.jsonl"
225
536
  self.workflows_file = self.storage_dir / "workflow_runs.jsonl"
226
537
 
538
+ # Tier 1 automation monitoring files
539
+ self.task_routing_file = self.storage_dir / "task_routing.jsonl"
540
+ self.test_executions_file = self.storage_dir / "test_executions.jsonl"
541
+ self.coverage_history_file = self.storage_dir / "coverage_history.jsonl"
542
+ self.agent_assignments_file = self.storage_dir / "agent_assignments.jsonl"
543
+
227
544
  def log_call(self, record: LLMCallRecord) -> None:
228
545
  """Log an LLM call record."""
229
546
  with open(self.calls_file, "a") as f:
@@ -265,7 +582,7 @@ class TelemetryStore:
265
582
 
266
583
  # Apply filters
267
584
  if since:
268
- record_time = datetime.fromisoformat(record.timestamp)
585
+ record_time = _parse_timestamp(record.timestamp)
269
586
  if record_time < since:
270
587
  continue
271
588
 
@@ -312,7 +629,7 @@ class TelemetryStore:
312
629
 
313
630
  # Apply filters
314
631
  if since:
315
- record_time = datetime.fromisoformat(record.started_at)
632
+ record_time = _parse_timestamp(record.started_at)
316
633
  if record_time < since:
317
634
  continue
318
635
 
@@ -328,6 +645,211 @@ class TelemetryStore:
328
645
 
329
646
  return records
330
647
 
648
+ # Tier 1 automation monitoring methods
649
+
650
+ def log_task_routing(self, record: TaskRoutingRecord) -> None:
651
+ """Log a task routing decision."""
652
+ with open(self.task_routing_file, "a") as f:
653
+ f.write(json.dumps(record.to_dict()) + "\n")
654
+
655
+ def log_test_execution(self, record: TestExecutionRecord) -> None:
656
+ """Log a test execution."""
657
+ with open(self.test_executions_file, "a") as f:
658
+ f.write(json.dumps(record.to_dict()) + "\n")
659
+
660
+ def log_coverage(self, record: CoverageRecord) -> None:
661
+ """Log coverage metrics."""
662
+ with open(self.coverage_history_file, "a") as f:
663
+ f.write(json.dumps(record.to_dict()) + "\n")
664
+
665
+ def log_agent_assignment(self, record: AgentAssignmentRecord) -> None:
666
+ """Log an agent assignment."""
667
+ with open(self.agent_assignments_file, "a") as f:
668
+ f.write(json.dumps(record.to_dict()) + "\n")
669
+
670
+ def get_task_routings(
671
+ self,
672
+ since: datetime | None = None,
673
+ status: str | None = None,
674
+ limit: int = 1000,
675
+ ) -> list[TaskRoutingRecord]:
676
+ """Get task routing records.
677
+
678
+ Args:
679
+ since: Only return records after this time
680
+ status: Filter by status (pending, running, completed, failed)
681
+ limit: Maximum records to return
682
+
683
+ Returns:
684
+ List of TaskRoutingRecord
685
+
686
+ """
687
+ records: list[TaskRoutingRecord] = []
688
+ if not self.task_routing_file.exists():
689
+ return records
690
+
691
+ with open(self.task_routing_file) as f:
692
+ for line in f:
693
+ if not line.strip():
694
+ continue
695
+ try:
696
+ data = json.loads(line)
697
+ record = TaskRoutingRecord.from_dict(data)
698
+
699
+ # Apply filters
700
+ if since:
701
+ record_time = _parse_timestamp(record.timestamp)
702
+ if record_time < since:
703
+ continue
704
+
705
+ if status and record.status != status:
706
+ continue
707
+
708
+ records.append(record)
709
+
710
+ if len(records) >= limit:
711
+ break
712
+ except (json.JSONDecodeError, KeyError):
713
+ continue
714
+
715
+ return records
716
+
717
+ def get_test_executions(
718
+ self,
719
+ since: datetime | None = None,
720
+ success_only: bool = False,
721
+ limit: int = 100,
722
+ ) -> list[TestExecutionRecord]:
723
+ """Get test execution records.
724
+
725
+ Args:
726
+ since: Only return records after this time
727
+ success_only: Only return successful test runs
728
+ limit: Maximum records to return
729
+
730
+ Returns:
731
+ List of TestExecutionRecord
732
+
733
+ """
734
+ records: list[TestExecutionRecord] = []
735
+ if not self.test_executions_file.exists():
736
+ return records
737
+
738
+ with open(self.test_executions_file) as f:
739
+ for line in f:
740
+ if not line.strip():
741
+ continue
742
+ try:
743
+ data = json.loads(line)
744
+ record = TestExecutionRecord.from_dict(data)
745
+
746
+ # Apply filters
747
+ if since:
748
+ record_time = _parse_timestamp(record.timestamp)
749
+ if record_time < since:
750
+ continue
751
+
752
+ if success_only and not record.success:
753
+ continue
754
+
755
+ records.append(record)
756
+
757
+ if len(records) >= limit:
758
+ break
759
+ except (json.JSONDecodeError, KeyError):
760
+ continue
761
+
762
+ return records
763
+
764
+ def get_coverage_history(
765
+ self,
766
+ since: datetime | None = None,
767
+ limit: int = 100,
768
+ ) -> list[CoverageRecord]:
769
+ """Get coverage history records.
770
+
771
+ Args:
772
+ since: Only return records after this time
773
+ limit: Maximum records to return
774
+
775
+ Returns:
776
+ List of CoverageRecord
777
+
778
+ """
779
+ records: list[CoverageRecord] = []
780
+ if not self.coverage_history_file.exists():
781
+ return records
782
+
783
+ with open(self.coverage_history_file) as f:
784
+ for line in f:
785
+ if not line.strip():
786
+ continue
787
+ try:
788
+ data = json.loads(line)
789
+ record = CoverageRecord.from_dict(data)
790
+
791
+ # Apply filters
792
+ if since:
793
+ record_time = _parse_timestamp(record.timestamp)
794
+ if record_time < since:
795
+ continue
796
+
797
+ records.append(record)
798
+
799
+ if len(records) >= limit:
800
+ break
801
+ except (json.JSONDecodeError, KeyError):
802
+ continue
803
+
804
+ return records
805
+
806
+ def get_agent_assignments(
807
+ self,
808
+ since: datetime | None = None,
809
+ automated_only: bool = True,
810
+ limit: int = 1000,
811
+ ) -> list[AgentAssignmentRecord]:
812
+ """Get agent assignment records.
813
+
814
+ Args:
815
+ since: Only return records after this time
816
+ automated_only: Only return assignments eligible for Tier 1 automation
817
+ limit: Maximum records to return
818
+
819
+ Returns:
820
+ List of AgentAssignmentRecord
821
+
822
+ """
823
+ records: list[AgentAssignmentRecord] = []
824
+ if not self.agent_assignments_file.exists():
825
+ return records
826
+
827
+ with open(self.agent_assignments_file) as f:
828
+ for line in f:
829
+ if not line.strip():
830
+ continue
831
+ try:
832
+ data = json.loads(line)
833
+ record = AgentAssignmentRecord.from_dict(data)
834
+
835
+ # Apply filters
836
+ if since:
837
+ record_time = _parse_timestamp(record.timestamp)
838
+ if record_time < since:
839
+ continue
840
+
841
+ if automated_only and not record.automated_eligible:
842
+ continue
843
+
844
+ records.append(record)
845
+
846
+ if len(records) >= limit:
847
+ break
848
+ except (json.JSONDecodeError, KeyError):
849
+ continue
850
+
851
+ return records
852
+
331
853
 
332
854
  class TelemetryAnalytics:
333
855
  """Analytics helpers for telemetry data.
@@ -495,6 +1017,89 @@ class TelemetryAnalytics:
495
1017
  "by_original_provider": by_provider,
496
1018
  }
497
1019
 
1020
+ def sonnet_opus_fallback_analysis(
1021
+ self,
1022
+ since: datetime | None = None,
1023
+ ) -> dict[str, Any]:
1024
+ """Analyze Sonnet 4.5 → Opus 4.5 fallback performance and cost savings.
1025
+
1026
+ Tracks:
1027
+ - How often Sonnet 4.5 succeeds vs needs Opus fallback
1028
+ - Cost savings from using Sonnet instead of always using Opus
1029
+ - Success rates by model
1030
+
1031
+ Args:
1032
+ since: Only consider calls after this time
1033
+
1034
+ Returns:
1035
+ Dict with fallback analysis and cost savings
1036
+ """
1037
+ calls = self.store.get_calls(since=since, limit=100000)
1038
+
1039
+ # Filter for Anthropic calls (Sonnet/Opus)
1040
+ anthropic_calls = [
1041
+ c for c in calls
1042
+ if c.provider == "anthropic"
1043
+ and c.model_id in ["claude-sonnet-4-5", "claude-opus-4-5-20251101"]
1044
+ ]
1045
+
1046
+ if not anthropic_calls:
1047
+ return {
1048
+ "total_calls": 0,
1049
+ "sonnet_attempts": 0,
1050
+ "sonnet_successes": 0,
1051
+ "opus_fallbacks": 0,
1052
+ "success_rate_sonnet": 0.0,
1053
+ "fallback_rate": 0.0,
1054
+ "actual_cost": 0.0,
1055
+ "always_opus_cost": 0.0,
1056
+ "savings": 0.0,
1057
+ "savings_percent": 0.0,
1058
+ }
1059
+
1060
+ total = len(anthropic_calls)
1061
+
1062
+ # Count Sonnet attempts and successes
1063
+ sonnet_calls = [c for c in anthropic_calls if c.model_id == "claude-sonnet-4-5"]
1064
+ sonnet_successes = sum(1 for c in sonnet_calls if c.success)
1065
+
1066
+ # Count Opus fallbacks (calls with fallback_used and ended up on Opus)
1067
+ opus_fallbacks = sum(
1068
+ 1 for c in anthropic_calls
1069
+ if c.model_id == "claude-opus-4-5-20251101" and c.fallback_used
1070
+ )
1071
+
1072
+ # Calculate costs
1073
+ actual_cost = sum(c.estimated_cost for c in anthropic_calls)
1074
+
1075
+ # Calculate what it would cost if everything used Opus
1076
+ opus_input_cost = 15.00 / 1_000_000 # per token
1077
+ opus_output_cost = 75.00 / 1_000_000 # per token
1078
+ always_opus_cost = sum(
1079
+ (c.input_tokens * opus_input_cost) + (c.output_tokens * opus_output_cost)
1080
+ for c in anthropic_calls
1081
+ )
1082
+
1083
+ savings = always_opus_cost - actual_cost
1084
+ savings_percent = (savings / always_opus_cost * 100) if always_opus_cost > 0 else 0
1085
+
1086
+ return {
1087
+ "total_calls": total,
1088
+ "sonnet_attempts": len(sonnet_calls),
1089
+ "sonnet_successes": sonnet_successes,
1090
+ "opus_fallbacks": opus_fallbacks,
1091
+ "success_rate_sonnet": (
1092
+ (sonnet_successes / len(sonnet_calls) * 100) if sonnet_calls else 0.0
1093
+ ),
1094
+ "fallback_rate": (opus_fallbacks / total * 100) if total > 0 else 0.0,
1095
+ "actual_cost": actual_cost,
1096
+ "always_opus_cost": always_opus_cost,
1097
+ "savings": savings,
1098
+ "savings_percent": savings_percent,
1099
+ "avg_cost_per_call": actual_cost / total if total > 0 else 0.0,
1100
+ "avg_opus_cost_per_call": always_opus_cost / total if total > 0 else 0.0,
1101
+ }
1102
+
498
1103
  def cost_savings_report(
499
1104
  self,
500
1105
  since: datetime | None = None,
@@ -525,6 +1130,299 @@ class TelemetryAnalytics:
525
1130
  "avg_cost_per_workflow": total_cost / len(workflows) if workflows else 0,
526
1131
  }
527
1132
 
1133
+ # Tier 1 automation monitoring analytics
1134
+
1135
+ def task_routing_accuracy(
1136
+ self,
1137
+ since: datetime | None = None,
1138
+ ) -> dict[str, Any]:
1139
+ """Analyze task routing accuracy.
1140
+
1141
+ Args:
1142
+ since: Only consider routings after this time
1143
+
1144
+ Returns:
1145
+ Dict with routing accuracy metrics by task type and strategy
1146
+
1147
+ """
1148
+ routings = self.store.get_task_routings(since=since, limit=10000)
1149
+
1150
+ if not routings:
1151
+ return {
1152
+ "total_tasks": 0,
1153
+ "successful_routing": 0,
1154
+ "accuracy_rate": 0.0,
1155
+ "avg_confidence": 0.0,
1156
+ "by_task_type": {},
1157
+ "by_strategy": {},
1158
+ }
1159
+
1160
+ total = len(routings)
1161
+ successful = sum(1 for r in routings if r.success)
1162
+ total_confidence = sum(r.confidence_score for r in routings)
1163
+
1164
+ # Aggregate by task type
1165
+ by_type: dict[str, dict[str, int | float]] = {}
1166
+ for r in routings:
1167
+ if r.task_type not in by_type:
1168
+ by_type[r.task_type] = {"total": 0, "success": 0}
1169
+ by_type[r.task_type]["total"] += 1
1170
+ if r.success:
1171
+ by_type[r.task_type]["success"] += 1
1172
+
1173
+ # Calculate rates
1174
+ for _task_type, stats in by_type.items():
1175
+ stats["rate"] = stats["success"] / stats["total"] if stats["total"] > 0 else 0.0
1176
+
1177
+ # Aggregate by strategy
1178
+ by_strategy: dict[str, dict[str, int]] = {}
1179
+ for r in routings:
1180
+ if r.routing_strategy not in by_strategy:
1181
+ by_strategy[r.routing_strategy] = {"total": 0, "success": 0}
1182
+ by_strategy[r.routing_strategy]["total"] += 1
1183
+ if r.success:
1184
+ by_strategy[r.routing_strategy]["success"] += 1
1185
+
1186
+ return {
1187
+ "total_tasks": total,
1188
+ "successful_routing": successful,
1189
+ "accuracy_rate": successful / total if total > 0 else 0.0,
1190
+ "avg_confidence": total_confidence / total if total > 0 else 0.0,
1191
+ "by_task_type": by_type,
1192
+ "by_strategy": by_strategy,
1193
+ }
1194
+
1195
+ def test_execution_trends(
1196
+ self,
1197
+ since: datetime | None = None,
1198
+ ) -> dict[str, Any]:
1199
+ """Analyze test execution trends.
1200
+
1201
+ Args:
1202
+ since: Only consider executions after this time
1203
+
1204
+ Returns:
1205
+ Dict with test execution metrics and trends
1206
+
1207
+ """
1208
+ executions = self.store.get_test_executions(since=since, limit=1000)
1209
+
1210
+ if not executions:
1211
+ return {
1212
+ "total_executions": 0,
1213
+ "success_rate": 0.0,
1214
+ "avg_duration_seconds": 0.0,
1215
+ "total_tests_run": 0,
1216
+ "total_failures": 0,
1217
+ "coverage_trend": "stable",
1218
+ "most_failing_tests": [],
1219
+ }
1220
+
1221
+ total_execs = len(executions)
1222
+ successful_execs = sum(1 for e in executions if e.success)
1223
+ total_duration = sum(e.duration_seconds for e in executions)
1224
+ total_tests = sum(e.total_tests for e in executions)
1225
+ total_failures = sum(e.failed for e in executions)
1226
+
1227
+ # Find most failing tests
1228
+ failure_counts: dict[str, int] = {}
1229
+ for exec_rec in executions:
1230
+ for test in exec_rec.failed_tests:
1231
+ test_name = test.get("name", "unknown")
1232
+ failure_counts[test_name] = failure_counts.get(test_name, 0) + 1
1233
+
1234
+ most_failing = [
1235
+ {"name": name, "failures": count}
1236
+ for name, count in sorted(failure_counts.items(), key=lambda x: x[1], reverse=True)[:10]
1237
+ ]
1238
+
1239
+ return {
1240
+ "total_executions": total_execs,
1241
+ "success_rate": successful_execs / total_execs if total_execs > 0 else 0.0,
1242
+ "avg_duration_seconds": total_duration / total_execs if total_execs > 0 else 0.0,
1243
+ "total_tests_run": total_tests,
1244
+ "total_failures": total_failures,
1245
+ "coverage_trend": "stable", # Will be computed from coverage_progress
1246
+ "most_failing_tests": most_failing,
1247
+ }
1248
+
1249
+ def coverage_progress(
1250
+ self,
1251
+ since: datetime | None = None,
1252
+ ) -> dict[str, Any]:
1253
+ """Track coverage progress over time.
1254
+
1255
+ Args:
1256
+ since: Only consider coverage records after this time
1257
+
1258
+ Returns:
1259
+ Dict with coverage metrics and trends
1260
+
1261
+ """
1262
+ records = self.store.get_coverage_history(since=since, limit=1000)
1263
+
1264
+ if not records:
1265
+ return {
1266
+ "current_coverage": 0.0,
1267
+ "previous_coverage": 0.0,
1268
+ "change": 0.0,
1269
+ "trend": "no_data",
1270
+ "coverage_history": [],
1271
+ "files_improved": 0,
1272
+ "files_declined": 0,
1273
+ "critical_gaps_count": 0,
1274
+ }
1275
+
1276
+ # Latest and first records
1277
+ latest = records[-1]
1278
+ first = records[0]
1279
+ current_coverage = latest.overall_percentage
1280
+
1281
+ # Calculate trend by comparing first to last
1282
+ if len(records) == 1:
1283
+ # Single record - no trend analysis possible
1284
+ prev_coverage = 0.0
1285
+ change = 0.0
1286
+ trend = "stable"
1287
+ else:
1288
+ # Multiple records - compare first to last
1289
+ prev_coverage = first.overall_percentage
1290
+ change = current_coverage - prev_coverage
1291
+
1292
+ # Determine trend based on change
1293
+ if change > 1.0:
1294
+ trend = "improving"
1295
+ elif change < -1.0:
1296
+ trend = "declining"
1297
+ else:
1298
+ trend = "stable"
1299
+
1300
+ # Build coverage history from records
1301
+ coverage_history = [
1302
+ {
1303
+ "timestamp": r.timestamp,
1304
+ "coverage": r.overall_percentage,
1305
+ "trend": r.trend,
1306
+ }
1307
+ for r in records
1308
+ ]
1309
+
1310
+ return {
1311
+ "current_coverage": current_coverage,
1312
+ "previous_coverage": prev_coverage,
1313
+ "change": change,
1314
+ "trend": trend,
1315
+ "coverage_history": coverage_history,
1316
+ "files_improved": 0, # Would need file-level history
1317
+ "files_declined": 0, # Would need file-level history
1318
+ "critical_gaps_count": len(latest.critical_gaps),
1319
+ }
1320
+
1321
+ def agent_performance(
1322
+ self,
1323
+ since: datetime | None = None,
1324
+ ) -> dict[str, Any]:
1325
+ """Analyze agent/workflow performance.
1326
+
1327
+ Args:
1328
+ since: Only consider assignments after this time
1329
+
1330
+ Returns:
1331
+ Dict with agent performance metrics
1332
+
1333
+ """
1334
+ assignments = self.store.get_agent_assignments(
1335
+ since=since, automated_only=False, limit=10000
1336
+ )
1337
+
1338
+ if not assignments:
1339
+ return {
1340
+ "total_assignments": 0,
1341
+ "by_agent": {},
1342
+ "automation_rate": 0.0,
1343
+ "human_review_rate": 0.0,
1344
+ }
1345
+
1346
+ # Aggregate by agent
1347
+ by_agent: dict[str, dict[str, Any]] = {}
1348
+ total_assignments = len(assignments)
1349
+ total_automated = 0
1350
+ total_human_review = 0
1351
+
1352
+ for assignment in assignments:
1353
+ agent = assignment.assigned_agent
1354
+ if agent not in by_agent:
1355
+ by_agent[agent] = {
1356
+ "assignments": 0,
1357
+ "completed": 0,
1358
+ "successful": 0,
1359
+ "success_rate": 0.0,
1360
+ "avg_duration_hours": 0.0,
1361
+ "quality_score_avg": 0.0,
1362
+ "total_duration": 0.0,
1363
+ "quality_scores": [],
1364
+ }
1365
+
1366
+ stats = by_agent[agent]
1367
+ stats["assignments"] += 1
1368
+ if assignment.status == "completed":
1369
+ stats["completed"] += 1
1370
+ if assignment.actual_duration_hours is not None:
1371
+ stats["total_duration"] += assignment.actual_duration_hours
1372
+
1373
+ # Track successful assignments (not just completed)
1374
+ if assignment.success:
1375
+ stats["successful"] += 1
1376
+
1377
+ if assignment.automated_eligible:
1378
+ total_automated += 1
1379
+ if assignment.human_review_required:
1380
+ total_human_review += 1
1381
+
1382
+ # Calculate averages
1383
+ for _agent, stats in by_agent.items():
1384
+ if stats["assignments"] > 0:
1385
+ stats["success_rate"] = stats["successful"] / stats["assignments"]
1386
+ if stats["completed"] > 0:
1387
+ stats["avg_duration_hours"] = stats["total_duration"] / stats["completed"]
1388
+
1389
+ # Remove helper fields
1390
+ del stats["total_duration"]
1391
+ del stats["quality_scores"]
1392
+ del stats["successful"] # Remove helper field, keep success_rate
1393
+
1394
+ return {
1395
+ "total_assignments": total_assignments,
1396
+ "by_agent": by_agent,
1397
+ "automation_rate": (
1398
+ total_automated / total_assignments if total_assignments > 0 else 0.0
1399
+ ),
1400
+ "human_review_rate": (
1401
+ total_human_review / total_assignments if total_assignments > 0 else 0.0
1402
+ ),
1403
+ }
1404
+
1405
+ def tier1_summary(
1406
+ self,
1407
+ since: datetime | None = None,
1408
+ ) -> dict[str, Any]:
1409
+ """Comprehensive Tier 1 automation summary.
1410
+
1411
+ Args:
1412
+ since: Only consider records after this time
1413
+
1414
+ Returns:
1415
+ Dict combining all Tier 1 metrics
1416
+
1417
+ """
1418
+ return {
1419
+ "task_routing": self.task_routing_accuracy(since),
1420
+ "test_execution": self.test_execution_trends(since),
1421
+ "coverage": self.coverage_progress(since),
1422
+ "agent_performance": self.agent_performance(since),
1423
+ "cost_savings": self.cost_savings_report(since),
1424
+ }
1425
+
528
1426
 
529
1427
  # Singleton for global telemetry
530
1428
  _telemetry_store: TelemetryStore | None = None