empathy-framework 4.7.1__py3-none-any.whl → 4.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {empathy_framework-4.7.1.dist-info → empathy_framework-4.9.0.dist-info}/METADATA +65 -2
  2. {empathy_framework-4.7.1.dist-info → empathy_framework-4.9.0.dist-info}/RECORD +69 -59
  3. {empathy_framework-4.7.1.dist-info → empathy_framework-4.9.0.dist-info}/WHEEL +1 -1
  4. {empathy_framework-4.7.1.dist-info → empathy_framework-4.9.0.dist-info}/entry_points.txt +2 -1
  5. {empathy_framework-4.7.1.dist-info → empathy_framework-4.9.0.dist-info}/top_level.txt +0 -1
  6. empathy_os/__init__.py +2 -0
  7. empathy_os/cli/__init__.py +128 -238
  8. empathy_os/cli/__main__.py +5 -33
  9. empathy_os/cli/commands/__init__.py +1 -8
  10. empathy_os/cli/commands/help.py +331 -0
  11. empathy_os/cli/commands/info.py +140 -0
  12. empathy_os/cli/commands/inspect.py +437 -0
  13. empathy_os/cli/commands/metrics.py +92 -0
  14. empathy_os/cli/commands/orchestrate.py +184 -0
  15. empathy_os/cli/commands/patterns.py +207 -0
  16. empathy_os/cli/commands/provider.py +93 -81
  17. empathy_os/cli/commands/setup.py +96 -0
  18. empathy_os/cli/commands/status.py +235 -0
  19. empathy_os/cli/commands/sync.py +166 -0
  20. empathy_os/cli/commands/tier.py +121 -0
  21. empathy_os/cli/commands/workflow.py +574 -0
  22. empathy_os/cli/parsers/__init__.py +62 -0
  23. empathy_os/cli/parsers/help.py +41 -0
  24. empathy_os/cli/parsers/info.py +26 -0
  25. empathy_os/cli/parsers/inspect.py +66 -0
  26. empathy_os/cli/parsers/metrics.py +42 -0
  27. empathy_os/cli/parsers/orchestrate.py +61 -0
  28. empathy_os/cli/parsers/patterns.py +54 -0
  29. empathy_os/cli/parsers/provider.py +40 -0
  30. empathy_os/cli/parsers/setup.py +42 -0
  31. empathy_os/cli/parsers/status.py +47 -0
  32. empathy_os/cli/parsers/sync.py +31 -0
  33. empathy_os/cli/parsers/tier.py +33 -0
  34. empathy_os/cli/parsers/workflow.py +77 -0
  35. empathy_os/cli/utils/__init__.py +1 -0
  36. empathy_os/cli/utils/data.py +242 -0
  37. empathy_os/cli/utils/helpers.py +68 -0
  38. empathy_os/{cli.py → cli_legacy.py} +0 -26
  39. empathy_os/cli_minimal.py +662 -0
  40. empathy_os/cli_router.py +384 -0
  41. empathy_os/cli_unified.py +13 -2
  42. empathy_os/memory/short_term.py +146 -414
  43. empathy_os/memory/types.py +441 -0
  44. empathy_os/memory/unified.py +61 -48
  45. empathy_os/models/fallback.py +1 -1
  46. empathy_os/models/provider_config.py +59 -344
  47. empathy_os/models/registry.py +27 -176
  48. empathy_os/monitoring/alerts.py +14 -20
  49. empathy_os/monitoring/alerts_cli.py +24 -7
  50. empathy_os/project_index/__init__.py +2 -0
  51. empathy_os/project_index/index.py +210 -5
  52. empathy_os/project_index/scanner.py +48 -16
  53. empathy_os/project_index/scanner_parallel.py +291 -0
  54. empathy_os/workflow_commands.py +9 -9
  55. empathy_os/workflows/__init__.py +31 -2
  56. empathy_os/workflows/base.py +295 -317
  57. empathy_os/workflows/bug_predict.py +10 -2
  58. empathy_os/workflows/builder.py +273 -0
  59. empathy_os/workflows/caching.py +253 -0
  60. empathy_os/workflows/code_review_pipeline.py +1 -0
  61. empathy_os/workflows/history.py +512 -0
  62. empathy_os/workflows/perf_audit.py +129 -23
  63. empathy_os/workflows/routing.py +163 -0
  64. empathy_os/workflows/secure_release.py +1 -0
  65. empathy_os/workflows/security_audit.py +1 -0
  66. empathy_os/workflows/security_audit_phase3.py +352 -0
  67. empathy_os/workflows/telemetry_mixin.py +269 -0
  68. empathy_os/workflows/test_gen.py +7 -7
  69. empathy_os/dashboard/__init__.py +0 -15
  70. empathy_os/dashboard/server.py +0 -941
  71. empathy_os/vscode_bridge 2.py +0 -173
  72. empathy_os/workflows/progressive/README 2.md +0 -454
  73. empathy_os/workflows/progressive/__init__ 2.py +0 -92
  74. empathy_os/workflows/progressive/cli 2.py +0 -242
  75. empathy_os/workflows/progressive/core 2.py +0 -488
  76. empathy_os/workflows/progressive/orchestrator 2.py +0 -701
  77. empathy_os/workflows/progressive/reports 2.py +0 -528
  78. empathy_os/workflows/progressive/telemetry 2.py +0 -280
  79. empathy_os/workflows/progressive/test_gen 2.py +0 -514
  80. empathy_os/workflows/progressive/workflow 2.py +0 -628
  81. patterns/README.md +0 -119
  82. patterns/__init__.py +0 -95
  83. patterns/behavior.py +0 -298
  84. patterns/code_review_memory.json +0 -441
  85. patterns/core.py +0 -97
  86. patterns/debugging.json +0 -3763
  87. patterns/empathy.py +0 -268
  88. patterns/health_check_memory.json +0 -505
  89. patterns/input.py +0 -161
  90. patterns/memory_graph.json +0 -8
  91. patterns/refactoring_memory.json +0 -1113
  92. patterns/registry.py +0 -663
  93. patterns/security_memory.json +0 -8
  94. patterns/structural.py +0 -415
  95. patterns/validation.py +0 -194
  96. {empathy_framework-4.7.1.dist-info → empathy_framework-4.9.0.dist-info}/licenses/LICENSE +0 -0
@@ -27,6 +27,7 @@ from pathlib import Path
27
27
  from typing import TYPE_CHECKING, Any
28
28
 
29
29
  if TYPE_CHECKING:
30
+ from .routing import TierRoutingStrategy
30
31
  from .tier_tracking import WorkflowTierTracker
31
32
 
32
33
  # Load .env file for API keys if python-dotenv is available
@@ -38,26 +39,26 @@ except ImportError:
38
39
  pass # python-dotenv not installed, rely on environment variables
39
40
 
40
41
  # Import caching infrastructure
41
- from empathy_os.cache import BaseCache, auto_setup_cache, create_cache
42
+ from empathy_os.cache import BaseCache
42
43
  from empathy_os.config import _validate_file_path
43
44
  from empathy_os.cost_tracker import MODEL_PRICING, CostTracker
44
45
 
45
46
  # Import unified types from empathy_os.models
46
47
  from empathy_os.models import (
47
48
  ExecutionContext,
48
- LLMCallRecord,
49
49
  LLMExecutor,
50
50
  TaskRoutingRecord,
51
51
  TelemetryBackend,
52
- WorkflowRunRecord,
53
- WorkflowStageRecord,
54
- get_telemetry_store,
55
52
  )
56
53
  from empathy_os.models import ModelProvider as UnifiedModelProvider
57
54
  from empathy_os.models import ModelTier as UnifiedModelTier
58
55
 
56
+ # Import mixins (extracted for maintainability)
57
+ from .caching import CachedResponse, CachingMixin
58
+
59
59
  # Import progress tracking
60
60
  from .progress import ProgressCallback, ProgressTracker
61
+ from .telemetry_mixin import TelemetryMixin
61
62
 
62
63
  # Import telemetry tracking
63
64
  try:
@@ -78,15 +79,47 @@ logger = logging.getLogger(__name__)
78
79
  WORKFLOW_HISTORY_FILE = ".empathy/workflow_runs.json"
79
80
 
80
81
 
81
- # Local enums for backward compatibility
82
+ # Local enums for backward compatibility - DEPRECATED
82
83
  # New code should use empathy_os.models.ModelTier/ModelProvider
83
84
  class ModelTier(Enum):
84
- """Model tier for cost optimization."""
85
+ """DEPRECATED: Model tier for cost optimization.
86
+
87
+ This enum is deprecated and will be removed in v5.0.
88
+ Use empathy_os.models.ModelTier instead.
89
+
90
+ Migration:
91
+ # Old:
92
+ from empathy_os.workflows.base import ModelTier
93
+
94
+ # New:
95
+ from empathy_os.models import ModelTier
96
+
97
+ Why deprecated:
98
+ - Creates confusion with dual definitions
99
+ - empathy_os.models.ModelTier is the canonical location
100
+ - Simplifies imports and reduces duplication
101
+ """
85
102
 
86
103
  CHEAP = "cheap" # Haiku/GPT-4o-mini - $0.25-1.25/M tokens
87
104
  CAPABLE = "capable" # Sonnet/GPT-4o - $3-15/M tokens
88
105
  PREMIUM = "premium" # Opus/o1 - $15-75/M tokens
89
106
 
107
+ def __init__(self, value: str):
108
+ """Initialize with deprecation warning."""
109
+ # Only warn once per process, not per instance
110
+ import warnings
111
+
112
+ # Use self.__class__ instead of ModelTier (class not yet defined during creation)
113
+ if not hasattr(self.__class__, "_deprecation_warned"):
114
+ warnings.warn(
115
+ "workflows.base.ModelTier is deprecated and will be removed in v5.0. "
116
+ "Use empathy_os.models.ModelTier instead. "
117
+ "Update imports: from empathy_os.models import ModelTier",
118
+ DeprecationWarning,
119
+ stacklevel=4,
120
+ )
121
+ self.__class__._deprecation_warned = True
122
+
90
123
  def to_unified(self) -> UnifiedModelTier:
91
124
  """Convert to unified ModelTier from empathy_os.models."""
92
125
  return UnifiedModelTier(self.value)
@@ -214,8 +247,52 @@ class WorkflowResult:
214
247
  transient: bool = False # True if retry is reasonable (e.g., provider timeout)
215
248
 
216
249
 
250
+ # Global singleton for workflow history store (lazy-initialized)
251
+ _history_store: Any = None # WorkflowHistoryStore | None
252
+
253
+
254
+ def _get_history_store():
255
+ """Get or create workflow history store singleton.
256
+
257
+ Returns SQLite-based history store. Falls back to None if initialization fails.
258
+ """
259
+ global _history_store
260
+
261
+ if _history_store is None:
262
+ try:
263
+ from .history import WorkflowHistoryStore
264
+
265
+ _history_store = WorkflowHistoryStore()
266
+ logger.debug("Workflow history store initialized (SQLite)")
267
+ except (ImportError, OSError, PermissionError) as e:
268
+ # File system errors or missing dependencies
269
+ logger.warning(f"Failed to initialize SQLite history store: {e}")
270
+ _history_store = False # Mark as failed to avoid repeated attempts
271
+
272
+ # Return store or None if initialization failed
273
+ return _history_store if _history_store is not False else None
274
+
275
+
217
276
  def _load_workflow_history(history_file: str = WORKFLOW_HISTORY_FILE) -> list[dict]:
218
- """Load workflow run history from disk."""
277
+ """Load workflow run history from disk (legacy JSON support).
278
+
279
+ DEPRECATED: Use WorkflowHistoryStore for new code.
280
+ This function is maintained for backward compatibility.
281
+
282
+ Args:
283
+ history_file: Path to JSON history file
284
+
285
+ Returns:
286
+ List of workflow run dictionaries
287
+ """
288
+ import warnings
289
+
290
+ warnings.warn(
291
+ "_load_workflow_history is deprecated. Use WorkflowHistoryStore instead.",
292
+ DeprecationWarning,
293
+ stacklevel=2,
294
+ )
295
+
219
296
  path = Path(history_file)
220
297
  if not path.exists():
221
298
  return []
@@ -234,11 +311,42 @@ def _save_workflow_run(
234
311
  history_file: str = WORKFLOW_HISTORY_FILE,
235
312
  max_history: int = 100,
236
313
  ) -> None:
237
- """Save a workflow run to history."""
314
+ """Save a workflow run to history.
315
+
316
+ Uses SQLite-based storage by default. Falls back to JSON if SQLite unavailable.
317
+
318
+ Args:
319
+ workflow_name: Name of the workflow
320
+ provider: Provider used (anthropic, openai, google)
321
+ result: WorkflowResult object
322
+ history_file: Legacy JSON path (ignored if SQLite available)
323
+ max_history: Legacy max history limit (ignored if SQLite available)
324
+ """
325
+ # Try SQLite first (new approach)
326
+ store = _get_history_store()
327
+ if store is not None:
328
+ try:
329
+ run_id = str(uuid.uuid4())
330
+ store.record_run(run_id, workflow_name, provider, result)
331
+ logger.debug(f"Workflow run saved to SQLite: {run_id}")
332
+ return
333
+ except (OSError, PermissionError, ValueError) as e:
334
+ # SQLite failed, fall back to JSON
335
+ logger.warning(f"Failed to save to SQLite, falling back to JSON: {e}")
336
+
337
+ # Fallback: Legacy JSON storage
338
+ logger.debug("Using legacy JSON storage for workflow history")
238
339
  path = Path(history_file)
239
340
  path.parent.mkdir(parents=True, exist_ok=True)
240
341
 
241
- history = _load_workflow_history(history_file)
342
+ history = []
343
+ if path.exists():
344
+ try:
345
+ with open(path) as f:
346
+ data = json.load(f)
347
+ history = list(data) if isinstance(data, list) else []
348
+ except (json.JSONDecodeError, OSError):
349
+ pass
242
350
 
243
351
  # Create run record
244
352
  run: dict = {
@@ -285,20 +393,48 @@ def _save_workflow_run(
285
393
  def get_workflow_stats(history_file: str = WORKFLOW_HISTORY_FILE) -> dict:
286
394
  """Get workflow statistics for dashboard.
287
395
 
396
+ Uses SQLite-based storage by default. Falls back to JSON if unavailable.
397
+
398
+ Args:
399
+ history_file: Legacy JSON path (used only if SQLite unavailable)
400
+
288
401
  Returns:
289
402
  Dictionary with workflow stats including:
290
403
  - total_runs: Total workflow runs
404
+ - successful_runs: Number of successful runs
291
405
  - by_workflow: Per-workflow stats
292
406
  - by_provider: Per-provider stats
407
+ - by_tier: Cost breakdown by tier
293
408
  - recent_runs: Last 10 runs
409
+ - total_cost: Total cost across all runs
294
410
  - total_savings: Total cost savings
295
-
411
+ - avg_savings_percent: Average savings percentage
296
412
  """
297
- history = _load_workflow_history(history_file)
413
+ # Try SQLite first (new approach)
414
+ store = _get_history_store()
415
+ if store is not None:
416
+ try:
417
+ return store.get_stats()
418
+ except (OSError, PermissionError, ValueError) as e:
419
+ # SQLite failed, fall back to JSON
420
+ logger.warning(f"Failed to get stats from SQLite, falling back to JSON: {e}")
421
+
422
+ # Fallback: Legacy JSON storage
423
+ logger.debug("Using legacy JSON storage for workflow stats")
424
+ history = []
425
+ path = Path(history_file)
426
+ if path.exists():
427
+ try:
428
+ with open(path) as f:
429
+ data = json.load(f)
430
+ history = list(data) if isinstance(data, list) else []
431
+ except (json.JSONDecodeError, OSError):
432
+ pass
298
433
 
299
434
  if not history:
300
435
  return {
301
436
  "total_runs": 0,
437
+ "successful_runs": 0,
302
438
  "by_workflow": {},
303
439
  "by_provider": {},
304
440
  "by_tier": {"cheap": 0, "capable": 0, "premium": 0},
@@ -368,9 +504,11 @@ def get_workflow_stats(history_file: str = WORKFLOW_HISTORY_FILE) -> dict:
368
504
  }
369
505
 
370
506
 
371
- class BaseWorkflow(ABC):
507
+ class BaseWorkflow(CachingMixin, TelemetryMixin, ABC):
372
508
  """Base class for multi-model workflows.
373
509
 
510
+ Inherits from CachingMixin and TelemetryMixin (extracted for maintainability).
511
+
374
512
  Subclasses define stages and tier mappings:
375
513
 
376
514
  class MyWorkflow(BaseWorkflow):
@@ -405,6 +543,7 @@ class BaseWorkflow(ABC):
405
543
  enable_cache: bool = True,
406
544
  enable_tier_tracking: bool = True,
407
545
  enable_tier_fallback: bool = False,
546
+ routing_strategy: TierRoutingStrategy | None = None,
408
547
  ):
409
548
  """Initialize workflow with optional cost tracker, provider, and config.
410
549
 
@@ -426,6 +565,10 @@ class BaseWorkflow(ABC):
426
565
  enable_tier_tracking: Whether to enable automatic tier tracking (default True).
427
566
  enable_tier_fallback: Whether to enable intelligent tier fallback
428
567
  (CHEAP → CAPABLE → PREMIUM). Opt-in feature (default False).
568
+ routing_strategy: Optional TierRoutingStrategy for dynamic tier selection.
569
+ When provided, overrides static tier_map for stage tier decisions.
570
+ Strategies: CostOptimizedRouting, PerformanceOptimizedRouting,
571
+ BalancedRouting, HybridRouting.
429
572
 
430
573
  """
431
574
  from .config import WorkflowConfig
@@ -439,8 +582,6 @@ class BaseWorkflow(ABC):
439
582
 
440
583
  # New: LLMExecutor support
441
584
  self._executor = executor
442
- self._telemetry_backend = telemetry_backend or get_telemetry_store()
443
- self._run_id: str | None = None # Set at start of execute()
444
585
  self._api_key: str | None = None # For default executor creation
445
586
 
446
587
  # Cache support
@@ -456,20 +597,11 @@ class BaseWorkflow(ABC):
456
597
  self._enable_tier_fallback = enable_tier_fallback
457
598
  self._tier_progression: list[tuple[str, str, bool]] = [] # (stage, tier, success)
458
599
 
459
- # Telemetry tracking (singleton instance)
460
- self._telemetry_tracker: UsageTracker | None = None
461
- self._enable_telemetry = True # Enable by default
462
- if TELEMETRY_AVAILABLE and UsageTracker is not None:
463
- try:
464
- self._telemetry_tracker = UsageTracker.get_instance()
465
- except (OSError, PermissionError) as e:
466
- # File system errors - log but disable telemetry
467
- logger.debug(f"Failed to initialize telemetry tracker (file system error): {e}")
468
- self._enable_telemetry = False
469
- except (AttributeError, TypeError, ValueError) as e:
470
- # Configuration or initialization errors
471
- logger.debug(f"Failed to initialize telemetry tracker (config error): {e}")
472
- self._enable_telemetry = False
600
+ # Routing strategy support
601
+ self._routing_strategy: TierRoutingStrategy | None = routing_strategy
602
+
603
+ # Telemetry tracking (uses TelemetryMixin)
604
+ self._init_telemetry(telemetry_backend)
473
605
 
474
606
  # Load config if not provided
475
607
  self._config = config or WorkflowConfig.load()
@@ -494,9 +626,83 @@ class BaseWorkflow(ABC):
494
626
  self.provider = provider
495
627
 
496
628
  def get_tier_for_stage(self, stage_name: str) -> ModelTier:
497
- """Get the model tier for a stage."""
629
+ """Get the model tier for a stage from static tier_map."""
498
630
  return self.tier_map.get(stage_name, ModelTier.CAPABLE)
499
631
 
632
+ def _get_tier_with_routing(
633
+ self,
634
+ stage_name: str,
635
+ input_data: dict[str, Any],
636
+ budget_remaining: float = 100.0,
637
+ ) -> ModelTier:
638
+ """Get tier for a stage using routing strategy if available.
639
+
640
+ If a routing strategy is configured, creates a RoutingContext and
641
+ delegates tier selection to the strategy. Otherwise falls back to
642
+ the static tier_map.
643
+
644
+ Args:
645
+ stage_name: Name of the stage
646
+ input_data: Current workflow data (used to estimate input size)
647
+ budget_remaining: Remaining budget in USD for this execution
648
+
649
+ Returns:
650
+ ModelTier to use for this stage
651
+ """
652
+ # Fall back to static tier_map if no routing strategy
653
+ if self._routing_strategy is None:
654
+ return self.get_tier_for_stage(stage_name)
655
+
656
+ from .routing import RoutingContext
657
+
658
+ # Estimate input size from data
659
+ input_size = self._estimate_input_tokens(input_data)
660
+
661
+ # Assess complexity
662
+ complexity = self._assess_complexity(input_data)
663
+
664
+ # Determine latency sensitivity based on stage position
665
+ # First stages are more latency-sensitive (user waiting)
666
+ stage_index = self.stages.index(stage_name) if stage_name in self.stages else 0
667
+ if stage_index == 0:
668
+ latency_sensitivity = "high"
669
+ elif stage_index < len(self.stages) // 2:
670
+ latency_sensitivity = "medium"
671
+ else:
672
+ latency_sensitivity = "low"
673
+
674
+ # Create routing context
675
+ context = RoutingContext(
676
+ task_type=f"{self.name}:{stage_name}",
677
+ input_size=input_size,
678
+ complexity=complexity,
679
+ budget_remaining=budget_remaining,
680
+ latency_sensitivity=latency_sensitivity,
681
+ )
682
+
683
+ # Delegate to routing strategy
684
+ return self._routing_strategy.route(context)
685
+
686
+ def _estimate_input_tokens(self, input_data: dict[str, Any]) -> int:
687
+ """Estimate input token count from data.
688
+
689
+ Simple heuristic: ~4 characters per token on average.
690
+
691
+ Args:
692
+ input_data: Workflow input data
693
+
694
+ Returns:
695
+ Estimated token count
696
+ """
697
+ import json
698
+
699
+ try:
700
+ # Serialize to estimate size
701
+ data_str = json.dumps(input_data, default=str)
702
+ return len(data_str) // 4
703
+ except (TypeError, ValueError):
704
+ return 1000 # Default estimate
705
+
500
706
  def get_model_for_tier(self, tier: ModelTier) -> str:
501
707
  """Get the model for a tier based on configured provider and config."""
502
708
  from .config import get_model
@@ -507,43 +713,7 @@ class BaseWorkflow(ABC):
507
713
  model = get_model(provider_str, tier.value, self._config)
508
714
  return model
509
715
 
510
- def _maybe_setup_cache(self) -> None:
511
- """Set up cache with one-time user prompt if needed.
512
-
513
- This is called lazily on first workflow execution to avoid
514
- blocking workflow initialization.
515
- """
516
- if not self._enable_cache:
517
- return
518
-
519
- if self._cache_setup_attempted:
520
- return
521
-
522
- self._cache_setup_attempted = True
523
-
524
- # If cache already provided, use it
525
- if self._cache is not None:
526
- return
527
-
528
- # Otherwise, trigger auto-setup (which may prompt user)
529
- try:
530
- auto_setup_cache()
531
- self._cache = create_cache()
532
- logger.info(f"Cache initialized for workflow: {self.name}")
533
- except ImportError as e:
534
- # Hybrid cache dependencies not available, fall back to hash-only
535
- logger.info(
536
- f"Using hash-only cache (install empathy-framework[cache] for semantic caching): {e}"
537
- )
538
- self._cache = create_cache(cache_type="hash")
539
- except (OSError, PermissionError) as e:
540
- # File system errors - disable cache
541
- logger.warning(f"Cache setup failed (file system error): {e}, continuing without cache")
542
- self._enable_cache = False
543
- except (ValueError, TypeError, AttributeError) as e:
544
- # Configuration errors - disable cache
545
- logger.warning(f"Cache setup failed (config error): {e}, continuing without cache")
546
- self._enable_cache = False
716
+ # Note: _maybe_setup_cache is inherited from CachingMixin
547
717
 
548
718
  async def _call_llm(
549
719
  self,
@@ -582,54 +752,26 @@ class BaseWorkflow(ABC):
582
752
  model = self.get_model_for_tier(tier)
583
753
  cache_type = None
584
754
 
585
- # Try cache lookup if enabled
586
- if self._enable_cache and self._cache is not None:
587
- try:
588
- # Combine system + user message for cache key
589
- full_prompt = f"{system}\n\n{user_message}" if system else user_message
590
- cached_response = self._cache.get(self.name, stage, full_prompt, model)
591
-
592
- if cached_response is not None:
593
- logger.debug(f"Cache hit for {self.name}:{stage}")
594
- # Determine cache type
595
- if hasattr(self._cache, "cache_type"):
596
- ct = self._cache.cache_type
597
- # Ensure it's a string (not a Mock object)
598
- cache_type = str(ct) if ct and isinstance(ct, str) else "hash"
599
- else:
600
- cache_type = "hash" # Default assumption
601
-
602
- # Track telemetry for cache hit
603
- duration_ms = int((time.time() - start_time) * 1000)
604
- in_tokens = cached_response["input_tokens"]
605
- out_tokens = cached_response["output_tokens"]
606
- cost = self._calculate_cost(tier, in_tokens, out_tokens)
755
+ # Try cache lookup using CachingMixin
756
+ cached = self._try_cache_lookup(stage, system, user_message, model)
757
+ if cached is not None:
758
+ # Track telemetry for cache hit
759
+ duration_ms = int((time.time() - start_time) * 1000)
760
+ cost = self._calculate_cost(tier, cached.input_tokens, cached.output_tokens)
761
+ cache_type = self._get_cache_type()
607
762
 
608
- self._track_telemetry(
609
- stage=stage,
610
- tier=tier,
611
- model=model,
612
- cost=cost,
613
- tokens={"input": in_tokens, "output": out_tokens},
614
- cache_hit=True,
615
- cache_type=cache_type,
616
- duration_ms=duration_ms,
617
- )
763
+ self._track_telemetry(
764
+ stage=stage,
765
+ tier=tier,
766
+ model=model,
767
+ cost=cost,
768
+ tokens={"input": cached.input_tokens, "output": cached.output_tokens},
769
+ cache_hit=True,
770
+ cache_type=cache_type,
771
+ duration_ms=duration_ms,
772
+ )
618
773
 
619
- # Cached response is dict with content, input_tokens, output_tokens
620
- return (
621
- cached_response["content"],
622
- cached_response["input_tokens"],
623
- cached_response["output_tokens"],
624
- )
625
- except (KeyError, TypeError, ValueError) as e:
626
- # Malformed cache data - continue with LLM call
627
- logger.debug(f"Cache lookup failed (malformed data): {e}, continuing with LLM call")
628
- except (OSError, PermissionError) as e:
629
- # File system errors - continue with LLM call
630
- logger.debug(
631
- f"Cache lookup failed (file system error): {e}, continuing with LLM call"
632
- )
774
+ return (cached.content, cached.input_tokens, cached.output_tokens)
633
775
 
634
776
  # Create a step config for this call
635
777
  step = WorkflowStepConfig(
@@ -662,23 +804,14 @@ class BaseWorkflow(ABC):
662
804
  duration_ms=duration_ms,
663
805
  )
664
806
 
665
- # Store in cache if enabled
666
- if self._enable_cache and self._cache is not None:
667
- try:
668
- full_prompt = f"{system}\n\n{user_message}" if system else user_message
669
- response_data = {
670
- "content": content,
671
- "input_tokens": in_tokens,
672
- "output_tokens": out_tokens,
673
- }
674
- self._cache.put(self.name, stage, full_prompt, model, response_data)
675
- logger.debug(f"Cached response for {self.name}:{stage}")
676
- except (OSError, PermissionError) as e:
677
- # File system errors - log but continue
678
- logger.debug(f"Failed to cache response (file system error): {e}")
679
- except (ValueError, TypeError, KeyError) as e:
680
- # Data serialization errors - log but continue
681
- logger.debug(f"Failed to cache response (serialization error): {e}")
807
+ # Store in cache using CachingMixin
808
+ self._store_in_cache(
809
+ stage,
810
+ system,
811
+ user_message,
812
+ model,
813
+ CachedResponse(content=content, input_tokens=in_tokens, output_tokens=out_tokens),
814
+ )
682
815
 
683
816
  return content, in_tokens, out_tokens
684
817
  except (ValueError, TypeError, KeyError) as e:
@@ -698,53 +831,7 @@ class BaseWorkflow(ABC):
698
831
  logger.exception(f"Unexpected error calling LLM: {e}")
699
832
  return f"Error calling LLM: {type(e).__name__}", 0, 0
700
833
 
701
- def _track_telemetry(
702
- self,
703
- stage: str,
704
- tier: ModelTier,
705
- model: str,
706
- cost: float,
707
- tokens: dict[str, int],
708
- cache_hit: bool,
709
- cache_type: str | None,
710
- duration_ms: int,
711
- ) -> None:
712
- """Track telemetry for an LLM call.
713
-
714
- Args:
715
- stage: Stage name
716
- tier: Model tier used
717
- model: Model ID used
718
- cost: Cost in USD
719
- tokens: Dictionary with "input" and "output" token counts
720
- cache_hit: Whether this was a cache hit
721
- cache_type: Cache type if cache hit
722
- duration_ms: Duration in milliseconds
723
-
724
- """
725
- if not self._enable_telemetry or self._telemetry_tracker is None:
726
- return
727
-
728
- try:
729
- provider_str = getattr(self, "_provider_str", "unknown")
730
- self._telemetry_tracker.track_llm_call(
731
- workflow=self.name,
732
- stage=stage,
733
- tier=tier.value.upper(),
734
- model=model,
735
- provider=provider_str,
736
- cost=cost,
737
- tokens=tokens,
738
- cache_hit=cache_hit,
739
- cache_type=cache_type,
740
- duration_ms=duration_ms,
741
- )
742
- except (AttributeError, TypeError, ValueError) as e:
743
- # INTENTIONAL: Telemetry tracking failures should never crash workflows
744
- logger.debug(f"Failed to track telemetry (config/data error): {e}")
745
- except (OSError, PermissionError) as e:
746
- # File system errors - log but never crash workflow
747
- logger.debug(f"Failed to track telemetry (file system error): {e}")
834
+ # Note: _track_telemetry is inherited from TelemetryMixin
748
835
 
749
836
  def _calculate_cost(self, tier: ModelTier, input_tokens: int, output_tokens: int) -> float:
750
837
  """Calculate cost for a stage."""
@@ -784,32 +871,20 @@ class BaseWorkflow(ABC):
784
871
  savings = baseline_cost - total_cost
785
872
  savings_percent = (savings / baseline_cost * 100) if baseline_cost > 0 else 0.0
786
873
 
787
- # Calculate cache metrics if cache is enabled
788
- cache_hits = 0
789
- cache_misses = 0
790
- cache_hit_rate = 0.0
874
+ # Calculate cache metrics using CachingMixin
875
+ cache_stats = self._get_cache_stats()
876
+ cache_hits = cache_stats["hits"]
877
+ cache_misses = cache_stats["misses"]
878
+ cache_hit_rate = cache_stats["hit_rate"]
791
879
  estimated_cost_without_cache = total_cost
792
880
  savings_from_cache = 0.0
793
881
 
794
- if self._cache is not None:
795
- try:
796
- stats = self._cache.get_stats()
797
- cache_hits = stats.hits
798
- cache_misses = stats.misses
799
- cache_hit_rate = stats.hit_rate
800
-
801
- # Estimate cost without cache (assumes cache hits would have incurred full cost)
802
- # This is a conservative estimate
803
- if cache_hits > 0:
804
- # Average cost per non-cached call
805
- avg_cost_per_call = total_cost / cache_misses if cache_misses > 0 else 0.0
806
- # Estimated additional cost if cache hits were actual API calls
807
- estimated_additional_cost = cache_hits * avg_cost_per_call
808
- estimated_cost_without_cache = total_cost + estimated_additional_cost
809
- savings_from_cache = estimated_additional_cost
810
- except (AttributeError, TypeError):
811
- # Cache doesn't support stats or error occurred
812
- pass
882
+ # Estimate cost without cache (assumes cache hits would have incurred full cost)
883
+ if cache_hits > 0:
884
+ avg_cost_per_call = total_cost / cache_misses if cache_misses > 0 else 0.0
885
+ estimated_additional_cost = cache_hits * avg_cost_per_call
886
+ estimated_cost_without_cache = total_cost + estimated_additional_cost
887
+ savings_from_cache = estimated_additional_cost
813
888
 
814
889
  return CostReport(
815
890
  total_cost=total_cost,
@@ -956,7 +1031,8 @@ class BaseWorkflow(ABC):
956
1031
 
957
1032
  # Log routing start
958
1033
  try:
959
- self._telemetry_backend.log_task_routing(routing_record)
1034
+ if self._telemetry_backend is not None:
1035
+ self._telemetry_backend.log_task_routing(routing_record)
960
1036
  except Exception as e:
961
1037
  logger.debug(f"Failed to log task routing: {e}")
962
1038
 
@@ -1144,10 +1220,20 @@ class BaseWorkflow(ABC):
1144
1220
  self._progress_tracker.fail_stage(stage_name, error_msg)
1145
1221
  raise ValueError(error_msg)
1146
1222
 
1147
- # Standard mode: use configured tier_map (backward compatible)
1223
+ # Standard mode: use routing strategy or tier_map (backward compatible)
1148
1224
  else:
1225
+ # Track budget for routing decisions
1226
+ total_budget = 100.0 # Default budget in USD
1227
+ budget_spent = 0.0
1228
+
1149
1229
  for stage_name in self.stages:
1150
- tier = self.get_tier_for_stage(stage_name)
1230
+ # Use routing strategy if available, otherwise fall back to tier_map
1231
+ budget_remaining = total_budget - budget_spent
1232
+ tier = self._get_tier_with_routing(
1233
+ stage_name,
1234
+ current_data if isinstance(current_data, dict) else {},
1235
+ budget_remaining,
1236
+ )
1151
1237
  stage_start = datetime.now()
1152
1238
 
1153
1239
  # Check if stage should be skipped
@@ -1185,6 +1271,9 @@ class BaseWorkflow(ABC):
1185
1271
  duration_ms = int((stage_end - stage_start).total_seconds() * 1000)
1186
1272
  cost = self._calculate_cost(tier, input_tokens, output_tokens)
1187
1273
 
1274
+ # Update budget spent for routing decisions
1275
+ budget_spent += cost
1276
+
1188
1277
  stage = WorkflowStage(
1189
1278
  name=stage_name,
1190
1279
  tier=tier,
@@ -1364,7 +1453,8 @@ class BaseWorkflow(ABC):
1364
1453
 
1365
1454
  # Log routing completion
1366
1455
  try:
1367
- self._telemetry_backend.log_task_routing(routing_record)
1456
+ if self._telemetry_backend is not None:
1457
+ self._telemetry_backend.log_task_routing(routing_record)
1368
1458
  except Exception as e:
1369
1459
  logger.debug(f"Failed to log task routing completion: {e}")
1370
1460
 
@@ -1543,119 +1633,7 @@ class BaseWorkflow(ABC):
1543
1633
  self._executor = self._create_default_executor()
1544
1634
  return self._executor
1545
1635
 
1546
- def _emit_call_telemetry(
1547
- self,
1548
- step_name: str,
1549
- task_type: str,
1550
- tier: str,
1551
- model_id: str,
1552
- input_tokens: int,
1553
- output_tokens: int,
1554
- cost: float,
1555
- latency_ms: int,
1556
- success: bool = True,
1557
- error_message: str | None = None,
1558
- fallback_used: bool = False,
1559
- ) -> None:
1560
- """Emit an LLMCallRecord to the telemetry backend.
1561
-
1562
- Args:
1563
- step_name: Name of the workflow step
1564
- task_type: Task type used for routing
1565
- tier: Model tier used
1566
- model_id: Model ID used
1567
- input_tokens: Input token count
1568
- output_tokens: Output token count
1569
- cost: Estimated cost
1570
- latency_ms: Latency in milliseconds
1571
- success: Whether the call succeeded
1572
- error_message: Error message if failed
1573
- fallback_used: Whether fallback was used
1574
-
1575
- """
1576
- record = LLMCallRecord(
1577
- call_id=str(uuid.uuid4()),
1578
- timestamp=datetime.now().isoformat(),
1579
- workflow_name=self.name,
1580
- step_name=step_name,
1581
- task_type=task_type,
1582
- provider=self._provider_str,
1583
- tier=tier,
1584
- model_id=model_id,
1585
- input_tokens=input_tokens,
1586
- output_tokens=output_tokens,
1587
- estimated_cost=cost,
1588
- latency_ms=latency_ms,
1589
- success=success,
1590
- error_message=error_message,
1591
- fallback_used=fallback_used,
1592
- metadata={"run_id": self._run_id},
1593
- )
1594
- try:
1595
- self._telemetry_backend.log_call(record)
1596
- except (AttributeError, ValueError, TypeError):
1597
- # Telemetry backend errors - log but don't crash workflow
1598
- logger.debug("Failed to log call telemetry (backend error)")
1599
- except OSError:
1600
- # File system errors - log but don't crash workflow
1601
- logger.debug("Failed to log call telemetry (file system error)")
1602
- except Exception: # noqa: BLE001
1603
- # INTENTIONAL: Telemetry is optional diagnostics - never crash workflow
1604
- logger.debug("Unexpected error logging call telemetry")
1605
-
1606
- def _emit_workflow_telemetry(self, result: WorkflowResult) -> None:
1607
- """Emit a WorkflowRunRecord to the telemetry backend.
1608
-
1609
- Args:
1610
- result: The workflow result to record
1611
-
1612
- """
1613
- # Build stage records
1614
- stages = [
1615
- WorkflowStageRecord(
1616
- stage_name=s.name,
1617
- tier=s.tier.value,
1618
- model_id=self.get_model_for_tier(s.tier),
1619
- input_tokens=s.input_tokens,
1620
- output_tokens=s.output_tokens,
1621
- cost=s.cost,
1622
- latency_ms=s.duration_ms,
1623
- success=not s.skipped and result.error is None,
1624
- skipped=s.skipped,
1625
- skip_reason=s.skip_reason,
1626
- )
1627
- for s in result.stages
1628
- ]
1629
-
1630
- record = WorkflowRunRecord(
1631
- run_id=self._run_id or str(uuid.uuid4()),
1632
- workflow_name=self.name,
1633
- started_at=result.started_at.isoformat(),
1634
- completed_at=result.completed_at.isoformat(),
1635
- stages=stages,
1636
- total_input_tokens=sum(s.input_tokens for s in result.stages if not s.skipped),
1637
- total_output_tokens=sum(s.output_tokens for s in result.stages if not s.skipped),
1638
- total_cost=result.cost_report.total_cost,
1639
- baseline_cost=result.cost_report.baseline_cost,
1640
- savings=result.cost_report.savings,
1641
- savings_percent=result.cost_report.savings_percent,
1642
- total_duration_ms=result.total_duration_ms,
1643
- success=result.success,
1644
- error=result.error,
1645
- providers_used=[self._provider_str],
1646
- tiers_used=list(result.cost_report.by_tier.keys()),
1647
- )
1648
- try:
1649
- self._telemetry_backend.log_workflow(record)
1650
- except (AttributeError, ValueError, TypeError):
1651
- # Telemetry backend errors - log but don't crash workflow
1652
- logger.debug("Failed to log workflow telemetry (backend error)")
1653
- except OSError:
1654
- # File system errors - log but don't crash workflow
1655
- logger.debug("Failed to log workflow telemetry (file system error)")
1656
- except Exception: # noqa: BLE001
1657
- # INTENTIONAL: Telemetry is optional diagnostics - never crash workflow
1658
- logger.debug("Unexpected error logging workflow telemetry")
1636
+ # Note: _emit_call_telemetry and _emit_workflow_telemetry are inherited from TelemetryMixin
1659
1637
 
1660
1638
  async def run_step_with_executor(
1661
1639
  self,