empathy-framework 4.7.1__py3-none-any.whl → 4.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {empathy_framework-4.7.1.dist-info → empathy_framework-4.8.0.dist-info}/METADATA +65 -2
  2. {empathy_framework-4.7.1.dist-info → empathy_framework-4.8.0.dist-info}/RECORD +73 -52
  3. {empathy_framework-4.7.1.dist-info → empathy_framework-4.8.0.dist-info}/WHEEL +1 -1
  4. {empathy_framework-4.7.1.dist-info → empathy_framework-4.8.0.dist-info}/entry_points.txt +2 -1
  5. {empathy_framework-4.7.1.dist-info → empathy_framework-4.8.0.dist-info}/top_level.txt +0 -1
  6. empathy_os/__init__.py +2 -0
  7. empathy_os/cache/hash_only.py +6 -3
  8. empathy_os/cache/hybrid.py +6 -3
  9. empathy_os/cli/__init__.py +128 -238
  10. empathy_os/cli/__main__.py +5 -33
  11. empathy_os/cli/commands/__init__.py +1 -8
  12. empathy_os/cli/commands/help.py +331 -0
  13. empathy_os/cli/commands/info.py +140 -0
  14. empathy_os/cli/commands/inspect.py +437 -0
  15. empathy_os/cli/commands/metrics.py +92 -0
  16. empathy_os/cli/commands/orchestrate.py +184 -0
  17. empathy_os/cli/commands/patterns.py +207 -0
  18. empathy_os/cli/commands/provider.py +93 -81
  19. empathy_os/cli/commands/setup.py +96 -0
  20. empathy_os/cli/commands/status.py +235 -0
  21. empathy_os/cli/commands/sync.py +166 -0
  22. empathy_os/cli/commands/tier.py +121 -0
  23. empathy_os/cli/commands/workflow.py +574 -0
  24. empathy_os/cli/parsers/__init__.py +62 -0
  25. empathy_os/cli/parsers/help.py +41 -0
  26. empathy_os/cli/parsers/info.py +26 -0
  27. empathy_os/cli/parsers/inspect.py +66 -0
  28. empathy_os/cli/parsers/metrics.py +42 -0
  29. empathy_os/cli/parsers/orchestrate.py +61 -0
  30. empathy_os/cli/parsers/patterns.py +54 -0
  31. empathy_os/cli/parsers/provider.py +40 -0
  32. empathy_os/cli/parsers/setup.py +42 -0
  33. empathy_os/cli/parsers/status.py +47 -0
  34. empathy_os/cli/parsers/sync.py +31 -0
  35. empathy_os/cli/parsers/tier.py +33 -0
  36. empathy_os/cli/parsers/workflow.py +77 -0
  37. empathy_os/cli/utils/__init__.py +1 -0
  38. empathy_os/cli/utils/data.py +242 -0
  39. empathy_os/cli/utils/helpers.py +68 -0
  40. empathy_os/{cli.py → cli_legacy.py} +27 -27
  41. empathy_os/cli_minimal.py +662 -0
  42. empathy_os/cli_router.py +384 -0
  43. empathy_os/cli_unified.py +38 -2
  44. empathy_os/memory/__init__.py +19 -5
  45. empathy_os/memory/short_term.py +14 -404
  46. empathy_os/memory/types.py +437 -0
  47. empathy_os/memory/unified.py +61 -48
  48. empathy_os/models/fallback.py +1 -1
  49. empathy_os/models/provider_config.py +59 -344
  50. empathy_os/models/registry.py +31 -180
  51. empathy_os/monitoring/alerts.py +14 -20
  52. empathy_os/monitoring/alerts_cli.py +24 -7
  53. empathy_os/project_index/__init__.py +2 -0
  54. empathy_os/project_index/index.py +210 -5
  55. empathy_os/project_index/scanner.py +45 -14
  56. empathy_os/project_index/scanner_parallel.py +291 -0
  57. empathy_os/socratic/ab_testing.py +1 -1
  58. empathy_os/workflows/__init__.py +31 -2
  59. empathy_os/workflows/base.py +349 -325
  60. empathy_os/workflows/bug_predict.py +8 -0
  61. empathy_os/workflows/builder.py +273 -0
  62. empathy_os/workflows/caching.py +253 -0
  63. empathy_os/workflows/code_review_pipeline.py +1 -0
  64. empathy_os/workflows/history.py +510 -0
  65. empathy_os/workflows/output.py +410 -0
  66. empathy_os/workflows/perf_audit.py +125 -19
  67. empathy_os/workflows/progress.py +324 -22
  68. empathy_os/workflows/routing.py +168 -0
  69. empathy_os/workflows/secure_release.py +1 -0
  70. empathy_os/workflows/security_audit.py +190 -0
  71. empathy_os/workflows/security_audit_phase3.py +328 -0
  72. empathy_os/workflows/telemetry_mixin.py +269 -0
  73. empathy_os/dashboard/__init__.py +0 -15
  74. empathy_os/dashboard/server.py +0 -941
  75. patterns/README.md +0 -119
  76. patterns/__init__.py +0 -95
  77. patterns/behavior.py +0 -298
  78. patterns/code_review_memory.json +0 -441
  79. patterns/core.py +0 -97
  80. patterns/debugging.json +0 -3763
  81. patterns/empathy.py +0 -268
  82. patterns/health_check_memory.json +0 -505
  83. patterns/input.py +0 -161
  84. patterns/memory_graph.json +0 -8
  85. patterns/refactoring_memory.json +0 -1113
  86. patterns/registry.py +0 -663
  87. patterns/security_memory.json +0 -8
  88. patterns/structural.py +0 -415
  89. patterns/validation.py +0 -194
  90. {empathy_framework-4.7.1.dist-info → empathy_framework-4.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -17,6 +17,7 @@ from __future__ import annotations
17
17
 
18
18
  import json
19
19
  import logging
20
+ import sys
20
21
  import time
21
22
  import uuid
22
23
  from abc import ABC, abstractmethod
@@ -27,6 +28,7 @@ from pathlib import Path
27
28
  from typing import TYPE_CHECKING, Any
28
29
 
29
30
  if TYPE_CHECKING:
31
+ from .routing import TierRoutingStrategy
30
32
  from .tier_tracking import WorkflowTierTracker
31
33
 
32
34
  # Load .env file for API keys if python-dotenv is available
@@ -38,26 +40,31 @@ except ImportError:
38
40
  pass # python-dotenv not installed, rely on environment variables
39
41
 
40
42
  # Import caching infrastructure
41
- from empathy_os.cache import BaseCache, auto_setup_cache, create_cache
43
+ from empathy_os.cache import BaseCache
42
44
  from empathy_os.config import _validate_file_path
43
45
  from empathy_os.cost_tracker import MODEL_PRICING, CostTracker
44
46
 
45
47
  # Import unified types from empathy_os.models
46
48
  from empathy_os.models import (
47
49
  ExecutionContext,
48
- LLMCallRecord,
49
50
  LLMExecutor,
50
51
  TaskRoutingRecord,
51
52
  TelemetryBackend,
52
- WorkflowRunRecord,
53
- WorkflowStageRecord,
54
- get_telemetry_store,
55
53
  )
56
54
  from empathy_os.models import ModelProvider as UnifiedModelProvider
57
55
  from empathy_os.models import ModelTier as UnifiedModelTier
58
56
 
57
+ # Import mixins (extracted for maintainability)
58
+ from .caching import CachedResponse, CachingMixin
59
+
59
60
  # Import progress tracking
60
- from .progress import ProgressCallback, ProgressTracker
61
+ from .progress import (
62
+ RICH_AVAILABLE,
63
+ ProgressCallback,
64
+ ProgressTracker,
65
+ RichProgressReporter,
66
+ )
67
+ from .telemetry_mixin import TelemetryMixin
61
68
 
62
69
  # Import telemetry tracking
63
70
  try:
@@ -78,15 +85,47 @@ logger = logging.getLogger(__name__)
78
85
  WORKFLOW_HISTORY_FILE = ".empathy/workflow_runs.json"
79
86
 
80
87
 
81
- # Local enums for backward compatibility
88
+ # Local enums for backward compatibility - DEPRECATED
82
89
  # New code should use empathy_os.models.ModelTier/ModelProvider
83
90
  class ModelTier(Enum):
84
- """Model tier for cost optimization."""
91
+ """DEPRECATED: Model tier for cost optimization.
92
+
93
+ This enum is deprecated and will be removed in v5.0.
94
+ Use empathy_os.models.ModelTier instead.
95
+
96
+ Migration:
97
+ # Old:
98
+ from empathy_os.workflows.base import ModelTier
99
+
100
+ # New:
101
+ from empathy_os.models import ModelTier
102
+
103
+ Why deprecated:
104
+ - Creates confusion with dual definitions
105
+ - empathy_os.models.ModelTier is the canonical location
106
+ - Simplifies imports and reduces duplication
107
+ """
85
108
 
86
109
  CHEAP = "cheap" # Haiku/GPT-4o-mini - $0.25-1.25/M tokens
87
110
  CAPABLE = "capable" # Sonnet/GPT-4o - $3-15/M tokens
88
111
  PREMIUM = "premium" # Opus/o1 - $15-75/M tokens
89
112
 
113
+ def __init__(self, value: str):
114
+ """Initialize with deprecation warning."""
115
+ # Only warn once per process, not per instance
116
+ import warnings
117
+
118
+ # Use self.__class__ instead of ModelTier (class not yet defined during creation)
119
+ if not hasattr(self.__class__, "_deprecation_warned"):
120
+ warnings.warn(
121
+ "workflows.base.ModelTier is deprecated and will be removed in v5.0. "
122
+ "Use empathy_os.models.ModelTier instead. "
123
+ "Update imports: from empathy_os.models import ModelTier",
124
+ DeprecationWarning,
125
+ stacklevel=4,
126
+ )
127
+ self.__class__._deprecation_warned = True
128
+
90
129
  def to_unified(self) -> UnifiedModelTier:
91
130
  """Convert to unified ModelTier from empathy_os.models."""
92
131
  return UnifiedModelTier(self.value)
@@ -214,8 +253,52 @@ class WorkflowResult:
214
253
  transient: bool = False # True if retry is reasonable (e.g., provider timeout)
215
254
 
216
255
 
256
+ # Global singleton for workflow history store (lazy-initialized)
257
+ _history_store: Any = None # WorkflowHistoryStore | None
258
+
259
+
260
+ def _get_history_store():
261
+ """Get or create workflow history store singleton.
262
+
263
+ Returns SQLite-based history store. Falls back to None if initialization fails.
264
+ """
265
+ global _history_store
266
+
267
+ if _history_store is None:
268
+ try:
269
+ from .history import WorkflowHistoryStore
270
+
271
+ _history_store = WorkflowHistoryStore()
272
+ logger.debug("Workflow history store initialized (SQLite)")
273
+ except (ImportError, OSError, PermissionError) as e:
274
+ # File system errors or missing dependencies
275
+ logger.warning(f"Failed to initialize SQLite history store: {e}")
276
+ _history_store = False # Mark as failed to avoid repeated attempts
277
+
278
+ # Return store or None if initialization failed
279
+ return _history_store if _history_store is not False else None
280
+
281
+
217
282
  def _load_workflow_history(history_file: str = WORKFLOW_HISTORY_FILE) -> list[dict]:
218
- """Load workflow run history from disk."""
283
+ """Load workflow run history from disk (legacy JSON support).
284
+
285
+ DEPRECATED: Use WorkflowHistoryStore for new code.
286
+ This function is maintained for backward compatibility.
287
+
288
+ Args:
289
+ history_file: Path to JSON history file
290
+
291
+ Returns:
292
+ List of workflow run dictionaries
293
+ """
294
+ import warnings
295
+
296
+ warnings.warn(
297
+ "_load_workflow_history is deprecated. Use WorkflowHistoryStore instead.",
298
+ DeprecationWarning,
299
+ stacklevel=2,
300
+ )
301
+
219
302
  path = Path(history_file)
220
303
  if not path.exists():
221
304
  return []
@@ -234,11 +317,42 @@ def _save_workflow_run(
234
317
  history_file: str = WORKFLOW_HISTORY_FILE,
235
318
  max_history: int = 100,
236
319
  ) -> None:
237
- """Save a workflow run to history."""
320
+ """Save a workflow run to history.
321
+
322
+ Uses SQLite-based storage by default. Falls back to JSON if SQLite unavailable.
323
+
324
+ Args:
325
+ workflow_name: Name of the workflow
326
+ provider: Provider used (anthropic, openai, google)
327
+ result: WorkflowResult object
328
+ history_file: Legacy JSON path (ignored if SQLite available)
329
+ max_history: Legacy max history limit (ignored if SQLite available)
330
+ """
331
+ # Try SQLite first (new approach)
332
+ store = _get_history_store()
333
+ if store is not None:
334
+ try:
335
+ run_id = str(uuid.uuid4())
336
+ store.record_run(run_id, workflow_name, provider, result)
337
+ logger.debug(f"Workflow run saved to SQLite: {run_id}")
338
+ return
339
+ except (OSError, PermissionError, ValueError) as e:
340
+ # SQLite failed, fall back to JSON
341
+ logger.warning(f"Failed to save to SQLite, falling back to JSON: {e}")
342
+
343
+ # Fallback: Legacy JSON storage
344
+ logger.debug("Using legacy JSON storage for workflow history")
238
345
  path = Path(history_file)
239
346
  path.parent.mkdir(parents=True, exist_ok=True)
240
347
 
241
- history = _load_workflow_history(history_file)
348
+ history = []
349
+ if path.exists():
350
+ try:
351
+ with open(path) as f:
352
+ data = json.load(f)
353
+ history = list(data) if isinstance(data, list) else []
354
+ except (json.JSONDecodeError, OSError):
355
+ pass
242
356
 
243
357
  # Create run record
244
358
  run: dict = {
@@ -285,20 +399,48 @@ def _save_workflow_run(
285
399
  def get_workflow_stats(history_file: str = WORKFLOW_HISTORY_FILE) -> dict:
286
400
  """Get workflow statistics for dashboard.
287
401
 
402
+ Uses SQLite-based storage by default. Falls back to JSON if unavailable.
403
+
404
+ Args:
405
+ history_file: Legacy JSON path (used only if SQLite unavailable)
406
+
288
407
  Returns:
289
408
  Dictionary with workflow stats including:
290
409
  - total_runs: Total workflow runs
410
+ - successful_runs: Number of successful runs
291
411
  - by_workflow: Per-workflow stats
292
412
  - by_provider: Per-provider stats
413
+ - by_tier: Cost breakdown by tier
293
414
  - recent_runs: Last 10 runs
415
+ - total_cost: Total cost across all runs
294
416
  - total_savings: Total cost savings
295
-
417
+ - avg_savings_percent: Average savings percentage
296
418
  """
297
- history = _load_workflow_history(history_file)
419
+ # Try SQLite first (new approach)
420
+ store = _get_history_store()
421
+ if store is not None:
422
+ try:
423
+ return store.get_stats()
424
+ except (OSError, PermissionError, ValueError) as e:
425
+ # SQLite failed, fall back to JSON
426
+ logger.warning(f"Failed to get stats from SQLite, falling back to JSON: {e}")
427
+
428
+ # Fallback: Legacy JSON storage
429
+ logger.debug("Using legacy JSON storage for workflow stats")
430
+ history = []
431
+ path = Path(history_file)
432
+ if path.exists():
433
+ try:
434
+ with open(path) as f:
435
+ data = json.load(f)
436
+ history = list(data) if isinstance(data, list) else []
437
+ except (json.JSONDecodeError, OSError):
438
+ pass
298
439
 
299
440
  if not history:
300
441
  return {
301
442
  "total_runs": 0,
443
+ "successful_runs": 0,
302
444
  "by_workflow": {},
303
445
  "by_provider": {},
304
446
  "by_tier": {"cheap": 0, "capable": 0, "premium": 0},
@@ -368,9 +510,11 @@ def get_workflow_stats(history_file: str = WORKFLOW_HISTORY_FILE) -> dict:
368
510
  }
369
511
 
370
512
 
371
- class BaseWorkflow(ABC):
513
+ class BaseWorkflow(CachingMixin, TelemetryMixin, ABC):
372
514
  """Base class for multi-model workflows.
373
515
 
516
+ Inherits from CachingMixin and TelemetryMixin (extracted for maintainability).
517
+
374
518
  Subclasses define stages and tier mappings:
375
519
 
376
520
  class MyWorkflow(BaseWorkflow):
@@ -405,6 +549,8 @@ class BaseWorkflow(ABC):
405
549
  enable_cache: bool = True,
406
550
  enable_tier_tracking: bool = True,
407
551
  enable_tier_fallback: bool = False,
552
+ routing_strategy: TierRoutingStrategy | None = None,
553
+ enable_rich_progress: bool = False,
408
554
  ):
409
555
  """Initialize workflow with optional cost tracker, provider, and config.
410
556
 
@@ -426,6 +572,15 @@ class BaseWorkflow(ABC):
426
572
  enable_tier_tracking: Whether to enable automatic tier tracking (default True).
427
573
  enable_tier_fallback: Whether to enable intelligent tier fallback
428
574
  (CHEAP → CAPABLE → PREMIUM). Opt-in feature (default False).
575
+ routing_strategy: Optional TierRoutingStrategy for dynamic tier selection.
576
+ When provided, overrides static tier_map for stage tier decisions.
577
+ Strategies: CostOptimizedRouting, PerformanceOptimizedRouting,
578
+ BalancedRouting, HybridRouting.
579
+ enable_rich_progress: Whether to enable Rich-based live progress display
580
+ (default False). When enabled and output is a TTY, shows live
581
+ progress bars with spinners. Default is False because most users
582
+ run workflows from IDEs (VSCode, etc.) where TTY is not available.
583
+ The console reporter works reliably in all environments.
429
584
 
430
585
  """
431
586
  from .config import WorkflowConfig
@@ -436,11 +591,11 @@ class BaseWorkflow(ABC):
436
591
  # Progress tracking
437
592
  self._progress_callback = progress_callback
438
593
  self._progress_tracker: ProgressTracker | None = None
594
+ self._enable_rich_progress = enable_rich_progress
595
+ self._rich_reporter: RichProgressReporter | None = None
439
596
 
440
597
  # New: LLMExecutor support
441
598
  self._executor = executor
442
- self._telemetry_backend = telemetry_backend or get_telemetry_store()
443
- self._run_id: str | None = None # Set at start of execute()
444
599
  self._api_key: str | None = None # For default executor creation
445
600
 
446
601
  # Cache support
@@ -456,20 +611,11 @@ class BaseWorkflow(ABC):
456
611
  self._enable_tier_fallback = enable_tier_fallback
457
612
  self._tier_progression: list[tuple[str, str, bool]] = [] # (stage, tier, success)
458
613
 
459
- # Telemetry tracking (singleton instance)
460
- self._telemetry_tracker: UsageTracker | None = None
461
- self._enable_telemetry = True # Enable by default
462
- if TELEMETRY_AVAILABLE and UsageTracker is not None:
463
- try:
464
- self._telemetry_tracker = UsageTracker.get_instance()
465
- except (OSError, PermissionError) as e:
466
- # File system errors - log but disable telemetry
467
- logger.debug(f"Failed to initialize telemetry tracker (file system error): {e}")
468
- self._enable_telemetry = False
469
- except (AttributeError, TypeError, ValueError) as e:
470
- # Configuration or initialization errors
471
- logger.debug(f"Failed to initialize telemetry tracker (config error): {e}")
472
- self._enable_telemetry = False
614
+ # Routing strategy support
615
+ self._routing_strategy: TierRoutingStrategy | None = routing_strategy
616
+
617
+ # Telemetry tracking (uses TelemetryMixin)
618
+ self._init_telemetry(telemetry_backend)
473
619
 
474
620
  # Load config if not provided
475
621
  self._config = config or WorkflowConfig.load()
@@ -494,9 +640,83 @@ class BaseWorkflow(ABC):
494
640
  self.provider = provider
495
641
 
496
642
  def get_tier_for_stage(self, stage_name: str) -> ModelTier:
497
- """Get the model tier for a stage."""
643
+ """Get the model tier for a stage from static tier_map."""
498
644
  return self.tier_map.get(stage_name, ModelTier.CAPABLE)
499
645
 
646
+ def _get_tier_with_routing(
647
+ self,
648
+ stage_name: str,
649
+ input_data: dict[str, Any],
650
+ budget_remaining: float = 100.0,
651
+ ) -> ModelTier:
652
+ """Get tier for a stage using routing strategy if available.
653
+
654
+ If a routing strategy is configured, creates a RoutingContext and
655
+ delegates tier selection to the strategy. Otherwise falls back to
656
+ the static tier_map.
657
+
658
+ Args:
659
+ stage_name: Name of the stage
660
+ input_data: Current workflow data (used to estimate input size)
661
+ budget_remaining: Remaining budget in USD for this execution
662
+
663
+ Returns:
664
+ ModelTier to use for this stage
665
+ """
666
+ # Fall back to static tier_map if no routing strategy
667
+ if self._routing_strategy is None:
668
+ return self.get_tier_for_stage(stage_name)
669
+
670
+ from .routing import RoutingContext
671
+
672
+ # Estimate input size from data
673
+ input_size = self._estimate_input_tokens(input_data)
674
+
675
+ # Assess complexity
676
+ complexity = self._assess_complexity(input_data)
677
+
678
+ # Determine latency sensitivity based on stage position
679
+ # First stages are more latency-sensitive (user waiting)
680
+ stage_index = self.stages.index(stage_name) if stage_name in self.stages else 0
681
+ if stage_index == 0:
682
+ latency_sensitivity = "high"
683
+ elif stage_index < len(self.stages) // 2:
684
+ latency_sensitivity = "medium"
685
+ else:
686
+ latency_sensitivity = "low"
687
+
688
+ # Create routing context
689
+ context = RoutingContext(
690
+ task_type=f"{self.name}:{stage_name}",
691
+ input_size=input_size,
692
+ complexity=complexity,
693
+ budget_remaining=budget_remaining,
694
+ latency_sensitivity=latency_sensitivity,
695
+ )
696
+
697
+ # Delegate to routing strategy
698
+ return self._routing_strategy.route(context)
699
+
700
+ def _estimate_input_tokens(self, input_data: dict[str, Any]) -> int:
701
+ """Estimate input token count from data.
702
+
703
+ Simple heuristic: ~4 characters per token on average.
704
+
705
+ Args:
706
+ input_data: Workflow input data
707
+
708
+ Returns:
709
+ Estimated token count
710
+ """
711
+ import json
712
+
713
+ try:
714
+ # Serialize to estimate size
715
+ data_str = json.dumps(input_data, default=str)
716
+ return len(data_str) // 4
717
+ except (TypeError, ValueError):
718
+ return 1000 # Default estimate
719
+
500
720
  def get_model_for_tier(self, tier: ModelTier) -> str:
501
721
  """Get the model for a tier based on configured provider and config."""
502
722
  from .config import get_model
@@ -507,43 +727,7 @@ class BaseWorkflow(ABC):
507
727
  model = get_model(provider_str, tier.value, self._config)
508
728
  return model
509
729
 
510
- def _maybe_setup_cache(self) -> None:
511
- """Set up cache with one-time user prompt if needed.
512
-
513
- This is called lazily on first workflow execution to avoid
514
- blocking workflow initialization.
515
- """
516
- if not self._enable_cache:
517
- return
518
-
519
- if self._cache_setup_attempted:
520
- return
521
-
522
- self._cache_setup_attempted = True
523
-
524
- # If cache already provided, use it
525
- if self._cache is not None:
526
- return
527
-
528
- # Otherwise, trigger auto-setup (which may prompt user)
529
- try:
530
- auto_setup_cache()
531
- self._cache = create_cache()
532
- logger.info(f"Cache initialized for workflow: {self.name}")
533
- except ImportError as e:
534
- # Hybrid cache dependencies not available, fall back to hash-only
535
- logger.info(
536
- f"Using hash-only cache (install empathy-framework[cache] for semantic caching): {e}"
537
- )
538
- self._cache = create_cache(cache_type="hash")
539
- except (OSError, PermissionError) as e:
540
- # File system errors - disable cache
541
- logger.warning(f"Cache setup failed (file system error): {e}, continuing without cache")
542
- self._enable_cache = False
543
- except (ValueError, TypeError, AttributeError) as e:
544
- # Configuration errors - disable cache
545
- logger.warning(f"Cache setup failed (config error): {e}, continuing without cache")
546
- self._enable_cache = False
730
+ # Note: _maybe_setup_cache is inherited from CachingMixin
547
731
 
548
732
  async def _call_llm(
549
733
  self,
@@ -582,54 +766,26 @@ class BaseWorkflow(ABC):
582
766
  model = self.get_model_for_tier(tier)
583
767
  cache_type = None
584
768
 
585
- # Try cache lookup if enabled
586
- if self._enable_cache and self._cache is not None:
587
- try:
588
- # Combine system + user message for cache key
589
- full_prompt = f"{system}\n\n{user_message}" if system else user_message
590
- cached_response = self._cache.get(self.name, stage, full_prompt, model)
591
-
592
- if cached_response is not None:
593
- logger.debug(f"Cache hit for {self.name}:{stage}")
594
- # Determine cache type
595
- if hasattr(self._cache, "cache_type"):
596
- ct = self._cache.cache_type
597
- # Ensure it's a string (not a Mock object)
598
- cache_type = str(ct) if ct and isinstance(ct, str) else "hash"
599
- else:
600
- cache_type = "hash" # Default assumption
601
-
602
- # Track telemetry for cache hit
603
- duration_ms = int((time.time() - start_time) * 1000)
604
- in_tokens = cached_response["input_tokens"]
605
- out_tokens = cached_response["output_tokens"]
606
- cost = self._calculate_cost(tier, in_tokens, out_tokens)
769
+ # Try cache lookup using CachingMixin
770
+ cached = self._try_cache_lookup(stage, system, user_message, model)
771
+ if cached is not None:
772
+ # Track telemetry for cache hit
773
+ duration_ms = int((time.time() - start_time) * 1000)
774
+ cost = self._calculate_cost(tier, cached.input_tokens, cached.output_tokens)
775
+ cache_type = self._get_cache_type()
607
776
 
608
- self._track_telemetry(
609
- stage=stage,
610
- tier=tier,
611
- model=model,
612
- cost=cost,
613
- tokens={"input": in_tokens, "output": out_tokens},
614
- cache_hit=True,
615
- cache_type=cache_type,
616
- duration_ms=duration_ms,
617
- )
777
+ self._track_telemetry(
778
+ stage=stage,
779
+ tier=tier,
780
+ model=model,
781
+ cost=cost,
782
+ tokens={"input": cached.input_tokens, "output": cached.output_tokens},
783
+ cache_hit=True,
784
+ cache_type=cache_type,
785
+ duration_ms=duration_ms,
786
+ )
618
787
 
619
- # Cached response is dict with content, input_tokens, output_tokens
620
- return (
621
- cached_response["content"],
622
- cached_response["input_tokens"],
623
- cached_response["output_tokens"],
624
- )
625
- except (KeyError, TypeError, ValueError) as e:
626
- # Malformed cache data - continue with LLM call
627
- logger.debug(f"Cache lookup failed (malformed data): {e}, continuing with LLM call")
628
- except (OSError, PermissionError) as e:
629
- # File system errors - continue with LLM call
630
- logger.debug(
631
- f"Cache lookup failed (file system error): {e}, continuing with LLM call"
632
- )
788
+ return (cached.content, cached.input_tokens, cached.output_tokens)
633
789
 
634
790
  # Create a step config for this call
635
791
  step = WorkflowStepConfig(
@@ -662,23 +818,14 @@ class BaseWorkflow(ABC):
662
818
  duration_ms=duration_ms,
663
819
  )
664
820
 
665
- # Store in cache if enabled
666
- if self._enable_cache and self._cache is not None:
667
- try:
668
- full_prompt = f"{system}\n\n{user_message}" if system else user_message
669
- response_data = {
670
- "content": content,
671
- "input_tokens": in_tokens,
672
- "output_tokens": out_tokens,
673
- }
674
- self._cache.put(self.name, stage, full_prompt, model, response_data)
675
- logger.debug(f"Cached response for {self.name}:{stage}")
676
- except (OSError, PermissionError) as e:
677
- # File system errors - log but continue
678
- logger.debug(f"Failed to cache response (file system error): {e}")
679
- except (ValueError, TypeError, KeyError) as e:
680
- # Data serialization errors - log but continue
681
- logger.debug(f"Failed to cache response (serialization error): {e}")
821
+ # Store in cache using CachingMixin
822
+ self._store_in_cache(
823
+ stage,
824
+ system,
825
+ user_message,
826
+ model,
827
+ CachedResponse(content=content, input_tokens=in_tokens, output_tokens=out_tokens),
828
+ )
682
829
 
683
830
  return content, in_tokens, out_tokens
684
831
  except (ValueError, TypeError, KeyError) as e:
@@ -698,53 +845,7 @@ class BaseWorkflow(ABC):
698
845
  logger.exception(f"Unexpected error calling LLM: {e}")
699
846
  return f"Error calling LLM: {type(e).__name__}", 0, 0
700
847
 
701
- def _track_telemetry(
702
- self,
703
- stage: str,
704
- tier: ModelTier,
705
- model: str,
706
- cost: float,
707
- tokens: dict[str, int],
708
- cache_hit: bool,
709
- cache_type: str | None,
710
- duration_ms: int,
711
- ) -> None:
712
- """Track telemetry for an LLM call.
713
-
714
- Args:
715
- stage: Stage name
716
- tier: Model tier used
717
- model: Model ID used
718
- cost: Cost in USD
719
- tokens: Dictionary with "input" and "output" token counts
720
- cache_hit: Whether this was a cache hit
721
- cache_type: Cache type if cache hit
722
- duration_ms: Duration in milliseconds
723
-
724
- """
725
- if not self._enable_telemetry or self._telemetry_tracker is None:
726
- return
727
-
728
- try:
729
- provider_str = getattr(self, "_provider_str", "unknown")
730
- self._telemetry_tracker.track_llm_call(
731
- workflow=self.name,
732
- stage=stage,
733
- tier=tier.value.upper(),
734
- model=model,
735
- provider=provider_str,
736
- cost=cost,
737
- tokens=tokens,
738
- cache_hit=cache_hit,
739
- cache_type=cache_type,
740
- duration_ms=duration_ms,
741
- )
742
- except (AttributeError, TypeError, ValueError) as e:
743
- # INTENTIONAL: Telemetry tracking failures should never crash workflows
744
- logger.debug(f"Failed to track telemetry (config/data error): {e}")
745
- except (OSError, PermissionError) as e:
746
- # File system errors - log but never crash workflow
747
- logger.debug(f"Failed to track telemetry (file system error): {e}")
848
+ # Note: _track_telemetry is inherited from TelemetryMixin
748
849
 
749
850
  def _calculate_cost(self, tier: ModelTier, input_tokens: int, output_tokens: int) -> float:
750
851
  """Calculate cost for a stage."""
@@ -784,32 +885,20 @@ class BaseWorkflow(ABC):
784
885
  savings = baseline_cost - total_cost
785
886
  savings_percent = (savings / baseline_cost * 100) if baseline_cost > 0 else 0.0
786
887
 
787
- # Calculate cache metrics if cache is enabled
788
- cache_hits = 0
789
- cache_misses = 0
790
- cache_hit_rate = 0.0
888
+ # Calculate cache metrics using CachingMixin
889
+ cache_stats = self._get_cache_stats()
890
+ cache_hits = cache_stats["hits"]
891
+ cache_misses = cache_stats["misses"]
892
+ cache_hit_rate = cache_stats["hit_rate"]
791
893
  estimated_cost_without_cache = total_cost
792
894
  savings_from_cache = 0.0
793
895
 
794
- if self._cache is not None:
795
- try:
796
- stats = self._cache.get_stats()
797
- cache_hits = stats.hits
798
- cache_misses = stats.misses
799
- cache_hit_rate = stats.hit_rate
800
-
801
- # Estimate cost without cache (assumes cache hits would have incurred full cost)
802
- # This is a conservative estimate
803
- if cache_hits > 0:
804
- # Average cost per non-cached call
805
- avg_cost_per_call = total_cost / cache_misses if cache_misses > 0 else 0.0
806
- # Estimated additional cost if cache hits were actual API calls
807
- estimated_additional_cost = cache_hits * avg_cost_per_call
808
- estimated_cost_without_cache = total_cost + estimated_additional_cost
809
- savings_from_cache = estimated_additional_cost
810
- except (AttributeError, TypeError):
811
- # Cache doesn't support stats or error occurred
812
- pass
896
+ # Estimate cost without cache (assumes cache hits would have incurred full cost)
897
+ if cache_hits > 0:
898
+ avg_cost_per_call = total_cost / cache_misses if cache_misses > 0 else 0.0
899
+ estimated_additional_cost = cache_hits * avg_cost_per_call
900
+ estimated_cost_without_cache = total_cost + estimated_additional_cost
901
+ savings_from_cache = estimated_additional_cost
813
902
 
814
903
  return CostReport(
815
904
  total_cost=total_cost,
@@ -956,7 +1045,8 @@ class BaseWorkflow(ABC):
956
1045
 
957
1046
  # Log routing start
958
1047
  try:
959
- self._telemetry_backend.log_task_routing(routing_record)
1048
+ if self._telemetry_backend is not None:
1049
+ self._telemetry_backend.log_task_routing(routing_record)
960
1050
  except Exception as e:
961
1051
  logger.debug(f"Failed to log task routing: {e}")
962
1052
 
@@ -979,15 +1069,39 @@ class BaseWorkflow(ABC):
979
1069
  current_data = kwargs
980
1070
  error = None
981
1071
 
982
- # Initialize progress tracker if callback provided
1072
+ # Initialize progress tracker
1073
+ # Always show progress by default (IDE-friendly console output)
1074
+ # Rich live display only when explicitly enabled AND in TTY
1075
+ from .progress import ConsoleProgressReporter
1076
+
1077
+ self._progress_tracker = ProgressTracker(
1078
+ workflow_name=self.name,
1079
+ workflow_id=self._run_id,
1080
+ stage_names=self.stages,
1081
+ )
1082
+
1083
+ # Add user's callback if provided
983
1084
  if self._progress_callback:
984
- self._progress_tracker = ProgressTracker(
985
- workflow_name=self.name,
986
- workflow_id=self._run_id,
987
- stage_names=self.stages,
988
- )
989
1085
  self._progress_tracker.add_callback(self._progress_callback)
990
- self._progress_tracker.start_workflow()
1086
+
1087
+ # Rich progress: only when explicitly enabled AND in a TTY
1088
+ if self._enable_rich_progress and RICH_AVAILABLE and sys.stdout.isatty():
1089
+ try:
1090
+ self._rich_reporter = RichProgressReporter(self.name, self.stages)
1091
+ self._progress_tracker.add_callback(self._rich_reporter.report)
1092
+ self._rich_reporter.start()
1093
+ except Exception as e:
1094
+ # Fall back to console reporter
1095
+ logger.debug(f"Rich progress unavailable: {e}")
1096
+ self._rich_reporter = None
1097
+ console_reporter = ConsoleProgressReporter(verbose=False)
1098
+ self._progress_tracker.add_callback(console_reporter.report)
1099
+ else:
1100
+ # Default: use console reporter (works in IDEs, terminals, everywhere)
1101
+ console_reporter = ConsoleProgressReporter(verbose=False)
1102
+ self._progress_tracker.add_callback(console_reporter.report)
1103
+
1104
+ self._progress_tracker.start_workflow()
991
1105
 
992
1106
  try:
993
1107
  # Tier fallback mode: try CHEAP → CAPABLE → PREMIUM with validation
@@ -1144,10 +1258,20 @@ class BaseWorkflow(ABC):
1144
1258
  self._progress_tracker.fail_stage(stage_name, error_msg)
1145
1259
  raise ValueError(error_msg)
1146
1260
 
1147
- # Standard mode: use configured tier_map (backward compatible)
1261
+ # Standard mode: use routing strategy or tier_map (backward compatible)
1148
1262
  else:
1263
+ # Track budget for routing decisions
1264
+ total_budget = 100.0 # Default budget in USD
1265
+ budget_spent = 0.0
1266
+
1149
1267
  for stage_name in self.stages:
1150
- tier = self.get_tier_for_stage(stage_name)
1268
+ # Use routing strategy if available, otherwise fall back to tier_map
1269
+ budget_remaining = total_budget - budget_spent
1270
+ tier = self._get_tier_with_routing(
1271
+ stage_name,
1272
+ current_data if isinstance(current_data, dict) else {},
1273
+ budget_remaining,
1274
+ )
1151
1275
  stage_start = datetime.now()
1152
1276
 
1153
1277
  # Check if stage should be skipped
@@ -1185,6 +1309,9 @@ class BaseWorkflow(ABC):
1185
1309
  duration_ms = int((stage_end - stage_start).total_seconds() * 1000)
1186
1310
  cost = self._calculate_cost(tier, input_tokens, output_tokens)
1187
1311
 
1312
+ # Update budget spent for routing decisions
1313
+ budget_spent += cost
1314
+
1188
1315
  stage = WorkflowStage(
1189
1316
  name=stage_name,
1190
1317
  tier=tier,
@@ -1304,6 +1431,14 @@ class BaseWorkflow(ABC):
1304
1431
  if self._progress_tracker and error is None:
1305
1432
  self._progress_tracker.complete_workflow()
1306
1433
 
1434
+ # Stop Rich progress display if active
1435
+ if self._rich_reporter:
1436
+ try:
1437
+ self._rich_reporter.stop()
1438
+ except Exception:
1439
+ pass # Best effort cleanup
1440
+ self._rich_reporter = None
1441
+
1307
1442
  # Save to workflow history for dashboard
1308
1443
  try:
1309
1444
  _save_workflow_run(self.name, provider_str, result)
@@ -1364,7 +1499,8 @@ class BaseWorkflow(ABC):
1364
1499
 
1365
1500
  # Log routing completion
1366
1501
  try:
1367
- self._telemetry_backend.log_task_routing(routing_record)
1502
+ if self._telemetry_backend is not None:
1503
+ self._telemetry_backend.log_task_routing(routing_record)
1368
1504
  except Exception as e:
1369
1505
  logger.debug(f"Failed to log task routing completion: {e}")
1370
1506
 
@@ -1543,119 +1679,7 @@ class BaseWorkflow(ABC):
1543
1679
  self._executor = self._create_default_executor()
1544
1680
  return self._executor
1545
1681
 
1546
- def _emit_call_telemetry(
1547
- self,
1548
- step_name: str,
1549
- task_type: str,
1550
- tier: str,
1551
- model_id: str,
1552
- input_tokens: int,
1553
- output_tokens: int,
1554
- cost: float,
1555
- latency_ms: int,
1556
- success: bool = True,
1557
- error_message: str | None = None,
1558
- fallback_used: bool = False,
1559
- ) -> None:
1560
- """Emit an LLMCallRecord to the telemetry backend.
1561
-
1562
- Args:
1563
- step_name: Name of the workflow step
1564
- task_type: Task type used for routing
1565
- tier: Model tier used
1566
- model_id: Model ID used
1567
- input_tokens: Input token count
1568
- output_tokens: Output token count
1569
- cost: Estimated cost
1570
- latency_ms: Latency in milliseconds
1571
- success: Whether the call succeeded
1572
- error_message: Error message if failed
1573
- fallback_used: Whether fallback was used
1574
-
1575
- """
1576
- record = LLMCallRecord(
1577
- call_id=str(uuid.uuid4()),
1578
- timestamp=datetime.now().isoformat(),
1579
- workflow_name=self.name,
1580
- step_name=step_name,
1581
- task_type=task_type,
1582
- provider=self._provider_str,
1583
- tier=tier,
1584
- model_id=model_id,
1585
- input_tokens=input_tokens,
1586
- output_tokens=output_tokens,
1587
- estimated_cost=cost,
1588
- latency_ms=latency_ms,
1589
- success=success,
1590
- error_message=error_message,
1591
- fallback_used=fallback_used,
1592
- metadata={"run_id": self._run_id},
1593
- )
1594
- try:
1595
- self._telemetry_backend.log_call(record)
1596
- except (AttributeError, ValueError, TypeError):
1597
- # Telemetry backend errors - log but don't crash workflow
1598
- logger.debug("Failed to log call telemetry (backend error)")
1599
- except OSError:
1600
- # File system errors - log but don't crash workflow
1601
- logger.debug("Failed to log call telemetry (file system error)")
1602
- except Exception: # noqa: BLE001
1603
- # INTENTIONAL: Telemetry is optional diagnostics - never crash workflow
1604
- logger.debug("Unexpected error logging call telemetry")
1605
-
1606
- def _emit_workflow_telemetry(self, result: WorkflowResult) -> None:
1607
- """Emit a WorkflowRunRecord to the telemetry backend.
1608
-
1609
- Args:
1610
- result: The workflow result to record
1611
-
1612
- """
1613
- # Build stage records
1614
- stages = [
1615
- WorkflowStageRecord(
1616
- stage_name=s.name,
1617
- tier=s.tier.value,
1618
- model_id=self.get_model_for_tier(s.tier),
1619
- input_tokens=s.input_tokens,
1620
- output_tokens=s.output_tokens,
1621
- cost=s.cost,
1622
- latency_ms=s.duration_ms,
1623
- success=not s.skipped and result.error is None,
1624
- skipped=s.skipped,
1625
- skip_reason=s.skip_reason,
1626
- )
1627
- for s in result.stages
1628
- ]
1629
-
1630
- record = WorkflowRunRecord(
1631
- run_id=self._run_id or str(uuid.uuid4()),
1632
- workflow_name=self.name,
1633
- started_at=result.started_at.isoformat(),
1634
- completed_at=result.completed_at.isoformat(),
1635
- stages=stages,
1636
- total_input_tokens=sum(s.input_tokens for s in result.stages if not s.skipped),
1637
- total_output_tokens=sum(s.output_tokens for s in result.stages if not s.skipped),
1638
- total_cost=result.cost_report.total_cost,
1639
- baseline_cost=result.cost_report.baseline_cost,
1640
- savings=result.cost_report.savings,
1641
- savings_percent=result.cost_report.savings_percent,
1642
- total_duration_ms=result.total_duration_ms,
1643
- success=result.success,
1644
- error=result.error,
1645
- providers_used=[self._provider_str],
1646
- tiers_used=list(result.cost_report.by_tier.keys()),
1647
- )
1648
- try:
1649
- self._telemetry_backend.log_workflow(record)
1650
- except (AttributeError, ValueError, TypeError):
1651
- # Telemetry backend errors - log but don't crash workflow
1652
- logger.debug("Failed to log workflow telemetry (backend error)")
1653
- except OSError:
1654
- # File system errors - log but don't crash workflow
1655
- logger.debug("Failed to log workflow telemetry (file system error)")
1656
- except Exception: # noqa: BLE001
1657
- # INTENTIONAL: Telemetry is optional diagnostics - never crash workflow
1658
- logger.debug("Unexpected error logging workflow telemetry")
1682
+ # Note: _emit_call_telemetry and _emit_workflow_telemetry are inherited from TelemetryMixin
1659
1683
 
1660
1684
  async def run_step_with_executor(
1661
1685
  self,