empathy-framework 4.9.1__py3-none-any.whl → 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {empathy_framework-4.9.1.dist-info → empathy_framework-5.0.0.dist-info}/METADATA +1 -1
- {empathy_framework-4.9.1.dist-info → empathy_framework-5.0.0.dist-info}/RECORD +47 -26
- empathy_os/__init__.py +1 -1
- empathy_os/cache/hash_only.py +6 -3
- empathy_os/cache/hybrid.py +6 -3
- empathy_os/cli_legacy.py +27 -1
- empathy_os/cli_minimal.py +512 -15
- empathy_os/cli_router.py +145 -113
- empathy_os/cli_unified.py +25 -0
- empathy_os/dashboard/__init__.py +42 -0
- empathy_os/dashboard/app.py +512 -0
- empathy_os/dashboard/simple_server.py +403 -0
- empathy_os/dashboard/standalone_server.py +536 -0
- empathy_os/memory/__init__.py +19 -5
- empathy_os/memory/short_term.py +4 -70
- empathy_os/memory/types.py +2 -2
- empathy_os/models/__init__.py +3 -0
- empathy_os/models/adaptive_routing.py +437 -0
- empathy_os/models/registry.py +4 -4
- empathy_os/socratic/ab_testing.py +1 -1
- empathy_os/telemetry/__init__.py +29 -1
- empathy_os/telemetry/agent_coordination.py +478 -0
- empathy_os/telemetry/agent_tracking.py +350 -0
- empathy_os/telemetry/approval_gates.py +563 -0
- empathy_os/telemetry/event_streaming.py +405 -0
- empathy_os/telemetry/feedback_loop.py +557 -0
- empathy_os/vscode_bridge 2.py +173 -0
- empathy_os/workflows/__init__.py +4 -4
- empathy_os/workflows/base.py +495 -43
- empathy_os/workflows/history.py +3 -5
- empathy_os/workflows/output.py +410 -0
- empathy_os/workflows/progress.py +324 -22
- empathy_os/workflows/progressive/README 2.md +454 -0
- empathy_os/workflows/progressive/__init__ 2.py +92 -0
- empathy_os/workflows/progressive/cli 2.py +242 -0
- empathy_os/workflows/progressive/core 2.py +488 -0
- empathy_os/workflows/progressive/orchestrator 2.py +701 -0
- empathy_os/workflows/progressive/reports 2.py +528 -0
- empathy_os/workflows/progressive/telemetry 2.py +280 -0
- empathy_os/workflows/progressive/test_gen 2.py +514 -0
- empathy_os/workflows/progressive/workflow 2.py +628 -0
- empathy_os/workflows/routing.py +5 -0
- empathy_os/workflows/security_audit.py +189 -0
- {empathy_framework-4.9.1.dist-info → empathy_framework-5.0.0.dist-info}/WHEEL +0 -0
- {empathy_framework-4.9.1.dist-info → empathy_framework-5.0.0.dist-info}/entry_points.txt +0 -0
- {empathy_framework-4.9.1.dist-info → empathy_framework-5.0.0.dist-info}/licenses/LICENSE +0 -0
- {empathy_framework-4.9.1.dist-info → empathy_framework-5.0.0.dist-info}/top_level.txt +0 -0
empathy_os/workflows/base.py
CHANGED
|
@@ -17,6 +17,7 @@ from __future__ import annotations
|
|
|
17
17
|
|
|
18
18
|
import json
|
|
19
19
|
import logging
|
|
20
|
+
import sys
|
|
20
21
|
import time
|
|
21
22
|
import uuid
|
|
22
23
|
from abc import ABC, abstractmethod
|
|
@@ -57,7 +58,12 @@ from empathy_os.models import ModelTier as UnifiedModelTier
|
|
|
57
58
|
from .caching import CachedResponse, CachingMixin
|
|
58
59
|
|
|
59
60
|
# Import progress tracking
|
|
60
|
-
from .progress import
|
|
61
|
+
from .progress import (
|
|
62
|
+
RICH_AVAILABLE,
|
|
63
|
+
ProgressCallback,
|
|
64
|
+
ProgressTracker,
|
|
65
|
+
RichProgressReporter,
|
|
66
|
+
)
|
|
61
67
|
from .telemetry_mixin import TelemetryMixin
|
|
62
68
|
|
|
63
69
|
# Import telemetry tracking
|
|
@@ -544,6 +550,11 @@ class BaseWorkflow(CachingMixin, TelemetryMixin, ABC):
|
|
|
544
550
|
enable_tier_tracking: bool = True,
|
|
545
551
|
enable_tier_fallback: bool = False,
|
|
546
552
|
routing_strategy: TierRoutingStrategy | None = None,
|
|
553
|
+
enable_rich_progress: bool = False,
|
|
554
|
+
enable_adaptive_routing: bool = False,
|
|
555
|
+
enable_heartbeat_tracking: bool = False,
|
|
556
|
+
enable_coordination: bool = False,
|
|
557
|
+
agent_id: str | None = None,
|
|
547
558
|
):
|
|
548
559
|
"""Initialize workflow with optional cost tracker, provider, and config.
|
|
549
560
|
|
|
@@ -569,6 +580,27 @@ class BaseWorkflow(CachingMixin, TelemetryMixin, ABC):
|
|
|
569
580
|
When provided, overrides static tier_map for stage tier decisions.
|
|
570
581
|
Strategies: CostOptimizedRouting, PerformanceOptimizedRouting,
|
|
571
582
|
BalancedRouting, HybridRouting.
|
|
583
|
+
enable_rich_progress: Whether to enable Rich-based live progress display
|
|
584
|
+
(default False). When enabled and output is a TTY, shows live
|
|
585
|
+
progress bars with spinners. Default is False because most users
|
|
586
|
+
run workflows from IDEs (VSCode, etc.) where TTY is not available.
|
|
587
|
+
The console reporter works reliably in all environments.
|
|
588
|
+
enable_adaptive_routing: Whether to enable adaptive model routing based
|
|
589
|
+
on telemetry history (default False). When enabled, uses historical
|
|
590
|
+
performance data to select the optimal Anthropic model for each stage,
|
|
591
|
+
automatically upgrading tiers when failure rates exceed 20%.
|
|
592
|
+
Opt-in feature for cost optimization and automatic quality improvement.
|
|
593
|
+
enable_heartbeat_tracking: Whether to enable agent heartbeat tracking
|
|
594
|
+
(default False). When enabled, publishes TTL-based heartbeat updates
|
|
595
|
+
to Redis for agent liveness monitoring. Requires Redis backend.
|
|
596
|
+
Pattern 1 from Agent Coordination Architecture.
|
|
597
|
+
enable_coordination: Whether to enable inter-agent coordination signals
|
|
598
|
+
(default False). When enabled, workflow can send and receive TTL-based
|
|
599
|
+
ephemeral signals for agent-to-agent communication. Requires Redis backend.
|
|
600
|
+
Pattern 2 from Agent Coordination Architecture.
|
|
601
|
+
agent_id: Optional agent ID for heartbeat tracking and coordination.
|
|
602
|
+
If None, auto-generates ID from workflow name and run ID.
|
|
603
|
+
Used as identifier in Redis keys (heartbeat:{agent_id}, signal:{agent_id}:...).
|
|
572
604
|
|
|
573
605
|
"""
|
|
574
606
|
from .config import WorkflowConfig
|
|
@@ -579,6 +611,8 @@ class BaseWorkflow(CachingMixin, TelemetryMixin, ABC):
|
|
|
579
611
|
# Progress tracking
|
|
580
612
|
self._progress_callback = progress_callback
|
|
581
613
|
self._progress_tracker: ProgressTracker | None = None
|
|
614
|
+
self._enable_rich_progress = enable_rich_progress
|
|
615
|
+
self._rich_reporter: RichProgressReporter | None = None
|
|
582
616
|
|
|
583
617
|
# New: LLMExecutor support
|
|
584
618
|
self._executor = executor
|
|
@@ -600,6 +634,17 @@ class BaseWorkflow(CachingMixin, TelemetryMixin, ABC):
|
|
|
600
634
|
# Routing strategy support
|
|
601
635
|
self._routing_strategy: TierRoutingStrategy | None = routing_strategy
|
|
602
636
|
|
|
637
|
+
# Adaptive routing support (Pattern 3 from AGENT_COORDINATION_ARCHITECTURE)
|
|
638
|
+
self._enable_adaptive_routing = enable_adaptive_routing
|
|
639
|
+
self._adaptive_router = None # Lazy initialization on first use
|
|
640
|
+
|
|
641
|
+
# Agent tracking and coordination (Pattern 1 & 2 from AGENT_COORDINATION_ARCHITECTURE)
|
|
642
|
+
self._enable_heartbeat_tracking = enable_heartbeat_tracking
|
|
643
|
+
self._enable_coordination = enable_coordination
|
|
644
|
+
self._agent_id = agent_id # Will be set during execute() if None
|
|
645
|
+
self._heartbeat_coordinator = None # Lazy initialization on first use
|
|
646
|
+
self._coordination_signals = None # Lazy initialization on first use
|
|
647
|
+
|
|
603
648
|
# Telemetry tracking (uses TelemetryMixin)
|
|
604
649
|
self._init_telemetry(telemetry_backend)
|
|
605
650
|
|
|
@@ -629,17 +674,314 @@ class BaseWorkflow(CachingMixin, TelemetryMixin, ABC):
|
|
|
629
674
|
"""Get the model tier for a stage from static tier_map."""
|
|
630
675
|
return self.tier_map.get(stage_name, ModelTier.CAPABLE)
|
|
631
676
|
|
|
677
|
+
def _get_adaptive_router(self):
|
|
678
|
+
"""Get or create AdaptiveModelRouter instance (lazy initialization).
|
|
679
|
+
|
|
680
|
+
Returns:
|
|
681
|
+
AdaptiveModelRouter instance if telemetry is available, None otherwise
|
|
682
|
+
"""
|
|
683
|
+
if not self._enable_adaptive_routing:
|
|
684
|
+
return None
|
|
685
|
+
|
|
686
|
+
if self._adaptive_router is None:
|
|
687
|
+
# Lazy import to avoid circular dependencies
|
|
688
|
+
try:
|
|
689
|
+
from empathy_os.models import AdaptiveModelRouter
|
|
690
|
+
|
|
691
|
+
if TELEMETRY_AVAILABLE and UsageTracker is not None:
|
|
692
|
+
self._adaptive_router = AdaptiveModelRouter(
|
|
693
|
+
telemetry=UsageTracker.get_instance()
|
|
694
|
+
)
|
|
695
|
+
logger.debug(
|
|
696
|
+
"adaptive_routing_initialized",
|
|
697
|
+
workflow=self.name,
|
|
698
|
+
message="Adaptive routing enabled for cost optimization"
|
|
699
|
+
)
|
|
700
|
+
else:
|
|
701
|
+
logger.warning(
|
|
702
|
+
"adaptive_routing_unavailable",
|
|
703
|
+
workflow=self.name,
|
|
704
|
+
message="Telemetry not available, adaptive routing disabled"
|
|
705
|
+
)
|
|
706
|
+
self._enable_adaptive_routing = False
|
|
707
|
+
except ImportError as e:
|
|
708
|
+
logger.warning(
|
|
709
|
+
"adaptive_routing_import_error",
|
|
710
|
+
workflow=self.name,
|
|
711
|
+
error=str(e),
|
|
712
|
+
message="Failed to import AdaptiveModelRouter"
|
|
713
|
+
)
|
|
714
|
+
self._enable_adaptive_routing = False
|
|
715
|
+
|
|
716
|
+
return self._adaptive_router
|
|
717
|
+
|
|
718
|
+
def _get_heartbeat_coordinator(self):
|
|
719
|
+
"""Get or create HeartbeatCoordinator instance (lazy initialization).
|
|
720
|
+
|
|
721
|
+
Returns:
|
|
722
|
+
HeartbeatCoordinator instance if heartbeat tracking is enabled, None otherwise
|
|
723
|
+
"""
|
|
724
|
+
if not self._enable_heartbeat_tracking:
|
|
725
|
+
return None
|
|
726
|
+
|
|
727
|
+
if self._heartbeat_coordinator is None:
|
|
728
|
+
try:
|
|
729
|
+
from empathy_os.telemetry import HeartbeatCoordinator
|
|
730
|
+
|
|
731
|
+
self._heartbeat_coordinator = HeartbeatCoordinator()
|
|
732
|
+
logger.debug(
|
|
733
|
+
"heartbeat_tracking_initialized",
|
|
734
|
+
workflow=self.name,
|
|
735
|
+
agent_id=self._agent_id,
|
|
736
|
+
message="Heartbeat tracking enabled for agent liveness monitoring"
|
|
737
|
+
)
|
|
738
|
+
except ImportError as e:
|
|
739
|
+
logger.warning(
|
|
740
|
+
"heartbeat_tracking_import_error",
|
|
741
|
+
workflow=self.name,
|
|
742
|
+
error=str(e),
|
|
743
|
+
message="Failed to import HeartbeatCoordinator"
|
|
744
|
+
)
|
|
745
|
+
self._enable_heartbeat_tracking = False
|
|
746
|
+
except Exception as e:
|
|
747
|
+
logger.warning(
|
|
748
|
+
"heartbeat_tracking_init_error",
|
|
749
|
+
workflow=self.name,
|
|
750
|
+
error=str(e),
|
|
751
|
+
message="Failed to initialize HeartbeatCoordinator (Redis unavailable?)"
|
|
752
|
+
)
|
|
753
|
+
self._enable_heartbeat_tracking = False
|
|
754
|
+
|
|
755
|
+
return self._heartbeat_coordinator
|
|
756
|
+
|
|
757
|
+
def _get_coordination_signals(self):
|
|
758
|
+
"""Get or create CoordinationSignals instance (lazy initialization).
|
|
759
|
+
|
|
760
|
+
Returns:
|
|
761
|
+
CoordinationSignals instance if coordination is enabled, None otherwise
|
|
762
|
+
"""
|
|
763
|
+
if not self._enable_coordination:
|
|
764
|
+
return None
|
|
765
|
+
|
|
766
|
+
if self._coordination_signals is None:
|
|
767
|
+
try:
|
|
768
|
+
from empathy_os.telemetry import CoordinationSignals
|
|
769
|
+
|
|
770
|
+
self._coordination_signals = CoordinationSignals(agent_id=self._agent_id)
|
|
771
|
+
logger.debug(
|
|
772
|
+
"coordination_initialized",
|
|
773
|
+
workflow=self.name,
|
|
774
|
+
agent_id=self._agent_id,
|
|
775
|
+
message="Coordination signals enabled for inter-agent communication"
|
|
776
|
+
)
|
|
777
|
+
except ImportError as e:
|
|
778
|
+
logger.warning(
|
|
779
|
+
"coordination_import_error",
|
|
780
|
+
workflow=self.name,
|
|
781
|
+
error=str(e),
|
|
782
|
+
message="Failed to import CoordinationSignals"
|
|
783
|
+
)
|
|
784
|
+
self._enable_coordination = False
|
|
785
|
+
except Exception as e:
|
|
786
|
+
logger.warning(
|
|
787
|
+
"coordination_init_error",
|
|
788
|
+
workflow=self.name,
|
|
789
|
+
error=str(e),
|
|
790
|
+
message="Failed to initialize CoordinationSignals (Redis unavailable?)"
|
|
791
|
+
)
|
|
792
|
+
self._enable_coordination = False
|
|
793
|
+
|
|
794
|
+
return self._coordination_signals
|
|
795
|
+
|
|
796
|
+
def _check_adaptive_tier_upgrade(self, stage_name: str, current_tier: ModelTier) -> ModelTier:
|
|
797
|
+
"""Check if adaptive routing recommends a tier upgrade.
|
|
798
|
+
|
|
799
|
+
Uses historical telemetry to detect if the current tier has a high
|
|
800
|
+
failure rate (>20%) and automatically upgrades to the next tier.
|
|
801
|
+
|
|
802
|
+
Args:
|
|
803
|
+
stage_name: Name of the stage
|
|
804
|
+
current_tier: Currently selected tier
|
|
805
|
+
|
|
806
|
+
Returns:
|
|
807
|
+
Upgraded tier if recommended, otherwise current_tier
|
|
808
|
+
"""
|
|
809
|
+
router = self._get_adaptive_router()
|
|
810
|
+
if router is None:
|
|
811
|
+
return current_tier
|
|
812
|
+
|
|
813
|
+
# Check if tier upgrade is recommended
|
|
814
|
+
should_upgrade, reason = router.recommend_tier_upgrade(
|
|
815
|
+
workflow=self.name,
|
|
816
|
+
stage=stage_name
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
if should_upgrade:
|
|
820
|
+
# Upgrade to next tier: CHEAP → CAPABLE → PREMIUM
|
|
821
|
+
if current_tier == ModelTier.CHEAP:
|
|
822
|
+
new_tier = ModelTier.CAPABLE
|
|
823
|
+
elif current_tier == ModelTier.CAPABLE:
|
|
824
|
+
new_tier = ModelTier.PREMIUM
|
|
825
|
+
else:
|
|
826
|
+
new_tier = current_tier # Already at highest tier
|
|
827
|
+
|
|
828
|
+
logger.warning(
|
|
829
|
+
"adaptive_routing_tier_upgrade",
|
|
830
|
+
workflow=self.name,
|
|
831
|
+
stage=stage_name,
|
|
832
|
+
old_tier=current_tier.value,
|
|
833
|
+
new_tier=new_tier.value,
|
|
834
|
+
reason=reason
|
|
835
|
+
)
|
|
836
|
+
|
|
837
|
+
return new_tier
|
|
838
|
+
|
|
839
|
+
return current_tier
|
|
840
|
+
|
|
841
|
+
def send_signal(
|
|
842
|
+
self,
|
|
843
|
+
signal_type: str,
|
|
844
|
+
target_agent: str | None = None,
|
|
845
|
+
payload: dict[str, Any] | None = None,
|
|
846
|
+
ttl_seconds: int | None = None,
|
|
847
|
+
) -> str:
|
|
848
|
+
"""Send a coordination signal to another agent (Pattern 2).
|
|
849
|
+
|
|
850
|
+
Args:
|
|
851
|
+
signal_type: Type of signal (e.g., "task_complete", "checkpoint", "error")
|
|
852
|
+
target_agent: Target agent ID (None for broadcast to all agents)
|
|
853
|
+
payload: Optional signal payload data
|
|
854
|
+
ttl_seconds: Optional TTL override (default 60 seconds)
|
|
855
|
+
|
|
856
|
+
Returns:
|
|
857
|
+
Signal ID if coordination is enabled, empty string otherwise
|
|
858
|
+
|
|
859
|
+
Example:
|
|
860
|
+
>>> # Signal completion to orchestrator
|
|
861
|
+
>>> workflow.send_signal(
|
|
862
|
+
... signal_type="task_complete",
|
|
863
|
+
... target_agent="orchestrator",
|
|
864
|
+
... payload={"result": "success", "data": {...}}
|
|
865
|
+
... )
|
|
866
|
+
|
|
867
|
+
>>> # Broadcast abort to all agents
|
|
868
|
+
>>> workflow.send_signal(
|
|
869
|
+
... signal_type="abort",
|
|
870
|
+
... target_agent=None, # Broadcast
|
|
871
|
+
... payload={"reason": "user_cancelled"}
|
|
872
|
+
... )
|
|
873
|
+
"""
|
|
874
|
+
coordinator = self._get_coordination_signals()
|
|
875
|
+
if coordinator is None:
|
|
876
|
+
return ""
|
|
877
|
+
|
|
878
|
+
try:
|
|
879
|
+
return coordinator.signal(
|
|
880
|
+
signal_type=signal_type,
|
|
881
|
+
source_agent=self._agent_id,
|
|
882
|
+
target_agent=target_agent,
|
|
883
|
+
payload=payload or {},
|
|
884
|
+
ttl_seconds=ttl_seconds,
|
|
885
|
+
)
|
|
886
|
+
except Exception as e:
|
|
887
|
+
logger.warning(f"Failed to send coordination signal: {e}")
|
|
888
|
+
return ""
|
|
889
|
+
|
|
890
|
+
def wait_for_signal(
|
|
891
|
+
self,
|
|
892
|
+
signal_type: str,
|
|
893
|
+
source_agent: str | None = None,
|
|
894
|
+
timeout: float = 30.0,
|
|
895
|
+
poll_interval: float = 0.5,
|
|
896
|
+
) -> Any:
|
|
897
|
+
"""Wait for a coordination signal from another agent (Pattern 2).
|
|
898
|
+
|
|
899
|
+
Blocking call that polls for signals with timeout.
|
|
900
|
+
|
|
901
|
+
Args:
|
|
902
|
+
signal_type: Type of signal to wait for
|
|
903
|
+
source_agent: Optional source agent filter
|
|
904
|
+
timeout: Maximum wait time in seconds (default 30.0)
|
|
905
|
+
poll_interval: Poll interval in seconds (default 0.5)
|
|
906
|
+
|
|
907
|
+
Returns:
|
|
908
|
+
CoordinationSignal if received, None if timeout or coordination disabled
|
|
909
|
+
|
|
910
|
+
Example:
|
|
911
|
+
>>> # Wait for orchestrator approval
|
|
912
|
+
>>> signal = workflow.wait_for_signal(
|
|
913
|
+
... signal_type="approval",
|
|
914
|
+
... source_agent="orchestrator",
|
|
915
|
+
... timeout=60.0
|
|
916
|
+
... )
|
|
917
|
+
>>> if signal:
|
|
918
|
+
... proceed_with_deployment(signal.payload)
|
|
919
|
+
"""
|
|
920
|
+
coordinator = self._get_coordination_signals()
|
|
921
|
+
if coordinator is None:
|
|
922
|
+
return None
|
|
923
|
+
|
|
924
|
+
try:
|
|
925
|
+
return coordinator.wait_for_signal(
|
|
926
|
+
signal_type=signal_type,
|
|
927
|
+
source_agent=source_agent,
|
|
928
|
+
timeout=timeout,
|
|
929
|
+
poll_interval=poll_interval,
|
|
930
|
+
)
|
|
931
|
+
except Exception as e:
|
|
932
|
+
logger.warning(f"Failed to wait for coordination signal: {e}")
|
|
933
|
+
return None
|
|
934
|
+
|
|
935
|
+
def check_signal(
|
|
936
|
+
self,
|
|
937
|
+
signal_type: str,
|
|
938
|
+
source_agent: str | None = None,
|
|
939
|
+
consume: bool = True,
|
|
940
|
+
) -> Any:
|
|
941
|
+
"""Check for a coordination signal without blocking (Pattern 2).
|
|
942
|
+
|
|
943
|
+
Non-blocking check for pending signals.
|
|
944
|
+
|
|
945
|
+
Args:
|
|
946
|
+
signal_type: Type of signal to check for
|
|
947
|
+
source_agent: Optional source agent filter
|
|
948
|
+
consume: If True, remove signal after reading (default True)
|
|
949
|
+
|
|
950
|
+
Returns:
|
|
951
|
+
CoordinationSignal if available, None otherwise
|
|
952
|
+
|
|
953
|
+
Example:
|
|
954
|
+
>>> # Non-blocking check for abort signal
|
|
955
|
+
>>> signal = workflow.check_signal(signal_type="abort")
|
|
956
|
+
>>> if signal:
|
|
957
|
+
... raise WorkflowAbortedException(signal.payload["reason"])
|
|
958
|
+
"""
|
|
959
|
+
coordinator = self._get_coordination_signals()
|
|
960
|
+
if coordinator is None:
|
|
961
|
+
return None
|
|
962
|
+
|
|
963
|
+
try:
|
|
964
|
+
return coordinator.check_signal(
|
|
965
|
+
signal_type=signal_type,
|
|
966
|
+
source_agent=source_agent,
|
|
967
|
+
consume=consume,
|
|
968
|
+
)
|
|
969
|
+
except Exception as e:
|
|
970
|
+
logger.warning(f"Failed to check coordination signal: {e}")
|
|
971
|
+
return None
|
|
972
|
+
|
|
632
973
|
def _get_tier_with_routing(
|
|
633
974
|
self,
|
|
634
975
|
stage_name: str,
|
|
635
976
|
input_data: dict[str, Any],
|
|
636
977
|
budget_remaining: float = 100.0,
|
|
637
978
|
) -> ModelTier:
|
|
638
|
-
"""Get tier for a stage using routing strategy if available.
|
|
979
|
+
"""Get tier for a stage using routing strategy or adaptive routing if available.
|
|
639
980
|
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
981
|
+
Priority order:
|
|
982
|
+
1. If routing_strategy configured, uses that for tier selection
|
|
983
|
+
2. Otherwise uses static tier_map
|
|
984
|
+
3. If adaptive routing enabled, checks for tier upgrade recommendations
|
|
643
985
|
|
|
644
986
|
Args:
|
|
645
987
|
stage_name: Name of the stage
|
|
@@ -647,41 +989,50 @@ class BaseWorkflow(CachingMixin, TelemetryMixin, ABC):
|
|
|
647
989
|
budget_remaining: Remaining budget in USD for this execution
|
|
648
990
|
|
|
649
991
|
Returns:
|
|
650
|
-
ModelTier to use for this stage
|
|
992
|
+
ModelTier to use for this stage (potentially upgraded by adaptive routing)
|
|
651
993
|
"""
|
|
652
|
-
#
|
|
653
|
-
if self._routing_strategy is None:
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
994
|
+
# Get base tier from routing strategy or static map
|
|
995
|
+
if self._routing_strategy is not None:
|
|
996
|
+
from .routing import RoutingContext
|
|
997
|
+
|
|
998
|
+
# Estimate input size from data
|
|
999
|
+
input_size = self._estimate_input_tokens(input_data)
|
|
1000
|
+
|
|
1001
|
+
# Assess complexity
|
|
1002
|
+
complexity = self._assess_complexity(input_data)
|
|
1003
|
+
|
|
1004
|
+
# Determine latency sensitivity based on stage position
|
|
1005
|
+
# First stages are more latency-sensitive (user waiting)
|
|
1006
|
+
stage_index = self.stages.index(stage_name) if stage_name in self.stages else 0
|
|
1007
|
+
if stage_index == 0:
|
|
1008
|
+
latency_sensitivity = "high"
|
|
1009
|
+
elif stage_index < len(self.stages) // 2:
|
|
1010
|
+
latency_sensitivity = "medium"
|
|
1011
|
+
else:
|
|
1012
|
+
latency_sensitivity = "low"
|
|
1013
|
+
|
|
1014
|
+
# Create routing context
|
|
1015
|
+
context = RoutingContext(
|
|
1016
|
+
task_type=f"{self.name}:{stage_name}",
|
|
1017
|
+
input_size=input_size,
|
|
1018
|
+
complexity=complexity,
|
|
1019
|
+
budget_remaining=budget_remaining,
|
|
1020
|
+
latency_sensitivity=latency_sensitivity,
|
|
1021
|
+
)
|
|
663
1022
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
stage_index = self.stages.index(stage_name) if stage_name in self.stages else 0
|
|
667
|
-
if stage_index == 0:
|
|
668
|
-
latency_sensitivity = "high"
|
|
669
|
-
elif stage_index < len(self.stages) // 2:
|
|
670
|
-
latency_sensitivity = "medium"
|
|
1023
|
+
# Delegate to routing strategy
|
|
1024
|
+
base_tier = self._routing_strategy.route(context)
|
|
671
1025
|
else:
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
# Create routing context
|
|
675
|
-
context = RoutingContext(
|
|
676
|
-
task_type=f"{self.name}:{stage_name}",
|
|
677
|
-
input_size=input_size,
|
|
678
|
-
complexity=complexity,
|
|
679
|
-
budget_remaining=budget_remaining,
|
|
680
|
-
latency_sensitivity=latency_sensitivity,
|
|
681
|
-
)
|
|
1026
|
+
# Use static tier_map
|
|
1027
|
+
base_tier = self.get_tier_for_stage(stage_name)
|
|
682
1028
|
|
|
683
|
-
#
|
|
684
|
-
|
|
1029
|
+
# Check if adaptive routing recommends a tier upgrade
|
|
1030
|
+
# This uses telemetry history to detect high failure rates
|
|
1031
|
+
if self._enable_adaptive_routing:
|
|
1032
|
+
final_tier = self._check_adaptive_tier_upgrade(stage_name, base_tier)
|
|
1033
|
+
return final_tier
|
|
1034
|
+
|
|
1035
|
+
return base_tier
|
|
685
1036
|
|
|
686
1037
|
def _estimate_input_tokens(self, input_data: dict[str, Any]) -> int:
|
|
687
1038
|
"""Estimate input token count from data.
|
|
@@ -1050,20 +1401,72 @@ class BaseWorkflow(CachingMixin, TelemetryMixin, ABC):
|
|
|
1050
1401
|
logger.debug(f"Tier tracking disabled: {e}")
|
|
1051
1402
|
self._enable_tier_tracking = False
|
|
1052
1403
|
|
|
1404
|
+
# Initialize agent ID for heartbeat/coordination (Pattern 1 & 2)
|
|
1405
|
+
if self._agent_id is None:
|
|
1406
|
+
# Auto-generate agent ID from workflow name and run ID
|
|
1407
|
+
self._agent_id = f"{self.name}-{self._run_id[:8]}"
|
|
1408
|
+
|
|
1409
|
+
# Start heartbeat tracking (Pattern 1)
|
|
1410
|
+
heartbeat_coordinator = self._get_heartbeat_coordinator()
|
|
1411
|
+
if heartbeat_coordinator:
|
|
1412
|
+
try:
|
|
1413
|
+
heartbeat_coordinator.start_heartbeat(
|
|
1414
|
+
agent_id=self._agent_id,
|
|
1415
|
+
metadata={
|
|
1416
|
+
"workflow": self.name,
|
|
1417
|
+
"run_id": self._run_id,
|
|
1418
|
+
"provider": getattr(self, "_provider_str", "unknown"),
|
|
1419
|
+
"stages": len(self.stages),
|
|
1420
|
+
}
|
|
1421
|
+
)
|
|
1422
|
+
logger.debug(
|
|
1423
|
+
"heartbeat_started",
|
|
1424
|
+
workflow=self.name,
|
|
1425
|
+
agent_id=self._agent_id,
|
|
1426
|
+
message="Agent heartbeat tracking started"
|
|
1427
|
+
)
|
|
1428
|
+
except Exception as e:
|
|
1429
|
+
logger.warning(f"Failed to start heartbeat tracking: {e}")
|
|
1430
|
+
self._enable_heartbeat_tracking = False
|
|
1431
|
+
|
|
1053
1432
|
started_at = datetime.now()
|
|
1054
1433
|
self._stages_run = []
|
|
1055
1434
|
current_data = kwargs
|
|
1056
1435
|
error = None
|
|
1057
1436
|
|
|
1058
|
-
# Initialize progress tracker
|
|
1437
|
+
# Initialize progress tracker
|
|
1438
|
+
# Always show progress by default (IDE-friendly console output)
|
|
1439
|
+
# Rich live display only when explicitly enabled AND in TTY
|
|
1440
|
+
from .progress import ConsoleProgressReporter
|
|
1441
|
+
|
|
1442
|
+
self._progress_tracker = ProgressTracker(
|
|
1443
|
+
workflow_name=self.name,
|
|
1444
|
+
workflow_id=self._run_id,
|
|
1445
|
+
stage_names=self.stages,
|
|
1446
|
+
)
|
|
1447
|
+
|
|
1448
|
+
# Add user's callback if provided
|
|
1059
1449
|
if self._progress_callback:
|
|
1060
|
-
self._progress_tracker = ProgressTracker(
|
|
1061
|
-
workflow_name=self.name,
|
|
1062
|
-
workflow_id=self._run_id,
|
|
1063
|
-
stage_names=self.stages,
|
|
1064
|
-
)
|
|
1065
1450
|
self._progress_tracker.add_callback(self._progress_callback)
|
|
1066
|
-
|
|
1451
|
+
|
|
1452
|
+
# Rich progress: only when explicitly enabled AND in a TTY
|
|
1453
|
+
if self._enable_rich_progress and RICH_AVAILABLE and sys.stdout.isatty():
|
|
1454
|
+
try:
|
|
1455
|
+
self._rich_reporter = RichProgressReporter(self.name, self.stages)
|
|
1456
|
+
self._progress_tracker.add_callback(self._rich_reporter.report)
|
|
1457
|
+
self._rich_reporter.start()
|
|
1458
|
+
except Exception as e:
|
|
1459
|
+
# Fall back to console reporter
|
|
1460
|
+
logger.debug(f"Rich progress unavailable: {e}")
|
|
1461
|
+
self._rich_reporter = None
|
|
1462
|
+
console_reporter = ConsoleProgressReporter(verbose=False)
|
|
1463
|
+
self._progress_tracker.add_callback(console_reporter.report)
|
|
1464
|
+
else:
|
|
1465
|
+
# Default: use console reporter (works in IDEs, terminals, everywhere)
|
|
1466
|
+
console_reporter = ConsoleProgressReporter(verbose=False)
|
|
1467
|
+
self._progress_tracker.add_callback(console_reporter.report)
|
|
1468
|
+
|
|
1469
|
+
self._progress_tracker.start_workflow()
|
|
1067
1470
|
|
|
1068
1471
|
try:
|
|
1069
1472
|
# Tier fallback mode: try CHEAP → CAPABLE → PREMIUM with validation
|
|
@@ -1111,6 +1514,19 @@ class BaseWorkflow(CachingMixin, TelemetryMixin, ABC):
|
|
|
1111
1514
|
stage_name, tier.value, f"{prev_tier}_failed"
|
|
1112
1515
|
)
|
|
1113
1516
|
|
|
1517
|
+
# Update heartbeat at stage start (Pattern 1)
|
|
1518
|
+
if heartbeat_coordinator:
|
|
1519
|
+
try:
|
|
1520
|
+
stage_index = self.stages.index(stage_name)
|
|
1521
|
+
progress = stage_index / len(self.stages)
|
|
1522
|
+
heartbeat_coordinator.beat(
|
|
1523
|
+
status="running",
|
|
1524
|
+
progress=progress,
|
|
1525
|
+
current_task=f"Running stage: {stage_name} ({tier.value})"
|
|
1526
|
+
)
|
|
1527
|
+
except Exception as e:
|
|
1528
|
+
logger.debug(f"Heartbeat update failed: {e}")
|
|
1529
|
+
|
|
1114
1530
|
try:
|
|
1115
1531
|
# Run the stage at current tier
|
|
1116
1532
|
output, input_tokens, output_tokens = await self.run_stage(
|
|
@@ -1154,6 +1570,19 @@ class BaseWorkflow(CachingMixin, TelemetryMixin, ABC):
|
|
|
1154
1570
|
tokens_out=output_tokens,
|
|
1155
1571
|
)
|
|
1156
1572
|
|
|
1573
|
+
# Update heartbeat after stage completion (Pattern 1)
|
|
1574
|
+
if heartbeat_coordinator:
|
|
1575
|
+
try:
|
|
1576
|
+
stage_index = self.stages.index(stage_name) + 1
|
|
1577
|
+
progress = stage_index / len(self.stages)
|
|
1578
|
+
heartbeat_coordinator.beat(
|
|
1579
|
+
status="running",
|
|
1580
|
+
progress=progress,
|
|
1581
|
+
current_task=f"Completed stage: {stage_name}"
|
|
1582
|
+
)
|
|
1583
|
+
except Exception as e:
|
|
1584
|
+
logger.debug(f"Heartbeat update failed: {e}")
|
|
1585
|
+
|
|
1157
1586
|
# Log to cost tracker
|
|
1158
1587
|
self.cost_tracker.log_request(
|
|
1159
1588
|
model=model_id,
|
|
@@ -1393,6 +1822,14 @@ class BaseWorkflow(CachingMixin, TelemetryMixin, ABC):
|
|
|
1393
1822
|
if self._progress_tracker and error is None:
|
|
1394
1823
|
self._progress_tracker.complete_workflow()
|
|
1395
1824
|
|
|
1825
|
+
# Stop Rich progress display if active
|
|
1826
|
+
if self._rich_reporter:
|
|
1827
|
+
try:
|
|
1828
|
+
self._rich_reporter.stop()
|
|
1829
|
+
except Exception:
|
|
1830
|
+
pass # Best effort cleanup
|
|
1831
|
+
self._rich_reporter = None
|
|
1832
|
+
|
|
1396
1833
|
# Save to workflow history for dashboard
|
|
1397
1834
|
try:
|
|
1398
1835
|
_save_workflow_run(self.name, provider_str, result)
|
|
@@ -1409,6 +1846,21 @@ class BaseWorkflow(CachingMixin, TelemetryMixin, ABC):
|
|
|
1409
1846
|
# Emit workflow telemetry to backend
|
|
1410
1847
|
self._emit_workflow_telemetry(result)
|
|
1411
1848
|
|
|
1849
|
+
# Stop heartbeat tracking (Pattern 1)
|
|
1850
|
+
if heartbeat_coordinator:
|
|
1851
|
+
try:
|
|
1852
|
+
final_status = "completed" if result.success else "failed"
|
|
1853
|
+
heartbeat_coordinator.stop_heartbeat(final_status=final_status)
|
|
1854
|
+
logger.debug(
|
|
1855
|
+
"heartbeat_stopped",
|
|
1856
|
+
workflow=self.name,
|
|
1857
|
+
agent_id=self._agent_id,
|
|
1858
|
+
status=final_status,
|
|
1859
|
+
message="Agent heartbeat tracking stopped"
|
|
1860
|
+
)
|
|
1861
|
+
except Exception as e:
|
|
1862
|
+
logger.warning(f"Failed to stop heartbeat tracking: {e}")
|
|
1863
|
+
|
|
1412
1864
|
# Auto-save tier progression
|
|
1413
1865
|
if self._enable_tier_tracking and self._tier_tracker:
|
|
1414
1866
|
try:
|
empathy_os/workflows/history.py
CHANGED
|
@@ -459,10 +459,6 @@ class WorkflowHistoryStore:
|
|
|
459
459
|
Returns:
|
|
460
460
|
Number of runs deleted
|
|
461
461
|
"""
|
|
462
|
-
cutoff = datetime.now().replace(
|
|
463
|
-
hour=0, minute=0, second=0, microsecond=0
|
|
464
|
-
).isoformat()
|
|
465
|
-
|
|
466
462
|
cursor = self.conn.cursor()
|
|
467
463
|
|
|
468
464
|
# Get run IDs to delete
|
|
@@ -480,12 +476,14 @@ class WorkflowHistoryStore:
|
|
|
480
476
|
return 0
|
|
481
477
|
|
|
482
478
|
# Delete stages for these runs
|
|
479
|
+
# Security Note: f-string builds placeholder list only ("?, ?, ?")
|
|
480
|
+
# Actual data (run_ids) passed as parameters - SQL injection safe
|
|
483
481
|
placeholders = ",".join("?" * len(run_ids))
|
|
484
482
|
cursor.execute(
|
|
485
483
|
f"DELETE FROM workflow_stages WHERE run_id IN ({placeholders})", run_ids
|
|
486
484
|
)
|
|
487
485
|
|
|
488
|
-
# Delete runs
|
|
486
|
+
# Delete runs (same safe parameterization pattern)
|
|
489
487
|
cursor.execute(
|
|
490
488
|
f"DELETE FROM workflow_runs WHERE run_id IN ({placeholders})", run_ids
|
|
491
489
|
)
|