empathy-framework 4.7.1__py3-none-any.whl → 4.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {empathy_framework-4.7.1.dist-info → empathy_framework-4.8.0.dist-info}/METADATA +65 -2
- {empathy_framework-4.7.1.dist-info → empathy_framework-4.8.0.dist-info}/RECORD +73 -52
- {empathy_framework-4.7.1.dist-info → empathy_framework-4.8.0.dist-info}/WHEEL +1 -1
- {empathy_framework-4.7.1.dist-info → empathy_framework-4.8.0.dist-info}/entry_points.txt +2 -1
- {empathy_framework-4.7.1.dist-info → empathy_framework-4.8.0.dist-info}/top_level.txt +0 -1
- empathy_os/__init__.py +2 -0
- empathy_os/cache/hash_only.py +6 -3
- empathy_os/cache/hybrid.py +6 -3
- empathy_os/cli/__init__.py +128 -238
- empathy_os/cli/__main__.py +5 -33
- empathy_os/cli/commands/__init__.py +1 -8
- empathy_os/cli/commands/help.py +331 -0
- empathy_os/cli/commands/info.py +140 -0
- empathy_os/cli/commands/inspect.py +437 -0
- empathy_os/cli/commands/metrics.py +92 -0
- empathy_os/cli/commands/orchestrate.py +184 -0
- empathy_os/cli/commands/patterns.py +207 -0
- empathy_os/cli/commands/provider.py +93 -81
- empathy_os/cli/commands/setup.py +96 -0
- empathy_os/cli/commands/status.py +235 -0
- empathy_os/cli/commands/sync.py +166 -0
- empathy_os/cli/commands/tier.py +121 -0
- empathy_os/cli/commands/workflow.py +574 -0
- empathy_os/cli/parsers/__init__.py +62 -0
- empathy_os/cli/parsers/help.py +41 -0
- empathy_os/cli/parsers/info.py +26 -0
- empathy_os/cli/parsers/inspect.py +66 -0
- empathy_os/cli/parsers/metrics.py +42 -0
- empathy_os/cli/parsers/orchestrate.py +61 -0
- empathy_os/cli/parsers/patterns.py +54 -0
- empathy_os/cli/parsers/provider.py +40 -0
- empathy_os/cli/parsers/setup.py +42 -0
- empathy_os/cli/parsers/status.py +47 -0
- empathy_os/cli/parsers/sync.py +31 -0
- empathy_os/cli/parsers/tier.py +33 -0
- empathy_os/cli/parsers/workflow.py +77 -0
- empathy_os/cli/utils/__init__.py +1 -0
- empathy_os/cli/utils/data.py +242 -0
- empathy_os/cli/utils/helpers.py +68 -0
- empathy_os/{cli.py → cli_legacy.py} +27 -27
- empathy_os/cli_minimal.py +662 -0
- empathy_os/cli_router.py +384 -0
- empathy_os/cli_unified.py +38 -2
- empathy_os/memory/__init__.py +19 -5
- empathy_os/memory/short_term.py +14 -404
- empathy_os/memory/types.py +437 -0
- empathy_os/memory/unified.py +61 -48
- empathy_os/models/fallback.py +1 -1
- empathy_os/models/provider_config.py +59 -344
- empathy_os/models/registry.py +31 -180
- empathy_os/monitoring/alerts.py +14 -20
- empathy_os/monitoring/alerts_cli.py +24 -7
- empathy_os/project_index/__init__.py +2 -0
- empathy_os/project_index/index.py +210 -5
- empathy_os/project_index/scanner.py +45 -14
- empathy_os/project_index/scanner_parallel.py +291 -0
- empathy_os/socratic/ab_testing.py +1 -1
- empathy_os/workflows/__init__.py +31 -2
- empathy_os/workflows/base.py +349 -325
- empathy_os/workflows/bug_predict.py +8 -0
- empathy_os/workflows/builder.py +273 -0
- empathy_os/workflows/caching.py +253 -0
- empathy_os/workflows/code_review_pipeline.py +1 -0
- empathy_os/workflows/history.py +510 -0
- empathy_os/workflows/output.py +410 -0
- empathy_os/workflows/perf_audit.py +125 -19
- empathy_os/workflows/progress.py +324 -22
- empathy_os/workflows/routing.py +168 -0
- empathy_os/workflows/secure_release.py +1 -0
- empathy_os/workflows/security_audit.py +190 -0
- empathy_os/workflows/security_audit_phase3.py +328 -0
- empathy_os/workflows/telemetry_mixin.py +269 -0
- empathy_os/dashboard/__init__.py +0 -15
- empathy_os/dashboard/server.py +0 -941
- patterns/README.md +0 -119
- patterns/__init__.py +0 -95
- patterns/behavior.py +0 -298
- patterns/code_review_memory.json +0 -441
- patterns/core.py +0 -97
- patterns/debugging.json +0 -3763
- patterns/empathy.py +0 -268
- patterns/health_check_memory.json +0 -505
- patterns/input.py +0 -161
- patterns/memory_graph.json +0 -8
- patterns/refactoring_memory.json +0 -1113
- patterns/registry.py +0 -663
- patterns/security_memory.json +0 -8
- patterns/structural.py +0 -415
- patterns/validation.py +0 -194
- {empathy_framework-4.7.1.dist-info → empathy_framework-4.8.0.dist-info}/licenses/LICENSE +0 -0
empathy_os/workflows/base.py
CHANGED
|
@@ -17,6 +17,7 @@ from __future__ import annotations
|
|
|
17
17
|
|
|
18
18
|
import json
|
|
19
19
|
import logging
|
|
20
|
+
import sys
|
|
20
21
|
import time
|
|
21
22
|
import uuid
|
|
22
23
|
from abc import ABC, abstractmethod
|
|
@@ -27,6 +28,7 @@ from pathlib import Path
|
|
|
27
28
|
from typing import TYPE_CHECKING, Any
|
|
28
29
|
|
|
29
30
|
if TYPE_CHECKING:
|
|
31
|
+
from .routing import TierRoutingStrategy
|
|
30
32
|
from .tier_tracking import WorkflowTierTracker
|
|
31
33
|
|
|
32
34
|
# Load .env file for API keys if python-dotenv is available
|
|
@@ -38,26 +40,31 @@ except ImportError:
|
|
|
38
40
|
pass # python-dotenv not installed, rely on environment variables
|
|
39
41
|
|
|
40
42
|
# Import caching infrastructure
|
|
41
|
-
from empathy_os.cache import BaseCache
|
|
43
|
+
from empathy_os.cache import BaseCache
|
|
42
44
|
from empathy_os.config import _validate_file_path
|
|
43
45
|
from empathy_os.cost_tracker import MODEL_PRICING, CostTracker
|
|
44
46
|
|
|
45
47
|
# Import unified types from empathy_os.models
|
|
46
48
|
from empathy_os.models import (
|
|
47
49
|
ExecutionContext,
|
|
48
|
-
LLMCallRecord,
|
|
49
50
|
LLMExecutor,
|
|
50
51
|
TaskRoutingRecord,
|
|
51
52
|
TelemetryBackend,
|
|
52
|
-
WorkflowRunRecord,
|
|
53
|
-
WorkflowStageRecord,
|
|
54
|
-
get_telemetry_store,
|
|
55
53
|
)
|
|
56
54
|
from empathy_os.models import ModelProvider as UnifiedModelProvider
|
|
57
55
|
from empathy_os.models import ModelTier as UnifiedModelTier
|
|
58
56
|
|
|
57
|
+
# Import mixins (extracted for maintainability)
|
|
58
|
+
from .caching import CachedResponse, CachingMixin
|
|
59
|
+
|
|
59
60
|
# Import progress tracking
|
|
60
|
-
from .progress import
|
|
61
|
+
from .progress import (
|
|
62
|
+
RICH_AVAILABLE,
|
|
63
|
+
ProgressCallback,
|
|
64
|
+
ProgressTracker,
|
|
65
|
+
RichProgressReporter,
|
|
66
|
+
)
|
|
67
|
+
from .telemetry_mixin import TelemetryMixin
|
|
61
68
|
|
|
62
69
|
# Import telemetry tracking
|
|
63
70
|
try:
|
|
@@ -78,15 +85,47 @@ logger = logging.getLogger(__name__)
|
|
|
78
85
|
WORKFLOW_HISTORY_FILE = ".empathy/workflow_runs.json"
|
|
79
86
|
|
|
80
87
|
|
|
81
|
-
# Local enums for backward compatibility
|
|
88
|
+
# Local enums for backward compatibility - DEPRECATED
|
|
82
89
|
# New code should use empathy_os.models.ModelTier/ModelProvider
|
|
83
90
|
class ModelTier(Enum):
|
|
84
|
-
"""Model tier for cost optimization.
|
|
91
|
+
"""DEPRECATED: Model tier for cost optimization.
|
|
92
|
+
|
|
93
|
+
This enum is deprecated and will be removed in v5.0.
|
|
94
|
+
Use empathy_os.models.ModelTier instead.
|
|
95
|
+
|
|
96
|
+
Migration:
|
|
97
|
+
# Old:
|
|
98
|
+
from empathy_os.workflows.base import ModelTier
|
|
99
|
+
|
|
100
|
+
# New:
|
|
101
|
+
from empathy_os.models import ModelTier
|
|
102
|
+
|
|
103
|
+
Why deprecated:
|
|
104
|
+
- Creates confusion with dual definitions
|
|
105
|
+
- empathy_os.models.ModelTier is the canonical location
|
|
106
|
+
- Simplifies imports and reduces duplication
|
|
107
|
+
"""
|
|
85
108
|
|
|
86
109
|
CHEAP = "cheap" # Haiku/GPT-4o-mini - $0.25-1.25/M tokens
|
|
87
110
|
CAPABLE = "capable" # Sonnet/GPT-4o - $3-15/M tokens
|
|
88
111
|
PREMIUM = "premium" # Opus/o1 - $15-75/M tokens
|
|
89
112
|
|
|
113
|
+
def __init__(self, value: str):
|
|
114
|
+
"""Initialize with deprecation warning."""
|
|
115
|
+
# Only warn once per process, not per instance
|
|
116
|
+
import warnings
|
|
117
|
+
|
|
118
|
+
# Use self.__class__ instead of ModelTier (class not yet defined during creation)
|
|
119
|
+
if not hasattr(self.__class__, "_deprecation_warned"):
|
|
120
|
+
warnings.warn(
|
|
121
|
+
"workflows.base.ModelTier is deprecated and will be removed in v5.0. "
|
|
122
|
+
"Use empathy_os.models.ModelTier instead. "
|
|
123
|
+
"Update imports: from empathy_os.models import ModelTier",
|
|
124
|
+
DeprecationWarning,
|
|
125
|
+
stacklevel=4,
|
|
126
|
+
)
|
|
127
|
+
self.__class__._deprecation_warned = True
|
|
128
|
+
|
|
90
129
|
def to_unified(self) -> UnifiedModelTier:
|
|
91
130
|
"""Convert to unified ModelTier from empathy_os.models."""
|
|
92
131
|
return UnifiedModelTier(self.value)
|
|
@@ -214,8 +253,52 @@ class WorkflowResult:
|
|
|
214
253
|
transient: bool = False # True if retry is reasonable (e.g., provider timeout)
|
|
215
254
|
|
|
216
255
|
|
|
256
|
+
# Global singleton for workflow history store (lazy-initialized)
|
|
257
|
+
_history_store: Any = None # WorkflowHistoryStore | None
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _get_history_store():
|
|
261
|
+
"""Get or create workflow history store singleton.
|
|
262
|
+
|
|
263
|
+
Returns SQLite-based history store. Falls back to None if initialization fails.
|
|
264
|
+
"""
|
|
265
|
+
global _history_store
|
|
266
|
+
|
|
267
|
+
if _history_store is None:
|
|
268
|
+
try:
|
|
269
|
+
from .history import WorkflowHistoryStore
|
|
270
|
+
|
|
271
|
+
_history_store = WorkflowHistoryStore()
|
|
272
|
+
logger.debug("Workflow history store initialized (SQLite)")
|
|
273
|
+
except (ImportError, OSError, PermissionError) as e:
|
|
274
|
+
# File system errors or missing dependencies
|
|
275
|
+
logger.warning(f"Failed to initialize SQLite history store: {e}")
|
|
276
|
+
_history_store = False # Mark as failed to avoid repeated attempts
|
|
277
|
+
|
|
278
|
+
# Return store or None if initialization failed
|
|
279
|
+
return _history_store if _history_store is not False else None
|
|
280
|
+
|
|
281
|
+
|
|
217
282
|
def _load_workflow_history(history_file: str = WORKFLOW_HISTORY_FILE) -> list[dict]:
|
|
218
|
-
"""Load workflow run history from disk.
|
|
283
|
+
"""Load workflow run history from disk (legacy JSON support).
|
|
284
|
+
|
|
285
|
+
DEPRECATED: Use WorkflowHistoryStore for new code.
|
|
286
|
+
This function is maintained for backward compatibility.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
history_file: Path to JSON history file
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
List of workflow run dictionaries
|
|
293
|
+
"""
|
|
294
|
+
import warnings
|
|
295
|
+
|
|
296
|
+
warnings.warn(
|
|
297
|
+
"_load_workflow_history is deprecated. Use WorkflowHistoryStore instead.",
|
|
298
|
+
DeprecationWarning,
|
|
299
|
+
stacklevel=2,
|
|
300
|
+
)
|
|
301
|
+
|
|
219
302
|
path = Path(history_file)
|
|
220
303
|
if not path.exists():
|
|
221
304
|
return []
|
|
@@ -234,11 +317,42 @@ def _save_workflow_run(
|
|
|
234
317
|
history_file: str = WORKFLOW_HISTORY_FILE,
|
|
235
318
|
max_history: int = 100,
|
|
236
319
|
) -> None:
|
|
237
|
-
"""Save a workflow run to history.
|
|
320
|
+
"""Save a workflow run to history.
|
|
321
|
+
|
|
322
|
+
Uses SQLite-based storage by default. Falls back to JSON if SQLite unavailable.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
workflow_name: Name of the workflow
|
|
326
|
+
provider: Provider used (anthropic, openai, google)
|
|
327
|
+
result: WorkflowResult object
|
|
328
|
+
history_file: Legacy JSON path (ignored if SQLite available)
|
|
329
|
+
max_history: Legacy max history limit (ignored if SQLite available)
|
|
330
|
+
"""
|
|
331
|
+
# Try SQLite first (new approach)
|
|
332
|
+
store = _get_history_store()
|
|
333
|
+
if store is not None:
|
|
334
|
+
try:
|
|
335
|
+
run_id = str(uuid.uuid4())
|
|
336
|
+
store.record_run(run_id, workflow_name, provider, result)
|
|
337
|
+
logger.debug(f"Workflow run saved to SQLite: {run_id}")
|
|
338
|
+
return
|
|
339
|
+
except (OSError, PermissionError, ValueError) as e:
|
|
340
|
+
# SQLite failed, fall back to JSON
|
|
341
|
+
logger.warning(f"Failed to save to SQLite, falling back to JSON: {e}")
|
|
342
|
+
|
|
343
|
+
# Fallback: Legacy JSON storage
|
|
344
|
+
logger.debug("Using legacy JSON storage for workflow history")
|
|
238
345
|
path = Path(history_file)
|
|
239
346
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
240
347
|
|
|
241
|
-
history =
|
|
348
|
+
history = []
|
|
349
|
+
if path.exists():
|
|
350
|
+
try:
|
|
351
|
+
with open(path) as f:
|
|
352
|
+
data = json.load(f)
|
|
353
|
+
history = list(data) if isinstance(data, list) else []
|
|
354
|
+
except (json.JSONDecodeError, OSError):
|
|
355
|
+
pass
|
|
242
356
|
|
|
243
357
|
# Create run record
|
|
244
358
|
run: dict = {
|
|
@@ -285,20 +399,48 @@ def _save_workflow_run(
|
|
|
285
399
|
def get_workflow_stats(history_file: str = WORKFLOW_HISTORY_FILE) -> dict:
|
|
286
400
|
"""Get workflow statistics for dashboard.
|
|
287
401
|
|
|
402
|
+
Uses SQLite-based storage by default. Falls back to JSON if unavailable.
|
|
403
|
+
|
|
404
|
+
Args:
|
|
405
|
+
history_file: Legacy JSON path (used only if SQLite unavailable)
|
|
406
|
+
|
|
288
407
|
Returns:
|
|
289
408
|
Dictionary with workflow stats including:
|
|
290
409
|
- total_runs: Total workflow runs
|
|
410
|
+
- successful_runs: Number of successful runs
|
|
291
411
|
- by_workflow: Per-workflow stats
|
|
292
412
|
- by_provider: Per-provider stats
|
|
413
|
+
- by_tier: Cost breakdown by tier
|
|
293
414
|
- recent_runs: Last 10 runs
|
|
415
|
+
- total_cost: Total cost across all runs
|
|
294
416
|
- total_savings: Total cost savings
|
|
295
|
-
|
|
417
|
+
- avg_savings_percent: Average savings percentage
|
|
296
418
|
"""
|
|
297
|
-
|
|
419
|
+
# Try SQLite first (new approach)
|
|
420
|
+
store = _get_history_store()
|
|
421
|
+
if store is not None:
|
|
422
|
+
try:
|
|
423
|
+
return store.get_stats()
|
|
424
|
+
except (OSError, PermissionError, ValueError) as e:
|
|
425
|
+
# SQLite failed, fall back to JSON
|
|
426
|
+
logger.warning(f"Failed to get stats from SQLite, falling back to JSON: {e}")
|
|
427
|
+
|
|
428
|
+
# Fallback: Legacy JSON storage
|
|
429
|
+
logger.debug("Using legacy JSON storage for workflow stats")
|
|
430
|
+
history = []
|
|
431
|
+
path = Path(history_file)
|
|
432
|
+
if path.exists():
|
|
433
|
+
try:
|
|
434
|
+
with open(path) as f:
|
|
435
|
+
data = json.load(f)
|
|
436
|
+
history = list(data) if isinstance(data, list) else []
|
|
437
|
+
except (json.JSONDecodeError, OSError):
|
|
438
|
+
pass
|
|
298
439
|
|
|
299
440
|
if not history:
|
|
300
441
|
return {
|
|
301
442
|
"total_runs": 0,
|
|
443
|
+
"successful_runs": 0,
|
|
302
444
|
"by_workflow": {},
|
|
303
445
|
"by_provider": {},
|
|
304
446
|
"by_tier": {"cheap": 0, "capable": 0, "premium": 0},
|
|
@@ -368,9 +510,11 @@ def get_workflow_stats(history_file: str = WORKFLOW_HISTORY_FILE) -> dict:
|
|
|
368
510
|
}
|
|
369
511
|
|
|
370
512
|
|
|
371
|
-
class BaseWorkflow(ABC):
|
|
513
|
+
class BaseWorkflow(CachingMixin, TelemetryMixin, ABC):
|
|
372
514
|
"""Base class for multi-model workflows.
|
|
373
515
|
|
|
516
|
+
Inherits from CachingMixin and TelemetryMixin (extracted for maintainability).
|
|
517
|
+
|
|
374
518
|
Subclasses define stages and tier mappings:
|
|
375
519
|
|
|
376
520
|
class MyWorkflow(BaseWorkflow):
|
|
@@ -405,6 +549,8 @@ class BaseWorkflow(ABC):
|
|
|
405
549
|
enable_cache: bool = True,
|
|
406
550
|
enable_tier_tracking: bool = True,
|
|
407
551
|
enable_tier_fallback: bool = False,
|
|
552
|
+
routing_strategy: TierRoutingStrategy | None = None,
|
|
553
|
+
enable_rich_progress: bool = False,
|
|
408
554
|
):
|
|
409
555
|
"""Initialize workflow with optional cost tracker, provider, and config.
|
|
410
556
|
|
|
@@ -426,6 +572,15 @@ class BaseWorkflow(ABC):
|
|
|
426
572
|
enable_tier_tracking: Whether to enable automatic tier tracking (default True).
|
|
427
573
|
enable_tier_fallback: Whether to enable intelligent tier fallback
|
|
428
574
|
(CHEAP → CAPABLE → PREMIUM). Opt-in feature (default False).
|
|
575
|
+
routing_strategy: Optional TierRoutingStrategy for dynamic tier selection.
|
|
576
|
+
When provided, overrides static tier_map for stage tier decisions.
|
|
577
|
+
Strategies: CostOptimizedRouting, PerformanceOptimizedRouting,
|
|
578
|
+
BalancedRouting, HybridRouting.
|
|
579
|
+
enable_rich_progress: Whether to enable Rich-based live progress display
|
|
580
|
+
(default False). When enabled and output is a TTY, shows live
|
|
581
|
+
progress bars with spinners. Default is False because most users
|
|
582
|
+
run workflows from IDEs (VSCode, etc.) where TTY is not available.
|
|
583
|
+
The console reporter works reliably in all environments.
|
|
429
584
|
|
|
430
585
|
"""
|
|
431
586
|
from .config import WorkflowConfig
|
|
@@ -436,11 +591,11 @@ class BaseWorkflow(ABC):
|
|
|
436
591
|
# Progress tracking
|
|
437
592
|
self._progress_callback = progress_callback
|
|
438
593
|
self._progress_tracker: ProgressTracker | None = None
|
|
594
|
+
self._enable_rich_progress = enable_rich_progress
|
|
595
|
+
self._rich_reporter: RichProgressReporter | None = None
|
|
439
596
|
|
|
440
597
|
# New: LLMExecutor support
|
|
441
598
|
self._executor = executor
|
|
442
|
-
self._telemetry_backend = telemetry_backend or get_telemetry_store()
|
|
443
|
-
self._run_id: str | None = None # Set at start of execute()
|
|
444
599
|
self._api_key: str | None = None # For default executor creation
|
|
445
600
|
|
|
446
601
|
# Cache support
|
|
@@ -456,20 +611,11 @@ class BaseWorkflow(ABC):
|
|
|
456
611
|
self._enable_tier_fallback = enable_tier_fallback
|
|
457
612
|
self._tier_progression: list[tuple[str, str, bool]] = [] # (stage, tier, success)
|
|
458
613
|
|
|
459
|
-
#
|
|
460
|
-
self.
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
self._telemetry_tracker = UsageTracker.get_instance()
|
|
465
|
-
except (OSError, PermissionError) as e:
|
|
466
|
-
# File system errors - log but disable telemetry
|
|
467
|
-
logger.debug(f"Failed to initialize telemetry tracker (file system error): {e}")
|
|
468
|
-
self._enable_telemetry = False
|
|
469
|
-
except (AttributeError, TypeError, ValueError) as e:
|
|
470
|
-
# Configuration or initialization errors
|
|
471
|
-
logger.debug(f"Failed to initialize telemetry tracker (config error): {e}")
|
|
472
|
-
self._enable_telemetry = False
|
|
614
|
+
# Routing strategy support
|
|
615
|
+
self._routing_strategy: TierRoutingStrategy | None = routing_strategy
|
|
616
|
+
|
|
617
|
+
# Telemetry tracking (uses TelemetryMixin)
|
|
618
|
+
self._init_telemetry(telemetry_backend)
|
|
473
619
|
|
|
474
620
|
# Load config if not provided
|
|
475
621
|
self._config = config or WorkflowConfig.load()
|
|
@@ -494,9 +640,83 @@ class BaseWorkflow(ABC):
|
|
|
494
640
|
self.provider = provider
|
|
495
641
|
|
|
496
642
|
def get_tier_for_stage(self, stage_name: str) -> ModelTier:
|
|
497
|
-
"""Get the model tier for a stage."""
|
|
643
|
+
"""Get the model tier for a stage from static tier_map."""
|
|
498
644
|
return self.tier_map.get(stage_name, ModelTier.CAPABLE)
|
|
499
645
|
|
|
646
|
+
def _get_tier_with_routing(
|
|
647
|
+
self,
|
|
648
|
+
stage_name: str,
|
|
649
|
+
input_data: dict[str, Any],
|
|
650
|
+
budget_remaining: float = 100.0,
|
|
651
|
+
) -> ModelTier:
|
|
652
|
+
"""Get tier for a stage using routing strategy if available.
|
|
653
|
+
|
|
654
|
+
If a routing strategy is configured, creates a RoutingContext and
|
|
655
|
+
delegates tier selection to the strategy. Otherwise falls back to
|
|
656
|
+
the static tier_map.
|
|
657
|
+
|
|
658
|
+
Args:
|
|
659
|
+
stage_name: Name of the stage
|
|
660
|
+
input_data: Current workflow data (used to estimate input size)
|
|
661
|
+
budget_remaining: Remaining budget in USD for this execution
|
|
662
|
+
|
|
663
|
+
Returns:
|
|
664
|
+
ModelTier to use for this stage
|
|
665
|
+
"""
|
|
666
|
+
# Fall back to static tier_map if no routing strategy
|
|
667
|
+
if self._routing_strategy is None:
|
|
668
|
+
return self.get_tier_for_stage(stage_name)
|
|
669
|
+
|
|
670
|
+
from .routing import RoutingContext
|
|
671
|
+
|
|
672
|
+
# Estimate input size from data
|
|
673
|
+
input_size = self._estimate_input_tokens(input_data)
|
|
674
|
+
|
|
675
|
+
# Assess complexity
|
|
676
|
+
complexity = self._assess_complexity(input_data)
|
|
677
|
+
|
|
678
|
+
# Determine latency sensitivity based on stage position
|
|
679
|
+
# First stages are more latency-sensitive (user waiting)
|
|
680
|
+
stage_index = self.stages.index(stage_name) if stage_name in self.stages else 0
|
|
681
|
+
if stage_index == 0:
|
|
682
|
+
latency_sensitivity = "high"
|
|
683
|
+
elif stage_index < len(self.stages) // 2:
|
|
684
|
+
latency_sensitivity = "medium"
|
|
685
|
+
else:
|
|
686
|
+
latency_sensitivity = "low"
|
|
687
|
+
|
|
688
|
+
# Create routing context
|
|
689
|
+
context = RoutingContext(
|
|
690
|
+
task_type=f"{self.name}:{stage_name}",
|
|
691
|
+
input_size=input_size,
|
|
692
|
+
complexity=complexity,
|
|
693
|
+
budget_remaining=budget_remaining,
|
|
694
|
+
latency_sensitivity=latency_sensitivity,
|
|
695
|
+
)
|
|
696
|
+
|
|
697
|
+
# Delegate to routing strategy
|
|
698
|
+
return self._routing_strategy.route(context)
|
|
699
|
+
|
|
700
|
+
def _estimate_input_tokens(self, input_data: dict[str, Any]) -> int:
|
|
701
|
+
"""Estimate input token count from data.
|
|
702
|
+
|
|
703
|
+
Simple heuristic: ~4 characters per token on average.
|
|
704
|
+
|
|
705
|
+
Args:
|
|
706
|
+
input_data: Workflow input data
|
|
707
|
+
|
|
708
|
+
Returns:
|
|
709
|
+
Estimated token count
|
|
710
|
+
"""
|
|
711
|
+
import json
|
|
712
|
+
|
|
713
|
+
try:
|
|
714
|
+
# Serialize to estimate size
|
|
715
|
+
data_str = json.dumps(input_data, default=str)
|
|
716
|
+
return len(data_str) // 4
|
|
717
|
+
except (TypeError, ValueError):
|
|
718
|
+
return 1000 # Default estimate
|
|
719
|
+
|
|
500
720
|
def get_model_for_tier(self, tier: ModelTier) -> str:
|
|
501
721
|
"""Get the model for a tier based on configured provider and config."""
|
|
502
722
|
from .config import get_model
|
|
@@ -507,43 +727,7 @@ class BaseWorkflow(ABC):
|
|
|
507
727
|
model = get_model(provider_str, tier.value, self._config)
|
|
508
728
|
return model
|
|
509
729
|
|
|
510
|
-
|
|
511
|
-
"""Set up cache with one-time user prompt if needed.
|
|
512
|
-
|
|
513
|
-
This is called lazily on first workflow execution to avoid
|
|
514
|
-
blocking workflow initialization.
|
|
515
|
-
"""
|
|
516
|
-
if not self._enable_cache:
|
|
517
|
-
return
|
|
518
|
-
|
|
519
|
-
if self._cache_setup_attempted:
|
|
520
|
-
return
|
|
521
|
-
|
|
522
|
-
self._cache_setup_attempted = True
|
|
523
|
-
|
|
524
|
-
# If cache already provided, use it
|
|
525
|
-
if self._cache is not None:
|
|
526
|
-
return
|
|
527
|
-
|
|
528
|
-
# Otherwise, trigger auto-setup (which may prompt user)
|
|
529
|
-
try:
|
|
530
|
-
auto_setup_cache()
|
|
531
|
-
self._cache = create_cache()
|
|
532
|
-
logger.info(f"Cache initialized for workflow: {self.name}")
|
|
533
|
-
except ImportError as e:
|
|
534
|
-
# Hybrid cache dependencies not available, fall back to hash-only
|
|
535
|
-
logger.info(
|
|
536
|
-
f"Using hash-only cache (install empathy-framework[cache] for semantic caching): {e}"
|
|
537
|
-
)
|
|
538
|
-
self._cache = create_cache(cache_type="hash")
|
|
539
|
-
except (OSError, PermissionError) as e:
|
|
540
|
-
# File system errors - disable cache
|
|
541
|
-
logger.warning(f"Cache setup failed (file system error): {e}, continuing without cache")
|
|
542
|
-
self._enable_cache = False
|
|
543
|
-
except (ValueError, TypeError, AttributeError) as e:
|
|
544
|
-
# Configuration errors - disable cache
|
|
545
|
-
logger.warning(f"Cache setup failed (config error): {e}, continuing without cache")
|
|
546
|
-
self._enable_cache = False
|
|
730
|
+
# Note: _maybe_setup_cache is inherited from CachingMixin
|
|
547
731
|
|
|
548
732
|
async def _call_llm(
|
|
549
733
|
self,
|
|
@@ -582,54 +766,26 @@ class BaseWorkflow(ABC):
|
|
|
582
766
|
model = self.get_model_for_tier(tier)
|
|
583
767
|
cache_type = None
|
|
584
768
|
|
|
585
|
-
# Try cache lookup
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
if cached_response is not None:
|
|
593
|
-
logger.debug(f"Cache hit for {self.name}:{stage}")
|
|
594
|
-
# Determine cache type
|
|
595
|
-
if hasattr(self._cache, "cache_type"):
|
|
596
|
-
ct = self._cache.cache_type
|
|
597
|
-
# Ensure it's a string (not a Mock object)
|
|
598
|
-
cache_type = str(ct) if ct and isinstance(ct, str) else "hash"
|
|
599
|
-
else:
|
|
600
|
-
cache_type = "hash" # Default assumption
|
|
601
|
-
|
|
602
|
-
# Track telemetry for cache hit
|
|
603
|
-
duration_ms = int((time.time() - start_time) * 1000)
|
|
604
|
-
in_tokens = cached_response["input_tokens"]
|
|
605
|
-
out_tokens = cached_response["output_tokens"]
|
|
606
|
-
cost = self._calculate_cost(tier, in_tokens, out_tokens)
|
|
769
|
+
# Try cache lookup using CachingMixin
|
|
770
|
+
cached = self._try_cache_lookup(stage, system, user_message, model)
|
|
771
|
+
if cached is not None:
|
|
772
|
+
# Track telemetry for cache hit
|
|
773
|
+
duration_ms = int((time.time() - start_time) * 1000)
|
|
774
|
+
cost = self._calculate_cost(tier, cached.input_tokens, cached.output_tokens)
|
|
775
|
+
cache_type = self._get_cache_type()
|
|
607
776
|
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
777
|
+
self._track_telemetry(
|
|
778
|
+
stage=stage,
|
|
779
|
+
tier=tier,
|
|
780
|
+
model=model,
|
|
781
|
+
cost=cost,
|
|
782
|
+
tokens={"input": cached.input_tokens, "output": cached.output_tokens},
|
|
783
|
+
cache_hit=True,
|
|
784
|
+
cache_type=cache_type,
|
|
785
|
+
duration_ms=duration_ms,
|
|
786
|
+
)
|
|
618
787
|
|
|
619
|
-
|
|
620
|
-
return (
|
|
621
|
-
cached_response["content"],
|
|
622
|
-
cached_response["input_tokens"],
|
|
623
|
-
cached_response["output_tokens"],
|
|
624
|
-
)
|
|
625
|
-
except (KeyError, TypeError, ValueError) as e:
|
|
626
|
-
# Malformed cache data - continue with LLM call
|
|
627
|
-
logger.debug(f"Cache lookup failed (malformed data): {e}, continuing with LLM call")
|
|
628
|
-
except (OSError, PermissionError) as e:
|
|
629
|
-
# File system errors - continue with LLM call
|
|
630
|
-
logger.debug(
|
|
631
|
-
f"Cache lookup failed (file system error): {e}, continuing with LLM call"
|
|
632
|
-
)
|
|
788
|
+
return (cached.content, cached.input_tokens, cached.output_tokens)
|
|
633
789
|
|
|
634
790
|
# Create a step config for this call
|
|
635
791
|
step = WorkflowStepConfig(
|
|
@@ -662,23 +818,14 @@ class BaseWorkflow(ABC):
|
|
|
662
818
|
duration_ms=duration_ms,
|
|
663
819
|
)
|
|
664
820
|
|
|
665
|
-
# Store in cache
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
}
|
|
674
|
-
self._cache.put(self.name, stage, full_prompt, model, response_data)
|
|
675
|
-
logger.debug(f"Cached response for {self.name}:{stage}")
|
|
676
|
-
except (OSError, PermissionError) as e:
|
|
677
|
-
# File system errors - log but continue
|
|
678
|
-
logger.debug(f"Failed to cache response (file system error): {e}")
|
|
679
|
-
except (ValueError, TypeError, KeyError) as e:
|
|
680
|
-
# Data serialization errors - log but continue
|
|
681
|
-
logger.debug(f"Failed to cache response (serialization error): {e}")
|
|
821
|
+
# Store in cache using CachingMixin
|
|
822
|
+
self._store_in_cache(
|
|
823
|
+
stage,
|
|
824
|
+
system,
|
|
825
|
+
user_message,
|
|
826
|
+
model,
|
|
827
|
+
CachedResponse(content=content, input_tokens=in_tokens, output_tokens=out_tokens),
|
|
828
|
+
)
|
|
682
829
|
|
|
683
830
|
return content, in_tokens, out_tokens
|
|
684
831
|
except (ValueError, TypeError, KeyError) as e:
|
|
@@ -698,53 +845,7 @@ class BaseWorkflow(ABC):
|
|
|
698
845
|
logger.exception(f"Unexpected error calling LLM: {e}")
|
|
699
846
|
return f"Error calling LLM: {type(e).__name__}", 0, 0
|
|
700
847
|
|
|
701
|
-
|
|
702
|
-
self,
|
|
703
|
-
stage: str,
|
|
704
|
-
tier: ModelTier,
|
|
705
|
-
model: str,
|
|
706
|
-
cost: float,
|
|
707
|
-
tokens: dict[str, int],
|
|
708
|
-
cache_hit: bool,
|
|
709
|
-
cache_type: str | None,
|
|
710
|
-
duration_ms: int,
|
|
711
|
-
) -> None:
|
|
712
|
-
"""Track telemetry for an LLM call.
|
|
713
|
-
|
|
714
|
-
Args:
|
|
715
|
-
stage: Stage name
|
|
716
|
-
tier: Model tier used
|
|
717
|
-
model: Model ID used
|
|
718
|
-
cost: Cost in USD
|
|
719
|
-
tokens: Dictionary with "input" and "output" token counts
|
|
720
|
-
cache_hit: Whether this was a cache hit
|
|
721
|
-
cache_type: Cache type if cache hit
|
|
722
|
-
duration_ms: Duration in milliseconds
|
|
723
|
-
|
|
724
|
-
"""
|
|
725
|
-
if not self._enable_telemetry or self._telemetry_tracker is None:
|
|
726
|
-
return
|
|
727
|
-
|
|
728
|
-
try:
|
|
729
|
-
provider_str = getattr(self, "_provider_str", "unknown")
|
|
730
|
-
self._telemetry_tracker.track_llm_call(
|
|
731
|
-
workflow=self.name,
|
|
732
|
-
stage=stage,
|
|
733
|
-
tier=tier.value.upper(),
|
|
734
|
-
model=model,
|
|
735
|
-
provider=provider_str,
|
|
736
|
-
cost=cost,
|
|
737
|
-
tokens=tokens,
|
|
738
|
-
cache_hit=cache_hit,
|
|
739
|
-
cache_type=cache_type,
|
|
740
|
-
duration_ms=duration_ms,
|
|
741
|
-
)
|
|
742
|
-
except (AttributeError, TypeError, ValueError) as e:
|
|
743
|
-
# INTENTIONAL: Telemetry tracking failures should never crash workflows
|
|
744
|
-
logger.debug(f"Failed to track telemetry (config/data error): {e}")
|
|
745
|
-
except (OSError, PermissionError) as e:
|
|
746
|
-
# File system errors - log but never crash workflow
|
|
747
|
-
logger.debug(f"Failed to track telemetry (file system error): {e}")
|
|
848
|
+
# Note: _track_telemetry is inherited from TelemetryMixin
|
|
748
849
|
|
|
749
850
|
def _calculate_cost(self, tier: ModelTier, input_tokens: int, output_tokens: int) -> float:
|
|
750
851
|
"""Calculate cost for a stage."""
|
|
@@ -784,32 +885,20 @@ class BaseWorkflow(ABC):
|
|
|
784
885
|
savings = baseline_cost - total_cost
|
|
785
886
|
savings_percent = (savings / baseline_cost * 100) if baseline_cost > 0 else 0.0
|
|
786
887
|
|
|
787
|
-
# Calculate cache metrics
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
888
|
+
# Calculate cache metrics using CachingMixin
|
|
889
|
+
cache_stats = self._get_cache_stats()
|
|
890
|
+
cache_hits = cache_stats["hits"]
|
|
891
|
+
cache_misses = cache_stats["misses"]
|
|
892
|
+
cache_hit_rate = cache_stats["hit_rate"]
|
|
791
893
|
estimated_cost_without_cache = total_cost
|
|
792
894
|
savings_from_cache = 0.0
|
|
793
895
|
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
# Estimate cost without cache (assumes cache hits would have incurred full cost)
|
|
802
|
-
# This is a conservative estimate
|
|
803
|
-
if cache_hits > 0:
|
|
804
|
-
# Average cost per non-cached call
|
|
805
|
-
avg_cost_per_call = total_cost / cache_misses if cache_misses > 0 else 0.0
|
|
806
|
-
# Estimated additional cost if cache hits were actual API calls
|
|
807
|
-
estimated_additional_cost = cache_hits * avg_cost_per_call
|
|
808
|
-
estimated_cost_without_cache = total_cost + estimated_additional_cost
|
|
809
|
-
savings_from_cache = estimated_additional_cost
|
|
810
|
-
except (AttributeError, TypeError):
|
|
811
|
-
# Cache doesn't support stats or error occurred
|
|
812
|
-
pass
|
|
896
|
+
# Estimate cost without cache (assumes cache hits would have incurred full cost)
|
|
897
|
+
if cache_hits > 0:
|
|
898
|
+
avg_cost_per_call = total_cost / cache_misses if cache_misses > 0 else 0.0
|
|
899
|
+
estimated_additional_cost = cache_hits * avg_cost_per_call
|
|
900
|
+
estimated_cost_without_cache = total_cost + estimated_additional_cost
|
|
901
|
+
savings_from_cache = estimated_additional_cost
|
|
813
902
|
|
|
814
903
|
return CostReport(
|
|
815
904
|
total_cost=total_cost,
|
|
@@ -956,7 +1045,8 @@ class BaseWorkflow(ABC):
|
|
|
956
1045
|
|
|
957
1046
|
# Log routing start
|
|
958
1047
|
try:
|
|
959
|
-
self._telemetry_backend
|
|
1048
|
+
if self._telemetry_backend is not None:
|
|
1049
|
+
self._telemetry_backend.log_task_routing(routing_record)
|
|
960
1050
|
except Exception as e:
|
|
961
1051
|
logger.debug(f"Failed to log task routing: {e}")
|
|
962
1052
|
|
|
@@ -979,15 +1069,39 @@ class BaseWorkflow(ABC):
|
|
|
979
1069
|
current_data = kwargs
|
|
980
1070
|
error = None
|
|
981
1071
|
|
|
982
|
-
# Initialize progress tracker
|
|
1072
|
+
# Initialize progress tracker
|
|
1073
|
+
# Always show progress by default (IDE-friendly console output)
|
|
1074
|
+
# Rich live display only when explicitly enabled AND in TTY
|
|
1075
|
+
from .progress import ConsoleProgressReporter
|
|
1076
|
+
|
|
1077
|
+
self._progress_tracker = ProgressTracker(
|
|
1078
|
+
workflow_name=self.name,
|
|
1079
|
+
workflow_id=self._run_id,
|
|
1080
|
+
stage_names=self.stages,
|
|
1081
|
+
)
|
|
1082
|
+
|
|
1083
|
+
# Add user's callback if provided
|
|
983
1084
|
if self._progress_callback:
|
|
984
|
-
self._progress_tracker = ProgressTracker(
|
|
985
|
-
workflow_name=self.name,
|
|
986
|
-
workflow_id=self._run_id,
|
|
987
|
-
stage_names=self.stages,
|
|
988
|
-
)
|
|
989
1085
|
self._progress_tracker.add_callback(self._progress_callback)
|
|
990
|
-
|
|
1086
|
+
|
|
1087
|
+
# Rich progress: only when explicitly enabled AND in a TTY
|
|
1088
|
+
if self._enable_rich_progress and RICH_AVAILABLE and sys.stdout.isatty():
|
|
1089
|
+
try:
|
|
1090
|
+
self._rich_reporter = RichProgressReporter(self.name, self.stages)
|
|
1091
|
+
self._progress_tracker.add_callback(self._rich_reporter.report)
|
|
1092
|
+
self._rich_reporter.start()
|
|
1093
|
+
except Exception as e:
|
|
1094
|
+
# Fall back to console reporter
|
|
1095
|
+
logger.debug(f"Rich progress unavailable: {e}")
|
|
1096
|
+
self._rich_reporter = None
|
|
1097
|
+
console_reporter = ConsoleProgressReporter(verbose=False)
|
|
1098
|
+
self._progress_tracker.add_callback(console_reporter.report)
|
|
1099
|
+
else:
|
|
1100
|
+
# Default: use console reporter (works in IDEs, terminals, everywhere)
|
|
1101
|
+
console_reporter = ConsoleProgressReporter(verbose=False)
|
|
1102
|
+
self._progress_tracker.add_callback(console_reporter.report)
|
|
1103
|
+
|
|
1104
|
+
self._progress_tracker.start_workflow()
|
|
991
1105
|
|
|
992
1106
|
try:
|
|
993
1107
|
# Tier fallback mode: try CHEAP → CAPABLE → PREMIUM with validation
|
|
@@ -1144,10 +1258,20 @@ class BaseWorkflow(ABC):
|
|
|
1144
1258
|
self._progress_tracker.fail_stage(stage_name, error_msg)
|
|
1145
1259
|
raise ValueError(error_msg)
|
|
1146
1260
|
|
|
1147
|
-
# Standard mode: use
|
|
1261
|
+
# Standard mode: use routing strategy or tier_map (backward compatible)
|
|
1148
1262
|
else:
|
|
1263
|
+
# Track budget for routing decisions
|
|
1264
|
+
total_budget = 100.0 # Default budget in USD
|
|
1265
|
+
budget_spent = 0.0
|
|
1266
|
+
|
|
1149
1267
|
for stage_name in self.stages:
|
|
1150
|
-
|
|
1268
|
+
# Use routing strategy if available, otherwise fall back to tier_map
|
|
1269
|
+
budget_remaining = total_budget - budget_spent
|
|
1270
|
+
tier = self._get_tier_with_routing(
|
|
1271
|
+
stage_name,
|
|
1272
|
+
current_data if isinstance(current_data, dict) else {},
|
|
1273
|
+
budget_remaining,
|
|
1274
|
+
)
|
|
1151
1275
|
stage_start = datetime.now()
|
|
1152
1276
|
|
|
1153
1277
|
# Check if stage should be skipped
|
|
@@ -1185,6 +1309,9 @@ class BaseWorkflow(ABC):
|
|
|
1185
1309
|
duration_ms = int((stage_end - stage_start).total_seconds() * 1000)
|
|
1186
1310
|
cost = self._calculate_cost(tier, input_tokens, output_tokens)
|
|
1187
1311
|
|
|
1312
|
+
# Update budget spent for routing decisions
|
|
1313
|
+
budget_spent += cost
|
|
1314
|
+
|
|
1188
1315
|
stage = WorkflowStage(
|
|
1189
1316
|
name=stage_name,
|
|
1190
1317
|
tier=tier,
|
|
@@ -1304,6 +1431,14 @@ class BaseWorkflow(ABC):
|
|
|
1304
1431
|
if self._progress_tracker and error is None:
|
|
1305
1432
|
self._progress_tracker.complete_workflow()
|
|
1306
1433
|
|
|
1434
|
+
# Stop Rich progress display if active
|
|
1435
|
+
if self._rich_reporter:
|
|
1436
|
+
try:
|
|
1437
|
+
self._rich_reporter.stop()
|
|
1438
|
+
except Exception:
|
|
1439
|
+
pass # Best effort cleanup
|
|
1440
|
+
self._rich_reporter = None
|
|
1441
|
+
|
|
1307
1442
|
# Save to workflow history for dashboard
|
|
1308
1443
|
try:
|
|
1309
1444
|
_save_workflow_run(self.name, provider_str, result)
|
|
@@ -1364,7 +1499,8 @@ class BaseWorkflow(ABC):
|
|
|
1364
1499
|
|
|
1365
1500
|
# Log routing completion
|
|
1366
1501
|
try:
|
|
1367
|
-
self._telemetry_backend
|
|
1502
|
+
if self._telemetry_backend is not None:
|
|
1503
|
+
self._telemetry_backend.log_task_routing(routing_record)
|
|
1368
1504
|
except Exception as e:
|
|
1369
1505
|
logger.debug(f"Failed to log task routing completion: {e}")
|
|
1370
1506
|
|
|
@@ -1543,119 +1679,7 @@ class BaseWorkflow(ABC):
|
|
|
1543
1679
|
self._executor = self._create_default_executor()
|
|
1544
1680
|
return self._executor
|
|
1545
1681
|
|
|
1546
|
-
|
|
1547
|
-
self,
|
|
1548
|
-
step_name: str,
|
|
1549
|
-
task_type: str,
|
|
1550
|
-
tier: str,
|
|
1551
|
-
model_id: str,
|
|
1552
|
-
input_tokens: int,
|
|
1553
|
-
output_tokens: int,
|
|
1554
|
-
cost: float,
|
|
1555
|
-
latency_ms: int,
|
|
1556
|
-
success: bool = True,
|
|
1557
|
-
error_message: str | None = None,
|
|
1558
|
-
fallback_used: bool = False,
|
|
1559
|
-
) -> None:
|
|
1560
|
-
"""Emit an LLMCallRecord to the telemetry backend.
|
|
1561
|
-
|
|
1562
|
-
Args:
|
|
1563
|
-
step_name: Name of the workflow step
|
|
1564
|
-
task_type: Task type used for routing
|
|
1565
|
-
tier: Model tier used
|
|
1566
|
-
model_id: Model ID used
|
|
1567
|
-
input_tokens: Input token count
|
|
1568
|
-
output_tokens: Output token count
|
|
1569
|
-
cost: Estimated cost
|
|
1570
|
-
latency_ms: Latency in milliseconds
|
|
1571
|
-
success: Whether the call succeeded
|
|
1572
|
-
error_message: Error message if failed
|
|
1573
|
-
fallback_used: Whether fallback was used
|
|
1574
|
-
|
|
1575
|
-
"""
|
|
1576
|
-
record = LLMCallRecord(
|
|
1577
|
-
call_id=str(uuid.uuid4()),
|
|
1578
|
-
timestamp=datetime.now().isoformat(),
|
|
1579
|
-
workflow_name=self.name,
|
|
1580
|
-
step_name=step_name,
|
|
1581
|
-
task_type=task_type,
|
|
1582
|
-
provider=self._provider_str,
|
|
1583
|
-
tier=tier,
|
|
1584
|
-
model_id=model_id,
|
|
1585
|
-
input_tokens=input_tokens,
|
|
1586
|
-
output_tokens=output_tokens,
|
|
1587
|
-
estimated_cost=cost,
|
|
1588
|
-
latency_ms=latency_ms,
|
|
1589
|
-
success=success,
|
|
1590
|
-
error_message=error_message,
|
|
1591
|
-
fallback_used=fallback_used,
|
|
1592
|
-
metadata={"run_id": self._run_id},
|
|
1593
|
-
)
|
|
1594
|
-
try:
|
|
1595
|
-
self._telemetry_backend.log_call(record)
|
|
1596
|
-
except (AttributeError, ValueError, TypeError):
|
|
1597
|
-
# Telemetry backend errors - log but don't crash workflow
|
|
1598
|
-
logger.debug("Failed to log call telemetry (backend error)")
|
|
1599
|
-
except OSError:
|
|
1600
|
-
# File system errors - log but don't crash workflow
|
|
1601
|
-
logger.debug("Failed to log call telemetry (file system error)")
|
|
1602
|
-
except Exception: # noqa: BLE001
|
|
1603
|
-
# INTENTIONAL: Telemetry is optional diagnostics - never crash workflow
|
|
1604
|
-
logger.debug("Unexpected error logging call telemetry")
|
|
1605
|
-
|
|
1606
|
-
def _emit_workflow_telemetry(self, result: WorkflowResult) -> None:
|
|
1607
|
-
"""Emit a WorkflowRunRecord to the telemetry backend.
|
|
1608
|
-
|
|
1609
|
-
Args:
|
|
1610
|
-
result: The workflow result to record
|
|
1611
|
-
|
|
1612
|
-
"""
|
|
1613
|
-
# Build stage records
|
|
1614
|
-
stages = [
|
|
1615
|
-
WorkflowStageRecord(
|
|
1616
|
-
stage_name=s.name,
|
|
1617
|
-
tier=s.tier.value,
|
|
1618
|
-
model_id=self.get_model_for_tier(s.tier),
|
|
1619
|
-
input_tokens=s.input_tokens,
|
|
1620
|
-
output_tokens=s.output_tokens,
|
|
1621
|
-
cost=s.cost,
|
|
1622
|
-
latency_ms=s.duration_ms,
|
|
1623
|
-
success=not s.skipped and result.error is None,
|
|
1624
|
-
skipped=s.skipped,
|
|
1625
|
-
skip_reason=s.skip_reason,
|
|
1626
|
-
)
|
|
1627
|
-
for s in result.stages
|
|
1628
|
-
]
|
|
1629
|
-
|
|
1630
|
-
record = WorkflowRunRecord(
|
|
1631
|
-
run_id=self._run_id or str(uuid.uuid4()),
|
|
1632
|
-
workflow_name=self.name,
|
|
1633
|
-
started_at=result.started_at.isoformat(),
|
|
1634
|
-
completed_at=result.completed_at.isoformat(),
|
|
1635
|
-
stages=stages,
|
|
1636
|
-
total_input_tokens=sum(s.input_tokens for s in result.stages if not s.skipped),
|
|
1637
|
-
total_output_tokens=sum(s.output_tokens for s in result.stages if not s.skipped),
|
|
1638
|
-
total_cost=result.cost_report.total_cost,
|
|
1639
|
-
baseline_cost=result.cost_report.baseline_cost,
|
|
1640
|
-
savings=result.cost_report.savings,
|
|
1641
|
-
savings_percent=result.cost_report.savings_percent,
|
|
1642
|
-
total_duration_ms=result.total_duration_ms,
|
|
1643
|
-
success=result.success,
|
|
1644
|
-
error=result.error,
|
|
1645
|
-
providers_used=[self._provider_str],
|
|
1646
|
-
tiers_used=list(result.cost_report.by_tier.keys()),
|
|
1647
|
-
)
|
|
1648
|
-
try:
|
|
1649
|
-
self._telemetry_backend.log_workflow(record)
|
|
1650
|
-
except (AttributeError, ValueError, TypeError):
|
|
1651
|
-
# Telemetry backend errors - log but don't crash workflow
|
|
1652
|
-
logger.debug("Failed to log workflow telemetry (backend error)")
|
|
1653
|
-
except OSError:
|
|
1654
|
-
# File system errors - log but don't crash workflow
|
|
1655
|
-
logger.debug("Failed to log workflow telemetry (file system error)")
|
|
1656
|
-
except Exception: # noqa: BLE001
|
|
1657
|
-
# INTENTIONAL: Telemetry is optional diagnostics - never crash workflow
|
|
1658
|
-
logger.debug("Unexpected error logging workflow telemetry")
|
|
1682
|
+
# Note: _emit_call_telemetry and _emit_workflow_telemetry are inherited from TelemetryMixin
|
|
1659
1683
|
|
|
1660
1684
|
async def run_step_with_executor(
|
|
1661
1685
|
self,
|