empathy-framework 3.7.0__py3-none-any.whl → 3.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. coach_wizards/code_reviewer_README.md +60 -0
  2. coach_wizards/code_reviewer_wizard.py +180 -0
  3. {empathy_framework-3.7.0.dist-info → empathy_framework-3.8.0.dist-info}/METADATA +148 -11
  4. empathy_framework-3.8.0.dist-info/RECORD +333 -0
  5. {empathy_framework-3.7.0.dist-info → empathy_framework-3.8.0.dist-info}/top_level.txt +5 -1
  6. empathy_healthcare_plugin/monitors/__init__.py +9 -0
  7. empathy_healthcare_plugin/monitors/clinical_protocol_monitor.py +315 -0
  8. empathy_healthcare_plugin/monitors/monitoring/__init__.py +44 -0
  9. empathy_healthcare_plugin/monitors/monitoring/protocol_checker.py +300 -0
  10. empathy_healthcare_plugin/monitors/monitoring/protocol_loader.py +214 -0
  11. empathy_healthcare_plugin/monitors/monitoring/sensor_parsers.py +306 -0
  12. empathy_healthcare_plugin/monitors/monitoring/trajectory_analyzer.py +389 -0
  13. empathy_llm_toolkit/agent_factory/__init__.py +53 -0
  14. empathy_llm_toolkit/agent_factory/adapters/__init__.py +85 -0
  15. empathy_llm_toolkit/agent_factory/adapters/autogen_adapter.py +312 -0
  16. empathy_llm_toolkit/agent_factory/adapters/crewai_adapter.py +454 -0
  17. empathy_llm_toolkit/agent_factory/adapters/haystack_adapter.py +298 -0
  18. empathy_llm_toolkit/agent_factory/adapters/langchain_adapter.py +362 -0
  19. empathy_llm_toolkit/agent_factory/adapters/langgraph_adapter.py +333 -0
  20. empathy_llm_toolkit/agent_factory/adapters/native.py +228 -0
  21. empathy_llm_toolkit/agent_factory/adapters/wizard_adapter.py +426 -0
  22. empathy_llm_toolkit/agent_factory/base.py +305 -0
  23. empathy_llm_toolkit/agent_factory/crews/__init__.py +67 -0
  24. empathy_llm_toolkit/agent_factory/crews/code_review.py +1113 -0
  25. empathy_llm_toolkit/agent_factory/crews/health_check.py +1246 -0
  26. empathy_llm_toolkit/agent_factory/crews/refactoring.py +1128 -0
  27. empathy_llm_toolkit/agent_factory/crews/security_audit.py +1018 -0
  28. empathy_llm_toolkit/agent_factory/decorators.py +286 -0
  29. empathy_llm_toolkit/agent_factory/factory.py +558 -0
  30. empathy_llm_toolkit/agent_factory/framework.py +192 -0
  31. empathy_llm_toolkit/agent_factory/memory_integration.py +324 -0
  32. empathy_llm_toolkit/agent_factory/resilient.py +320 -0
  33. empathy_llm_toolkit/cli/__init__.py +8 -0
  34. empathy_llm_toolkit/cli/sync_claude.py +487 -0
  35. empathy_llm_toolkit/code_health.py +150 -3
  36. empathy_llm_toolkit/config/__init__.py +29 -0
  37. empathy_llm_toolkit/config/unified.py +295 -0
  38. empathy_llm_toolkit/routing/__init__.py +32 -0
  39. empathy_llm_toolkit/routing/model_router.py +362 -0
  40. empathy_llm_toolkit/security/IMPLEMENTATION_SUMMARY.md +413 -0
  41. empathy_llm_toolkit/security/PHASE2_COMPLETE.md +384 -0
  42. empathy_llm_toolkit/security/PHASE2_SECRETS_DETECTOR_COMPLETE.md +271 -0
  43. empathy_llm_toolkit/security/QUICK_REFERENCE.md +316 -0
  44. empathy_llm_toolkit/security/README.md +262 -0
  45. empathy_llm_toolkit/security/__init__.py +62 -0
  46. empathy_llm_toolkit/security/audit_logger.py +929 -0
  47. empathy_llm_toolkit/security/audit_logger_example.py +152 -0
  48. empathy_llm_toolkit/security/pii_scrubber.py +640 -0
  49. empathy_llm_toolkit/security/secrets_detector.py +678 -0
  50. empathy_llm_toolkit/security/secrets_detector_example.py +304 -0
  51. empathy_llm_toolkit/security/secure_memdocs.py +1192 -0
  52. empathy_llm_toolkit/security/secure_memdocs_example.py +278 -0
  53. empathy_llm_toolkit/wizards/__init__.py +38 -0
  54. empathy_llm_toolkit/wizards/base_wizard.py +364 -0
  55. empathy_llm_toolkit/wizards/customer_support_wizard.py +190 -0
  56. empathy_llm_toolkit/wizards/healthcare_wizard.py +362 -0
  57. empathy_llm_toolkit/wizards/patient_assessment_README.md +64 -0
  58. empathy_llm_toolkit/wizards/patient_assessment_wizard.py +193 -0
  59. empathy_llm_toolkit/wizards/technology_wizard.py +194 -0
  60. empathy_os/__init__.py +52 -52
  61. empathy_os/adaptive/__init__.py +13 -0
  62. empathy_os/adaptive/task_complexity.py +127 -0
  63. empathy_os/cache/__init__.py +117 -0
  64. empathy_os/cache/base.py +166 -0
  65. empathy_os/cache/dependency_manager.py +253 -0
  66. empathy_os/cache/hash_only.py +248 -0
  67. empathy_os/cache/hybrid.py +390 -0
  68. empathy_os/cache/storage.py +282 -0
  69. empathy_os/cli.py +118 -8
  70. empathy_os/cli_unified.py +121 -1
  71. empathy_os/config/__init__.py +63 -0
  72. empathy_os/config/xml_config.py +239 -0
  73. empathy_os/config.py +2 -1
  74. empathy_os/dashboard/__init__.py +15 -0
  75. empathy_os/dashboard/server.py +743 -0
  76. empathy_os/memory/__init__.py +195 -0
  77. empathy_os/memory/claude_memory.py +466 -0
  78. empathy_os/memory/config.py +224 -0
  79. empathy_os/memory/control_panel.py +1298 -0
  80. empathy_os/memory/edges.py +179 -0
  81. empathy_os/memory/graph.py +567 -0
  82. empathy_os/memory/long_term.py +1194 -0
  83. empathy_os/memory/nodes.py +179 -0
  84. empathy_os/memory/redis_bootstrap.py +540 -0
  85. empathy_os/memory/security/__init__.py +31 -0
  86. empathy_os/memory/security/audit_logger.py +930 -0
  87. empathy_os/memory/security/pii_scrubber.py +640 -0
  88. empathy_os/memory/security/secrets_detector.py +678 -0
  89. empathy_os/memory/short_term.py +2119 -0
  90. empathy_os/memory/storage/__init__.py +15 -0
  91. empathy_os/memory/summary_index.py +583 -0
  92. empathy_os/memory/unified.py +619 -0
  93. empathy_os/metrics/__init__.py +12 -0
  94. empathy_os/metrics/prompt_metrics.py +190 -0
  95. empathy_os/models/__init__.py +136 -0
  96. empathy_os/models/__main__.py +13 -0
  97. empathy_os/models/cli.py +655 -0
  98. empathy_os/models/empathy_executor.py +354 -0
  99. empathy_os/models/executor.py +252 -0
  100. empathy_os/models/fallback.py +671 -0
  101. empathy_os/models/provider_config.py +563 -0
  102. empathy_os/models/registry.py +382 -0
  103. empathy_os/models/tasks.py +302 -0
  104. empathy_os/models/telemetry.py +548 -0
  105. empathy_os/models/token_estimator.py +378 -0
  106. empathy_os/models/validation.py +274 -0
  107. empathy_os/monitoring/__init__.py +52 -0
  108. empathy_os/monitoring/alerts.py +23 -0
  109. empathy_os/monitoring/alerts_cli.py +268 -0
  110. empathy_os/monitoring/multi_backend.py +271 -0
  111. empathy_os/monitoring/otel_backend.py +363 -0
  112. empathy_os/optimization/__init__.py +19 -0
  113. empathy_os/optimization/context_optimizer.py +272 -0
  114. empathy_os/plugins/__init__.py +28 -0
  115. empathy_os/plugins/base.py +361 -0
  116. empathy_os/plugins/registry.py +268 -0
  117. empathy_os/project_index/__init__.py +30 -0
  118. empathy_os/project_index/cli.py +335 -0
  119. empathy_os/project_index/crew_integration.py +430 -0
  120. empathy_os/project_index/index.py +425 -0
  121. empathy_os/project_index/models.py +501 -0
  122. empathy_os/project_index/reports.py +473 -0
  123. empathy_os/project_index/scanner.py +538 -0
  124. empathy_os/prompts/__init__.py +61 -0
  125. empathy_os/prompts/config.py +77 -0
  126. empathy_os/prompts/context.py +177 -0
  127. empathy_os/prompts/parser.py +285 -0
  128. empathy_os/prompts/registry.py +313 -0
  129. empathy_os/prompts/templates.py +208 -0
  130. empathy_os/resilience/__init__.py +56 -0
  131. empathy_os/resilience/circuit_breaker.py +256 -0
  132. empathy_os/resilience/fallback.py +179 -0
  133. empathy_os/resilience/health.py +300 -0
  134. empathy_os/resilience/retry.py +209 -0
  135. empathy_os/resilience/timeout.py +135 -0
  136. empathy_os/routing/__init__.py +43 -0
  137. empathy_os/routing/chain_executor.py +433 -0
  138. empathy_os/routing/classifier.py +217 -0
  139. empathy_os/routing/smart_router.py +234 -0
  140. empathy_os/routing/wizard_registry.py +307 -0
  141. empathy_os/trust/__init__.py +28 -0
  142. empathy_os/trust/circuit_breaker.py +579 -0
  143. empathy_os/validation/__init__.py +19 -0
  144. empathy_os/validation/xml_validator.py +281 -0
  145. empathy_os/wizard_factory_cli.py +170 -0
  146. empathy_os/workflows/__init__.py +360 -0
  147. empathy_os/workflows/base.py +1660 -0
  148. empathy_os/workflows/bug_predict.py +962 -0
  149. empathy_os/workflows/code_review.py +960 -0
  150. empathy_os/workflows/code_review_adapters.py +310 -0
  151. empathy_os/workflows/code_review_pipeline.py +720 -0
  152. empathy_os/workflows/config.py +600 -0
  153. empathy_os/workflows/dependency_check.py +648 -0
  154. empathy_os/workflows/document_gen.py +1069 -0
  155. empathy_os/workflows/documentation_orchestrator.py +1205 -0
  156. empathy_os/workflows/health_check.py +679 -0
  157. empathy_os/workflows/keyboard_shortcuts/__init__.py +39 -0
  158. empathy_os/workflows/keyboard_shortcuts/generators.py +386 -0
  159. empathy_os/workflows/keyboard_shortcuts/parsers.py +414 -0
  160. empathy_os/workflows/keyboard_shortcuts/prompts.py +295 -0
  161. empathy_os/workflows/keyboard_shortcuts/schema.py +193 -0
  162. empathy_os/workflows/keyboard_shortcuts/workflow.py +505 -0
  163. empathy_os/workflows/manage_documentation.py +804 -0
  164. empathy_os/workflows/new_sample_workflow1.py +146 -0
  165. empathy_os/workflows/new_sample_workflow1_README.md +150 -0
  166. empathy_os/workflows/perf_audit.py +687 -0
  167. empathy_os/workflows/pr_review.py +748 -0
  168. empathy_os/workflows/progress.py +445 -0
  169. empathy_os/workflows/progress_server.py +322 -0
  170. empathy_os/workflows/refactor_plan.py +693 -0
  171. empathy_os/workflows/release_prep.py +808 -0
  172. empathy_os/workflows/research_synthesis.py +404 -0
  173. empathy_os/workflows/secure_release.py +585 -0
  174. empathy_os/workflows/security_adapters.py +297 -0
  175. empathy_os/workflows/security_audit.py +1046 -0
  176. empathy_os/workflows/step_config.py +234 -0
  177. empathy_os/workflows/test5.py +125 -0
  178. empathy_os/workflows/test5_README.md +158 -0
  179. empathy_os/workflows/test_gen.py +1855 -0
  180. empathy_os/workflows/test_lifecycle.py +526 -0
  181. empathy_os/workflows/test_maintenance.py +626 -0
  182. empathy_os/workflows/test_maintenance_cli.py +590 -0
  183. empathy_os/workflows/test_maintenance_crew.py +821 -0
  184. empathy_os/workflows/xml_enhanced_crew.py +285 -0
  185. empathy_software_plugin/cli/__init__.py +120 -0
  186. empathy_software_plugin/cli/inspect.py +362 -0
  187. empathy_software_plugin/cli.py +3 -1
  188. empathy_software_plugin/wizards/__init__.py +42 -0
  189. empathy_software_plugin/wizards/advanced_debugging_wizard.py +392 -0
  190. empathy_software_plugin/wizards/agent_orchestration_wizard.py +511 -0
  191. empathy_software_plugin/wizards/ai_collaboration_wizard.py +503 -0
  192. empathy_software_plugin/wizards/ai_context_wizard.py +441 -0
  193. empathy_software_plugin/wizards/ai_documentation_wizard.py +503 -0
  194. empathy_software_plugin/wizards/base_wizard.py +288 -0
  195. empathy_software_plugin/wizards/book_chapter_wizard.py +519 -0
  196. empathy_software_plugin/wizards/code_review_wizard.py +606 -0
  197. empathy_software_plugin/wizards/debugging/__init__.py +50 -0
  198. empathy_software_plugin/wizards/debugging/bug_risk_analyzer.py +414 -0
  199. empathy_software_plugin/wizards/debugging/config_loaders.py +442 -0
  200. empathy_software_plugin/wizards/debugging/fix_applier.py +469 -0
  201. empathy_software_plugin/wizards/debugging/language_patterns.py +383 -0
  202. empathy_software_plugin/wizards/debugging/linter_parsers.py +470 -0
  203. empathy_software_plugin/wizards/debugging/verification.py +369 -0
  204. empathy_software_plugin/wizards/enhanced_testing_wizard.py +537 -0
  205. empathy_software_plugin/wizards/memory_enhanced_debugging_wizard.py +816 -0
  206. empathy_software_plugin/wizards/multi_model_wizard.py +501 -0
  207. empathy_software_plugin/wizards/pattern_extraction_wizard.py +422 -0
  208. empathy_software_plugin/wizards/pattern_retriever_wizard.py +400 -0
  209. empathy_software_plugin/wizards/performance/__init__.py +9 -0
  210. empathy_software_plugin/wizards/performance/bottleneck_detector.py +221 -0
  211. empathy_software_plugin/wizards/performance/profiler_parsers.py +278 -0
  212. empathy_software_plugin/wizards/performance/trajectory_analyzer.py +429 -0
  213. empathy_software_plugin/wizards/performance_profiling_wizard.py +305 -0
  214. empathy_software_plugin/wizards/prompt_engineering_wizard.py +425 -0
  215. empathy_software_plugin/wizards/rag_pattern_wizard.py +461 -0
  216. empathy_software_plugin/wizards/security/__init__.py +32 -0
  217. empathy_software_plugin/wizards/security/exploit_analyzer.py +290 -0
  218. empathy_software_plugin/wizards/security/owasp_patterns.py +241 -0
  219. empathy_software_plugin/wizards/security/vulnerability_scanner.py +604 -0
  220. empathy_software_plugin/wizards/security_analysis_wizard.py +322 -0
  221. empathy_software_plugin/wizards/security_learning_wizard.py +740 -0
  222. empathy_software_plugin/wizards/tech_debt_wizard.py +726 -0
  223. empathy_software_plugin/wizards/testing/__init__.py +27 -0
  224. empathy_software_plugin/wizards/testing/coverage_analyzer.py +459 -0
  225. empathy_software_plugin/wizards/testing/quality_analyzer.py +531 -0
  226. empathy_software_plugin/wizards/testing/test_suggester.py +533 -0
  227. empathy_software_plugin/wizards/testing_wizard.py +274 -0
  228. hot_reload/README.md +473 -0
  229. hot_reload/__init__.py +62 -0
  230. hot_reload/config.py +84 -0
  231. hot_reload/integration.py +228 -0
  232. hot_reload/reloader.py +298 -0
  233. hot_reload/watcher.py +179 -0
  234. hot_reload/websocket.py +176 -0
  235. scaffolding/README.md +589 -0
  236. scaffolding/__init__.py +35 -0
  237. scaffolding/__main__.py +14 -0
  238. scaffolding/cli.py +240 -0
  239. test_generator/__init__.py +38 -0
  240. test_generator/__main__.py +14 -0
  241. test_generator/cli.py +226 -0
  242. test_generator/generator.py +325 -0
  243. test_generator/risk_analyzer.py +216 -0
  244. workflow_patterns/__init__.py +33 -0
  245. workflow_patterns/behavior.py +249 -0
  246. workflow_patterns/core.py +76 -0
  247. workflow_patterns/output.py +99 -0
  248. workflow_patterns/registry.py +255 -0
  249. workflow_patterns/structural.py +288 -0
  250. workflow_scaffolding/__init__.py +11 -0
  251. workflow_scaffolding/__main__.py +12 -0
  252. workflow_scaffolding/cli.py +206 -0
  253. workflow_scaffolding/generator.py +265 -0
  254. agents/code_inspection/patterns/inspection/recurring_B112.json +0 -18
  255. agents/code_inspection/patterns/inspection/recurring_F541.json +0 -16
  256. agents/code_inspection/patterns/inspection/recurring_FORMAT.json +0 -25
  257. agents/code_inspection/patterns/inspection/recurring_bug_20250822_def456.json +0 -16
  258. agents/code_inspection/patterns/inspection/recurring_bug_20250915_abc123.json +0 -16
  259. agents/code_inspection/patterns/inspection/recurring_bug_20251212_3c5b9951.json +0 -16
  260. agents/code_inspection/patterns/inspection/recurring_bug_20251212_97c0f72f.json +0 -16
  261. agents/code_inspection/patterns/inspection/recurring_bug_20251212_a0871d53.json +0 -16
  262. agents/code_inspection/patterns/inspection/recurring_bug_20251212_a9b6ec41.json +0 -16
  263. agents/code_inspection/patterns/inspection/recurring_bug_null_001.json +0 -16
  264. agents/code_inspection/patterns/inspection/recurring_builtin.json +0 -16
  265. agents/compliance_anticipation_agent.py +0 -1422
  266. agents/compliance_db.py +0 -339
  267. agents/epic_integration_wizard.py +0 -530
  268. agents/notifications.py +0 -291
  269. agents/trust_building_behaviors.py +0 -872
  270. empathy_framework-3.7.0.dist-info/RECORD +0 -105
  271. {empathy_framework-3.7.0.dist-info → empathy_framework-3.8.0.dist-info}/WHEEL +0 -0
  272. {empathy_framework-3.7.0.dist-info → empathy_framework-3.8.0.dist-info}/entry_points.txt +0 -0
  273. {empathy_framework-3.7.0.dist-info → empathy_framework-3.8.0.dist-info}/licenses/LICENSE +0 -0
  274. /empathy_os/{monitoring.py → agent_monitoring.py} +0 -0
@@ -0,0 +1,1660 @@
1
+ """Base Workflow Class for Multi-Model Pipelines
2
+
3
+ Provides a framework for creating cost-optimized workflows that
4
+ route tasks to the appropriate model tier.
5
+
6
+ Integration with empathy_os.models:
7
+ - Uses unified ModelTier/ModelProvider from empathy_os.models
8
+ - Supports LLMExecutor for abstracted LLM calls
9
+ - Supports TelemetryBackend for telemetry storage
10
+ - WorkflowStepConfig for declarative step definitions
11
+
12
+ Copyright 2025 Smart-AI-Memory
13
+ Licensed under Fair Source License 0.9
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import logging
20
+ import uuid
21
+ from abc import ABC, abstractmethod
22
+ from dataclasses import dataclass, field
23
+ from datetime import datetime
24
+ from enum import Enum
25
+ from pathlib import Path
26
+ from typing import TYPE_CHECKING, Any
27
+
28
+ # Load .env file for API keys if python-dotenv is available
29
+ try:
30
+ from dotenv import load_dotenv
31
+
32
+ load_dotenv()
33
+ except ImportError:
34
+ pass # python-dotenv not installed, rely on environment variables
35
+
36
+ # Import caching infrastructure
37
+ from empathy_os.cache import BaseCache, auto_setup_cache, create_cache
38
+ from empathy_os.cost_tracker import MODEL_PRICING, CostTracker
39
+
40
+ # Import unified types from empathy_os.models
41
+ from empathy_os.models import (
42
+ ExecutionContext,
43
+ LLMCallRecord,
44
+ LLMExecutor,
45
+ TelemetryBackend,
46
+ WorkflowRunRecord,
47
+ WorkflowStageRecord,
48
+ get_telemetry_store,
49
+ )
50
+ from empathy_os.models import ModelProvider as UnifiedModelProvider
51
+ from empathy_os.models import ModelTier as UnifiedModelTier
52
+
53
+ # Import progress tracking
54
+ from .progress import ProgressCallback, ProgressTracker
55
+
56
+ if TYPE_CHECKING:
57
+ from .config import WorkflowConfig
58
+ from .step_config import WorkflowStepConfig
59
+
60
+ logger = logging.getLogger(__name__)
61
+
62
+ # Default path for workflow run history
63
+ WORKFLOW_HISTORY_FILE = ".empathy/workflow_runs.json"
64
+
65
+
66
+ # Local enums for backward compatibility
67
+ # New code should use empathy_os.models.ModelTier/ModelProvider
68
+ class ModelTier(Enum):
69
+ """Model tier for cost optimization."""
70
+
71
+ CHEAP = "cheap" # Haiku/GPT-4o-mini - $0.25-1.25/M tokens
72
+ CAPABLE = "capable" # Sonnet/GPT-4o - $3-15/M tokens
73
+ PREMIUM = "premium" # Opus/o1 - $15-75/M tokens
74
+
75
+ def to_unified(self) -> UnifiedModelTier:
76
+ """Convert to unified ModelTier from empathy_os.models."""
77
+ return UnifiedModelTier(self.value)
78
+
79
+
80
+ class ModelProvider(Enum):
81
+ """Supported model providers."""
82
+
83
+ ANTHROPIC = "anthropic"
84
+ OPENAI = "openai"
85
+ GOOGLE = "google" # Google Gemini models
86
+ OLLAMA = "ollama"
87
+ HYBRID = "hybrid" # Mix of best models from different providers
88
+ CUSTOM = "custom" # User-defined custom models
89
+
90
+ def to_unified(self) -> UnifiedModelProvider:
91
+ """Convert to unified ModelProvider from empathy_os.models."""
92
+ return UnifiedModelProvider(self.value)
93
+
94
+
95
+ # Import unified MODEL_REGISTRY as single source of truth
96
+ # This import is placed here intentionally to avoid circular imports
97
+ from empathy_os.models import MODEL_REGISTRY # noqa: E402
98
+
99
+
100
+ def _build_provider_models() -> dict[ModelProvider, dict[ModelTier, str]]:
101
+ """Build PROVIDER_MODELS from MODEL_REGISTRY.
102
+
103
+ This ensures PROVIDER_MODELS stays in sync with the single source of truth.
104
+ """
105
+ result: dict[ModelProvider, dict[ModelTier, str]] = {}
106
+
107
+ # Map string provider names to ModelProvider enum
108
+ provider_map = {
109
+ "anthropic": ModelProvider.ANTHROPIC,
110
+ "openai": ModelProvider.OPENAI,
111
+ "google": ModelProvider.GOOGLE,
112
+ "ollama": ModelProvider.OLLAMA,
113
+ "hybrid": ModelProvider.HYBRID,
114
+ }
115
+
116
+ # Map string tier names to ModelTier enum
117
+ tier_map = {
118
+ "cheap": ModelTier.CHEAP,
119
+ "capable": ModelTier.CAPABLE,
120
+ "premium": ModelTier.PREMIUM,
121
+ }
122
+
123
+ for provider_str, tiers in MODEL_REGISTRY.items():
124
+ if provider_str not in provider_map:
125
+ continue # Skip custom providers
126
+ provider_enum = provider_map[provider_str]
127
+ result[provider_enum] = {}
128
+ for tier_str, model_info in tiers.items():
129
+ if tier_str in tier_map:
130
+ result[provider_enum][tier_map[tier_str]] = model_info.id
131
+
132
+ return result
133
+
134
+
135
+ # Model mappings by provider and tier (derived from MODEL_REGISTRY)
136
+ PROVIDER_MODELS: dict[ModelProvider, dict[ModelTier, str]] = _build_provider_models()
137
+
138
+
139
+ @dataclass
140
+ class WorkflowStage:
141
+ """Represents a single stage in a workflow."""
142
+
143
+ name: str
144
+ tier: ModelTier
145
+ description: str
146
+ input_tokens: int = 0
147
+ output_tokens: int = 0
148
+ cost: float = 0.0
149
+ result: Any = None
150
+ duration_ms: int = 0
151
+ skipped: bool = False
152
+ skip_reason: str | None = None
153
+
154
+
155
+ @dataclass
156
+ class CostReport:
157
+ """Cost breakdown for a workflow execution."""
158
+
159
+ total_cost: float
160
+ baseline_cost: float # If all stages used premium
161
+ savings: float
162
+ savings_percent: float
163
+ by_stage: dict[str, float] = field(default_factory=dict)
164
+ by_tier: dict[str, float] = field(default_factory=dict)
165
+ # Cache metrics
166
+ cache_hits: int = 0
167
+ cache_misses: int = 0
168
+ cache_hit_rate: float = 0.0
169
+ estimated_cost_without_cache: float = 0.0
170
+ savings_from_cache: float = 0.0
171
+
172
+
173
+ @dataclass
174
+ class WorkflowResult:
175
+ """Result of a workflow execution."""
176
+
177
+ success: bool
178
+ stages: list[WorkflowStage]
179
+ final_output: Any
180
+ cost_report: CostReport
181
+ started_at: datetime
182
+ completed_at: datetime
183
+ total_duration_ms: int
184
+ provider: str = "unknown"
185
+ error: str | None = None
186
+ # Structured error taxonomy for reliability
187
+ error_type: str | None = None # "config" | "runtime" | "provider" | "timeout" | "validation"
188
+ transient: bool = False # True if retry is reasonable (e.g., provider timeout)
189
+
190
+
191
+ def _load_workflow_history(history_file: str = WORKFLOW_HISTORY_FILE) -> list[dict]:
192
+ """Load workflow run history from disk."""
193
+ path = Path(history_file)
194
+ if not path.exists():
195
+ return []
196
+ try:
197
+ with open(path) as f:
198
+ data = json.load(f)
199
+ return list(data) if isinstance(data, list) else []
200
+ except (json.JSONDecodeError, OSError):
201
+ return []
202
+
203
+
204
+ def _save_workflow_run(
205
+ workflow_name: str,
206
+ provider: str,
207
+ result: WorkflowResult,
208
+ history_file: str = WORKFLOW_HISTORY_FILE,
209
+ max_history: int = 100,
210
+ ) -> None:
211
+ """Save a workflow run to history."""
212
+ path = Path(history_file)
213
+ path.parent.mkdir(parents=True, exist_ok=True)
214
+
215
+ history = _load_workflow_history(history_file)
216
+
217
+ # Create run record
218
+ run: dict = {
219
+ "workflow": workflow_name,
220
+ "provider": provider,
221
+ "success": result.success,
222
+ "started_at": result.started_at.isoformat(),
223
+ "completed_at": result.completed_at.isoformat(),
224
+ "duration_ms": result.total_duration_ms,
225
+ "cost": result.cost_report.total_cost,
226
+ "baseline_cost": result.cost_report.baseline_cost,
227
+ "savings": result.cost_report.savings,
228
+ "savings_percent": result.cost_report.savings_percent,
229
+ "stages": [
230
+ {
231
+ "name": s.name,
232
+ "tier": s.tier.value,
233
+ "skipped": s.skipped,
234
+ "cost": s.cost,
235
+ "duration_ms": s.duration_ms,
236
+ }
237
+ for s in result.stages
238
+ ],
239
+ "error": result.error,
240
+ }
241
+
242
+ # Extract XML-parsed fields from final_output if present
243
+ if isinstance(result.final_output, dict):
244
+ if result.final_output.get("xml_parsed"):
245
+ run["xml_parsed"] = True
246
+ run["summary"] = result.final_output.get("summary")
247
+ run["findings"] = result.final_output.get("findings", [])
248
+ run["checklist"] = result.final_output.get("checklist", [])
249
+
250
+ # Add to history and trim
251
+ history.append(run)
252
+ history = history[-max_history:]
253
+
254
+ with open(path, "w") as f:
255
+ json.dump(history, f, indent=2)
256
+
257
+
258
+ def get_workflow_stats(history_file: str = WORKFLOW_HISTORY_FILE) -> dict:
259
+ """Get workflow statistics for dashboard.
260
+
261
+ Returns:
262
+ Dictionary with workflow stats including:
263
+ - total_runs: Total workflow runs
264
+ - by_workflow: Per-workflow stats
265
+ - by_provider: Per-provider stats
266
+ - recent_runs: Last 10 runs
267
+ - total_savings: Total cost savings
268
+
269
+ """
270
+ history = _load_workflow_history(history_file)
271
+
272
+ if not history:
273
+ return {
274
+ "total_runs": 0,
275
+ "by_workflow": {},
276
+ "by_provider": {},
277
+ "by_tier": {"cheap": 0, "capable": 0, "premium": 0},
278
+ "recent_runs": [],
279
+ "total_cost": 0.0,
280
+ "total_savings": 0.0,
281
+ "avg_savings_percent": 0.0,
282
+ }
283
+
284
+ # Aggregate stats
285
+ by_workflow: dict[str, dict] = {}
286
+ by_provider: dict[str, dict] = {}
287
+ by_tier: dict[str, float] = {"cheap": 0.0, "capable": 0.0, "premium": 0.0}
288
+ total_cost = 0.0
289
+ total_savings = 0.0
290
+ successful_runs = 0
291
+
292
+ for run in history:
293
+ wf_name = run.get("workflow", "unknown")
294
+ provider = run.get("provider", "unknown")
295
+ cost = run.get("cost", 0.0)
296
+ savings = run.get("savings", 0.0)
297
+
298
+ # By workflow
299
+ if wf_name not in by_workflow:
300
+ by_workflow[wf_name] = {"runs": 0, "cost": 0.0, "savings": 0.0, "success": 0}
301
+ by_workflow[wf_name]["runs"] += 1
302
+ by_workflow[wf_name]["cost"] += cost
303
+ by_workflow[wf_name]["savings"] += savings
304
+ if run.get("success"):
305
+ by_workflow[wf_name]["success"] += 1
306
+
307
+ # By provider
308
+ if provider not in by_provider:
309
+ by_provider[provider] = {"runs": 0, "cost": 0.0}
310
+ by_provider[provider]["runs"] += 1
311
+ by_provider[provider]["cost"] += cost
312
+
313
+ # By tier (from stages)
314
+ for stage in run.get("stages", []):
315
+ if not stage.get("skipped"):
316
+ tier = stage.get("tier", "capable")
317
+ by_tier[tier] = by_tier.get(tier, 0.0) + stage.get("cost", 0.0)
318
+
319
+ total_cost += cost
320
+ total_savings += savings
321
+ if run.get("success"):
322
+ successful_runs += 1
323
+
324
+ # Calculate average savings percent
325
+ avg_savings_percent = 0.0
326
+ if history:
327
+ savings_percents = [r.get("savings_percent", 0) for r in history if r.get("success")]
328
+ if savings_percents:
329
+ avg_savings_percent = sum(savings_percents) / len(savings_percents)
330
+
331
+ return {
332
+ "total_runs": len(history),
333
+ "successful_runs": successful_runs,
334
+ "by_workflow": by_workflow,
335
+ "by_provider": by_provider,
336
+ "by_tier": by_tier,
337
+ "recent_runs": history[-10:][::-1], # Last 10, most recent first
338
+ "total_cost": total_cost,
339
+ "total_savings": total_savings,
340
+ "avg_savings_percent": avg_savings_percent,
341
+ }
342
+
343
+
344
+ class BaseWorkflow(ABC):
345
+ """Base class for multi-model workflows.
346
+
347
+ Subclasses define stages and tier mappings:
348
+
349
+ class MyWorkflow(BaseWorkflow):
350
+ name = "my-workflow"
351
+ description = "Does something useful"
352
+ stages = ["stage1", "stage2", "stage3"]
353
+ tier_map = {
354
+ "stage1": ModelTier.CHEAP,
355
+ "stage2": ModelTier.CAPABLE,
356
+ "stage3": ModelTier.PREMIUM,
357
+ }
358
+
359
+ async def run_stage(self, stage_name, tier, input_data):
360
+ # Implement stage logic
361
+ return output_data
362
+ """
363
+
364
+ name: str = "base-workflow"
365
+ description: str = "Base workflow template"
366
+ stages: list[str] = []
367
+ tier_map: dict[str, ModelTier] = {}
368
+
369
+ def __init__(
370
+ self,
371
+ cost_tracker: CostTracker | None = None,
372
+ provider: ModelProvider | str | None = None,
373
+ config: WorkflowConfig | None = None,
374
+ executor: LLMExecutor | None = None,
375
+ telemetry_backend: TelemetryBackend | None = None,
376
+ progress_callback: ProgressCallback | None = None,
377
+ cache: BaseCache | None = None,
378
+ enable_cache: bool = True,
379
+ ):
380
+ """Initialize workflow with optional cost tracker, provider, and config.
381
+
382
+ Args:
383
+ cost_tracker: CostTracker instance for logging costs
384
+ provider: Model provider (anthropic, openai, ollama) or ModelProvider enum.
385
+ If None, uses config or defaults to anthropic.
386
+ config: WorkflowConfig for model customization. If None, loads from
387
+ .empathy/workflows.yaml or uses defaults.
388
+ executor: LLMExecutor for abstracted LLM calls (optional).
389
+ If provided, enables unified execution with telemetry.
390
+ telemetry_backend: TelemetryBackend for storing telemetry records.
391
+ Defaults to TelemetryStore (JSONL file backend).
392
+ progress_callback: Callback for real-time progress updates.
393
+ If provided, enables live progress tracking during execution.
394
+ cache: Optional cache instance. If None and enable_cache=True,
395
+ auto-creates cache with one-time setup prompt.
396
+ enable_cache: Whether to enable caching (default True).
397
+
398
+ """
399
+ from .config import WorkflowConfig
400
+
401
+ self.cost_tracker = cost_tracker or CostTracker()
402
+ self._stages_run: list[WorkflowStage] = []
403
+
404
+ # Progress tracking
405
+ self._progress_callback = progress_callback
406
+ self._progress_tracker: ProgressTracker | None = None
407
+
408
+ # New: LLMExecutor support
409
+ self._executor = executor
410
+ self._telemetry_backend = telemetry_backend or get_telemetry_store()
411
+ self._run_id: str | None = None # Set at start of execute()
412
+ self._api_key: str | None = None # For default executor creation
413
+
414
+ # Cache support
415
+ self._cache: BaseCache | None = cache
416
+ self._enable_cache = enable_cache
417
+ self._cache_setup_attempted = False
418
+
419
+ # Load config if not provided
420
+ self._config = config or WorkflowConfig.load()
421
+
422
+ # Determine provider (priority: arg > config > default)
423
+ if provider is None:
424
+ provider = self._config.get_provider_for_workflow(self.name)
425
+
426
+ # Handle string provider input
427
+ if isinstance(provider, str):
428
+ provider_str = provider.lower()
429
+ try:
430
+ provider = ModelProvider(provider_str)
431
+ self._provider_str = provider_str
432
+ except ValueError:
433
+ # Custom provider, keep as string
434
+ self._provider_str = provider_str
435
+ provider = ModelProvider.CUSTOM
436
+ else:
437
+ self._provider_str = provider.value
438
+
439
+ self.provider = provider
440
+
441
+ def get_tier_for_stage(self, stage_name: str) -> ModelTier:
442
+ """Get the model tier for a stage."""
443
+ return self.tier_map.get(stage_name, ModelTier.CAPABLE)
444
+
445
+ def get_model_for_tier(self, tier: ModelTier) -> str:
446
+ """Get the model for a tier based on configured provider and config."""
447
+ from .config import get_model
448
+
449
+ provider_str = getattr(self, "_provider_str", self.provider.value)
450
+
451
+ # Use config-aware model lookup
452
+ model = get_model(provider_str, tier.value, self._config)
453
+ return model
454
+
455
+ def _maybe_setup_cache(self) -> None:
456
+ """Set up cache with one-time user prompt if needed.
457
+
458
+ This is called lazily on first workflow execution to avoid
459
+ blocking workflow initialization.
460
+ """
461
+ if not self._enable_cache:
462
+ return
463
+
464
+ if self._cache_setup_attempted:
465
+ return
466
+
467
+ self._cache_setup_attempted = True
468
+
469
+ # If cache already provided, use it
470
+ if self._cache is not None:
471
+ return
472
+
473
+ # Otherwise, trigger auto-setup (which may prompt user)
474
+ try:
475
+ auto_setup_cache()
476
+ self._cache = create_cache()
477
+ logger.info(f"Cache initialized for workflow: {self.name}")
478
+ except ImportError:
479
+ # Hybrid cache dependencies not available, fall back to hash-only
480
+ logger.info(
481
+ "Using hash-only cache (install empathy-framework[cache] for semantic caching)"
482
+ )
483
+ self._cache = create_cache(cache_type="hash")
484
+ except Exception:
485
+ # Graceful degradation - disable cache if setup fails
486
+ logger.warning("Cache setup failed, continuing without cache")
487
+ self._enable_cache = False
488
+
489
+ async def _call_llm(
490
+ self,
491
+ tier: ModelTier,
492
+ system: str,
493
+ user_message: str,
494
+ max_tokens: int = 4096,
495
+ stage_name: str | None = None,
496
+ ) -> tuple[str, int, int]:
497
+ """Provider-agnostic LLM call using the configured provider.
498
+
499
+ This method uses run_step_with_executor internally to make LLM calls
500
+ that respect the configured provider (anthropic, openai, google, etc.).
501
+
502
+ Supports automatic caching to reduce API costs and latency.
503
+
504
+ Args:
505
+ tier: Model tier to use (CHEAP, CAPABLE, PREMIUM)
506
+ system: System prompt
507
+ user_message: User message/prompt
508
+ max_tokens: Maximum tokens in response
509
+ stage_name: Optional stage name for cache key (defaults to tier)
510
+
511
+ Returns:
512
+ Tuple of (response_content, input_tokens, output_tokens)
513
+
514
+ """
515
+ from .step_config import WorkflowStepConfig
516
+
517
+ # Determine stage name for cache key
518
+ stage = stage_name or f"llm_call_{tier.value}"
519
+ model = self.get_model_for_tier(tier)
520
+
521
+ # Try cache lookup if enabled
522
+ if self._enable_cache and self._cache is not None:
523
+ try:
524
+ # Combine system + user message for cache key
525
+ full_prompt = f"{system}\n\n{user_message}" if system else user_message
526
+ cached_response = self._cache.get(self.name, stage, full_prompt, model)
527
+
528
+ if cached_response is not None:
529
+ logger.debug(f"Cache hit for {self.name}:{stage}")
530
+ # Cached response is dict with content, input_tokens, output_tokens
531
+ return (
532
+ cached_response["content"],
533
+ cached_response["input_tokens"],
534
+ cached_response["output_tokens"],
535
+ )
536
+ except Exception:
537
+ # Cache lookup failed - continue with LLM call
538
+ logger.debug("Cache lookup failed, continuing with LLM call")
539
+
540
+ # Create a step config for this call
541
+ step = WorkflowStepConfig(
542
+ name=stage,
543
+ task_type="general",
544
+ tier_hint=tier.value,
545
+ description="LLM call",
546
+ max_tokens=max_tokens,
547
+ )
548
+
549
+ try:
550
+ content, in_tokens, out_tokens, _cost = await self.run_step_with_executor(
551
+ step=step,
552
+ prompt=user_message,
553
+ system=system,
554
+ )
555
+
556
+ # Store in cache if enabled
557
+ if self._enable_cache and self._cache is not None:
558
+ try:
559
+ full_prompt = f"{system}\n\n{user_message}" if system else user_message
560
+ response_data = {
561
+ "content": content,
562
+ "input_tokens": in_tokens,
563
+ "output_tokens": out_tokens,
564
+ }
565
+ self._cache.put(self.name, stage, full_prompt, model, response_data)
566
+ logger.debug(f"Cached response for {self.name}:{stage}")
567
+ except Exception:
568
+ # Cache storage failed - log but continue
569
+ logger.debug("Failed to cache response")
570
+
571
+ return content, in_tokens, out_tokens
572
+ except (ValueError, TypeError, KeyError) as e:
573
+ # Invalid input or configuration errors
574
+ return f"Error calling LLM (invalid input): {e}", 0, 0
575
+ except (TimeoutError, RuntimeError) as e:
576
+ # Timeout or API errors
577
+ return f"Error calling LLM (timeout/API error): {e}", 0, 0
578
+ except Exception:
579
+ # INTENTIONAL: Graceful degradation - return error message rather than crashing workflow
580
+ logger.exception("Unexpected error calling LLM")
581
+ return "Error calling LLM: unexpected error", 0, 0
582
+
583
+ def _calculate_cost(self, tier: ModelTier, input_tokens: int, output_tokens: int) -> float:
584
+ """Calculate cost for a stage."""
585
+ tier_name = tier.value
586
+ pricing = MODEL_PRICING.get(tier_name, MODEL_PRICING["capable"])
587
+ input_cost = (input_tokens / 1_000_000) * pricing["input"]
588
+ output_cost = (output_tokens / 1_000_000) * pricing["output"]
589
+ return input_cost + output_cost
590
+
591
+ def _calculate_baseline_cost(self, input_tokens: int, output_tokens: int) -> float:
592
+ """Calculate what the cost would be using premium tier."""
593
+ pricing = MODEL_PRICING["premium"]
594
+ input_cost = (input_tokens / 1_000_000) * pricing["input"]
595
+ output_cost = (output_tokens / 1_000_000) * pricing["output"]
596
+ return input_cost + output_cost
597
+
598
+ def _generate_cost_report(self) -> CostReport:
599
+ """Generate cost report from completed stages."""
600
+ total_cost = 0.0
601
+ baseline_cost = 0.0
602
+ by_stage: dict[str, float] = {}
603
+ by_tier: dict[str, float] = {}
604
+
605
+ for stage in self._stages_run:
606
+ if stage.skipped:
607
+ continue
608
+
609
+ total_cost += stage.cost
610
+ by_stage[stage.name] = stage.cost
611
+
612
+ tier_name = stage.tier.value
613
+ by_tier[tier_name] = by_tier.get(tier_name, 0.0) + stage.cost
614
+
615
+ # Calculate what this would cost at premium tier
616
+ baseline_cost += self._calculate_baseline_cost(stage.input_tokens, stage.output_tokens)
617
+
618
+ savings = baseline_cost - total_cost
619
+ savings_percent = (savings / baseline_cost * 100) if baseline_cost > 0 else 0.0
620
+
621
+ # Calculate cache metrics if cache is enabled
622
+ cache_hits = 0
623
+ cache_misses = 0
624
+ cache_hit_rate = 0.0
625
+ estimated_cost_without_cache = total_cost
626
+ savings_from_cache = 0.0
627
+
628
+ if self._cache is not None:
629
+ try:
630
+ stats = self._cache.get_stats()
631
+ cache_hits = stats.hits
632
+ cache_misses = stats.misses
633
+ cache_hit_rate = stats.hit_rate
634
+
635
+ # Estimate cost without cache (assumes cache hits would have incurred full cost)
636
+ # This is a conservative estimate
637
+ if cache_hits > 0:
638
+ # Average cost per non-cached call
639
+ avg_cost_per_call = total_cost / cache_misses if cache_misses > 0 else 0.0
640
+ # Estimated additional cost if cache hits were actual API calls
641
+ estimated_additional_cost = cache_hits * avg_cost_per_call
642
+ estimated_cost_without_cache = total_cost + estimated_additional_cost
643
+ savings_from_cache = estimated_additional_cost
644
+ except (AttributeError, TypeError):
645
+ # Cache doesn't support stats or error occurred
646
+ pass
647
+
648
+ return CostReport(
649
+ total_cost=total_cost,
650
+ baseline_cost=baseline_cost,
651
+ savings=savings,
652
+ savings_percent=savings_percent,
653
+ by_stage=by_stage,
654
+ by_tier=by_tier,
655
+ cache_hits=cache_hits,
656
+ cache_misses=cache_misses,
657
+ cache_hit_rate=cache_hit_rate,
658
+ estimated_cost_without_cache=estimated_cost_without_cache,
659
+ savings_from_cache=savings_from_cache,
660
+ )
661
+
662
+ @abstractmethod
663
+ async def run_stage(
664
+ self,
665
+ stage_name: str,
666
+ tier: ModelTier,
667
+ input_data: Any,
668
+ ) -> tuple[Any, int, int]:
669
+ """Execute a single workflow stage.
670
+
671
+ Args:
672
+ stage_name: Name of the stage to run
673
+ tier: Model tier to use
674
+ input_data: Input for this stage
675
+
676
+ Returns:
677
+ Tuple of (output_data, input_tokens, output_tokens)
678
+
679
+ """
680
+
681
+ def should_skip_stage(self, stage_name: str, input_data: Any) -> tuple[bool, str | None]:
682
+ """Determine if a stage should be skipped.
683
+
684
+ Override in subclasses for conditional stage execution.
685
+
686
+ Args:
687
+ stage_name: Name of the stage
688
+ input_data: Current workflow data
689
+
690
+ Returns:
691
+ Tuple of (should_skip, reason)
692
+
693
+ """
694
+ return False, None
695
+
696
+ async def execute(self, **kwargs: Any) -> WorkflowResult:
697
+ """Execute the full workflow.
698
+
699
+ Args:
700
+ **kwargs: Initial input data for the workflow
701
+
702
+ Returns:
703
+ WorkflowResult with stages, output, and cost report
704
+
705
+ """
706
+ # Set up cache (one-time setup with user prompt if needed)
707
+ self._maybe_setup_cache()
708
+
709
+ # Set run ID for telemetry correlation
710
+ self._run_id = str(uuid.uuid4())
711
+
712
+ started_at = datetime.now()
713
+ self._stages_run = []
714
+ current_data = kwargs
715
+ error = None
716
+
717
+ # Initialize progress tracker if callback provided
718
+ if self._progress_callback:
719
+ self._progress_tracker = ProgressTracker(
720
+ workflow_name=self.name,
721
+ workflow_id=self._run_id,
722
+ stage_names=self.stages,
723
+ )
724
+ self._progress_tracker.add_callback(self._progress_callback)
725
+ self._progress_tracker.start_workflow()
726
+
727
+ try:
728
+ for stage_name in self.stages:
729
+ tier = self.get_tier_for_stage(stage_name)
730
+ stage_start = datetime.now()
731
+
732
+ # Check if stage should be skipped
733
+ should_skip, skip_reason = self.should_skip_stage(stage_name, current_data)
734
+
735
+ if should_skip:
736
+ stage = WorkflowStage(
737
+ name=stage_name,
738
+ tier=tier,
739
+ description=f"Stage: {stage_name}",
740
+ skipped=True,
741
+ skip_reason=skip_reason,
742
+ )
743
+ self._stages_run.append(stage)
744
+
745
+ # Report skip to progress tracker
746
+ if self._progress_tracker:
747
+ self._progress_tracker.skip_stage(stage_name, skip_reason or "")
748
+
749
+ continue
750
+
751
+ # Report stage start to progress tracker
752
+ model_id = self.get_model_for_tier(tier)
753
+ if self._progress_tracker:
754
+ self._progress_tracker.start_stage(stage_name, tier.value, model_id)
755
+
756
+ # Run the stage
757
+ output, input_tokens, output_tokens = await self.run_stage(
758
+ stage_name,
759
+ tier,
760
+ current_data,
761
+ )
762
+
763
+ stage_end = datetime.now()
764
+ duration_ms = int((stage_end - stage_start).total_seconds() * 1000)
765
+ cost = self._calculate_cost(tier, input_tokens, output_tokens)
766
+
767
+ stage = WorkflowStage(
768
+ name=stage_name,
769
+ tier=tier,
770
+ description=f"Stage: {stage_name}",
771
+ input_tokens=input_tokens,
772
+ output_tokens=output_tokens,
773
+ cost=cost,
774
+ result=output,
775
+ duration_ms=duration_ms,
776
+ )
777
+ self._stages_run.append(stage)
778
+
779
+ # Report stage completion to progress tracker
780
+ if self._progress_tracker:
781
+ self._progress_tracker.complete_stage(
782
+ stage_name,
783
+ cost=cost,
784
+ tokens_in=input_tokens,
785
+ tokens_out=output_tokens,
786
+ )
787
+
788
+ # Log to cost tracker
789
+ self.cost_tracker.log_request(
790
+ model=model_id,
791
+ input_tokens=input_tokens,
792
+ output_tokens=output_tokens,
793
+ task_type=f"workflow:{self.name}:{stage_name}",
794
+ )
795
+
796
+ # Pass output to next stage
797
+ current_data = output if isinstance(output, dict) else {"result": output}
798
+
799
+ except (ValueError, TypeError, KeyError) as e:
800
+ # Data validation or configuration errors
801
+ error = f"Workflow execution error (data/config): {e}"
802
+ logger.error(error)
803
+ if self._progress_tracker:
804
+ self._progress_tracker.fail_workflow(error)
805
+ except (TimeoutError, RuntimeError) as e:
806
+ # Timeout or API errors
807
+ error = f"Workflow execution error (timeout/API): {e}"
808
+ logger.error(error)
809
+ if self._progress_tracker:
810
+ self._progress_tracker.fail_workflow(error)
811
+ except Exception:
812
+ # INTENTIONAL: Workflow orchestration - catch all errors to report failure gracefully
813
+ logger.exception("Unexpected error in workflow execution")
814
+ error = "Workflow execution failed: unexpected error"
815
+ if self._progress_tracker:
816
+ self._progress_tracker.fail_workflow(error)
817
+
818
+ completed_at = datetime.now()
819
+ total_duration_ms = int((completed_at - started_at).total_seconds() * 1000)
820
+
821
+ # Get final output from last non-skipped stage
822
+ final_output = None
823
+ for stage in reversed(self._stages_run):
824
+ if not stage.skipped and stage.result is not None:
825
+ final_output = stage.result
826
+ break
827
+
828
+ # Classify error type and transient status
829
+ error_type = None
830
+ transient = False
831
+ if error:
832
+ error_lower = error.lower()
833
+ if "timeout" in error_lower or "timed out" in error_lower:
834
+ error_type = "timeout"
835
+ transient = True
836
+ elif "config" in error_lower or "configuration" in error_lower:
837
+ error_type = "config"
838
+ transient = False
839
+ elif "api" in error_lower or "rate limit" in error_lower or "quota" in error_lower:
840
+ error_type = "provider"
841
+ transient = True
842
+ elif "validation" in error_lower or "invalid" in error_lower:
843
+ error_type = "validation"
844
+ transient = False
845
+ else:
846
+ error_type = "runtime"
847
+ transient = False
848
+
849
+ provider_str = getattr(self, "_provider_str", "unknown")
850
+ result = WorkflowResult(
851
+ success=error is None,
852
+ stages=self._stages_run,
853
+ final_output=final_output,
854
+ cost_report=self._generate_cost_report(),
855
+ started_at=started_at,
856
+ completed_at=completed_at,
857
+ total_duration_ms=total_duration_ms,
858
+ provider=provider_str,
859
+ error=error,
860
+ error_type=error_type,
861
+ transient=transient,
862
+ )
863
+
864
+ # Report workflow completion to progress tracker
865
+ if self._progress_tracker and error is None:
866
+ self._progress_tracker.complete_workflow()
867
+
868
+ # Save to workflow history for dashboard
869
+ try:
870
+ _save_workflow_run(self.name, provider_str, result)
871
+ except (OSError, PermissionError):
872
+ # File system errors saving history - log but don't crash workflow
873
+ logger.warning("Failed to save workflow history (file system error)")
874
+ except (ValueError, TypeError, KeyError):
875
+ # Data serialization errors - log but don't crash workflow
876
+ logger.warning("Failed to save workflow history (serialization error)")
877
+ except Exception:
878
+ # INTENTIONAL: History save is optional diagnostics - never crash workflow
879
+ logger.exception("Unexpected error saving workflow history")
880
+
881
+ # Emit workflow telemetry to backend
882
+ self._emit_workflow_telemetry(result)
883
+
884
+ return result
885
+
886
+ def describe(self) -> str:
887
+ """Get a human-readable description of the workflow."""
888
+ lines = [
889
+ f"Workflow: {self.name}",
890
+ f"Description: {self.description}",
891
+ "",
892
+ "Stages:",
893
+ ]
894
+
895
+ for stage_name in self.stages:
896
+ tier = self.get_tier_for_stage(stage_name)
897
+ model = self.get_model_for_tier(tier)
898
+ lines.append(f" {stage_name}: {tier.value} ({model})")
899
+
900
+ return "\n".join(lines)
901
+
902
+ # =========================================================================
903
+ # New infrastructure methods (Phase 4)
904
+ # =========================================================================
905
+
906
+ def _create_execution_context(
907
+ self,
908
+ step_name: str,
909
+ task_type: str,
910
+ user_id: str | None = None,
911
+ session_id: str | None = None,
912
+ ) -> ExecutionContext:
913
+ """Create an ExecutionContext for a step execution.
914
+
915
+ Args:
916
+ step_name: Name of the workflow step
917
+ task_type: Task type for routing
918
+ user_id: Optional user ID
919
+ session_id: Optional session ID
920
+
921
+ Returns:
922
+ ExecutionContext populated with workflow info
923
+
924
+ """
925
+ return ExecutionContext(
926
+ workflow_name=self.name,
927
+ step_name=step_name,
928
+ user_id=user_id,
929
+ session_id=session_id,
930
+ metadata={
931
+ "task_type": task_type,
932
+ "run_id": self._run_id,
933
+ "provider": self._provider_str,
934
+ },
935
+ )
936
+
937
+ def _create_default_executor(self) -> LLMExecutor:
938
+ """Create a default EmpathyLLMExecutor wrapped in ResilientExecutor.
939
+
940
+ This method is called lazily when run_step_with_executor is used
941
+ without a pre-configured executor. The executor is wrapped with
942
+ resilience features (retry, fallback, circuit breaker).
943
+
944
+ Returns:
945
+ LLMExecutor instance (ResilientExecutor wrapping EmpathyLLMExecutor)
946
+
947
+ """
948
+ from empathy_os.models.empathy_executor import EmpathyLLMExecutor
949
+ from empathy_os.models.fallback import ResilientExecutor
950
+
951
+ # Create the base executor
952
+ base_executor = EmpathyLLMExecutor(
953
+ provider=self._provider_str,
954
+ api_key=self._api_key,
955
+ telemetry_store=self._telemetry_backend,
956
+ )
957
+ # Wrap with resilience layer (retry, fallback, circuit breaker)
958
+ return ResilientExecutor(executor=base_executor)
959
+
960
+ def _get_executor(self) -> LLMExecutor:
961
+ """Get or create the LLM executor.
962
+
963
+ Returns the configured executor or creates a default one.
964
+
965
+ Returns:
966
+ LLMExecutor instance
967
+
968
+ """
969
+ if self._executor is None:
970
+ self._executor = self._create_default_executor()
971
+ return self._executor
972
+
973
+ def _emit_call_telemetry(
974
+ self,
975
+ step_name: str,
976
+ task_type: str,
977
+ tier: str,
978
+ model_id: str,
979
+ input_tokens: int,
980
+ output_tokens: int,
981
+ cost: float,
982
+ latency_ms: int,
983
+ success: bool = True,
984
+ error_message: str | None = None,
985
+ fallback_used: bool = False,
986
+ ) -> None:
987
+ """Emit an LLMCallRecord to the telemetry backend.
988
+
989
+ Args:
990
+ step_name: Name of the workflow step
991
+ task_type: Task type used for routing
992
+ tier: Model tier used
993
+ model_id: Model ID used
994
+ input_tokens: Input token count
995
+ output_tokens: Output token count
996
+ cost: Estimated cost
997
+ latency_ms: Latency in milliseconds
998
+ success: Whether the call succeeded
999
+ error_message: Error message if failed
1000
+ fallback_used: Whether fallback was used
1001
+
1002
+ """
1003
+ record = LLMCallRecord(
1004
+ call_id=str(uuid.uuid4()),
1005
+ timestamp=datetime.now().isoformat(),
1006
+ workflow_name=self.name,
1007
+ step_name=step_name,
1008
+ task_type=task_type,
1009
+ provider=self._provider_str,
1010
+ tier=tier,
1011
+ model_id=model_id,
1012
+ input_tokens=input_tokens,
1013
+ output_tokens=output_tokens,
1014
+ estimated_cost=cost,
1015
+ latency_ms=latency_ms,
1016
+ success=success,
1017
+ error_message=error_message,
1018
+ fallback_used=fallback_used,
1019
+ metadata={"run_id": self._run_id},
1020
+ )
1021
+ try:
1022
+ self._telemetry_backend.log_call(record)
1023
+ except (AttributeError, ValueError, TypeError):
1024
+ # Telemetry backend errors - log but don't crash workflow
1025
+ logger.debug("Failed to log call telemetry (backend error)")
1026
+ except OSError:
1027
+ # File system errors - log but don't crash workflow
1028
+ logger.debug("Failed to log call telemetry (file system error)")
1029
+ except Exception: # noqa: BLE001
1030
+ # INTENTIONAL: Telemetry is optional diagnostics - never crash workflow
1031
+ logger.debug("Unexpected error logging call telemetry")
1032
+
1033
+ def _emit_workflow_telemetry(self, result: WorkflowResult) -> None:
1034
+ """Emit a WorkflowRunRecord to the telemetry backend.
1035
+
1036
+ Args:
1037
+ result: The workflow result to record
1038
+
1039
+ """
1040
+ # Build stage records
1041
+ stages = [
1042
+ WorkflowStageRecord(
1043
+ stage_name=s.name,
1044
+ tier=s.tier.value,
1045
+ model_id=self.get_model_for_tier(s.tier),
1046
+ input_tokens=s.input_tokens,
1047
+ output_tokens=s.output_tokens,
1048
+ cost=s.cost,
1049
+ latency_ms=s.duration_ms,
1050
+ success=not s.skipped and result.error is None,
1051
+ skipped=s.skipped,
1052
+ skip_reason=s.skip_reason,
1053
+ )
1054
+ for s in result.stages
1055
+ ]
1056
+
1057
+ record = WorkflowRunRecord(
1058
+ run_id=self._run_id or str(uuid.uuid4()),
1059
+ workflow_name=self.name,
1060
+ started_at=result.started_at.isoformat(),
1061
+ completed_at=result.completed_at.isoformat(),
1062
+ stages=stages,
1063
+ total_input_tokens=sum(s.input_tokens for s in result.stages if not s.skipped),
1064
+ total_output_tokens=sum(s.output_tokens for s in result.stages if not s.skipped),
1065
+ total_cost=result.cost_report.total_cost,
1066
+ baseline_cost=result.cost_report.baseline_cost,
1067
+ savings=result.cost_report.savings,
1068
+ savings_percent=result.cost_report.savings_percent,
1069
+ total_duration_ms=result.total_duration_ms,
1070
+ success=result.success,
1071
+ error=result.error,
1072
+ providers_used=[self._provider_str],
1073
+ tiers_used=list(result.cost_report.by_tier.keys()),
1074
+ )
1075
+ try:
1076
+ self._telemetry_backend.log_workflow(record)
1077
+ except (AttributeError, ValueError, TypeError):
1078
+ # Telemetry backend errors - log but don't crash workflow
1079
+ logger.debug("Failed to log workflow telemetry (backend error)")
1080
+ except OSError:
1081
+ # File system errors - log but don't crash workflow
1082
+ logger.debug("Failed to log workflow telemetry (file system error)")
1083
+ except Exception: # noqa: BLE001
1084
+ # INTENTIONAL: Telemetry is optional diagnostics - never crash workflow
1085
+ logger.debug("Unexpected error logging workflow telemetry")
1086
+
1087
+ async def run_step_with_executor(
1088
+ self,
1089
+ step: WorkflowStepConfig,
1090
+ prompt: str,
1091
+ system: str | None = None,
1092
+ **kwargs: Any,
1093
+ ) -> tuple[str, int, int, float]:
1094
+ """Run a workflow step using the LLMExecutor.
1095
+
1096
+ This method provides a unified interface for executing steps with
1097
+ automatic routing, telemetry, and cost tracking. If no executor
1098
+ was provided at construction, a default EmpathyLLMExecutor is created.
1099
+
1100
+ Args:
1101
+ step: WorkflowStepConfig defining the step
1102
+ prompt: The prompt to send
1103
+ system: Optional system prompt
1104
+ **kwargs: Additional arguments passed to executor
1105
+
1106
+ Returns:
1107
+ Tuple of (content, input_tokens, output_tokens, cost)
1108
+
1109
+ """
1110
+ executor = self._get_executor()
1111
+
1112
+ context = self._create_execution_context(
1113
+ step_name=step.name,
1114
+ task_type=step.task_type,
1115
+ )
1116
+
1117
+ start_time = datetime.now()
1118
+ response = await executor.run(
1119
+ task_type=step.task_type,
1120
+ prompt=prompt,
1121
+ system=system,
1122
+ context=context,
1123
+ **kwargs,
1124
+ )
1125
+ end_time = datetime.now()
1126
+ latency_ms = int((end_time - start_time).total_seconds() * 1000)
1127
+
1128
+ # Emit telemetry
1129
+ self._emit_call_telemetry(
1130
+ step_name=step.name,
1131
+ task_type=step.task_type,
1132
+ tier=response.tier,
1133
+ model_id=response.model_id,
1134
+ input_tokens=response.tokens_input,
1135
+ output_tokens=response.tokens_output,
1136
+ cost=response.cost_estimate,
1137
+ latency_ms=latency_ms,
1138
+ success=True,
1139
+ )
1140
+
1141
+ return (
1142
+ response.content,
1143
+ response.tokens_input,
1144
+ response.tokens_output,
1145
+ response.cost_estimate,
1146
+ )
1147
+
1148
+ # =========================================================================
1149
+ # XML Prompt Integration (Phase 4)
1150
+ # =========================================================================
1151
+
1152
+ def _get_xml_config(self) -> dict[str, Any]:
1153
+ """Get XML prompt configuration for this workflow.
1154
+
1155
+ Returns:
1156
+ Dictionary with XML configuration settings.
1157
+
1158
+ """
1159
+ if self._config is None:
1160
+ return {}
1161
+ return self._config.get_xml_config_for_workflow(self.name)
1162
+
1163
+ def _is_xml_enabled(self) -> bool:
1164
+ """Check if XML prompts are enabled for this workflow."""
1165
+ config = self._get_xml_config()
1166
+ return bool(config.get("enabled", False))
1167
+
1168
+ def _render_xml_prompt(
1169
+ self,
1170
+ role: str,
1171
+ goal: str,
1172
+ instructions: list[str],
1173
+ constraints: list[str],
1174
+ input_type: str,
1175
+ input_payload: str,
1176
+ extra: dict[str, Any] | None = None,
1177
+ ) -> str:
1178
+ """Render a prompt using XML template if enabled.
1179
+
1180
+ Args:
1181
+ role: The role for the AI (e.g., "security analyst").
1182
+ goal: The primary objective.
1183
+ instructions: Step-by-step instructions.
1184
+ constraints: Rules and guidelines.
1185
+ input_type: Type of input ("code", "diff", "document").
1186
+ input_payload: The content to process.
1187
+ extra: Additional context data.
1188
+
1189
+ Returns:
1190
+ Rendered prompt string (XML if enabled, plain text otherwise).
1191
+
1192
+ """
1193
+ from empathy_os.prompts import PromptContext, XmlPromptTemplate, get_template
1194
+
1195
+ config = self._get_xml_config()
1196
+
1197
+ if not config.get("enabled", False):
1198
+ # Fall back to plain text
1199
+ return self._render_plain_prompt(
1200
+ role,
1201
+ goal,
1202
+ instructions,
1203
+ constraints,
1204
+ input_type,
1205
+ input_payload,
1206
+ )
1207
+
1208
+ # Create context
1209
+ context = PromptContext(
1210
+ role=role,
1211
+ goal=goal,
1212
+ instructions=instructions,
1213
+ constraints=constraints,
1214
+ input_type=input_type,
1215
+ input_payload=input_payload,
1216
+ extra=extra or {},
1217
+ )
1218
+
1219
+ # Get template
1220
+ template_name = config.get("template_name", self.name)
1221
+ template = get_template(template_name)
1222
+
1223
+ if template is None:
1224
+ # Create a basic XML template if no built-in found
1225
+ template = XmlPromptTemplate(
1226
+ name=self.name,
1227
+ schema_version=config.get("schema_version", "1.0"),
1228
+ )
1229
+
1230
+ return template.render(context)
1231
+
1232
+ def _render_plain_prompt(
1233
+ self,
1234
+ role: str,
1235
+ goal: str,
1236
+ instructions: list[str],
1237
+ constraints: list[str],
1238
+ input_type: str,
1239
+ input_payload: str,
1240
+ ) -> str:
1241
+ """Render a plain text prompt (fallback when XML is disabled)."""
1242
+ parts = [f"You are a {role}.", "", f"Goal: {goal}", ""]
1243
+
1244
+ if instructions:
1245
+ parts.append("Instructions:")
1246
+ for i, inst in enumerate(instructions, 1):
1247
+ parts.append(f"{i}. {inst}")
1248
+ parts.append("")
1249
+
1250
+ if constraints:
1251
+ parts.append("Guidelines:")
1252
+ for constraint in constraints:
1253
+ parts.append(f"- {constraint}")
1254
+ parts.append("")
1255
+
1256
+ if input_payload:
1257
+ parts.append(f"Input ({input_type}):")
1258
+ parts.append(input_payload)
1259
+
1260
+ return "\n".join(parts)
1261
+
1262
+ def _parse_xml_response(self, response: str) -> dict[str, Any]:
1263
+ """Parse an XML response if XML enforcement is enabled.
1264
+
1265
+ Args:
1266
+ response: The LLM response text.
1267
+
1268
+ Returns:
1269
+ Dictionary with parsed fields or raw response data.
1270
+
1271
+ """
1272
+ from empathy_os.prompts import XmlResponseParser
1273
+
1274
+ config = self._get_xml_config()
1275
+
1276
+ if not config.get("enforce_response_xml", False):
1277
+ # No parsing needed, return as-is
1278
+ return {
1279
+ "_parsed_response": None,
1280
+ "_raw": response,
1281
+ }
1282
+
1283
+ fallback = config.get("fallback_on_parse_error", True)
1284
+ parser = XmlResponseParser(fallback_on_error=fallback)
1285
+ parsed = parser.parse(response)
1286
+
1287
+ return {
1288
+ "_parsed_response": parsed,
1289
+ "_raw": response,
1290
+ "summary": parsed.summary,
1291
+ "findings": [f.to_dict() for f in parsed.findings],
1292
+ "checklist": parsed.checklist,
1293
+ "xml_parsed": parsed.success,
1294
+ "parse_errors": parsed.errors,
1295
+ }
1296
+
1297
+ def _extract_findings_from_response(
1298
+ self,
1299
+ response: str,
1300
+ files_changed: list[str],
1301
+ code_context: str = "",
1302
+ ) -> list[dict[str, Any]]:
1303
+ """Extract structured findings from LLM response.
1304
+
1305
+ Tries multiple strategies in order:
1306
+ 1. XML parsing (if XML tags present)
1307
+ 2. Regex-based extraction for file:line patterns
1308
+ 3. Returns empty list if no findings extractable
1309
+
1310
+ Args:
1311
+ response: Raw LLM response text
1312
+ files_changed: List of files being analyzed (for context)
1313
+ code_context: Original code being reviewed (optional)
1314
+
1315
+ Returns:
1316
+ List of findings matching WorkflowFinding schema:
1317
+ [
1318
+ {
1319
+ "id": "unique-id",
1320
+ "file": "relative/path.py",
1321
+ "line": 42,
1322
+ "column": 10,
1323
+ "severity": "high",
1324
+ "category": "security",
1325
+ "message": "Brief message",
1326
+ "details": "Extended explanation",
1327
+ "recommendation": "Fix suggestion"
1328
+ }
1329
+ ]
1330
+
1331
+ """
1332
+ import re
1333
+ import uuid
1334
+
1335
+ findings: list[dict[str, Any]] = []
1336
+
1337
+ # Strategy 1: Try XML parsing first
1338
+ response_lower = response.lower()
1339
+ if (
1340
+ "<finding>" in response_lower
1341
+ or "<issue>" in response_lower
1342
+ or "<findings>" in response_lower
1343
+ ):
1344
+ # Parse XML directly (bypass config checks)
1345
+ from empathy_os.prompts import XmlResponseParser
1346
+
1347
+ parser = XmlResponseParser(fallback_on_error=True)
1348
+ parsed = parser.parse(response)
1349
+
1350
+ if parsed.success and parsed.findings:
1351
+ for raw_finding in parsed.findings:
1352
+ enriched = self._enrich_finding_with_location(
1353
+ raw_finding.to_dict(),
1354
+ files_changed,
1355
+ )
1356
+ findings.append(enriched)
1357
+ return findings
1358
+
1359
+ # Strategy 2: Regex-based extraction for common patterns
1360
+ # Match patterns like:
1361
+ # - "src/auth.py:42: SQL injection found"
1362
+ # - "In file src/auth.py line 42"
1363
+ # - "auth.py (line 42, column 10)"
1364
+ patterns = [
1365
+ # Pattern 1: file.py:line:column: message
1366
+ r"([^\s:]+\.(?:py|ts|tsx|js|jsx|java|go|rb|php)):(\d+):(\d+):\s*(.+)",
1367
+ # Pattern 2: file.py:line: message
1368
+ r"([^\s:]+\.(?:py|ts|tsx|js|jsx|java|go|rb|php)):(\d+):\s*(.+)",
1369
+ # Pattern 3: in file X line Y
1370
+ r"(?:in file|file)\s+([^\s]+\.(?:py|ts|tsx|js|jsx|java|go|rb|php))\s+line\s+(\d+)",
1371
+ # Pattern 4: file.py (line X)
1372
+ r"([^\s]+\.(?:py|ts|tsx|js|jsx|java|go|rb|php))\s*\(line\s+(\d+)(?:,\s*col(?:umn)?\s+(\d+))?\)",
1373
+ ]
1374
+
1375
+ for pattern in patterns:
1376
+ matches = re.findall(pattern, response, re.IGNORECASE)
1377
+ for match in matches:
1378
+ if len(match) >= 2:
1379
+ file_path = match[0]
1380
+ line = int(match[1])
1381
+
1382
+ # Handle different pattern formats
1383
+ if len(match) == 4 and match[2].isdigit():
1384
+ # Pattern 1: file:line:col:message
1385
+ column = int(match[2])
1386
+ message = match[3]
1387
+ elif len(match) == 3 and match[2] and not match[2].isdigit():
1388
+ # Pattern 2: file:line:message
1389
+ column = 1
1390
+ message = match[2]
1391
+ elif len(match) == 3 and match[2].isdigit():
1392
+ # Pattern 4: file (line col)
1393
+ column = int(match[2])
1394
+ message = ""
1395
+ else:
1396
+ # Pattern 3: in file X line Y (no message)
1397
+ column = 1
1398
+ message = ""
1399
+
1400
+ # Determine severity from keywords in message
1401
+ severity = self._infer_severity(message)
1402
+ category = self._infer_category(message)
1403
+
1404
+ findings.append(
1405
+ {
1406
+ "id": str(uuid.uuid4())[:8],
1407
+ "file": file_path,
1408
+ "line": line,
1409
+ "column": column,
1410
+ "severity": severity,
1411
+ "category": category,
1412
+ "message": message.strip() if message else "",
1413
+ "details": "",
1414
+ "recommendation": "",
1415
+ },
1416
+ )
1417
+
1418
+ # Deduplicate by file:line
1419
+ seen = set()
1420
+ unique_findings = []
1421
+ for finding in findings:
1422
+ key = (finding["file"], finding["line"])
1423
+ if key not in seen:
1424
+ seen.add(key)
1425
+ unique_findings.append(finding)
1426
+
1427
+ return unique_findings
1428
+
1429
+ def _enrich_finding_with_location(
1430
+ self,
1431
+ raw_finding: dict[str, Any],
1432
+ files_changed: list[str],
1433
+ ) -> dict[str, Any]:
1434
+ """Enrich a finding from XML parser with file/line/column fields.
1435
+
1436
+ Args:
1437
+ raw_finding: Finding dict from XML parser (has 'location' string field)
1438
+ files_changed: List of files being analyzed
1439
+
1440
+ Returns:
1441
+ Enriched finding dict with file, line, column fields
1442
+
1443
+ """
1444
+ import uuid
1445
+
1446
+ location_str = raw_finding.get("location", "")
1447
+ file_path, line, column = self._parse_location_string(location_str, files_changed)
1448
+
1449
+ # Map category from severity or title keywords
1450
+ category = self._infer_category(
1451
+ raw_finding.get("title", "") + " " + raw_finding.get("details", ""),
1452
+ )
1453
+
1454
+ return {
1455
+ "id": str(uuid.uuid4())[:8],
1456
+ "file": file_path,
1457
+ "line": line,
1458
+ "column": column,
1459
+ "severity": raw_finding.get("severity", "medium"),
1460
+ "category": category,
1461
+ "message": raw_finding.get("title", ""),
1462
+ "details": raw_finding.get("details", ""),
1463
+ "recommendation": raw_finding.get("fix", ""),
1464
+ }
1465
+
1466
+ def _parse_location_string(
1467
+ self,
1468
+ location: str,
1469
+ files_changed: list[str],
1470
+ ) -> tuple[str, int, int]:
1471
+ """Parse a location string to extract file, line, column.
1472
+
1473
+ Handles formats like:
1474
+ - "src/auth.py:42:10"
1475
+ - "src/auth.py:42"
1476
+ - "auth.py line 42"
1477
+ - "line 42 in auth.py"
1478
+
1479
+ Args:
1480
+ location: Location string from finding
1481
+ files_changed: List of files being analyzed (for fallback)
1482
+
1483
+ Returns:
1484
+ Tuple of (file_path, line_number, column_number)
1485
+ Defaults: ("", 1, 1) if parsing fails
1486
+
1487
+ """
1488
+ import re
1489
+
1490
+ if not location:
1491
+ # Fallback: use first file if available
1492
+ return (files_changed[0] if files_changed else "", 1, 1)
1493
+
1494
+ # Try colon-separated format: file.py:line:col
1495
+ match = re.search(
1496
+ r"([^\s:]+\.(?:py|ts|tsx|js|jsx|java|go|rb|php)):(\d+)(?::(\d+))?",
1497
+ location,
1498
+ )
1499
+ if match:
1500
+ file_path = match.group(1)
1501
+ line = int(match.group(2))
1502
+ column = int(match.group(3)) if match.group(3) else 1
1503
+ return (file_path, line, column)
1504
+
1505
+ # Try "line X in file.py" format
1506
+ match = re.search(
1507
+ r"line\s+(\d+)\s+(?:in|of)\s+([^\s]+\.(?:py|ts|tsx|js|jsx|java|go|rb|php))",
1508
+ location,
1509
+ re.IGNORECASE,
1510
+ )
1511
+ if match:
1512
+ line = int(match.group(1))
1513
+ file_path = match.group(2)
1514
+ return (file_path, line, 1)
1515
+
1516
+ # Try "file.py line X" format
1517
+ match = re.search(
1518
+ r"([^\s]+\.(?:py|ts|tsx|js|jsx|java|go|rb|php))\s+line\s+(\d+)",
1519
+ location,
1520
+ re.IGNORECASE,
1521
+ )
1522
+ if match:
1523
+ file_path = match.group(1)
1524
+ line = int(match.group(2))
1525
+ return (file_path, line, 1)
1526
+
1527
+ # Extract just line number if present
1528
+ match = re.search(r"line\s+(\d+)", location, re.IGNORECASE)
1529
+ if match:
1530
+ line = int(match.group(1))
1531
+ # Use first file from files_changed as fallback
1532
+ file_path = files_changed[0] if files_changed else ""
1533
+ return (file_path, line, 1)
1534
+
1535
+ # Couldn't parse - return defaults
1536
+ return (files_changed[0] if files_changed else "", 1, 1)
1537
+
1538
+ def _infer_severity(self, text: str) -> str:
1539
+ """Infer severity from keywords in text.
1540
+
1541
+ Args:
1542
+ text: Message or title text
1543
+
1544
+ Returns:
1545
+ Severity level: critical, high, medium, low, or info
1546
+
1547
+ """
1548
+ text_lower = text.lower()
1549
+
1550
+ if any(
1551
+ word in text_lower
1552
+ for word in [
1553
+ "critical",
1554
+ "severe",
1555
+ "exploit",
1556
+ "vulnerability",
1557
+ "injection",
1558
+ "remote code execution",
1559
+ "rce",
1560
+ ]
1561
+ ):
1562
+ return "critical"
1563
+
1564
+ if any(
1565
+ word in text_lower
1566
+ for word in [
1567
+ "high",
1568
+ "security",
1569
+ "unsafe",
1570
+ "dangerous",
1571
+ "xss",
1572
+ "csrf",
1573
+ "auth",
1574
+ "password",
1575
+ "secret",
1576
+ ]
1577
+ ):
1578
+ return "high"
1579
+
1580
+ if any(
1581
+ word in text_lower
1582
+ for word in [
1583
+ "warning",
1584
+ "issue",
1585
+ "problem",
1586
+ "bug",
1587
+ "error",
1588
+ "deprecated",
1589
+ "leak",
1590
+ ]
1591
+ ):
1592
+ return "medium"
1593
+
1594
+ if any(word in text_lower for word in ["low", "minor", "style", "format", "typo"]):
1595
+ return "low"
1596
+
1597
+ return "info"
1598
+
1599
+ def _infer_category(self, text: str) -> str:
1600
+ """Infer finding category from keywords.
1601
+
1602
+ Args:
1603
+ text: Message or title text
1604
+
1605
+ Returns:
1606
+ Category: security, performance, maintainability, style, or correctness
1607
+
1608
+ """
1609
+ text_lower = text.lower()
1610
+
1611
+ if any(
1612
+ word in text_lower
1613
+ for word in [
1614
+ "security",
1615
+ "vulnerability",
1616
+ "injection",
1617
+ "xss",
1618
+ "csrf",
1619
+ "auth",
1620
+ "encrypt",
1621
+ "password",
1622
+ "secret",
1623
+ "unsafe",
1624
+ ]
1625
+ ):
1626
+ return "security"
1627
+
1628
+ if any(
1629
+ word in text_lower
1630
+ for word in [
1631
+ "performance",
1632
+ "slow",
1633
+ "memory",
1634
+ "leak",
1635
+ "inefficient",
1636
+ "optimization",
1637
+ "cache",
1638
+ ]
1639
+ ):
1640
+ return "performance"
1641
+
1642
+ if any(
1643
+ word in text_lower
1644
+ for word in [
1645
+ "complex",
1646
+ "refactor",
1647
+ "duplicate",
1648
+ "maintainability",
1649
+ "readability",
1650
+ "documentation",
1651
+ ]
1652
+ ):
1653
+ return "maintainability"
1654
+
1655
+ if any(
1656
+ word in text_lower for word in ["style", "format", "lint", "convention", "whitespace"]
1657
+ ):
1658
+ return "style"
1659
+
1660
+ return "correctness"