empathy-framework 4.6.2__py3-none-any.whl → 4.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.3.dist-info}/METADATA +1 -1
  2. {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.3.dist-info}/RECORD +53 -20
  3. {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.3.dist-info}/WHEEL +1 -1
  4. empathy_os/__init__.py +1 -1
  5. empathy_os/cli.py +361 -32
  6. empathy_os/config/xml_config.py +8 -3
  7. empathy_os/core.py +37 -4
  8. empathy_os/leverage_points.py +2 -1
  9. empathy_os/memory/short_term.py +45 -1
  10. empathy_os/meta_workflows/agent_creator 2.py +254 -0
  11. empathy_os/meta_workflows/builtin_templates 2.py +567 -0
  12. empathy_os/meta_workflows/cli_meta_workflows 2.py +1551 -0
  13. empathy_os/meta_workflows/form_engine 2.py +304 -0
  14. empathy_os/meta_workflows/intent_detector 2.py +298 -0
  15. empathy_os/meta_workflows/pattern_learner 2.py +754 -0
  16. empathy_os/meta_workflows/session_context 2.py +398 -0
  17. empathy_os/meta_workflows/template_registry 2.py +229 -0
  18. empathy_os/meta_workflows/workflow 2.py +980 -0
  19. empathy_os/models/token_estimator.py +16 -9
  20. empathy_os/models/validation.py +7 -1
  21. empathy_os/orchestration/pattern_learner 2.py +699 -0
  22. empathy_os/orchestration/real_tools 2.py +938 -0
  23. empathy_os/orchestration/real_tools.py +4 -2
  24. empathy_os/socratic/__init__ 2.py +273 -0
  25. empathy_os/socratic/ab_testing 2.py +969 -0
  26. empathy_os/socratic/blueprint 2.py +532 -0
  27. empathy_os/socratic/cli 2.py +689 -0
  28. empathy_os/socratic/collaboration 2.py +1112 -0
  29. empathy_os/socratic/domain_templates 2.py +916 -0
  30. empathy_os/socratic/embeddings 2.py +734 -0
  31. empathy_os/socratic/engine 2.py +729 -0
  32. empathy_os/socratic/explainer 2.py +663 -0
  33. empathy_os/socratic/feedback 2.py +767 -0
  34. empathy_os/socratic/forms 2.py +624 -0
  35. empathy_os/socratic/generator 2.py +716 -0
  36. empathy_os/socratic/llm_analyzer 2.py +635 -0
  37. empathy_os/socratic/mcp_server 2.py +751 -0
  38. empathy_os/socratic/session 2.py +306 -0
  39. empathy_os/socratic/storage 2.py +635 -0
  40. empathy_os/socratic/storage.py +2 -1
  41. empathy_os/socratic/success 2.py +719 -0
  42. empathy_os/socratic/visual_editor 2.py +812 -0
  43. empathy_os/socratic/web_ui 2.py +925 -0
  44. empathy_os/tier_recommender.py +5 -2
  45. empathy_os/workflow_commands.py +11 -6
  46. empathy_os/workflows/base.py +1 -1
  47. empathy_os/workflows/batch_processing 2.py +310 -0
  48. empathy_os/workflows/release_prep_crew 2.py +968 -0
  49. empathy_os/workflows/test_coverage_boost_crew 2.py +848 -0
  50. empathy_os/workflows/test_maintenance.py +3 -2
  51. {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.3.dist-info}/entry_points.txt +0 -0
  52. {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.3.dist-info}/licenses/LICENSE +0 -0
  53. {empathy_framework-4.6.2.dist-info → empathy_framework-4.6.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,980 @@
1
+ """Meta-workflow orchestration engine.
2
+
3
+ Coordinates the complete meta-workflow execution:
4
+ 1. Template selection
5
+ 2. Form collection (Socratic questioning)
6
+ 3. Agent team generation
7
+ 4. Agent execution (with tier escalation)
8
+ 5. Result aggregation and storage (files + optional memory)
9
+
10
+ Created: 2026-01-17
11
+ Updated: 2026-01-18 (v4.3.0 - Real LLM execution with Anthropic client)
12
+ Purpose: Core orchestration for meta-workflows
13
+ """
14
+
15
+ import json
16
+ import logging
17
+ import time
18
+ from datetime import datetime
19
+ from pathlib import Path
20
+ from typing import TYPE_CHECKING, Any
21
+
22
+ from empathy_llm_toolkit.routing.model_router import ModelRouter, ModelTier
23
+ from empathy_os.meta_workflows.agent_creator import DynamicAgentCreator
24
+ from empathy_os.meta_workflows.form_engine import SocraticFormEngine
25
+ from empathy_os.meta_workflows.models import (
26
+ AgentExecutionResult,
27
+ AgentSpec,
28
+ FormResponse,
29
+ MetaWorkflowResult,
30
+ MetaWorkflowTemplate,
31
+ TierStrategy,
32
+ )
33
+ from empathy_os.meta_workflows.template_registry import TemplateRegistry
34
+ from empathy_os.orchestration.agent_templates import get_template
35
+ from empathy_os.telemetry.usage_tracker import UsageTracker
36
+
37
+ if TYPE_CHECKING:
38
+ from empathy_os.meta_workflows.pattern_learner import PatternLearner
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+
43
+ class MetaWorkflow:
44
+ """Orchestrates complete meta-workflow execution.
45
+
46
+ Coordinates form collection, agent generation, and execution
47
+ to implement dynamic, template-based workflows.
48
+
49
+ Hybrid Storage:
50
+ - Files: Persistent, human-readable execution results
51
+ - Memory: Rich semantic queries (optional via pattern_learner)
52
+
53
+ Attributes:
54
+ template: Meta-workflow template to execute
55
+ storage_dir: Directory for storing execution results
56
+ form_engine: Engine for collecting form responses
57
+ agent_creator: Creator for generating agent teams
58
+ pattern_learner: Optional pattern learner for memory integration
59
+ """
60
+
61
+ def __init__(
62
+ self,
63
+ template: MetaWorkflowTemplate | None = None,
64
+ template_id: str | None = None,
65
+ storage_dir: str | None = None,
66
+ pattern_learner: "PatternLearner | None" = None,
67
+ ):
68
+ """Initialize meta-workflow with optional memory integration.
69
+
70
+ Args:
71
+ template: Template to execute (optional if template_id provided)
72
+ template_id: ID of template to load (optional if template provided)
73
+ storage_dir: Directory for execution results
74
+ (default: .empathy/meta_workflows/executions/)
75
+ pattern_learner: Optional pattern learner with memory integration
76
+ If provided, execution results will be stored in
77
+ both files and memory for rich semantic querying
78
+
79
+ Raises:
80
+ ValueError: If neither template nor template_id provided
81
+ """
82
+ if template is None and template_id is None:
83
+ raise ValueError("Must provide either template or template_id")
84
+
85
+ # Load template if needed
86
+ if template is None:
87
+ registry = TemplateRegistry()
88
+ template = registry.load_template(template_id)
89
+ if template is None:
90
+ raise ValueError(f"Template not found: {template_id}")
91
+
92
+ self.template = template
93
+ self.form_engine = SocraticFormEngine()
94
+ self.agent_creator = DynamicAgentCreator()
95
+ self.pattern_learner = pattern_learner
96
+
97
+ # Set up storage
98
+ if storage_dir is None:
99
+ storage_dir = str(
100
+ Path.home() / ".empathy" / "meta_workflows" / "executions"
101
+ )
102
+ self.storage_dir = Path(storage_dir)
103
+ self.storage_dir.mkdir(parents=True, exist_ok=True)
104
+
105
+ logger.info(
106
+ f"Initialized MetaWorkflow for template: {self.template.template_id}",
107
+ extra={"memory_enabled": pattern_learner is not None},
108
+ )
109
+
110
+ def execute(
111
+ self,
112
+ form_response: FormResponse | None = None,
113
+ mock_execution: bool = True,
114
+ use_defaults: bool = False,
115
+ ) -> MetaWorkflowResult:
116
+ """Execute complete meta-workflow.
117
+
118
+ Args:
119
+ form_response: Pre-collected form responses (optional)
120
+ If None, will collect via form_engine
121
+ mock_execution: Use mock agent execution (default: True for MVP)
122
+ Set to False for real LLM execution
123
+ use_defaults: Use default values instead of asking questions
124
+ (non-interactive mode)
125
+
126
+ Returns:
127
+ MetaWorkflowResult with complete execution details
128
+
129
+ Raises:
130
+ ValueError: If execution fails
131
+ """
132
+ run_id = f"{self.template.template_id}-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
133
+ start_time = time.time()
134
+
135
+ logger.info(f"Starting meta-workflow execution: {run_id}")
136
+
137
+ try:
138
+ # Stage 1: Form collection (if not provided)
139
+ if form_response is None:
140
+ if use_defaults:
141
+ logger.info("Stage 1: Using default form values (non-interactive)")
142
+ else:
143
+ logger.info("Stage 1: Collecting form responses")
144
+ form_response = self.form_engine.ask_questions(
145
+ self.template.form_schema, self.template.template_id
146
+ )
147
+ else:
148
+ logger.info("Stage 1: Using provided form responses")
149
+
150
+ # Stage 2: Agent generation
151
+ logger.info("Stage 2: Generating agent team")
152
+ agents = self.agent_creator.create_agents(self.template, form_response)
153
+
154
+ logger.info(f"Created {len(agents)} agents")
155
+
156
+ # Stage 3: Agent execution
157
+ logger.info("Stage 3: Executing agents")
158
+
159
+ if mock_execution:
160
+ agent_results = self._execute_agents_mock(agents)
161
+ else:
162
+ agent_results = self._execute_agents_real(agents)
163
+
164
+ # Stage 4: Aggregate results
165
+ logger.info("Stage 4: Aggregating results")
166
+
167
+ total_cost = sum(result.cost for result in agent_results)
168
+ total_duration = time.time() - start_time
169
+ success = all(result.success for result in agent_results)
170
+
171
+ result = MetaWorkflowResult(
172
+ run_id=run_id,
173
+ template_id=self.template.template_id,
174
+ timestamp=datetime.now().isoformat(),
175
+ form_responses=form_response,
176
+ agents_created=agents,
177
+ agent_results=agent_results,
178
+ total_cost=total_cost,
179
+ total_duration=total_duration,
180
+ success=success,
181
+ )
182
+
183
+ # Stage 5: Save results (files + optional memory)
184
+ logger.info("Stage 5: Saving results")
185
+ self._save_execution(result)
186
+
187
+ # Store in memory if pattern learner available
188
+ if self.pattern_learner:
189
+ logger.info("Stage 5b: Storing in memory")
190
+ pattern_id = self.pattern_learner.store_execution_in_memory(result)
191
+ if pattern_id:
192
+ logger.info(f"Execution stored in memory: {pattern_id}")
193
+
194
+ logger.info(
195
+ f"Meta-workflow execution complete: {run_id} "
196
+ f"(cost: ${total_cost:.2f}, duration: {total_duration:.1f}s)"
197
+ )
198
+
199
+ return result
200
+
201
+ except Exception as e:
202
+ logger.error(f"Meta-workflow execution failed: {e}")
203
+
204
+ # Create error result
205
+ error_result = MetaWorkflowResult(
206
+ run_id=run_id,
207
+ template_id=self.template.template_id,
208
+ timestamp=datetime.now().isoformat(),
209
+ form_responses=form_response
210
+ or FormResponse(template_id=self.template.template_id),
211
+ total_cost=0.0,
212
+ total_duration=time.time() - start_time,
213
+ success=False,
214
+ error=str(e),
215
+ )
216
+
217
+ # Try to save error result
218
+ try:
219
+ self._save_execution(error_result)
220
+ except Exception as save_error:
221
+ logger.error(f"Failed to save error result: {save_error}")
222
+
223
+ raise ValueError(f"Meta-workflow execution failed: {e}") from e
224
+
225
+ def _execute_agents_mock(
226
+ self, agents: list[AgentSpec]
227
+ ) -> list[AgentExecutionResult]:
228
+ """Execute agents with mock execution (for MVP).
229
+
230
+ Args:
231
+ agents: List of agent specs to execute
232
+
233
+ Returns:
234
+ List of agent execution results
235
+ """
236
+ results = []
237
+
238
+ for agent in agents:
239
+ logger.debug(f"Mock executing agent: {agent.role}")
240
+
241
+ # Simulate execution time based on tier
242
+ if agent.tier_strategy == TierStrategy.CHEAP_ONLY:
243
+ duration = 1.5
244
+ cost = 0.05
245
+ tier_used = "cheap"
246
+ elif agent.tier_strategy == TierStrategy.PROGRESSIVE:
247
+ duration = 3.0
248
+ cost = 0.15 # Average (may escalate)
249
+ tier_used = "capable"
250
+ elif agent.tier_strategy == TierStrategy.CAPABLE_FIRST:
251
+ duration = 4.0
252
+ cost = 0.25
253
+ tier_used = "capable"
254
+ else: # PREMIUM_ONLY
255
+ duration = 6.0
256
+ cost = 0.40
257
+ tier_used = "premium"
258
+
259
+ # Mock result
260
+ result = AgentExecutionResult(
261
+ agent_id=agent.agent_id,
262
+ role=agent.role,
263
+ success=True,
264
+ cost=cost,
265
+ duration=duration,
266
+ tier_used=tier_used,
267
+ output={
268
+ "message": f"Mock execution of {agent.role}",
269
+ "tier_strategy": agent.tier_strategy.value,
270
+ "tools_used": agent.tools,
271
+ "config": agent.config,
272
+ "success_criteria": agent.success_criteria,
273
+ },
274
+ )
275
+
276
+ results.append(result)
277
+
278
+ # Simulate some execution time
279
+ time.sleep(0.1)
280
+
281
+ return results
282
+
283
+ def _execute_agents_real(self, agents: list[AgentSpec]) -> list[AgentExecutionResult]:
284
+ """Execute agents with real LLM calls and progressive tier escalation.
285
+
286
+ Implements progressive tier escalation strategy:
287
+ - CHEAP_ONLY: Always uses cheap tier
288
+ - PROGRESSIVE: cheap → capable → premium (escalates on failure)
289
+ - CAPABLE_FIRST: capable → premium (skips cheap tier)
290
+
291
+ Each LLM call is tracked via UsageTracker for cost analysis.
292
+
293
+ Args:
294
+ agents: List of agent specs to execute
295
+
296
+ Returns:
297
+ List of agent execution results with actual LLM costs
298
+
299
+ Raises:
300
+ RuntimeError: If agent execution encounters fatal error
301
+ """
302
+ results = []
303
+ router = ModelRouter()
304
+ tracker = UsageTracker.get_instance()
305
+
306
+ for agent in agents:
307
+ logger.info(f"Executing agent: {agent.role} ({agent.tier_strategy.value})")
308
+
309
+ try:
310
+ result = self._execute_single_agent_with_escalation(
311
+ agent, router, tracker
312
+ )
313
+ results.append(result)
314
+
315
+ logger.info(
316
+ f"Agent {agent.role} completed: "
317
+ f"tier={result.tier_used}, cost=${result.cost:.4f}, "
318
+ f"success={result.success}"
319
+ )
320
+
321
+ except Exception as e:
322
+ logger.error(f"Agent {agent.role} failed with error: {e}")
323
+
324
+ # Create error result
325
+ error_result = AgentExecutionResult(
326
+ agent_id=agent.agent_id,
327
+ role=agent.role,
328
+ success=False,
329
+ cost=0.0,
330
+ duration=0.0,
331
+ tier_used="error",
332
+ output={"error": str(e)},
333
+ error=str(e),
334
+ )
335
+ results.append(error_result)
336
+
337
+ return results
338
+
339
+ def _execute_single_agent_with_escalation(
340
+ self,
341
+ agent: AgentSpec,
342
+ router: ModelRouter,
343
+ tracker: UsageTracker,
344
+ ) -> AgentExecutionResult:
345
+ """Execute single agent with progressive tier escalation.
346
+
347
+ Args:
348
+ agent: Agent specification
349
+ router: Model router for tier selection
350
+ tracker: Usage tracker for telemetry
351
+
352
+ Returns:
353
+ AgentExecutionResult with actual LLM execution data
354
+ """
355
+ start_time = time.time()
356
+
357
+ # Determine tier sequence based on strategy
358
+ if agent.tier_strategy == TierStrategy.CHEAP_ONLY:
359
+ tiers = [ModelTier.CHEAP]
360
+ elif agent.tier_strategy == TierStrategy.PROGRESSIVE:
361
+ tiers = [ModelTier.CHEAP, ModelTier.CAPABLE, ModelTier.PREMIUM]
362
+ elif agent.tier_strategy == TierStrategy.CAPABLE_FIRST:
363
+ tiers = [ModelTier.CAPABLE, ModelTier.PREMIUM]
364
+ else:
365
+ # Fallback to capable
366
+ logger.warning(f"Unknown tier strategy: {agent.tier_strategy}, using CAPABLE")
367
+ tiers = [ModelTier.CAPABLE]
368
+
369
+ # Try each tier in sequence
370
+ result = None
371
+ total_cost = 0.0
372
+
373
+ for tier in tiers:
374
+ logger.debug(f"Attempting tier: {tier.value}")
375
+
376
+ # Execute at this tier
377
+ tier_result = self._execute_at_tier(agent, tier, router, tracker)
378
+ total_cost += tier_result.cost
379
+
380
+ # Check if successful
381
+ if self._evaluate_success_criteria(tier_result, agent):
382
+ # Success - return result
383
+ tier_result.cost = total_cost # Update with cumulative cost
384
+ tier_result.duration = time.time() - start_time
385
+ return tier_result
386
+
387
+ # Failed - try next tier
388
+ logger.debug(
389
+ f"Tier {tier.value} did not meet success criteria, "
390
+ f"attempting escalation"
391
+ )
392
+ result = tier_result
393
+
394
+ # All tiers exhausted - return final result (failed)
395
+ if result:
396
+ result.cost = total_cost
397
+ result.duration = time.time() - start_time
398
+ logger.warning(
399
+ f"Agent {agent.role} failed at all tiers "
400
+ f"(cost: ${total_cost:.4f})"
401
+ )
402
+ return result
403
+
404
+ # Should never reach here
405
+ raise RuntimeError(f"No tiers attempted for agent {agent.role}")
406
+
407
+ def _execute_at_tier(
408
+ self,
409
+ agent: AgentSpec,
410
+ tier: ModelTier,
411
+ router: ModelRouter,
412
+ tracker: UsageTracker,
413
+ ) -> AgentExecutionResult:
414
+ """Execute agent at specific tier.
415
+
416
+ Args:
417
+ agent: Agent specification
418
+ tier: Model tier to use
419
+ router: Model router
420
+ tracker: Usage tracker
421
+
422
+ Returns:
423
+ AgentExecutionResult from this tier
424
+ """
425
+ start_time = time.time()
426
+
427
+ # Get model config for tier (access MODELS dict directly)
428
+ provider = router._default_provider
429
+ model_config = router.MODELS[provider][tier.value]
430
+
431
+ # Build prompt from agent spec
432
+ prompt = self._build_agent_prompt(agent)
433
+
434
+ # Execute LLM call
435
+ # v4.3.0: Real LLM execution with Anthropic client
436
+ # Falls back to simulation if API key not available
437
+
438
+ try:
439
+ # Execute real LLM call (with simulation fallback)
440
+ response = self._execute_llm_call(prompt, model_config, tier)
441
+
442
+ # Track telemetry
443
+ duration_ms = int((time.time() - start_time) * 1000)
444
+ tracker.track_llm_call(
445
+ workflow="meta-workflow",
446
+ stage=agent.role,
447
+ tier=tier.value,
448
+ model=model_config.model_id,
449
+ provider=router._default_provider,
450
+ cost=response["cost"],
451
+ tokens=response["tokens"],
452
+ cache_hit=False,
453
+ cache_type=None,
454
+ duration_ms=duration_ms,
455
+ user_id=None,
456
+ )
457
+
458
+ # Create result
459
+ result = AgentExecutionResult(
460
+ agent_id=agent.agent_id,
461
+ role=agent.role,
462
+ success=response["success"],
463
+ cost=response["cost"],
464
+ duration=time.time() - start_time,
465
+ tier_used=tier.value,
466
+ output=response["output"],
467
+ )
468
+
469
+ return result
470
+
471
+ except Exception as e:
472
+ logger.error(f"LLM execution failed at tier {tier.value}: {e}")
473
+
474
+ # Return error result
475
+ return AgentExecutionResult(
476
+ agent_id=agent.agent_id,
477
+ role=agent.role,
478
+ success=False,
479
+ cost=0.0,
480
+ duration=time.time() - start_time,
481
+ tier_used=tier.value,
482
+ output={"error": str(e)},
483
+ error=str(e),
484
+ )
485
+
486
+ def _get_generic_instructions(self, role: str) -> str:
487
+ """Generate generic instructions based on agent role.
488
+
489
+ Args:
490
+ role: Agent role name
491
+
492
+ Returns:
493
+ Generic instructions appropriate for the role
494
+ """
495
+ # Map common role keywords to instructions
496
+ role_lower = role.lower()
497
+
498
+ if "analyst" in role_lower or "analyze" in role_lower:
499
+ return (
500
+ "You are an expert analyst. Your job is to thoroughly analyze "
501
+ "the provided information, identify key patterns, issues, and "
502
+ "opportunities. Provide detailed findings with specific evidence "
503
+ "and actionable recommendations."
504
+ )
505
+ elif "reviewer" in role_lower or "review" in role_lower:
506
+ return (
507
+ "You are a careful reviewer. Your job is to review the provided "
508
+ "content for quality, accuracy, completeness, and adherence to "
509
+ "best practices. Identify any issues, gaps, or areas for improvement "
510
+ "and provide specific feedback."
511
+ )
512
+ elif "generator" in role_lower or "create" in role_lower or "writer" in role_lower:
513
+ return (
514
+ "You are a skilled content generator. Your job is to create "
515
+ "high-quality content based on the provided requirements and context. "
516
+ "Ensure your output is well-structured, accurate, and follows "
517
+ "established conventions."
518
+ )
519
+ elif "validator" in role_lower or "verify" in role_lower:
520
+ return (
521
+ "You are a thorough validator. Your job is to verify the provided "
522
+ "content meets all requirements and standards. Check for correctness, "
523
+ "completeness, and consistency. Report any issues found."
524
+ )
525
+ elif "synthesizer" in role_lower or "combine" in role_lower:
526
+ return (
527
+ "You are an expert synthesizer. Your job is to combine multiple "
528
+ "inputs into a cohesive, well-organized output. Identify common "
529
+ "themes, resolve conflicts, and produce a unified result that "
530
+ "captures the key insights from all sources."
531
+ )
532
+ elif "test" in role_lower:
533
+ return (
534
+ "You are a testing specialist. Your job is to analyze code and "
535
+ "create comprehensive test cases that cover edge cases, error "
536
+ "conditions, and normal operation. Ensure tests are well-documented "
537
+ "and maintainable."
538
+ )
539
+ elif "doc" in role_lower:
540
+ return (
541
+ "You are a documentation specialist. Your job is to analyze content "
542
+ "and create or improve documentation that is clear, accurate, and "
543
+ "helpful. Follow documentation best practices and maintain consistency."
544
+ )
545
+ else:
546
+ return (
547
+ f"You are a {role} agent. Complete your assigned task thoroughly "
548
+ "and provide clear, well-structured output. Follow best practices "
549
+ "and provide actionable results."
550
+ )
551
+
552
+ def _build_agent_prompt(self, agent: AgentSpec) -> str:
553
+ """Build prompt for agent from specification.
554
+
555
+ Args:
556
+ agent: Agent specification
557
+
558
+ Returns:
559
+ Formatted prompt string
560
+ """
561
+ # Load base template
562
+ base_template = get_template(agent.base_template)
563
+ if base_template is not None:
564
+ instructions = base_template.default_instructions
565
+ else:
566
+ # Fallback if template not found - use role-based generic prompt
567
+ logger.warning(
568
+ f"Template {agent.base_template} not found, using generic prompt"
569
+ )
570
+ instructions = self._get_generic_instructions(agent.role)
571
+
572
+ # Build prompt
573
+ prompt_parts = [
574
+ f"Role: {agent.role}",
575
+ f"\nInstructions:\n{instructions}",
576
+ ]
577
+
578
+ # Add config if present
579
+ if agent.config:
580
+ prompt_parts.append(f"\nConfiguration:\n{json.dumps(agent.config, indent=2)}")
581
+
582
+ # Add success criteria if present
583
+ if agent.success_criteria:
584
+ prompt_parts.append(
585
+ f"\nSuccess Criteria:\n{json.dumps(agent.success_criteria, indent=2)}"
586
+ )
587
+
588
+ # Add tools if present
589
+ if agent.tools:
590
+ prompt_parts.append(f"\nAvailable Tools: {', '.join(agent.tools)}")
591
+
592
+ return "\n".join(prompt_parts)
593
+
594
+ def _execute_llm_call(
595
+ self, prompt: str, model_config: Any, tier: ModelTier
596
+ ) -> dict[str, Any]:
597
+ """Execute real LLM call via Anthropic or other providers.
598
+
599
+ Uses the Anthropic client for Claude models, with fallback to
600
+ other providers via the model configuration.
601
+
602
+ Args:
603
+ prompt: Prompt to send to LLM
604
+ model_config: Model configuration from router
605
+ tier: Model tier being used
606
+
607
+ Returns:
608
+ Dict with cost, tokens, success, and output
609
+
610
+ Raises:
611
+ RuntimeError: If LLM call fails after retries
612
+ """
613
+ import os
614
+
615
+ # Try to use Anthropic client
616
+ try:
617
+ from anthropic import Anthropic
618
+
619
+ client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
620
+
621
+ # Execute the LLM call
622
+ response = client.messages.create(
623
+ model=model_config.model_id,
624
+ max_tokens=2048,
625
+ messages=[
626
+ {"role": "user", "content": prompt}
627
+ ],
628
+ )
629
+
630
+ # Extract response data
631
+ output_text = response.content[0].text if response.content else ""
632
+ prompt_tokens = response.usage.input_tokens
633
+ completion_tokens = response.usage.output_tokens
634
+
635
+ # Calculate cost
636
+ cost = (
637
+ (prompt_tokens / 1000) * model_config.cost_per_1k_input
638
+ + (completion_tokens / 1000) * model_config.cost_per_1k_output
639
+ )
640
+
641
+ return {
642
+ "cost": cost,
643
+ "tokens": {
644
+ "input": prompt_tokens,
645
+ "output": completion_tokens,
646
+ "total": prompt_tokens + completion_tokens,
647
+ },
648
+ "success": True,
649
+ "output": {
650
+ "message": output_text,
651
+ "model": model_config.model_id,
652
+ "tier": tier.value,
653
+ "success": True,
654
+ },
655
+ }
656
+
657
+ except ImportError:
658
+ logger.warning("Anthropic client not available, using simulation")
659
+ return self._simulate_llm_call(prompt, model_config, tier)
660
+
661
+ except Exception as e:
662
+ logger.error(f"LLM call failed: {e}")
663
+ # Return failure result
664
+ return {
665
+ "cost": 0.0,
666
+ "tokens": {"input": 0, "output": 0, "total": 0},
667
+ "success": False,
668
+ "output": {
669
+ "error": str(e),
670
+ "model": model_config.model_id,
671
+ "tier": tier.value,
672
+ "success": False,
673
+ },
674
+ }
675
+
676
+ def _simulate_llm_call(
677
+ self, prompt: str, model_config: Any, tier: ModelTier
678
+ ) -> dict[str, Any]:
679
+ """Simulate LLM call with realistic cost/token estimates.
680
+
681
+ Used as fallback when real LLM execution is not available
682
+ (e.g., no API key, testing mode, etc.)
683
+
684
+ Args:
685
+ prompt: Prompt to send to LLM
686
+ model_config: Model configuration
687
+ tier: Model tier
688
+
689
+ Returns:
690
+ Dict with cost, tokens, success, and output
691
+ """
692
+ import random
693
+
694
+ # Estimate tokens (rough: ~4 chars per token)
695
+ prompt_tokens = len(prompt) // 4
696
+ completion_tokens = 500 # Assume moderate response
697
+
698
+ # Calculate cost
699
+ cost = (
700
+ (prompt_tokens / 1000) * model_config.cost_per_1k_input
701
+ + (completion_tokens / 1000) * model_config.cost_per_1k_output
702
+ )
703
+
704
+ # Simulate success rate based on tier
705
+ # cheap: 80%, capable: 95%, premium: 99%
706
+ if tier == ModelTier.CHEAP:
707
+ success = random.random() < 0.80
708
+ elif tier == ModelTier.CAPABLE:
709
+ success = random.random() < 0.95
710
+ else: # PREMIUM
711
+ success = random.random() < 0.99
712
+
713
+ return {
714
+ "cost": cost,
715
+ "tokens": {
716
+ "input": prompt_tokens,
717
+ "output": completion_tokens,
718
+ "total": prompt_tokens + completion_tokens,
719
+ },
720
+ "success": success,
721
+ "output": {
722
+ "message": f"Simulated response at {tier.value} tier",
723
+ "model": model_config.model_id,
724
+ "tier": tier.value,
725
+ "success": success,
726
+ },
727
+ }
728
+
729
+ def _evaluate_success_criteria(
730
+ self, result: AgentExecutionResult, agent: AgentSpec
731
+ ) -> bool:
732
+ """Evaluate if agent result meets success criteria.
733
+
734
+ Args:
735
+ result: Agent execution result
736
+ agent: Agent specification with success criteria
737
+
738
+ Returns:
739
+ True if success criteria met, False otherwise
740
+ """
741
+ # Basic success check
742
+ if not result.success:
743
+ return False
744
+
745
+ # If no criteria specified, basic success is enough
746
+ if not agent.success_criteria:
747
+ return True
748
+
749
+ # success_criteria is a list of descriptive strings (e.g., ["code reviewed", "tests pass"])
750
+ # These are informational criteria - if result.success is True, we consider the criteria met
751
+ # The criteria serve as documentation of what success means for this agent
752
+ logger.debug(
753
+ f"Agent succeeded with criteria: {agent.success_criteria}"
754
+ )
755
+ return True
756
+
757
+ def _save_execution(self, result: MetaWorkflowResult) -> Path:
758
+ """Save execution results to disk.
759
+
760
+ Args:
761
+ result: Execution result to save
762
+
763
+ Returns:
764
+ Path to saved results directory
765
+
766
+ Raises:
767
+ OSError: If save operation fails
768
+ """
769
+ # Create run directory
770
+ run_dir = self.storage_dir / result.run_id
771
+ run_dir.mkdir(parents=True, exist_ok=True)
772
+
773
+ # Save config (template info + form responses)
774
+ config_file = run_dir / "config.json"
775
+ config_data = {
776
+ "template_id": result.template_id,
777
+ "template_name": self.template.name,
778
+ "template_version": self.template.version,
779
+ "run_id": result.run_id,
780
+ "timestamp": result.timestamp,
781
+ }
782
+ config_file.write_text(json.dumps(config_data, indent=2), encoding="utf-8")
783
+
784
+ # Save form responses
785
+ responses_file = run_dir / "form_responses.json"
786
+ responses_file.write_text(
787
+ json.dumps(
788
+ {
789
+ "template_id": result.form_responses.template_id,
790
+ "responses": result.form_responses.responses,
791
+ "timestamp": result.form_responses.timestamp,
792
+ "response_id": result.form_responses.response_id,
793
+ },
794
+ indent=2,
795
+ ),
796
+ encoding="utf-8",
797
+ )
798
+
799
+ # Save agents created
800
+ agents_file = run_dir / "agents.json"
801
+ agents_data = [
802
+ {
803
+ "agent_id": agent.agent_id,
804
+ "role": agent.role,
805
+ "base_template": agent.base_template,
806
+ "tier_strategy": agent.tier_strategy.value,
807
+ "tools": agent.tools,
808
+ "config": agent.config,
809
+ "success_criteria": agent.success_criteria,
810
+ }
811
+ for agent in result.agents_created
812
+ ]
813
+ agents_file.write_text(json.dumps(agents_data, indent=2), encoding="utf-8")
814
+
815
+ # Save complete result
816
+ result_file = run_dir / "result.json"
817
+ result_file.write_text(result.to_json(), encoding="utf-8")
818
+
819
+ # Create human-readable report
820
+ report_file = run_dir / "report.txt"
821
+ report = self._generate_report(result)
822
+ report_file.write_text(report, encoding="utf-8")
823
+
824
+ logger.info(f"Saved execution results to: {run_dir}")
825
+ return run_dir
826
+
827
+ def _generate_report(self, result: MetaWorkflowResult) -> str:
828
+ """Generate human-readable report.
829
+
830
+ Args:
831
+ result: Execution result
832
+
833
+ Returns:
834
+ Markdown-formatted report
835
+ """
836
+ lines = []
837
+
838
+ lines.append("# Meta-Workflow Execution Report")
839
+ lines.append("")
840
+ lines.append(f"**Run ID**: {result.run_id}")
841
+ lines.append(f"**Template**: {self.template.name}")
842
+ lines.append(f"**Timestamp**: {result.timestamp}")
843
+ lines.append(f"**Success**: {'✅ Yes' if result.success else '❌ No'}")
844
+ if result.error:
845
+ lines.append(f"**Error**: {result.error}")
846
+ lines.append("")
847
+
848
+ lines.append("## Summary")
849
+ lines.append("")
850
+ lines.append(f"- **Agents Created**: {len(result.agents_created)}")
851
+ lines.append(
852
+ f"- **Agents Executed**: {len(result.agent_results)}"
853
+ )
854
+ lines.append(f"- **Total Cost**: ${result.total_cost:.2f}")
855
+ lines.append(f"- **Total Duration**: {result.total_duration:.1f}s")
856
+ lines.append("")
857
+
858
+ lines.append("## Form Responses")
859
+ lines.append("")
860
+ for key, value in result.form_responses.responses.items():
861
+ lines.append(f"- **{key}**: {value}")
862
+ lines.append("")
863
+
864
+ lines.append("## Agents Created")
865
+ lines.append("")
866
+ for i, agent in enumerate(result.agents_created, 1):
867
+ lines.append(f"### {i}. {agent.role}")
868
+ lines.append("")
869
+ lines.append(f"- **Agent ID**: {agent.agent_id}")
870
+ lines.append(f"- **Base Template**: {agent.base_template}")
871
+ lines.append(f"- **Tier Strategy**: {agent.tier_strategy.value}")
872
+ lines.append(f"- **Tools**: {', '.join(agent.tools) if agent.tools else 'None'}")
873
+ if agent.config:
874
+ lines.append(f"- **Config**: {json.dumps(agent.config)}")
875
+ if agent.success_criteria:
876
+ lines.append("- **Success Criteria**:")
877
+ for criterion in agent.success_criteria:
878
+ lines.append(f" - {criterion}")
879
+ lines.append("")
880
+
881
+ lines.append("## Execution Results")
882
+ lines.append("")
883
+ for i, agent_result in enumerate(result.agent_results, 1):
884
+ lines.append(f"### {i}. {agent_result.role}")
885
+ lines.append("")
886
+ lines.append(
887
+ f"- **Status**: {'✅ Success' if agent_result.success else '❌ Failed'}"
888
+ )
889
+ lines.append(f"- **Tier Used**: {agent_result.tier_used}")
890
+ lines.append(f"- **Cost**: ${agent_result.cost:.2f}")
891
+ lines.append(f"- **Duration**: {agent_result.duration:.1f}s")
892
+ if agent_result.error:
893
+ lines.append(f"- **Error**: {agent_result.error}")
894
+ lines.append("")
895
+
896
+ lines.append("## Cost Breakdown")
897
+ lines.append("")
898
+
899
+ # Group by tier
900
+ tier_costs = {}
901
+ for agent_result in result.agent_results:
902
+ tier = agent_result.tier_used
903
+ if tier not in tier_costs:
904
+ tier_costs[tier] = 0.0
905
+ tier_costs[tier] += agent_result.cost
906
+
907
+ for tier, cost in sorted(tier_costs.items()):
908
+ lines.append(f"- **{tier}**: ${cost:.2f}")
909
+
910
+ lines.append("")
911
+ lines.append("---")
912
+ lines.append("")
913
+ lines.append("*Generated by Empathy Framework Meta-Workflow System*")
914
+
915
+ return "\n".join(lines)
916
+
917
+
918
+ # =============================================================================
919
+ # Helper functions
920
+ # =============================================================================
921
+
922
+
923
+ def load_execution_result(run_id: str, storage_dir: str | None = None) -> MetaWorkflowResult:
924
+ """Load a saved execution result.
925
+
926
+ Args:
927
+ run_id: ID of execution to load
928
+ storage_dir: Directory where executions are stored
929
+
930
+ Returns:
931
+ Loaded MetaWorkflowResult
932
+
933
+ Raises:
934
+ FileNotFoundError: If result not found
935
+ ValueError: If result file is invalid
936
+ """
937
+ if storage_dir is None:
938
+ storage_dir = str(Path.home() / ".empathy" / "meta_workflows" / "executions")
939
+
940
+ result_file = Path(storage_dir) / run_id / "result.json"
941
+
942
+ if not result_file.exists():
943
+ raise FileNotFoundError(f"Result not found: {run_id}")
944
+
945
+ try:
946
+ json_str = result_file.read_text(encoding="utf-8")
947
+ data = json.loads(json_str)
948
+ return MetaWorkflowResult.from_dict(data)
949
+
950
+ except (json.JSONDecodeError, KeyError) as e:
951
+ raise ValueError(f"Invalid result file: {e}") from e
952
+
953
+
954
+ def list_execution_results(storage_dir: str | None = None) -> list[str]:
955
+ """List all saved execution results.
956
+
957
+ Args:
958
+ storage_dir: Directory where executions are stored
959
+
960
+ Returns:
961
+ List of run IDs (sorted by timestamp, newest first)
962
+ """
963
+ if storage_dir is None:
964
+ storage_dir = str(Path.home() / ".empathy" / "meta_workflows" / "executions")
965
+
966
+ storage_path = Path(storage_dir)
967
+
968
+ if not storage_path.exists():
969
+ return []
970
+
971
+ # Find all directories with result.json
972
+ run_ids = []
973
+ for dir_path in storage_path.iterdir():
974
+ if dir_path.is_dir() and (dir_path / "result.json").exists():
975
+ run_ids.append(dir_path.name)
976
+
977
+ # Sort by timestamp (newest first)
978
+ run_ids.sort(reverse=True)
979
+
980
+ return run_ids