empathy-framework 5.1.1__py3-none-any.whl → 5.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. {empathy_framework-5.1.1.dist-info → empathy_framework-5.3.0.dist-info}/METADATA +79 -6
  2. {empathy_framework-5.1.1.dist-info → empathy_framework-5.3.0.dist-info}/RECORD +83 -64
  3. empathy_os/__init__.py +1 -1
  4. empathy_os/cache/hybrid.py +5 -1
  5. empathy_os/cli/commands/batch.py +8 -0
  6. empathy_os/cli/commands/profiling.py +4 -0
  7. empathy_os/cli/commands/workflow.py +8 -4
  8. empathy_os/cli_router.py +9 -0
  9. empathy_os/config.py +15 -2
  10. empathy_os/core_modules/__init__.py +15 -0
  11. empathy_os/dashboard/simple_server.py +62 -30
  12. empathy_os/mcp/__init__.py +10 -0
  13. empathy_os/mcp/server.py +506 -0
  14. empathy_os/memory/control_panel.py +1 -131
  15. empathy_os/memory/control_panel_support.py +145 -0
  16. empathy_os/memory/encryption.py +159 -0
  17. empathy_os/memory/long_term.py +46 -631
  18. empathy_os/memory/long_term_types.py +99 -0
  19. empathy_os/memory/mixins/__init__.py +25 -0
  20. empathy_os/memory/mixins/backend_init_mixin.py +249 -0
  21. empathy_os/memory/mixins/capabilities_mixin.py +208 -0
  22. empathy_os/memory/mixins/handoff_mixin.py +208 -0
  23. empathy_os/memory/mixins/lifecycle_mixin.py +49 -0
  24. empathy_os/memory/mixins/long_term_mixin.py +352 -0
  25. empathy_os/memory/mixins/promotion_mixin.py +109 -0
  26. empathy_os/memory/mixins/short_term_mixin.py +182 -0
  27. empathy_os/memory/short_term.py +61 -12
  28. empathy_os/memory/simple_storage.py +302 -0
  29. empathy_os/memory/storage_backend.py +167 -0
  30. empathy_os/memory/types.py +8 -3
  31. empathy_os/memory/unified.py +21 -1120
  32. empathy_os/meta_workflows/cli_commands/__init__.py +56 -0
  33. empathy_os/meta_workflows/cli_commands/agent_commands.py +321 -0
  34. empathy_os/meta_workflows/cli_commands/analytics_commands.py +442 -0
  35. empathy_os/meta_workflows/cli_commands/config_commands.py +232 -0
  36. empathy_os/meta_workflows/cli_commands/memory_commands.py +182 -0
  37. empathy_os/meta_workflows/cli_commands/template_commands.py +354 -0
  38. empathy_os/meta_workflows/cli_commands/workflow_commands.py +382 -0
  39. empathy_os/meta_workflows/cli_meta_workflows.py +52 -1802
  40. empathy_os/models/telemetry/__init__.py +71 -0
  41. empathy_os/models/telemetry/analytics.py +594 -0
  42. empathy_os/models/telemetry/backend.py +196 -0
  43. empathy_os/models/telemetry/data_models.py +431 -0
  44. empathy_os/models/telemetry/storage.py +489 -0
  45. empathy_os/orchestration/__init__.py +35 -0
  46. empathy_os/orchestration/execution_strategies.py +481 -0
  47. empathy_os/orchestration/meta_orchestrator.py +488 -1
  48. empathy_os/routing/workflow_registry.py +36 -0
  49. empathy_os/telemetry/agent_coordination.py +2 -3
  50. empathy_os/telemetry/agent_tracking.py +26 -7
  51. empathy_os/telemetry/approval_gates.py +18 -24
  52. empathy_os/telemetry/cli.py +19 -724
  53. empathy_os/telemetry/commands/__init__.py +14 -0
  54. empathy_os/telemetry/commands/dashboard_commands.py +696 -0
  55. empathy_os/telemetry/event_streaming.py +7 -3
  56. empathy_os/telemetry/feedback_loop.py +28 -15
  57. empathy_os/tools.py +183 -0
  58. empathy_os/workflows/__init__.py +5 -0
  59. empathy_os/workflows/autonomous_test_gen.py +860 -161
  60. empathy_os/workflows/base.py +6 -2
  61. empathy_os/workflows/code_review.py +4 -1
  62. empathy_os/workflows/document_gen/__init__.py +25 -0
  63. empathy_os/workflows/document_gen/config.py +30 -0
  64. empathy_os/workflows/document_gen/report_formatter.py +162 -0
  65. empathy_os/workflows/{document_gen.py → document_gen/workflow.py} +5 -184
  66. empathy_os/workflows/output.py +4 -1
  67. empathy_os/workflows/progress.py +8 -2
  68. empathy_os/workflows/security_audit.py +2 -2
  69. empathy_os/workflows/security_audit_phase3.py +7 -4
  70. empathy_os/workflows/seo_optimization.py +633 -0
  71. empathy_os/workflows/test_gen/__init__.py +52 -0
  72. empathy_os/workflows/test_gen/ast_analyzer.py +249 -0
  73. empathy_os/workflows/test_gen/config.py +88 -0
  74. empathy_os/workflows/test_gen/data_models.py +38 -0
  75. empathy_os/workflows/test_gen/report_formatter.py +289 -0
  76. empathy_os/workflows/test_gen/test_templates.py +381 -0
  77. empathy_os/workflows/test_gen/workflow.py +655 -0
  78. empathy_os/workflows/test_gen.py +42 -1905
  79. empathy_os/cli/parsers/cache 2.py +0 -65
  80. empathy_os/cli_router 2.py +0 -416
  81. empathy_os/dashboard/app 2.py +0 -512
  82. empathy_os/dashboard/simple_server 2.py +0 -403
  83. empathy_os/dashboard/standalone_server 2.py +0 -536
  84. empathy_os/memory/types 2.py +0 -441
  85. empathy_os/models/adaptive_routing 2.py +0 -437
  86. empathy_os/models/telemetry.py +0 -1660
  87. empathy_os/project_index/scanner_parallel 2.py +0 -291
  88. empathy_os/telemetry/agent_coordination 2.py +0 -478
  89. empathy_os/telemetry/agent_tracking 2.py +0 -350
  90. empathy_os/telemetry/approval_gates 2.py +0 -563
  91. empathy_os/telemetry/event_streaming 2.py +0 -405
  92. empathy_os/telemetry/feedback_loop 2.py +0 -557
  93. empathy_os/vscode_bridge 2.py +0 -173
  94. empathy_os/workflows/progressive/__init__ 2.py +0 -92
  95. empathy_os/workflows/progressive/cli 2.py +0 -242
  96. empathy_os/workflows/progressive/core 2.py +0 -488
  97. empathy_os/workflows/progressive/orchestrator 2.py +0 -701
  98. empathy_os/workflows/progressive/reports 2.py +0 -528
  99. empathy_os/workflows/progressive/telemetry 2.py +0 -280
  100. empathy_os/workflows/progressive/test_gen 2.py +0 -514
  101. empathy_os/workflows/progressive/workflow 2.py +0 -628
  102. {empathy_framework-5.1.1.dist-info → empathy_framework-5.3.0.dist-info}/WHEEL +0 -0
  103. {empathy_framework-5.1.1.dist-info → empathy_framework-5.3.0.dist-info}/entry_points.txt +0 -0
  104. {empathy_framework-5.1.1.dist-info → empathy_framework-5.3.0.dist-info}/licenses/LICENSE +0 -0
  105. {empathy_framework-5.1.1.dist-info → empathy_framework-5.3.0.dist-info}/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +0 -0
  106. {empathy_framework-5.1.1.dist-info → empathy_framework-5.3.0.dist-info}/top_level.txt +0 -0
@@ -1,701 +0,0 @@
1
- """Meta-orchestrator for progressive tier escalation decisions.
2
-
3
- The MetaOrchestrator is responsible for:
4
- 1. Analyzing tier execution results
5
- 2. Making escalation decisions
6
- 3. Creating specialized agent teams
7
- 4. Building XML-enhanced prompts with failure context
8
- 5. Detecting stagnation patterns
9
- """
10
-
11
- import logging
12
- from typing import Any
13
-
14
- from empathy_os.workflows.progressive.core import (
15
- EscalationConfig,
16
- Tier,
17
- TierResult,
18
- )
19
-
20
- logger = logging.getLogger(__name__)
21
-
22
-
23
- class MetaOrchestrator:
24
- """Meta-agent that orchestrates progressive tier decisions.
25
-
26
- The MetaOrchestrator acts as a higher-level intelligence that:
27
- - Analyzes tier results objectively
28
- - Decides when to escalate vs retry
29
- - Detects stagnation patterns
30
- - Creates specialized agent teams per tier
31
- - Builds context-aware prompts
32
-
33
- This separates escalation logic from workflow logic, allowing
34
- workflows to focus on their domain-specific tasks.
35
-
36
- Example:
37
- >>> orchestrator = MetaOrchestrator()
38
- >>> should_esc, reason = orchestrator.should_escalate(
39
- ... tier=Tier.CHEAP,
40
- ... result=cheap_result,
41
- ... attempt=2,
42
- ... config=config
43
- ... )
44
- >>> if should_esc:
45
- ... print(f"Escalating: {reason}")
46
- """
47
-
48
- def __init__(self) -> None:
49
- """Initialize meta-orchestrator."""
50
- self.tier_history: dict[Tier, list[float]] = {
51
- Tier.CHEAP: [],
52
- Tier.CAPABLE: [],
53
- Tier.PREMIUM: []
54
- }
55
-
56
- def should_escalate(
57
- self,
58
- tier: Tier,
59
- result: TierResult,
60
- attempt: int,
61
- config: EscalationConfig
62
- ) -> tuple[bool, str]:
63
- """Determine if tier should escalate to next tier.
64
-
65
- Multi-criteria decision based on:
66
- - Quality score vs thresholds
67
- - Syntax errors
68
- - Failure rate
69
- - Attempt count
70
- - Stagnation detection (for CAPABLE tier)
71
-
72
- Args:
73
- tier: Current tier
74
- result: Execution result
75
- attempt: Attempt number at this tier
76
- config: Escalation configuration
77
-
78
- Returns:
79
- Tuple of (should_escalate, reason)
80
-
81
- Example:
82
- >>> should_esc, reason = orchestrator.should_escalate(
83
- ... Tier.CHEAP, result, 2, config
84
- ... )
85
- >>> # (True, "Quality score 65 below threshold 70")
86
- """
87
- cqs = result.quality_score
88
-
89
- # Track CQS history for stagnation detection
90
- self.tier_history[tier].append(cqs)
91
-
92
- # Check if we've met minimum attempts
93
- min_attempts = config.get_min_attempts(tier)
94
- if attempt < min_attempts:
95
- return False, f"Only {attempt}/{min_attempts} attempts completed"
96
-
97
- # Tier-specific threshold checks
98
- if tier == Tier.CHEAP:
99
- return self._check_cheap_escalation(result, config)
100
- elif tier == Tier.CAPABLE:
101
- return self._check_capable_escalation(result, attempt, config)
102
- else: # PREMIUM
103
- # Premium doesn't escalate (highest tier)
104
- return False, "Premium tier is final"
105
-
106
- def _check_cheap_escalation(
107
- self,
108
- result: TierResult,
109
- config: EscalationConfig
110
- ) -> tuple[bool, str]:
111
- """Check if cheap tier should escalate to capable.
112
-
113
- Args:
114
- result: Cheap tier result
115
- config: Escalation configuration
116
-
117
- Returns:
118
- Tuple of (should_escalate, reason)
119
- """
120
- cqs = result.quality_score
121
- failure_rate = 1.0 - result.success_rate
122
- syntax_error_count = len(result.failure_analysis.syntax_errors)
123
-
124
- # Check severity first (critical failures)
125
- if result.failure_analysis.failure_severity == "CRITICAL":
126
- return True, "Critical failures detected (consider skipping to Premium)"
127
-
128
- # Check syntax errors (prioritize over CQS)
129
- if syntax_error_count > config.cheap_to_capable_max_syntax_errors:
130
- return True, f"{syntax_error_count} syntax errors exceeds limit {config.cheap_to_capable_max_syntax_errors}"
131
-
132
- # Check failure rate
133
- if failure_rate > config.cheap_to_capable_failure_rate:
134
- return True, f"Failure rate {failure_rate:.1%} exceeds threshold {config.cheap_to_capable_failure_rate:.1%}"
135
-
136
- # Check CQS threshold
137
- if cqs < config.cheap_to_capable_min_cqs:
138
- return True, f"Quality score {cqs:.1f} below threshold {config.cheap_to_capable_min_cqs}"
139
-
140
- # All checks passed, no escalation needed
141
- return False, f"Quality acceptable (CQS={cqs:.1f})"
142
-
143
- def _check_capable_escalation(
144
- self,
145
- result: TierResult,
146
- attempt: int,
147
- config: EscalationConfig
148
- ) -> tuple[bool, str]:
149
- """Check if capable tier should escalate to premium.
150
-
151
- Includes stagnation detection: if improvement is <5% for 2 consecutive
152
- attempts, escalate even if quality is borderline acceptable.
153
-
154
- Args:
155
- result: Capable tier result
156
- attempt: Attempt number
157
- config: Escalation configuration
158
-
159
- Returns:
160
- Tuple of (should_escalate, reason)
161
- """
162
- cqs = result.quality_score
163
- failure_rate = 1.0 - result.success_rate
164
- syntax_error_count = len(result.failure_analysis.syntax_errors)
165
-
166
- # Check max attempts first
167
- if attempt >= config.capable_max_attempts:
168
- return True, f"Max attempts ({config.capable_max_attempts}) reached without achieving target quality"
169
-
170
- # Check syntax errors (strict for capable tier)
171
- if syntax_error_count > config.capable_to_premium_max_syntax_errors:
172
- return True, f"{syntax_error_count} syntax errors exceeds limit {config.capable_to_premium_max_syntax_errors}"
173
-
174
- # Check failure rate
175
- if failure_rate > config.capable_to_premium_failure_rate:
176
- return True, f"Failure rate {failure_rate:.1%} exceeds threshold {config.capable_to_premium_failure_rate:.1%}"
177
-
178
- # Check stagnation (consecutive runs with <5% improvement)
179
- # Only check if we have enough history
180
- if len(self.tier_history[Tier.CAPABLE]) >= config.consecutive_stagnation_limit + 1:
181
- is_stagnant, stagnation_reason = self._detect_stagnation(
182
- self.tier_history[Tier.CAPABLE],
183
- config.improvement_threshold,
184
- config.consecutive_stagnation_limit
185
- )
186
-
187
- if is_stagnant:
188
- return True, f"Stagnation detected: {stagnation_reason}"
189
-
190
- # Check CQS threshold (after stagnation check)
191
- if cqs < config.capable_to_premium_min_cqs and attempt >= config.capable_min_attempts:
192
- return True, f"Quality score {cqs:.1f} below threshold {config.capable_to_premium_min_cqs}"
193
-
194
- # No escalation needed
195
- return False, f"Quality acceptable (CQS={cqs:.1f}), continuing improvement"
196
-
197
- def _detect_stagnation(
198
- self,
199
- cqs_history: list[float],
200
- improvement_threshold: float,
201
- consecutive_limit: int
202
- ) -> tuple[bool, str]:
203
- """Detect if improvement has stagnated.
204
-
205
- Stagnation is defined as N consecutive attempts with <X% improvement.
206
-
207
- Args:
208
- cqs_history: List of CQS scores (chronological)
209
- improvement_threshold: Min improvement % to avoid stagnation
210
- consecutive_limit: Number of consecutive stagnations before escalating
211
-
212
- Returns:
213
- Tuple of (is_stagnant, reason)
214
-
215
- Example:
216
- >>> history = [75, 76, 77, 77.5]
217
- >>> is_stagnant, reason = orchestrator._detect_stagnation(
218
- ... history, improvement_threshold=5.0, consecutive_limit=2
219
- ... )
220
- >>> # (True, "2 consecutive runs with <5% improvement")
221
- """
222
- if len(cqs_history) < consecutive_limit + 1:
223
- return False, "Insufficient history for stagnation detection"
224
-
225
- # Check last N improvements
226
- consecutive_stagnations = 0
227
-
228
- for i in range(len(cqs_history) - 1, 0, -1):
229
- current = cqs_history[i]
230
- previous = cqs_history[i - 1]
231
-
232
- improvement = current - previous
233
-
234
- if improvement < improvement_threshold:
235
- consecutive_stagnations += 1
236
-
237
- if consecutive_stagnations >= consecutive_limit:
238
- return True, (
239
- f"{consecutive_stagnations} consecutive runs with "
240
- f"<{improvement_threshold}% improvement"
241
- )
242
- else:
243
- # Improvement above threshold, reset counter
244
- break
245
-
246
- return False, "No stagnation detected"
247
-
248
- def build_tier_prompt(
249
- self,
250
- tier: Tier,
251
- base_task: str,
252
- failure_context: dict[str, Any] | None = None
253
- ) -> str:
254
- """Build XML-enhanced prompt with failure context.
255
-
256
- Creates tier-appropriate prompts:
257
- - CHEAP: Simple, focused prompt
258
- - CAPABLE: Enhanced with failure analysis from cheap tier
259
- - PREMIUM: Comprehensive with full escalation context
260
-
261
- Args:
262
- tier: Which tier this prompt is for
263
- base_task: Base task description
264
- failure_context: Context from previous tier (if escalating)
265
-
266
- Returns:
267
- XML-enhanced prompt string
268
-
269
- Example:
270
- >>> prompt = orchestrator.build_tier_prompt(
271
- ... Tier.CAPABLE,
272
- ... "Generate tests for module.py",
273
- ... failure_context={"previous_tier": Tier.CHEAP, ...}
274
- ... )
275
- """
276
- if tier == Tier.CHEAP:
277
- return self._build_cheap_prompt(base_task)
278
- elif tier == Tier.CAPABLE:
279
- return self._build_capable_prompt(base_task, failure_context)
280
- else: # PREMIUM
281
- return self._build_premium_prompt(base_task, failure_context)
282
-
283
- def _build_cheap_prompt(self, base_task: str) -> str:
284
- """Build simple prompt for cheap tier.
285
-
286
- Args:
287
- base_task: Task description
288
-
289
- Returns:
290
- XML-enhanced prompt
291
- """
292
- return f"""<task>
293
- <objective>{base_task}</objective>
294
-
295
- <quality_requirements>
296
- <pass_rate>70%+</pass_rate>
297
- <coverage>60%+</coverage>
298
- <syntax>No syntax errors</syntax>
299
- </quality_requirements>
300
-
301
- <instructions>
302
- Generate high-quality output that meets the quality requirements.
303
- Focus on correctness and completeness.
304
- </instructions>
305
- </task>"""
306
-
307
- def _build_capable_prompt(
308
- self,
309
- base_task: str,
310
- failure_context: dict[str, Any] | None
311
- ) -> str:
312
- """Build enhanced prompt for capable tier with failure context.
313
-
314
- Args:
315
- base_task: Task description
316
- failure_context: Context from cheap tier
317
-
318
- Returns:
319
- XML-enhanced prompt with failure analysis
320
- """
321
- if not failure_context:
322
- # No context, use enhanced base prompt
323
- return f"""<task>
324
- <objective>{base_task}</objective>
325
-
326
- <quality_requirements>
327
- <pass_rate>80%+</pass_rate>
328
- <coverage>70%+</coverage>
329
- <quality_score>80+</quality_score>
330
- </quality_requirements>
331
-
332
- <instructions>
333
- Generate high-quality output with comprehensive coverage.
334
- Ensure all edge cases are handled correctly.
335
- </instructions>
336
- </task>"""
337
-
338
- # Extract detailed failure context
339
- previous_cqs = failure_context.get("previous_cqs", 0)
340
- reason = failure_context.get("reason", "Quality below threshold")
341
- failures = failure_context.get("failures", [])
342
- examples = failure_context.get("examples", [])
343
-
344
- # Analyze failure patterns
345
- failure_patterns = self.analyze_failure_patterns(failures) if failures else {}
346
-
347
- # Build detailed prompt with failure analysis
348
- prompt_parts = [
349
- "<task>",
350
- f" <objective>{base_task}</objective>",
351
- "",
352
- " <context_from_previous_tier>",
353
- " <tier>cheap</tier>",
354
- f" <quality_score>{previous_cqs:.1f}</quality_score>",
355
- f" <escalation_reason>{reason}</escalation_reason>",
356
- "",
357
- ]
358
-
359
- # Add failure pattern analysis
360
- if failure_patterns:
361
- prompt_parts.append(" <failure_analysis>")
362
- prompt_parts.append(f" <total_failures>{failure_patterns.get('total_failures', 0)}</total_failures>")
363
- prompt_parts.append(" <patterns>")
364
-
365
- error_types = failure_patterns.get("error_types", {})
366
- for error_type, count in sorted(error_types.items(), key=lambda x: -x[1]):
367
- prompt_parts.append(f" <pattern type=\"{error_type}\" count=\"{count}\" />")
368
-
369
- prompt_parts.append(" </patterns>")
370
-
371
- primary_issue = failure_patterns.get("primary_issue", "unknown")
372
- prompt_parts.append(f" <primary_issue>{primary_issue}</primary_issue>")
373
- prompt_parts.append(" </failure_analysis>")
374
- prompt_parts.append("")
375
-
376
- # Add concrete failure examples (max 3)
377
- if examples:
378
- prompt_parts.append(" <failed_attempts>")
379
- prompt_parts.append(" <!-- Examples of what the cheap tier produced -->")
380
-
381
- for i, example in enumerate(examples[:3], 1):
382
- error = example.get("error", "Unknown error")
383
- code_snippet = example.get("code", "")[:200] # Limit snippet length
384
-
385
- prompt_parts.append(f" <example number=\"{i}\">")
386
- prompt_parts.append(f" <error>{self._escape_xml(error)}</error>")
387
- if code_snippet:
388
- prompt_parts.append(f" <code_snippet>{self._escape_xml(code_snippet)}</code_snippet>")
389
- prompt_parts.append(" </example>")
390
-
391
- prompt_parts.append(" </failed_attempts>")
392
- prompt_parts.append("")
393
-
394
- prompt_parts.extend([
395
- " <improvement_needed>",
396
- " The cheap tier struggled with these items. Analyze the failure",
397
- " patterns above and generate improved solutions that specifically",
398
- " address these issues.",
399
- " </improvement_needed>",
400
- " </context_from_previous_tier>",
401
- "",
402
- " <your_task>",
403
- " Generate improved output that avoids the specific failure patterns identified above.",
404
- "",
405
- " <quality_requirements>",
406
- " <pass_rate>80%+</pass_rate>",
407
- " <coverage>70%+</coverage>",
408
- " <quality_score>80+</quality_score>",
409
- " </quality_requirements>",
410
- "",
411
- " <focus_areas>",
412
- ])
413
-
414
- # Add targeted focus areas based on failure patterns
415
- if failure_patterns:
416
- error_types = failure_patterns.get("error_types", {})
417
- if "async_errors" in error_types:
418
- prompt_parts.append(" <focus area=\"async\">Proper async/await patterns and error handling</focus>")
419
- if "mocking_errors" in error_types:
420
- prompt_parts.append(" <focus area=\"mocking\">Correct mock setup and teardown</focus>")
421
- if "syntax_errors" in error_types:
422
- prompt_parts.append(" <focus area=\"syntax\">Valid Python syntax and imports</focus>")
423
- if "other_errors" in error_types:
424
- prompt_parts.append(" <focus area=\"general\">Edge cases and error handling</focus>")
425
- else:
426
- # Default focus areas
427
- prompt_parts.extend([
428
- " <focus area=\"syntax\">Correct syntax and structure</focus>",
429
- " <focus area=\"coverage\">Comprehensive test coverage</focus>",
430
- " <focus area=\"errors\">Proper error handling</focus>",
431
- " <focus area=\"edge_cases\">Edge case coverage</focus>",
432
- ])
433
-
434
- prompt_parts.extend([
435
- " </focus_areas>",
436
- " </your_task>",
437
- "</task>"
438
- ])
439
-
440
- return "\n".join(prompt_parts)
441
-
442
- def _build_premium_prompt(
443
- self,
444
- base_task: str,
445
- failure_context: dict[str, Any] | None
446
- ) -> str:
447
- """Build comprehensive prompt for premium tier.
448
-
449
- Args:
450
- base_task: Task description
451
- failure_context: Context from previous tiers
452
-
453
- Returns:
454
- XML-enhanced prompt with full escalation context
455
- """
456
- if not failure_context:
457
- return f"""<task>
458
- <objective>{base_task}</objective>
459
-
460
- <quality_requirements>
461
- <pass_rate>95%+</pass_rate>
462
- <coverage>85%+</coverage>
463
- <quality_score>95+</quality_score>
464
- </quality_requirements>
465
-
466
- <expert_instructions>
467
- Apply expert-level techniques to generate exceptional output.
468
- This is the highest tier - excellence is expected.
469
- </expert_instructions>
470
- </task>"""
471
-
472
- # Extract comprehensive escalation context
473
- previous_tier = failure_context.get("previous_tier", Tier.CAPABLE)
474
- previous_cqs = failure_context.get("previous_cqs", 0)
475
- reason = failure_context.get("reason", "Previous tier unsuccessful")
476
- failures = failure_context.get("failures", [])
477
- examples = failure_context.get("examples", [])
478
-
479
- # Analyze persistent failure patterns
480
- failure_patterns = self.analyze_failure_patterns(failures) if failures else {}
481
-
482
- prompt_parts = [
483
- "<task>",
484
- f" <objective>{base_task}</objective>",
485
- "",
486
- " <escalation_context>",
487
- f" <previous_tier>{previous_tier.value}</previous_tier>",
488
- f" <quality_score>{previous_cqs:.1f}</quality_score>",
489
- f" <escalation_reason>{self._escape_xml(reason)}</escalation_reason>",
490
- "",
491
- " <progression_analysis>",
492
- " This task has been escalated through multiple tiers:",
493
- " 1. CHEAP tier: Initial attempt with basic models",
494
- " 2. CAPABLE tier: Enhanced attempt with better models",
495
- " 3. PREMIUM tier (current): Final expert-level attempt",
496
- "",
497
- " The fact that this reached premium tier indicates a complex",
498
- " or difficult case requiring expert-level handling.",
499
- " </progression_analysis>",
500
- "",
501
- ]
502
-
503
- # Add detailed failure analysis
504
- if failure_patterns:
505
- prompt_parts.append(" <persistent_issues>")
506
- prompt_parts.append(f" <total_failures>{failure_patterns.get('total_failures', 0)}</total_failures>")
507
- prompt_parts.append(" <failure_patterns>")
508
-
509
- error_types = failure_patterns.get("error_types", {})
510
- for error_type, count in sorted(error_types.items(), key=lambda x: -x[1]):
511
- prompt_parts.append(f" <pattern type=\"{error_type}\" count=\"{count}\">")
512
-
513
- # Add specific guidance per error type
514
- if error_type == "async_errors":
515
- prompt_parts.append(" <guidance>Use proper async/await patterns, handle timeouts correctly</guidance>")
516
- elif error_type == "mocking_errors":
517
- prompt_parts.append(" <guidance>Ensure mocks are properly configured and reset</guidance>")
518
- elif error_type == "syntax_errors":
519
- prompt_parts.append(" <guidance>Double-check syntax, imports, and type annotations</guidance>")
520
-
521
- prompt_parts.append(" </pattern>")
522
-
523
- prompt_parts.append(" </failure_patterns>")
524
- prompt_parts.append(f" <primary_issue>{failure_patterns.get('primary_issue', 'unknown')}</primary_issue>")
525
- prompt_parts.append(" </persistent_issues>")
526
- prompt_parts.append("")
527
-
528
- # Add concrete examples from capable tier
529
- if examples:
530
- prompt_parts.append(" <capable_tier_attempts>")
531
- prompt_parts.append(" <!-- Examples from the capable tier's attempts -->")
532
-
533
- for i, example in enumerate(examples[:3], 1):
534
- error = example.get("error", "Unknown error")
535
- code_snippet = example.get("code", "")[:300] # More context for premium
536
- quality_score = example.get("quality_score", 0)
537
-
538
- prompt_parts.append(f" <attempt number=\"{i}\" quality_score=\"{quality_score}\">")
539
- prompt_parts.append(f" <error>{self._escape_xml(error)}</error>")
540
- if code_snippet:
541
- prompt_parts.append(f" <code_snippet>{self._escape_xml(code_snippet)}</code_snippet>")
542
- prompt_parts.append(" </attempt>")
543
-
544
- prompt_parts.append(" </capable_tier_attempts>")
545
- prompt_parts.append("")
546
-
547
- prompt_parts.extend([
548
- " </escalation_context>",
549
- "",
550
- " <expert_task>",
551
- " <critical_notice>",
552
- " You are the FINAL tier in the progressive escalation system.",
553
- " Previous tiers (cheap and capable) have attempted this task",
554
- " multiple times and could not achieve the required quality.",
555
- "",
556
- " This is the last automated attempt before human review.",
557
- " Excellence is not optional - it is required.",
558
- " </critical_notice>",
559
- "",
560
- " <expert_techniques>",
561
- " Apply sophisticated approaches:",
562
- " - Deep analysis of why previous attempts failed",
563
- " - Production-grade error handling and edge cases",
564
- " - Comprehensive documentation and clarity",
565
- " - Defensive programming against subtle bugs",
566
- ])
567
-
568
- # Add specific techniques based on failure patterns
569
- if failure_patterns:
570
- error_types = failure_patterns.get("error_types", {})
571
- if "async_errors" in error_types:
572
- prompt_parts.append(" - Advanced async patterns (asyncio.gather, proper timeouts)")
573
- if "mocking_errors" in error_types:
574
- prompt_parts.append(" - Sophisticated mocking (pytest fixtures, proper lifecycle)")
575
- if "syntax_errors" in error_types:
576
- prompt_parts.append(" - Rigorous syntax validation before submission")
577
-
578
- prompt_parts.extend([
579
- " </expert_techniques>",
580
- "",
581
- " <quality_requirements>",
582
- " <pass_rate>95%+</pass_rate>",
583
- " <coverage>85%+</coverage>",
584
- " <quality_score>95+</quality_score>",
585
- " <zero_syntax_errors>MANDATORY</zero_syntax_errors>",
586
- " </quality_requirements>",
587
- "",
588
- " <success_criteria>",
589
- " Your implementation must:",
590
- " 1. Address ALL failure patterns identified above",
591
- " 2. Achieve exceptional quality scores (95+)",
592
- " 3. Have zero syntax errors or runtime failures",
593
- " 4. Include comprehensive edge case coverage",
594
- " 5. Be production-ready with proper documentation",
595
- " </success_criteria>",
596
- " </expert_task>",
597
- "</task>"
598
- ])
599
-
600
- return "\n".join(prompt_parts)
601
-
602
- def _escape_xml(self, text: str) -> str:
603
- """Escape special XML characters.
604
-
605
- Args:
606
- text: Text to escape
607
-
608
- Returns:
609
- XML-safe text
610
-
611
- Example:
612
- >>> orchestrator._escape_xml("Error: <missing>")
613
- 'Error: &lt;missing&gt;'
614
- """
615
- return (
616
- text
617
- .replace("&", "&amp;")
618
- .replace("<", "&lt;")
619
- .replace(">", "&gt;")
620
- .replace('"', "&quot;")
621
- .replace("'", "&apos;")
622
- )
623
-
624
- def create_agent_team(
625
- self,
626
- tier: Tier,
627
- failure_context: dict[str, Any] | None = None
628
- ) -> list[str]:
629
- """Create specialized agent team for tier.
630
-
631
- Different tiers get different agent compositions:
632
- - CHEAP: Single generator agent
633
- - CAPABLE: Generator + Analyzer
634
- - PREMIUM: Generator + Analyzer + Reviewer
635
-
636
- Args:
637
- tier: Which tier
638
- failure_context: Context from previous tier
639
-
640
- Returns:
641
- List of agent types to create
642
-
643
- Note:
644
- This returns agent type names. Actual agent creation
645
- will be implemented when we integrate with the agent system.
646
-
647
- Example:
648
- >>> agents = orchestrator.create_agent_team(
649
- ... Tier.CAPABLE,
650
- ... failure_context={...}
651
- ... )
652
- >>> # ["generator", "analyzer"]
653
- """
654
- if tier == Tier.CHEAP:
655
- return ["generator"]
656
- elif tier == Tier.CAPABLE:
657
- return ["generator", "analyzer"]
658
- else: # PREMIUM
659
- return ["generator", "analyzer", "reviewer"]
660
-
661
- def analyze_failure_patterns(
662
- self,
663
- failures: list[dict[str, Any]]
664
- ) -> dict[str, Any]:
665
- """Analyze failure patterns to inform next tier.
666
-
667
- Groups failures by type and identifies common issues.
668
-
669
- Args:
670
- failures: List of failed items with error details
671
-
672
- Returns:
673
- Failure pattern analysis
674
-
675
- Example:
676
- >>> patterns = orchestrator.analyze_failure_patterns(
677
- ... [{"error": "SyntaxError: async"}, ...]
678
- ... )
679
- >>> # {"async_errors": 15, "mocking_errors": 10, ...}
680
- """
681
- # Group by error type
682
- error_types: dict[str, int] = {}
683
-
684
- for failure in failures:
685
- error = failure.get("error", "unknown")
686
-
687
- # Categorize error
688
- if "async" in error.lower() or "await" in error.lower():
689
- error_types["async_errors"] = error_types.get("async_errors", 0) + 1
690
- elif "mock" in error.lower():
691
- error_types["mocking_errors"] = error_types.get("mocking_errors", 0) + 1
692
- elif "syntax" in error.lower():
693
- error_types["syntax_errors"] = error_types.get("syntax_errors", 0) + 1
694
- else:
695
- error_types["other_errors"] = error_types.get("other_errors", 0) + 1
696
-
697
- return {
698
- "total_failures": len(failures),
699
- "error_types": error_types,
700
- "primary_issue": max(error_types.items(), key=lambda x: x[1])[0] if error_types else "unknown"
701
- }