empathy-framework 4.1.1__py3-none-any.whl → 4.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {empathy_framework-4.1.1.dist-info → empathy_framework-4.4.0.dist-info}/METADATA +77 -12
  2. {empathy_framework-4.1.1.dist-info → empathy_framework-4.4.0.dist-info}/RECORD +45 -14
  3. empathy_os/cli_unified.py +13 -0
  4. empathy_os/memory/long_term.py +5 -0
  5. empathy_os/memory/unified.py +149 -9
  6. empathy_os/meta_workflows/__init__.py +74 -0
  7. empathy_os/meta_workflows/agent_creator.py +254 -0
  8. empathy_os/meta_workflows/builtin_templates.py +567 -0
  9. empathy_os/meta_workflows/cli_meta_workflows.py +1551 -0
  10. empathy_os/meta_workflows/form_engine.py +304 -0
  11. empathy_os/meta_workflows/intent_detector.py +298 -0
  12. empathy_os/meta_workflows/models.py +567 -0
  13. empathy_os/meta_workflows/pattern_learner.py +754 -0
  14. empathy_os/meta_workflows/session_context.py +398 -0
  15. empathy_os/meta_workflows/template_registry.py +229 -0
  16. empathy_os/meta_workflows/workflow.py +980 -0
  17. empathy_os/orchestration/execution_strategies.py +888 -1
  18. empathy_os/orchestration/pattern_learner.py +699 -0
  19. empathy_os/socratic/__init__.py +273 -0
  20. empathy_os/socratic/ab_testing.py +969 -0
  21. empathy_os/socratic/blueprint.py +532 -0
  22. empathy_os/socratic/cli.py +689 -0
  23. empathy_os/socratic/collaboration.py +1112 -0
  24. empathy_os/socratic/domain_templates.py +916 -0
  25. empathy_os/socratic/embeddings.py +734 -0
  26. empathy_os/socratic/engine.py +729 -0
  27. empathy_os/socratic/explainer.py +663 -0
  28. empathy_os/socratic/feedback.py +767 -0
  29. empathy_os/socratic/forms.py +624 -0
  30. empathy_os/socratic/generator.py +716 -0
  31. empathy_os/socratic/llm_analyzer.py +635 -0
  32. empathy_os/socratic/mcp_server.py +751 -0
  33. empathy_os/socratic/session.py +306 -0
  34. empathy_os/socratic/storage.py +635 -0
  35. empathy_os/socratic/success.py +719 -0
  36. empathy_os/socratic/visual_editor.py +812 -0
  37. empathy_os/socratic/web_ui.py +925 -0
  38. empathy_os/workflows/manage_documentation.py +18 -2
  39. empathy_os/workflows/release_prep_crew.py +16 -1
  40. empathy_os/workflows/test_coverage_boost_crew.py +16 -1
  41. empathy_os/workflows/test_maintenance_crew.py +18 -1
  42. {empathy_framework-4.1.1.dist-info → empathy_framework-4.4.0.dist-info}/WHEEL +0 -0
  43. {empathy_framework-4.1.1.dist-info → empathy_framework-4.4.0.dist-info}/entry_points.txt +0 -0
  44. {empathy_framework-4.1.1.dist-info → empathy_framework-4.4.0.dist-info}/licenses/LICENSE +0 -0
  45. {empathy_framework-4.1.1.dist-info → empathy_framework-4.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,767 @@
1
+ """Feedback Loop for Continuous Improvement
2
+
3
+ Analyzes success metrics from workflow executions to improve
4
+ future agent generation. This creates a learning system that:
5
+
6
+ 1. Tracks which agent configurations succeed
7
+ 2. Identifies patterns in successful workflows
8
+ 3. Adjusts agent recommendations based on historical data
9
+ 4. Provides insights for manual tuning
10
+
11
+ Copyright 2026 Smart-AI-Memory
12
+ Licensed under Fair Source License 0.9
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import logging
19
+ from dataclasses import dataclass, field
20
+ from datetime import datetime
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+ from .blueprint import AgentBlueprint, WorkflowBlueprint
25
+ from .success import SuccessEvaluation
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ # =============================================================================
31
+ # FEEDBACK DATA STRUCTURES
32
+ # =============================================================================
33
+
34
+
35
+ @dataclass
36
+ class AgentPerformance:
37
+ """Performance statistics for an agent template."""
38
+
39
+ template_id: str
40
+ total_uses: int = 0
41
+ successful_uses: int = 0
42
+ average_score: float = 0.0
43
+ scores: list[float] = field(default_factory=list)
44
+
45
+ # Context-specific performance
46
+ by_domain: dict[str, dict[str, float]] = field(default_factory=dict)
47
+ by_language: dict[str, dict[str, float]] = field(default_factory=dict)
48
+ by_quality_focus: dict[str, dict[str, float]] = field(default_factory=dict)
49
+
50
+ # Trend data
51
+ recent_scores: list[tuple[str, float]] = field(default_factory=list) # (timestamp, score)
52
+
53
+ @property
54
+ def success_rate(self) -> float:
55
+ """Calculate success rate."""
56
+ if self.total_uses == 0:
57
+ return 0.0
58
+ return self.successful_uses / self.total_uses
59
+
60
+ @property
61
+ def trend(self) -> str:
62
+ """Determine performance trend."""
63
+ if len(self.recent_scores) < 5:
64
+ return "insufficient_data"
65
+
66
+ recent_5 = [s for _, s in self.recent_scores[-5:]]
67
+ older_5 = [s for _, s in self.recent_scores[-10:-5]] if len(self.recent_scores) >= 10 else []
68
+
69
+ if not older_5:
70
+ return "stable"
71
+
72
+ recent_avg = sum(recent_5) / len(recent_5)
73
+ older_avg = sum(older_5) / len(older_5)
74
+
75
+ if recent_avg > older_avg * 1.1:
76
+ return "improving"
77
+ elif recent_avg < older_avg * 0.9:
78
+ return "declining"
79
+ else:
80
+ return "stable"
81
+
82
+ def record_use(
83
+ self,
84
+ success: bool,
85
+ score: float,
86
+ domain: str | None = None,
87
+ languages: list[str] | None = None,
88
+ quality_focus: list[str] | None = None,
89
+ ) -> None:
90
+ """Record a use of this agent."""
91
+ self.total_uses += 1
92
+ if success:
93
+ self.successful_uses += 1
94
+
95
+ self.scores.append(score)
96
+ self.average_score = sum(self.scores) / len(self.scores)
97
+
98
+ # Record with timestamp for trend analysis
99
+ self.recent_scores.append((datetime.now().isoformat(), score))
100
+ # Keep last 100 scores
101
+ if len(self.recent_scores) > 100:
102
+ self.recent_scores = self.recent_scores[-100:]
103
+
104
+ # Record by context
105
+ if domain:
106
+ if domain not in self.by_domain:
107
+ self.by_domain[domain] = {"uses": 0, "successes": 0, "total_score": 0}
108
+ self.by_domain[domain]["uses"] += 1
109
+ self.by_domain[domain]["successes"] += 1 if success else 0
110
+ self.by_domain[domain]["total_score"] += score
111
+
112
+ if languages:
113
+ for lang in languages:
114
+ if lang not in self.by_language:
115
+ self.by_language[lang] = {"uses": 0, "successes": 0, "total_score": 0}
116
+ self.by_language[lang]["uses"] += 1
117
+ self.by_language[lang]["successes"] += 1 if success else 0
118
+ self.by_language[lang]["total_score"] += score
119
+
120
+ if quality_focus:
121
+ for qf in quality_focus:
122
+ if qf not in self.by_quality_focus:
123
+ self.by_quality_focus[qf] = {"uses": 0, "successes": 0, "total_score": 0}
124
+ self.by_quality_focus[qf]["uses"] += 1
125
+ self.by_quality_focus[qf]["successes"] += 1 if success else 0
126
+ self.by_quality_focus[qf]["total_score"] += score
127
+
128
+ def get_score_for_context(
129
+ self,
130
+ domain: str | None = None,
131
+ languages: list[str] | None = None,
132
+ quality_focus: list[str] | None = None,
133
+ ) -> float:
134
+ """Get a weighted score for a specific context."""
135
+ scores = []
136
+ weights = []
137
+
138
+ # Base score
139
+ if self.total_uses > 0:
140
+ scores.append(self.average_score)
141
+ weights.append(1.0)
142
+
143
+ # Domain-specific score
144
+ if domain and domain in self.by_domain:
145
+ d = self.by_domain[domain]
146
+ if d["uses"] > 0:
147
+ scores.append(d["total_score"] / d["uses"])
148
+ weights.append(2.0) # Higher weight for domain match
149
+
150
+ # Language-specific score
151
+ if languages:
152
+ for lang in languages:
153
+ if lang in self.by_language:
154
+ l = self.by_language[lang]
155
+ if l["uses"] > 0:
156
+ scores.append(l["total_score"] / l["uses"])
157
+ weights.append(1.5)
158
+
159
+ # Quality focus score
160
+ if quality_focus:
161
+ for qf in quality_focus:
162
+ if qf in self.by_quality_focus:
163
+ q = self.by_quality_focus[qf]
164
+ if q["uses"] > 0:
165
+ scores.append(q["total_score"] / q["uses"])
166
+ weights.append(1.5)
167
+
168
+ if not scores:
169
+ return 0.5 # Default neutral score
170
+
171
+ # Weighted average
172
+ return sum(s * w for s, w in zip(scores, weights, strict=False)) / sum(weights)
173
+
174
+ def to_dict(self) -> dict[str, Any]:
175
+ """Serialize to dictionary."""
176
+ return {
177
+ "template_id": self.template_id,
178
+ "total_uses": self.total_uses,
179
+ "successful_uses": self.successful_uses,
180
+ "average_score": self.average_score,
181
+ "success_rate": self.success_rate,
182
+ "trend": self.trend,
183
+ "by_domain": self.by_domain,
184
+ "by_language": self.by_language,
185
+ "by_quality_focus": self.by_quality_focus,
186
+ "recent_scores": self.recent_scores[-20:], # Last 20 for display
187
+ }
188
+
189
+ @classmethod
190
+ def from_dict(cls, data: dict[str, Any]) -> AgentPerformance:
191
+ """Deserialize from dictionary."""
192
+ perf = cls(template_id=data.get("template_id", ""))
193
+ perf.total_uses = data.get("total_uses", 0)
194
+ perf.successful_uses = data.get("successful_uses", 0)
195
+ perf.average_score = data.get("average_score", 0.0)
196
+ perf.by_domain = data.get("by_domain", {})
197
+ perf.by_language = data.get("by_language", {})
198
+ perf.by_quality_focus = data.get("by_quality_focus", {})
199
+ perf.recent_scores = data.get("recent_scores", [])
200
+ return perf
201
+
202
+
203
+ @dataclass
204
+ class WorkflowPattern:
205
+ """Pattern of successful workflow configurations."""
206
+
207
+ pattern_id: str
208
+ domain: str
209
+ agent_combination: list[str] # List of template IDs
210
+ stage_configuration: list[dict[str, Any]]
211
+ uses: int = 0
212
+ successes: int = 0
213
+ average_score: float = 0.0
214
+
215
+ @property
216
+ def success_rate(self) -> float:
217
+ """Calculate success rate."""
218
+ if self.uses == 0:
219
+ return 0.0
220
+ return self.successes / self.uses
221
+
222
+ def to_dict(self) -> dict[str, Any]:
223
+ """Serialize to dictionary."""
224
+ return {
225
+ "pattern_id": self.pattern_id,
226
+ "domain": self.domain,
227
+ "agent_combination": self.agent_combination,
228
+ "stage_configuration": self.stage_configuration,
229
+ "uses": self.uses,
230
+ "successes": self.successes,
231
+ "average_score": self.average_score,
232
+ "success_rate": self.success_rate,
233
+ }
234
+
235
+
236
+ # =============================================================================
237
+ # FEEDBACK COLLECTOR
238
+ # =============================================================================
239
+
240
+
241
+ class FeedbackCollector:
242
+ """Collects and stores feedback from workflow executions.
243
+
244
+ Example:
245
+ >>> collector = FeedbackCollector()
246
+ >>> collector.record_execution(blueprint, evaluation)
247
+ >>> performance = collector.get_agent_performance("security_reviewer")
248
+ """
249
+
250
+ def __init__(self, storage_path: str = ".empathy/socratic/feedback"):
251
+ """Initialize the collector.
252
+
253
+ Args:
254
+ storage_path: Path for feedback data storage
255
+ """
256
+ self.storage_path = Path(storage_path)
257
+ self.storage_path.mkdir(parents=True, exist_ok=True)
258
+
259
+ self._agent_performance: dict[str, AgentPerformance] = {}
260
+ self._workflow_patterns: dict[str, WorkflowPattern] = {}
261
+
262
+ self._load_data()
263
+
264
+ def _load_data(self) -> None:
265
+ """Load existing feedback data."""
266
+ # Load agent performance
267
+ perf_file = self.storage_path / "agent_performance.json"
268
+ if perf_file.exists():
269
+ try:
270
+ with perf_file.open() as f:
271
+ data = json.load(f)
272
+ for template_id, perf_data in data.items():
273
+ self._agent_performance[template_id] = AgentPerformance.from_dict(perf_data)
274
+ except (json.JSONDecodeError, KeyError) as e:
275
+ logger.warning(f"Failed to load agent performance: {e}")
276
+
277
+ # Load workflow patterns
278
+ patterns_file = self.storage_path / "workflow_patterns.json"
279
+ if patterns_file.exists():
280
+ try:
281
+ with patterns_file.open() as f:
282
+ data = json.load(f)
283
+ for pattern_id, pattern_data in data.items():
284
+ self._workflow_patterns[pattern_id] = WorkflowPattern(**pattern_data)
285
+ except (json.JSONDecodeError, KeyError) as e:
286
+ logger.warning(f"Failed to load workflow patterns: {e}")
287
+
288
+ def _save_data(self) -> None:
289
+ """Save feedback data to disk."""
290
+ # Save agent performance
291
+ perf_file = self.storage_path / "agent_performance.json"
292
+ perf_data = {k: v.to_dict() for k, v in self._agent_performance.items()}
293
+ with perf_file.open("w") as f:
294
+ json.dump(perf_data, f, indent=2)
295
+
296
+ # Save workflow patterns
297
+ patterns_file = self.storage_path / "workflow_patterns.json"
298
+ patterns_data = {k: v.to_dict() for k, v in self._workflow_patterns.items()}
299
+ with patterns_file.open("w") as f:
300
+ json.dump(patterns_data, f, indent=2)
301
+
302
+ def record_execution(
303
+ self,
304
+ blueprint: WorkflowBlueprint,
305
+ evaluation: SuccessEvaluation,
306
+ ) -> None:
307
+ """Record feedback from a workflow execution.
308
+
309
+ Args:
310
+ blueprint: The executed workflow blueprint
311
+ evaluation: The success evaluation results
312
+ """
313
+ success = evaluation.overall_success
314
+ score = evaluation.overall_score
315
+
316
+ # Record for each agent
317
+ for agent in blueprint.agents:
318
+ template_id = agent.template_id or agent.spec.id
319
+
320
+ if template_id not in self._agent_performance:
321
+ self._agent_performance[template_id] = AgentPerformance(template_id=template_id)
322
+
323
+ self._agent_performance[template_id].record_use(
324
+ success=success,
325
+ score=score,
326
+ domain=blueprint.domain,
327
+ languages=blueprint.supported_languages,
328
+ quality_focus=blueprint.quality_focus,
329
+ )
330
+
331
+ # Record workflow pattern
332
+ pattern_id = self._generate_pattern_id(blueprint)
333
+ if pattern_id not in self._workflow_patterns:
334
+ self._workflow_patterns[pattern_id] = WorkflowPattern(
335
+ pattern_id=pattern_id,
336
+ domain=blueprint.domain,
337
+ agent_combination=[a.template_id or a.spec.id for a in blueprint.agents],
338
+ stage_configuration=[s.to_dict() for s in blueprint.stages],
339
+ )
340
+
341
+ pattern = self._workflow_patterns[pattern_id]
342
+ pattern.uses += 1
343
+ if success:
344
+ pattern.successes += 1
345
+ # Rolling average
346
+ pattern.average_score = (
347
+ (pattern.average_score * (pattern.uses - 1) + score) / pattern.uses
348
+ )
349
+
350
+ self._save_data()
351
+ logger.info(f"Recorded feedback for blueprint {blueprint.id[:8]}: success={success}, score={score:.2f}")
352
+
353
+ def _generate_pattern_id(self, blueprint: WorkflowBlueprint) -> str:
354
+ """Generate a unique ID for a workflow pattern."""
355
+ agents = sorted(a.template_id or a.spec.id for a in blueprint.agents)
356
+ return f"{blueprint.domain}:{':'.join(agents)}"
357
+
358
+ def get_agent_performance(self, template_id: str) -> AgentPerformance | None:
359
+ """Get performance data for an agent template."""
360
+ return self._agent_performance.get(template_id)
361
+
362
+ def get_all_performance(self) -> dict[str, AgentPerformance]:
363
+ """Get all agent performance data."""
364
+ return self._agent_performance.copy()
365
+
366
+ def get_best_agents_for_context(
367
+ self,
368
+ domain: str,
369
+ languages: list[str] | None = None,
370
+ quality_focus: list[str] | None = None,
371
+ limit: int = 5,
372
+ ) -> list[tuple[str, float]]:
373
+ """Get the best performing agents for a context.
374
+
375
+ Args:
376
+ domain: Target domain
377
+ languages: Target languages
378
+ quality_focus: Quality attributes
379
+ limit: Maximum number of results
380
+
381
+ Returns:
382
+ List of (template_id, score) tuples sorted by score
383
+ """
384
+ scored_agents = []
385
+
386
+ for template_id, perf in self._agent_performance.items():
387
+ score = perf.get_score_for_context(domain, languages, quality_focus)
388
+ # Apply confidence penalty for low sample sizes
389
+ confidence = min(perf.total_uses / 10, 1.0) # Full confidence at 10+ uses
390
+ adjusted_score = score * confidence + 0.5 * (1 - confidence) # Blend with neutral
391
+ scored_agents.append((template_id, adjusted_score))
392
+
393
+ # Sort by score descending
394
+ scored_agents.sort(key=lambda x: x[1], reverse=True)
395
+
396
+ return scored_agents[:limit]
397
+
398
+ def get_successful_patterns(
399
+ self,
400
+ domain: str | None = None,
401
+ min_success_rate: float = 0.7,
402
+ min_uses: int = 3,
403
+ ) -> list[WorkflowPattern]:
404
+ """Get successful workflow patterns.
405
+
406
+ Args:
407
+ domain: Filter by domain
408
+ min_success_rate: Minimum success rate threshold
409
+ min_uses: Minimum number of uses to be considered
410
+
411
+ Returns:
412
+ List of successful patterns
413
+ """
414
+ patterns = []
415
+
416
+ for pattern in self._workflow_patterns.values():
417
+ if domain and pattern.domain != domain:
418
+ continue
419
+ if pattern.uses < min_uses:
420
+ continue
421
+ if pattern.success_rate < min_success_rate:
422
+ continue
423
+ patterns.append(pattern)
424
+
425
+ # Sort by success rate then by uses
426
+ patterns.sort(key=lambda p: (p.success_rate, p.uses), reverse=True)
427
+
428
+ return patterns
429
+
430
+ def get_insights(self) -> dict[str, Any]:
431
+ """Get aggregated insights from feedback data.
432
+
433
+ Returns:
434
+ Dictionary with various insights
435
+ """
436
+ insights: dict[str, Any] = {
437
+ "total_agents_tracked": len(self._agent_performance),
438
+ "total_patterns_tracked": len(self._workflow_patterns),
439
+ "top_performing_agents": [],
440
+ "declining_agents": [],
441
+ "domain_insights": {},
442
+ "recommendations": [],
443
+ }
444
+
445
+ # Top performing agents
446
+ all_agents = [
447
+ (tid, perf) for tid, perf in self._agent_performance.items()
448
+ if perf.total_uses >= 5
449
+ ]
450
+ all_agents.sort(key=lambda x: x[1].average_score, reverse=True)
451
+ insights["top_performing_agents"] = [
452
+ {"template_id": tid, "score": perf.average_score, "uses": perf.total_uses}
453
+ for tid, perf in all_agents[:5]
454
+ ]
455
+
456
+ # Declining agents
457
+ for tid, perf in self._agent_performance.items():
458
+ if perf.trend == "declining" and perf.total_uses >= 5:
459
+ insights["declining_agents"].append({
460
+ "template_id": tid,
461
+ "current_score": perf.average_score,
462
+ "uses": perf.total_uses,
463
+ })
464
+
465
+ # Domain insights
466
+ domains: dict[str, dict[str, Any]] = {}
467
+ for perf in self._agent_performance.values():
468
+ for domain, stats in perf.by_domain.items():
469
+ if domain not in domains:
470
+ domains[domain] = {"total_uses": 0, "total_score": 0, "agents": set()}
471
+ domains[domain]["total_uses"] += stats["uses"]
472
+ domains[domain]["total_score"] += stats["total_score"]
473
+ domains[domain]["agents"].add(perf.template_id)
474
+
475
+ for domain, stats in domains.items():
476
+ if stats["total_uses"] > 0:
477
+ insights["domain_insights"][domain] = {
478
+ "average_score": stats["total_score"] / stats["total_uses"],
479
+ "total_uses": stats["total_uses"],
480
+ "agents_used": len(stats["agents"]),
481
+ }
482
+
483
+ # Generate recommendations
484
+ insights["recommendations"] = self._generate_recommendations()
485
+
486
+ return insights
487
+
488
+ def _generate_recommendations(self) -> list[str]:
489
+ """Generate improvement recommendations based on feedback."""
490
+ recommendations = []
491
+
492
+ # Check for underperforming agents
493
+ for tid, perf in self._agent_performance.items():
494
+ if perf.total_uses >= 10 and perf.success_rate < 0.5:
495
+ recommendations.append(
496
+ f"Consider reviewing '{tid}' configuration - success rate is {perf.success_rate:.0%}"
497
+ )
498
+
499
+ # Check for agents that work well together
500
+ successful_patterns = self.get_successful_patterns(min_success_rate=0.8, min_uses=5)
501
+ for pattern in successful_patterns[:3]:
502
+ agents = ", ".join(pattern.agent_combination)
503
+ recommendations.append(
504
+ f"Successful pattern for '{pattern.domain}': [{agents}] - {pattern.success_rate:.0%} success rate"
505
+ )
506
+
507
+ # Check for domains needing more data
508
+ for domain, stats in self.get_insights().get("domain_insights", {}).items():
509
+ if stats["total_uses"] < 5:
510
+ recommendations.append(
511
+ f"More data needed for '{domain}' domain - only {stats['total_uses']} executions recorded"
512
+ )
513
+
514
+ return recommendations
515
+
516
+
517
+ # =============================================================================
518
+ # ADAPTIVE AGENT GENERATOR
519
+ # =============================================================================
520
+
521
+
522
+ class AdaptiveAgentGenerator:
523
+ """Agent generator that uses feedback to improve recommendations.
524
+
525
+ Wraps the standard AgentGenerator and adjusts recommendations
526
+ based on historical performance data.
527
+
528
+ Example:
529
+ >>> generator = AdaptiveAgentGenerator()
530
+ >>> agents = generator.generate_agents_for_requirements(requirements)
531
+ >>> # Returns agents weighted by historical success
532
+ """
533
+
534
+ def __init__(self, feedback_collector: FeedbackCollector | None = None):
535
+ """Initialize the adaptive generator.
536
+
537
+ Args:
538
+ feedback_collector: Feedback collector instance
539
+ """
540
+ from .generator import AgentGenerator
541
+
542
+ self.base_generator = AgentGenerator()
543
+ self.feedback = feedback_collector or FeedbackCollector()
544
+
545
+ def generate_agents_for_requirements(
546
+ self,
547
+ requirements: dict[str, Any],
548
+ use_feedback: bool = True,
549
+ ) -> list[AgentBlueprint]:
550
+ """Generate agents using feedback-informed recommendations.
551
+
552
+ Args:
553
+ requirements: Requirements from Socratic session
554
+ use_feedback: Whether to use feedback data
555
+
556
+ Returns:
557
+ List of AgentBlueprints optimized based on feedback
558
+ """
559
+ # Get base recommendations
560
+ base_agents = self.base_generator.generate_agents_for_requirements(requirements)
561
+
562
+ if not use_feedback:
563
+ return base_agents
564
+
565
+ # Get context
566
+ domain = requirements.get("domain", "general")
567
+ languages = requirements.get("languages", [])
568
+ quality_focus = requirements.get("quality_focus", [])
569
+
570
+ # Get best agents for this context
571
+ best_agents = self.feedback.get_best_agents_for_context(
572
+ domain=domain,
573
+ languages=languages,
574
+ quality_focus=quality_focus,
575
+ limit=10,
576
+ )
577
+
578
+ if not best_agents:
579
+ return base_agents
580
+
581
+ # Reorder and potentially add agents based on feedback
582
+ agent_scores = {tid: score for tid, score in best_agents}
583
+
584
+ # Score base agents
585
+ scored_base = []
586
+ for agent in base_agents:
587
+ tid = agent.template_id or agent.spec.id
588
+ feedback_score = agent_scores.get(tid, 0.5)
589
+ scored_base.append((agent, feedback_score))
590
+
591
+ # Sort by feedback score
592
+ scored_base.sort(key=lambda x: x[1], reverse=True)
593
+
594
+ # Check if any high-performing agents are missing
595
+ base_ids = {a.template_id or a.spec.id for a in base_agents}
596
+ for tid, score in best_agents:
597
+ if tid not in base_ids and score > 0.7:
598
+ # Add this high-performing agent
599
+ try:
600
+ new_agent = self.base_generator.generate_agent_from_template(
601
+ tid,
602
+ customizations={
603
+ "languages": languages,
604
+ "quality_focus": quality_focus,
605
+ }
606
+ )
607
+ scored_base.append((new_agent, score))
608
+ logger.info(f"Added high-performing agent '{tid}' based on feedback")
609
+ except ValueError:
610
+ pass # Template not found
611
+
612
+ # Return sorted agents
613
+ return [agent for agent, _ in scored_base]
614
+
615
+ def get_recommendation_explanation(
616
+ self,
617
+ requirements: dict[str, Any],
618
+ ) -> dict[str, Any]:
619
+ """Get explanation for agent recommendations.
620
+
621
+ Args:
622
+ requirements: Requirements dict
623
+
624
+ Returns:
625
+ Explanation of why agents were recommended
626
+ """
627
+ domain = requirements.get("domain", "general")
628
+ languages = requirements.get("languages", [])
629
+ quality_focus = requirements.get("quality_focus", [])
630
+
631
+ best_agents = self.feedback.get_best_agents_for_context(
632
+ domain=domain,
633
+ languages=languages,
634
+ quality_focus=quality_focus,
635
+ )
636
+
637
+ successful_patterns = self.feedback.get_successful_patterns(
638
+ domain=domain,
639
+ min_success_rate=0.7,
640
+ )
641
+
642
+ return {
643
+ "context": {
644
+ "domain": domain,
645
+ "languages": languages,
646
+ "quality_focus": quality_focus,
647
+ },
648
+ "recommended_agents": [
649
+ {
650
+ "template_id": tid,
651
+ "score": score,
652
+ "performance": self.feedback.get_agent_performance(tid).to_dict()
653
+ if self.feedback.get_agent_performance(tid) else None,
654
+ }
655
+ for tid, score in best_agents
656
+ ],
657
+ "successful_patterns": [p.to_dict() for p in successful_patterns[:3]],
658
+ "data_quality": {
659
+ "total_executions": sum(
660
+ p.total_uses for p in self.feedback.get_all_performance().values()
661
+ ),
662
+ "agents_with_data": len([
663
+ p for p in self.feedback.get_all_performance().values()
664
+ if p.total_uses >= 5
665
+ ]),
666
+ },
667
+ }
668
+
669
+
670
+ # =============================================================================
671
+ # FEEDBACK LOOP INTEGRATION
672
+ # =============================================================================
673
+
674
+
675
+ class FeedbackLoop:
676
+ """High-level integration for the feedback loop.
677
+
678
+ Provides a simple interface to:
679
+ 1. Record execution results
680
+ 2. Get improved recommendations
681
+ 3. View insights
682
+
683
+ Example:
684
+ >>> loop = FeedbackLoop()
685
+ >>>
686
+ >>> # After workflow execution
687
+ >>> loop.record(blueprint, evaluation)
688
+ >>>
689
+ >>> # For next generation
690
+ >>> agents = loop.get_recommended_agents(requirements)
691
+ >>>
692
+ >>> # View insights
693
+ >>> insights = loop.get_insights()
694
+ """
695
+
696
+ def __init__(
697
+ self,
698
+ storage_path: str = ".empathy/socratic/feedback",
699
+ ):
700
+ """Initialize the feedback loop.
701
+
702
+ Args:
703
+ storage_path: Path for feedback storage
704
+ """
705
+ self.collector = FeedbackCollector(storage_path)
706
+ self.adaptive_generator = AdaptiveAgentGenerator(self.collector)
707
+
708
+ def record(
709
+ self,
710
+ blueprint: WorkflowBlueprint,
711
+ evaluation: SuccessEvaluation,
712
+ ) -> None:
713
+ """Record execution results for learning.
714
+
715
+ Args:
716
+ blueprint: The executed blueprint
717
+ evaluation: The success evaluation
718
+ """
719
+ self.collector.record_execution(blueprint, evaluation)
720
+
721
+ def get_recommended_agents(
722
+ self,
723
+ requirements: dict[str, Any],
724
+ ) -> list[AgentBlueprint]:
725
+ """Get recommended agents using feedback data.
726
+
727
+ Args:
728
+ requirements: Requirements from Socratic session
729
+
730
+ Returns:
731
+ List of recommended agents
732
+ """
733
+ return self.adaptive_generator.generate_agents_for_requirements(requirements)
734
+
735
+ def get_insights(self) -> dict[str, Any]:
736
+ """Get aggregated insights.
737
+
738
+ Returns:
739
+ Dictionary with insights and recommendations
740
+ """
741
+ return self.collector.get_insights()
742
+
743
+ def get_agent_stats(self, template_id: str) -> dict[str, Any] | None:
744
+ """Get performance stats for a specific agent.
745
+
746
+ Args:
747
+ template_id: Agent template ID
748
+
749
+ Returns:
750
+ Performance statistics or None
751
+ """
752
+ perf = self.collector.get_agent_performance(template_id)
753
+ return perf.to_dict() if perf else None
754
+
755
+ def explain_recommendations(
756
+ self,
757
+ requirements: dict[str, Any],
758
+ ) -> dict[str, Any]:
759
+ """Explain why certain agents are recommended.
760
+
761
+ Args:
762
+ requirements: Requirements dict
763
+
764
+ Returns:
765
+ Explanation dictionary
766
+ """
767
+ return self.adaptive_generator.get_recommendation_explanation(requirements)