empathy-framework 5.0.1__py3-none-any.whl → 5.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/METADATA +311 -150
  2. {empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/RECORD +60 -33
  3. empathy_framework-5.1.0.dist-info/licenses/LICENSE +201 -0
  4. empathy_framework-5.1.0.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +101 -0
  5. empathy_llm_toolkit/providers.py +175 -35
  6. empathy_llm_toolkit/utils/tokens.py +150 -30
  7. empathy_os/__init__.py +1 -1
  8. empathy_os/cli/commands/batch.py +256 -0
  9. empathy_os/cli/commands/cache.py +248 -0
  10. empathy_os/cli/commands/inspect.py +1 -2
  11. empathy_os/cli/commands/metrics.py +1 -1
  12. empathy_os/cli/commands/routing.py +285 -0
  13. empathy_os/cli/commands/workflow.py +2 -1
  14. empathy_os/cli/parsers/__init__.py +6 -0
  15. empathy_os/cli/parsers/batch.py +118 -0
  16. empathy_os/cli/parsers/cache 2.py +65 -0
  17. empathy_os/cli/parsers/cache.py +65 -0
  18. empathy_os/cli/parsers/routing.py +110 -0
  19. empathy_os/cli_minimal.py +3 -3
  20. empathy_os/cli_router 2.py +416 -0
  21. empathy_os/dashboard/__init__.py +1 -2
  22. empathy_os/dashboard/app 2.py +512 -0
  23. empathy_os/dashboard/app.py +1 -1
  24. empathy_os/dashboard/simple_server 2.py +403 -0
  25. empathy_os/dashboard/standalone_server 2.py +536 -0
  26. empathy_os/dashboard/standalone_server.py +22 -11
  27. empathy_os/memory/types 2.py +441 -0
  28. empathy_os/metrics/collector.py +31 -0
  29. empathy_os/models/__init__.py +19 -0
  30. empathy_os/models/adaptive_routing 2.py +437 -0
  31. empathy_os/models/auth_cli.py +444 -0
  32. empathy_os/models/auth_strategy.py +450 -0
  33. empathy_os/models/token_estimator.py +21 -13
  34. empathy_os/project_index/scanner_parallel 2.py +291 -0
  35. empathy_os/telemetry/agent_coordination 2.py +478 -0
  36. empathy_os/telemetry/agent_coordination.py +14 -16
  37. empathy_os/telemetry/agent_tracking 2.py +350 -0
  38. empathy_os/telemetry/agent_tracking.py +18 -20
  39. empathy_os/telemetry/approval_gates 2.py +563 -0
  40. empathy_os/telemetry/approval_gates.py +27 -39
  41. empathy_os/telemetry/event_streaming 2.py +405 -0
  42. empathy_os/telemetry/event_streaming.py +22 -22
  43. empathy_os/telemetry/feedback_loop 2.py +557 -0
  44. empathy_os/telemetry/feedback_loop.py +14 -17
  45. empathy_os/workflows/__init__.py +8 -0
  46. empathy_os/workflows/autonomous_test_gen.py +569 -0
  47. empathy_os/workflows/batch_processing.py +56 -10
  48. empathy_os/workflows/bug_predict.py +45 -0
  49. empathy_os/workflows/code_review.py +92 -22
  50. empathy_os/workflows/document_gen.py +594 -62
  51. empathy_os/workflows/llm_base.py +363 -0
  52. empathy_os/workflows/perf_audit.py +69 -0
  53. empathy_os/workflows/release_prep.py +54 -0
  54. empathy_os/workflows/security_audit.py +154 -79
  55. empathy_os/workflows/test_gen.py +60 -0
  56. empathy_os/workflows/test_gen_behavioral.py +477 -0
  57. empathy_os/workflows/test_gen_parallel.py +341 -0
  58. empathy_framework-5.0.1.dist-info/licenses/LICENSE +0 -139
  59. {empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/WHEEL +0 -0
  60. {empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/entry_points.txt +0 -0
  61. {empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,557 @@
1
+ """Agent-to-LLM Feedback Loop for Quality-Based Learning.
2
+
3
+ Pattern 6 from Agent Coordination Architecture - Collect quality ratings
4
+ on LLM responses and use feedback to inform routing decisions.
5
+
6
+ Usage:
7
+ # Record feedback after LLM response
8
+ feedback = FeedbackLoop()
9
+ feedback.record_feedback(
10
+ workflow_name="code-review",
11
+ stage_name="analysis",
12
+ tier=ModelTier.CHEAP,
13
+ quality_score=0.8,
14
+ metadata={
15
+ "response_length": 500,
16
+ "tokens": 150,
17
+ "latency_ms": 1200
18
+ }
19
+ )
20
+
21
+ # Get tier recommendation based on historical performance
22
+ recommendation = feedback.recommend_tier(
23
+ workflow_name="code-review",
24
+ stage_name="analysis"
25
+ )
26
+ if recommendation.recommended_tier == ModelTier.CAPABLE:
27
+ print(f"Upgrade to CAPABLE tier (confidence: {recommendation.confidence})")
28
+
29
+ # Get quality stats for analysis
30
+ stats = feedback.get_quality_stats(
31
+ workflow_name="code-review",
32
+ stage_name="analysis"
33
+ )
34
+ print(f"Average quality: {stats.avg_quality}")
35
+
36
+ Copyright 2025 Smart-AI-Memory
37
+ Licensed under Fair Source License 0.9
38
+ """
39
+
40
+ from __future__ import annotations
41
+
42
+ import logging
43
+ from dataclasses import dataclass, field
44
+ from datetime import datetime, timedelta
45
+ from enum import Enum
46
+ from typing import Any
47
+ from uuid import uuid4
48
+
49
+ logger = logging.getLogger(__name__)
50
+
51
+
52
+ class ModelTier(str, Enum):
53
+ """Model tier enum matching workflows.base.ModelTier."""
54
+
55
+ CHEAP = "cheap"
56
+ CAPABLE = "capable"
57
+ PREMIUM = "premium"
58
+
59
+
60
+ @dataclass
61
+ class FeedbackEntry:
62
+ """Quality feedback for an LLM response.
63
+
64
+ Represents a single quality rating for a workflow stage execution.
65
+ """
66
+
67
+ feedback_id: str
68
+ workflow_name: str
69
+ stage_name: str
70
+ tier: str # ModelTier value
71
+ quality_score: float # 0.0 (bad) to 1.0 (excellent)
72
+ timestamp: datetime
73
+ metadata: dict[str, Any] = field(default_factory=dict)
74
+
75
+ def to_dict(self) -> dict[str, Any]:
76
+ """Convert to dictionary for serialization."""
77
+ return {
78
+ "feedback_id": self.feedback_id,
79
+ "workflow_name": self.workflow_name,
80
+ "stage_name": self.stage_name,
81
+ "tier": self.tier,
82
+ "quality_score": self.quality_score,
83
+ "timestamp": self.timestamp.isoformat() if isinstance(self.timestamp, datetime) else self.timestamp,
84
+ "metadata": self.metadata,
85
+ }
86
+
87
+ @classmethod
88
+ def from_dict(cls, data: dict[str, Any]) -> FeedbackEntry:
89
+ """Create from dictionary."""
90
+ timestamp = data.get("timestamp")
91
+ if isinstance(timestamp, str):
92
+ timestamp = datetime.fromisoformat(timestamp)
93
+ elif not isinstance(timestamp, datetime):
94
+ timestamp = datetime.utcnow()
95
+
96
+ return cls(
97
+ feedback_id=data["feedback_id"],
98
+ workflow_name=data["workflow_name"],
99
+ stage_name=data["stage_name"],
100
+ tier=data["tier"],
101
+ quality_score=data["quality_score"],
102
+ timestamp=timestamp,
103
+ metadata=data.get("metadata", {}),
104
+ )
105
+
106
+
107
+ @dataclass
108
+ class QualityStats:
109
+ """Quality statistics for a workflow stage."""
110
+
111
+ workflow_name: str
112
+ stage_name: str
113
+ tier: str
114
+ avg_quality: float
115
+ min_quality: float
116
+ max_quality: float
117
+ sample_count: int
118
+ recent_trend: float # -1.0 (declining) to 1.0 (improving)
119
+
120
+
121
+ @dataclass
122
+ class TierRecommendation:
123
+ """Tier recommendation based on quality feedback."""
124
+
125
+ current_tier: str
126
+ recommended_tier: str
127
+ confidence: float # 0.0 (low) to 1.0 (high)
128
+ reason: str
129
+ stats: dict[str, QualityStats] # Stats by tier
130
+
131
+
132
+ class FeedbackLoop:
133
+ """Agent-to-LLM feedback loop for quality-based learning.
134
+
135
+ Collects quality ratings on LLM responses and uses feedback to:
136
+ - Recommend tier upgrades/downgrades
137
+ - Track quality trends over time
138
+ - Identify underperforming stages
139
+ - Optimize routing based on historical performance
140
+
141
+ Attributes:
142
+ FEEDBACK_TTL: Feedback entry TTL (7 days)
143
+ MIN_SAMPLES: Minimum samples for recommendation (10)
144
+ QUALITY_THRESHOLD: Quality threshold for tier upgrade (0.7)
145
+ """
146
+
147
+ FEEDBACK_TTL = 604800 # 7 days (60*60*24*7)
148
+ MIN_SAMPLES = 10 # Minimum samples for recommendation
149
+ QUALITY_THRESHOLD = 0.7 # Quality below this triggers upgrade recommendation
150
+
151
+ def __init__(self, memory=None):
152
+ """Initialize feedback loop.
153
+
154
+ Args:
155
+ memory: Memory instance for storing feedback
156
+ """
157
+ self.memory = memory
158
+
159
+ if self.memory is None:
160
+ try:
161
+ from empathy_os.telemetry import UsageTracker
162
+
163
+ tracker = UsageTracker.get_instance()
164
+ if hasattr(tracker, "_memory"):
165
+ self.memory = tracker._memory
166
+ except (ImportError, AttributeError):
167
+ pass
168
+
169
+ if self.memory is None:
170
+ logger.warning("No memory backend available for feedback loop")
171
+
172
+ def record_feedback(
173
+ self,
174
+ workflow_name: str,
175
+ stage_name: str,
176
+ tier: str | ModelTier,
177
+ quality_score: float,
178
+ metadata: dict[str, Any] | None = None,
179
+ ) -> str:
180
+ """Record quality feedback for a workflow stage execution.
181
+
182
+ Args:
183
+ workflow_name: Name of workflow
184
+ stage_name: Name of stage within workflow
185
+ tier: Model tier used (CHEAP, CAPABLE, PREMIUM)
186
+ quality_score: Quality rating 0.0-1.0 (0=bad, 1=excellent)
187
+ metadata: Optional metadata (tokens, latency, etc.)
188
+
189
+ Returns:
190
+ Feedback ID if stored, empty string otherwise
191
+
192
+ Example:
193
+ >>> feedback = FeedbackLoop()
194
+ >>> feedback.record_feedback(
195
+ ... workflow_name="code-review",
196
+ ... stage_name="analysis",
197
+ ... tier=ModelTier.CHEAP,
198
+ ... quality_score=0.85,
199
+ ... metadata={"tokens": 150, "latency_ms": 1200}
200
+ ... )
201
+ """
202
+ if not self.memory:
203
+ logger.debug("Cannot record feedback: no memory backend")
204
+ return ""
205
+
206
+ # Validate quality score
207
+ if not 0.0 <= quality_score <= 1.0:
208
+ logger.warning(f"Invalid quality score: {quality_score} (must be 0.0-1.0)")
209
+ return ""
210
+
211
+ # Convert tier to string if ModelTier enum
212
+ if isinstance(tier, ModelTier):
213
+ tier = tier.value
214
+
215
+ feedback_id = f"feedback_{uuid4().hex[:8]}"
216
+
217
+ entry = FeedbackEntry(
218
+ feedback_id=feedback_id,
219
+ workflow_name=workflow_name,
220
+ stage_name=stage_name,
221
+ tier=tier,
222
+ quality_score=quality_score,
223
+ timestamp=datetime.utcnow(),
224
+ metadata=metadata or {},
225
+ )
226
+
227
+ # Store feedback
228
+ # Key format: feedback:{workflow}:{stage}:{tier}:{id}
229
+ key = f"feedback:{workflow_name}:{stage_name}:{tier}:{feedback_id}"
230
+
231
+ try:
232
+ if hasattr(self.memory, "stash"):
233
+ self.memory.stash(
234
+ key=key, data=entry.to_dict(), credentials=None, ttl_seconds=self.FEEDBACK_TTL
235
+ )
236
+ elif hasattr(self.memory, "_redis"):
237
+ import json
238
+
239
+ self.memory._redis.setex(key, self.FEEDBACK_TTL, json.dumps(entry.to_dict()))
240
+ else:
241
+ logger.warning("Cannot store feedback: unsupported memory type")
242
+ return ""
243
+ except Exception as e:
244
+ logger.error(f"Failed to store feedback: {e}")
245
+ return ""
246
+
247
+ logger.debug(
248
+ f"Recorded feedback: {workflow_name}/{stage_name} tier={tier} quality={quality_score:.2f}"
249
+ )
250
+ return feedback_id
251
+
252
+ def get_feedback_history(
253
+ self, workflow_name: str, stage_name: str, tier: str | ModelTier | None = None, limit: int = 100
254
+ ) -> list[FeedbackEntry]:
255
+ """Get feedback history for a workflow stage.
256
+
257
+ Args:
258
+ workflow_name: Name of workflow
259
+ stage_name: Name of stage
260
+ tier: Optional filter by tier
261
+ limit: Maximum number of entries to return
262
+
263
+ Returns:
264
+ List of feedback entries (newest first)
265
+ """
266
+ if not self.memory or not hasattr(self.memory, "_redis"):
267
+ return []
268
+
269
+ # Convert tier to string if ModelTier enum
270
+ if isinstance(tier, ModelTier):
271
+ tier = tier.value
272
+
273
+ try:
274
+ # Build search pattern
275
+ if tier:
276
+ pattern = f"feedback:{workflow_name}:{stage_name}:{tier}:*"
277
+ else:
278
+ pattern = f"feedback:{workflow_name}:{stage_name}:*"
279
+
280
+ keys = self.memory._redis.keys(pattern)
281
+
282
+ entries = []
283
+ for key in keys:
284
+ if isinstance(key, bytes):
285
+ key = key.decode("utf-8")
286
+
287
+ # Retrieve entry
288
+ data = self._retrieve_feedback(key)
289
+ if data:
290
+ entries.append(FeedbackEntry.from_dict(data))
291
+
292
+ if len(entries) >= limit:
293
+ break
294
+
295
+ # Sort by timestamp (newest first)
296
+ entries.sort(key=lambda e: e.timestamp, reverse=True)
297
+
298
+ return entries[:limit]
299
+ except Exception as e:
300
+ logger.error(f"Failed to get feedback history: {e}")
301
+ return []
302
+
303
+ def _retrieve_feedback(self, key: str) -> dict[str, Any] | None:
304
+ """Retrieve feedback entry from memory."""
305
+ if not self.memory:
306
+ return None
307
+
308
+ try:
309
+ if hasattr(self.memory, "retrieve"):
310
+ return self.memory.retrieve(key, credentials=None)
311
+ elif hasattr(self.memory, "_redis"):
312
+ import json
313
+
314
+ data = self.memory._redis.get(key)
315
+ if data:
316
+ if isinstance(data, bytes):
317
+ data = data.decode("utf-8")
318
+ return json.loads(data)
319
+ return None
320
+ except Exception as e:
321
+ logger.debug(f"Failed to retrieve feedback: {e}")
322
+ return None
323
+
324
+ def get_quality_stats(
325
+ self, workflow_name: str, stage_name: str, tier: str | ModelTier | None = None
326
+ ) -> QualityStats | None:
327
+ """Get quality statistics for a workflow stage.
328
+
329
+ Args:
330
+ workflow_name: Name of workflow
331
+ stage_name: Name of stage
332
+ tier: Optional filter by tier
333
+
334
+ Returns:
335
+ Quality statistics or None if insufficient data
336
+ """
337
+ history = self.get_feedback_history(workflow_name, stage_name, tier=tier)
338
+
339
+ if not history:
340
+ return None
341
+
342
+ # Calculate statistics
343
+ quality_scores = [entry.quality_score for entry in history]
344
+
345
+ avg_quality = sum(quality_scores) / len(quality_scores)
346
+ min_quality = min(quality_scores)
347
+ max_quality = max(quality_scores)
348
+
349
+ # Calculate trend (recent vs older feedback)
350
+ if len(history) >= 4:
351
+ recent = quality_scores[: len(quality_scores) // 2]
352
+ older = quality_scores[len(quality_scores) // 2 :]
353
+ recent_avg = sum(recent) / len(recent)
354
+ older_avg = sum(older) / len(older)
355
+ recent_trend = (recent_avg - older_avg) / max(older_avg, 0.1) # Normalized difference
356
+ else:
357
+ recent_trend = 0.0
358
+
359
+ tier_str = tier.value if isinstance(tier, ModelTier) else (tier or "all")
360
+
361
+ return QualityStats(
362
+ workflow_name=workflow_name,
363
+ stage_name=stage_name,
364
+ tier=tier_str,
365
+ avg_quality=avg_quality,
366
+ min_quality=min_quality,
367
+ max_quality=max_quality,
368
+ sample_count=len(history),
369
+ recent_trend=recent_trend,
370
+ )
371
+
372
+ def recommend_tier(
373
+ self, workflow_name: str, stage_name: str, current_tier: str | ModelTier | None = None
374
+ ) -> TierRecommendation:
375
+ """Recommend optimal tier based on quality feedback.
376
+
377
+ Analyzes historical quality data and recommends:
378
+ - Downgrade if current tier consistently delivers high quality (cost optimization)
379
+ - Upgrade if current tier delivers poor quality (quality optimization)
380
+ - Keep current if quality is acceptable
381
+
382
+ Args:
383
+ workflow_name: Name of workflow
384
+ stage_name: Name of stage
385
+ current_tier: Current tier in use (if known)
386
+
387
+ Returns:
388
+ Tier recommendation with confidence and reasoning
389
+ """
390
+ # Convert tier to string if ModelTier enum
391
+ if isinstance(current_tier, ModelTier):
392
+ current_tier = current_tier.value
393
+
394
+ # Get stats for all tiers
395
+ stats_by_tier = {}
396
+ for tier in ["cheap", "capable", "premium"]:
397
+ stats = self.get_quality_stats(workflow_name, stage_name, tier=tier)
398
+ if stats:
399
+ stats_by_tier[tier] = stats
400
+
401
+ # No data - default recommendation
402
+ if not stats_by_tier:
403
+ return TierRecommendation(
404
+ current_tier=current_tier or "unknown",
405
+ recommended_tier=current_tier or "cheap",
406
+ confidence=0.0,
407
+ reason="No feedback data available",
408
+ stats={},
409
+ )
410
+
411
+ # Determine current tier if not provided
412
+ if not current_tier:
413
+ # Use tier with most recent feedback
414
+ all_history = self.get_feedback_history(workflow_name, stage_name, tier=None, limit=1)
415
+ if all_history:
416
+ current_tier = all_history[0].tier
417
+ else:
418
+ current_tier = "cheap"
419
+
420
+ current_stats = stats_by_tier.get(current_tier)
421
+
422
+ # Insufficient data for current tier
423
+ if not current_stats or current_stats.sample_count < self.MIN_SAMPLES:
424
+ return TierRecommendation(
425
+ current_tier=current_tier,
426
+ recommended_tier=current_tier,
427
+ confidence=0.0,
428
+ reason=f"Insufficient data (need {self.MIN_SAMPLES} samples, have {current_stats.sample_count if current_stats else 0})",
429
+ stats=stats_by_tier,
430
+ )
431
+
432
+ # Analyze quality
433
+ avg_quality = current_stats.avg_quality
434
+ confidence = min(current_stats.sample_count / (self.MIN_SAMPLES * 2), 1.0)
435
+
436
+ # Decision logic
437
+ if avg_quality < self.QUALITY_THRESHOLD:
438
+ # Poor quality - recommend upgrade
439
+ if current_tier == "cheap":
440
+ recommended = "capable"
441
+ reason = f"Low quality ({avg_quality:.2f}) - upgrade for better results"
442
+ elif current_tier == "capable":
443
+ recommended = "premium"
444
+ reason = f"Low quality ({avg_quality:.2f}) - upgrade to premium tier"
445
+ else: # premium
446
+ recommended = "premium"
447
+ reason = f"Already using premium tier (quality: {avg_quality:.2f})"
448
+ confidence = 1.0
449
+ elif avg_quality > 0.9 and current_tier != "cheap":
450
+ # Excellent quality - consider downgrade for cost optimization
451
+ if current_tier == "premium":
452
+ # Check if capable tier also has good quality
453
+ capable_stats = stats_by_tier.get("capable")
454
+ if capable_stats and capable_stats.avg_quality > 0.85:
455
+ recommended = "capable"
456
+ reason = f"Excellent quality ({avg_quality:.2f}) - downgrade to save cost"
457
+ else:
458
+ recommended = "premium"
459
+ reason = f"Excellent quality ({avg_quality:.2f}) - keep premium for consistency"
460
+ elif current_tier == "capable":
461
+ # Check if cheap tier also has good quality
462
+ cheap_stats = stats_by_tier.get("cheap")
463
+ if cheap_stats and cheap_stats.avg_quality > 0.85:
464
+ recommended = "cheap"
465
+ reason = f"Excellent quality ({avg_quality:.2f}) - downgrade to save cost"
466
+ else:
467
+ recommended = "capable"
468
+ reason = f"Excellent quality ({avg_quality:.2f}) - keep capable tier"
469
+ else:
470
+ recommended = current_tier
471
+ reason = f"Excellent quality ({avg_quality:.2f}) - maintain current tier"
472
+ else:
473
+ # Acceptable quality - keep current tier
474
+ recommended = current_tier
475
+ reason = f"Acceptable quality ({avg_quality:.2f}) - maintain current tier"
476
+
477
+ return TierRecommendation(
478
+ current_tier=current_tier,
479
+ recommended_tier=recommended,
480
+ confidence=confidence,
481
+ reason=reason,
482
+ stats=stats_by_tier,
483
+ )
484
+
485
+ def get_underperforming_stages(
486
+ self, workflow_name: str, quality_threshold: float = 0.7
487
+ ) -> list[tuple[str, QualityStats]]:
488
+ """Get workflow stages with poor quality scores.
489
+
490
+ Args:
491
+ workflow_name: Name of workflow
492
+ quality_threshold: Threshold below which stage is considered underperforming
493
+
494
+ Returns:
495
+ List of (stage_name, stats) tuples for underperforming stages
496
+ """
497
+ if not self.memory or not hasattr(self.memory, "_redis"):
498
+ return []
499
+
500
+ try:
501
+ # Find all feedback keys for this workflow
502
+ pattern = f"feedback:{workflow_name}:*"
503
+ keys = self.memory._redis.keys(pattern)
504
+
505
+ # Extract unique stages
506
+ stages = set()
507
+ for key in keys:
508
+ if isinstance(key, bytes):
509
+ key = key.decode("utf-8")
510
+ # Parse key: feedback:{workflow}:{stage}:{tier}:{id}
511
+ parts = key.split(":")
512
+ if len(parts) >= 4:
513
+ stages.add(parts[2])
514
+
515
+ # Get stats for each stage
516
+ underperforming = []
517
+ for stage_name in stages:
518
+ stats = self.get_quality_stats(workflow_name, stage_name)
519
+ if stats and stats.avg_quality < quality_threshold:
520
+ underperforming.append((stage_name, stats))
521
+
522
+ # Sort by quality (worst first)
523
+ underperforming.sort(key=lambda x: x[1].avg_quality)
524
+
525
+ return underperforming
526
+ except Exception as e:
527
+ logger.error(f"Failed to get underperforming stages: {e}")
528
+ return []
529
+
530
+ def clear_feedback(self, workflow_name: str, stage_name: str | None = None) -> int:
531
+ """Clear feedback history for a workflow or stage.
532
+
533
+ Args:
534
+ workflow_name: Name of workflow
535
+ stage_name: Optional stage name (clears all stages if None)
536
+
537
+ Returns:
538
+ Number of feedback entries cleared
539
+ """
540
+ if not self.memory or not hasattr(self.memory, "_redis"):
541
+ return 0
542
+
543
+ try:
544
+ if stage_name:
545
+ pattern = f"feedback:{workflow_name}:{stage_name}:*"
546
+ else:
547
+ pattern = f"feedback:{workflow_name}:*"
548
+
549
+ keys = self.memory._redis.keys(pattern)
550
+ if not keys:
551
+ return 0
552
+
553
+ deleted = self.memory._redis.delete(*keys)
554
+ return deleted
555
+ except Exception as e:
556
+ logger.error(f"Failed to clear feedback: {e}")
557
+ return 0
@@ -41,7 +41,7 @@ from __future__ import annotations
41
41
 
42
42
  import logging
43
43
  from dataclasses import dataclass, field
44
- from datetime import datetime, timedelta
44
+ from datetime import datetime
45
45
  from enum import Enum
46
46
  from typing import Any
47
47
  from uuid import uuid4
@@ -229,16 +229,13 @@ class FeedbackLoop:
229
229
  key = f"feedback:{workflow_name}:{stage_name}:{tier}:{feedback_id}"
230
230
 
231
231
  try:
232
- if hasattr(self.memory, "stash"):
233
- self.memory.stash(
234
- key=key, data=entry.to_dict(), credentials=None, ttl_seconds=self.FEEDBACK_TTL
235
- )
236
- elif hasattr(self.memory, "_redis"):
232
+ # Use direct Redis access for custom TTL
233
+ if hasattr(self.memory, "_client") and self.memory._client:
237
234
  import json
238
235
 
239
- self.memory._redis.setex(key, self.FEEDBACK_TTL, json.dumps(entry.to_dict()))
236
+ self.memory._client.setex(key, self.FEEDBACK_TTL, json.dumps(entry.to_dict()))
240
237
  else:
241
- logger.warning("Cannot store feedback: unsupported memory type")
238
+ logger.warning("Cannot store feedback: no Redis backend available")
242
239
  return ""
243
240
  except Exception as e:
244
241
  logger.error(f"Failed to store feedback: {e}")
@@ -263,7 +260,7 @@ class FeedbackLoop:
263
260
  Returns:
264
261
  List of feedback entries (newest first)
265
262
  """
266
- if not self.memory or not hasattr(self.memory, "_redis"):
263
+ if not self.memory or not hasattr(self.memory, "_client"):
267
264
  return []
268
265
 
269
266
  # Convert tier to string if ModelTier enum
@@ -277,7 +274,7 @@ class FeedbackLoop:
277
274
  else:
278
275
  pattern = f"feedback:{workflow_name}:{stage_name}:*"
279
276
 
280
- keys = self.memory._redis.keys(pattern)
277
+ keys = self.memory._client.keys(pattern)
281
278
 
282
279
  entries = []
283
280
  for key in keys:
@@ -308,10 +305,10 @@ class FeedbackLoop:
308
305
  try:
309
306
  if hasattr(self.memory, "retrieve"):
310
307
  return self.memory.retrieve(key, credentials=None)
311
- elif hasattr(self.memory, "_redis"):
308
+ elif hasattr(self.memory, "_client"):
312
309
  import json
313
310
 
314
- data = self.memory._redis.get(key)
311
+ data = self.memory._client.get(key)
315
312
  if data:
316
313
  if isinstance(data, bytes):
317
314
  data = data.decode("utf-8")
@@ -494,13 +491,13 @@ class FeedbackLoop:
494
491
  Returns:
495
492
  List of (stage_name, stats) tuples for underperforming stages
496
493
  """
497
- if not self.memory or not hasattr(self.memory, "_redis"):
494
+ if not self.memory or not hasattr(self.memory, "_client"):
498
495
  return []
499
496
 
500
497
  try:
501
498
  # Find all feedback keys for this workflow
502
499
  pattern = f"feedback:{workflow_name}:*"
503
- keys = self.memory._redis.keys(pattern)
500
+ keys = self.memory._client.keys(pattern)
504
501
 
505
502
  # Extract unique stages
506
503
  stages = set()
@@ -537,7 +534,7 @@ class FeedbackLoop:
537
534
  Returns:
538
535
  Number of feedback entries cleared
539
536
  """
540
- if not self.memory or not hasattr(self.memory, "_redis"):
537
+ if not self.memory or not hasattr(self.memory, "_client"):
541
538
  return 0
542
539
 
543
540
  try:
@@ -546,11 +543,11 @@ class FeedbackLoop:
546
543
  else:
547
544
  pattern = f"feedback:{workflow_name}:*"
548
545
 
549
- keys = self.memory._redis.keys(pattern)
546
+ keys = self.memory._client.keys(pattern)
550
547
  if not keys:
551
548
  return 0
552
549
 
553
- deleted = self.memory._redis.delete(*keys)
550
+ deleted = self.memory._client.delete(*keys)
554
551
  return deleted
555
552
  except Exception as e:
556
553
  logger.error(f"Failed to clear feedback: {e}")
@@ -66,6 +66,8 @@ if TYPE_CHECKING:
66
66
  from .test5 import Test5Workflow
67
67
  from .test_coverage_boost_crew import TestCoverageBoostCrew, TestCoverageBoostCrewResult
68
68
  from .test_gen import TestGenerationWorkflow
69
+ from .test_gen_behavioral import BehavioralTestGenerationWorkflow
70
+ from .test_gen_parallel import ParallelTestGenerationWorkflow
69
71
  from .xml_enhanced_crew import XMLAgent, XMLTask
70
72
 
71
73
  # Only import base module eagerly (small, needed for type checks)
@@ -136,6 +138,8 @@ _LAZY_WORKFLOW_IMPORTS: dict[str, tuple[str, str]] = {
136
138
  "TestCoverageBoostCrew": (".test_coverage_boost_crew", "TestCoverageBoostCrew"),
137
139
  "TestCoverageBoostCrewResult": (".test_coverage_boost_crew", "TestCoverageBoostCrewResult"),
138
140
  "TestGenerationWorkflow": (".test_gen", "TestGenerationWorkflow"),
141
+ "BehavioralTestGenerationWorkflow": (".test_gen_behavioral", "BehavioralTestGenerationWorkflow"),
142
+ "ParallelTestGenerationWorkflow": (".test_gen_parallel", "ParallelTestGenerationWorkflow"),
139
143
  "XMLAgent": (".xml_enhanced_crew", "XMLAgent"),
140
144
  "XMLTask": (".xml_enhanced_crew", "XMLTask"),
141
145
  "parse_xml_response": (".xml_enhanced_crew", "parse_xml_response"),
@@ -213,6 +217,8 @@ _DEFAULT_WORKFLOW_NAMES: dict[str, str] = {
213
217
  "perf-audit": "PerformanceAuditWorkflow",
214
218
  # Generation workflows
215
219
  "test-gen": "TestGenerationWorkflow",
220
+ "test-gen-behavioral": "BehavioralTestGenerationWorkflow",
221
+ "test-gen-parallel": "ParallelTestGenerationWorkflow",
216
222
  "refactor-plan": "RefactorPlanWorkflow",
217
223
  # Operational workflows
218
224
  "dependency-check": "DependencyCheckWorkflow",
@@ -484,6 +490,8 @@ __all__ = [
484
490
  "SecureReleaseResult",
485
491
  "SecurityAuditWorkflow",
486
492
  "TestGenerationWorkflow",
493
+ "BehavioralTestGenerationWorkflow",
494
+ "ParallelTestGenerationWorkflow",
487
495
  # Configuration
488
496
  "WorkflowConfig",
489
497
  "WorkflowResult",