crackerjack 0.30.3__py3-none-any.whl → 0.31.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crackerjack might be problematic. Click here for more details.
- crackerjack/CLAUDE.md +1005 -0
- crackerjack/RULES.md +380 -0
- crackerjack/__init__.py +42 -13
- crackerjack/__main__.py +227 -299
- crackerjack/agents/__init__.py +41 -0
- crackerjack/agents/architect_agent.py +281 -0
- crackerjack/agents/base.py +170 -0
- crackerjack/agents/coordinator.py +512 -0
- crackerjack/agents/documentation_agent.py +498 -0
- crackerjack/agents/dry_agent.py +388 -0
- crackerjack/agents/formatting_agent.py +245 -0
- crackerjack/agents/import_optimization_agent.py +281 -0
- crackerjack/agents/performance_agent.py +669 -0
- crackerjack/agents/proactive_agent.py +104 -0
- crackerjack/agents/refactoring_agent.py +788 -0
- crackerjack/agents/security_agent.py +529 -0
- crackerjack/agents/test_creation_agent.py +657 -0
- crackerjack/agents/test_specialist_agent.py +486 -0
- crackerjack/agents/tracker.py +212 -0
- crackerjack/api.py +560 -0
- crackerjack/cli/__init__.py +24 -0
- crackerjack/cli/facade.py +104 -0
- crackerjack/cli/handlers.py +267 -0
- crackerjack/cli/interactive.py +471 -0
- crackerjack/cli/options.py +409 -0
- crackerjack/cli/utils.py +18 -0
- crackerjack/code_cleaner.py +618 -928
- crackerjack/config/__init__.py +19 -0
- crackerjack/config/hooks.py +218 -0
- crackerjack/core/__init__.py +0 -0
- crackerjack/core/async_workflow_orchestrator.py +406 -0
- crackerjack/core/autofix_coordinator.py +200 -0
- crackerjack/core/container.py +104 -0
- crackerjack/core/enhanced_container.py +542 -0
- crackerjack/core/performance.py +243 -0
- crackerjack/core/phase_coordinator.py +585 -0
- crackerjack/core/proactive_workflow.py +316 -0
- crackerjack/core/session_coordinator.py +289 -0
- crackerjack/core/workflow_orchestrator.py +826 -0
- crackerjack/dynamic_config.py +94 -103
- crackerjack/errors.py +263 -41
- crackerjack/executors/__init__.py +11 -0
- crackerjack/executors/async_hook_executor.py +431 -0
- crackerjack/executors/cached_hook_executor.py +242 -0
- crackerjack/executors/hook_executor.py +345 -0
- crackerjack/executors/individual_hook_executor.py +669 -0
- crackerjack/intelligence/__init__.py +44 -0
- crackerjack/intelligence/adaptive_learning.py +751 -0
- crackerjack/intelligence/agent_orchestrator.py +551 -0
- crackerjack/intelligence/agent_registry.py +414 -0
- crackerjack/intelligence/agent_selector.py +502 -0
- crackerjack/intelligence/integration.py +290 -0
- crackerjack/interactive.py +576 -315
- crackerjack/managers/__init__.py +11 -0
- crackerjack/managers/async_hook_manager.py +135 -0
- crackerjack/managers/hook_manager.py +137 -0
- crackerjack/managers/publish_manager.py +433 -0
- crackerjack/managers/test_command_builder.py +151 -0
- crackerjack/managers/test_executor.py +443 -0
- crackerjack/managers/test_manager.py +258 -0
- crackerjack/managers/test_manager_backup.py +1124 -0
- crackerjack/managers/test_progress.py +114 -0
- crackerjack/mcp/__init__.py +0 -0
- crackerjack/mcp/cache.py +336 -0
- crackerjack/mcp/client_runner.py +104 -0
- crackerjack/mcp/context.py +621 -0
- crackerjack/mcp/dashboard.py +636 -0
- crackerjack/mcp/enhanced_progress_monitor.py +479 -0
- crackerjack/mcp/file_monitor.py +336 -0
- crackerjack/mcp/progress_components.py +569 -0
- crackerjack/mcp/progress_monitor.py +949 -0
- crackerjack/mcp/rate_limiter.py +332 -0
- crackerjack/mcp/server.py +22 -0
- crackerjack/mcp/server_core.py +244 -0
- crackerjack/mcp/service_watchdog.py +501 -0
- crackerjack/mcp/state.py +395 -0
- crackerjack/mcp/task_manager.py +257 -0
- crackerjack/mcp/tools/__init__.py +17 -0
- crackerjack/mcp/tools/core_tools.py +249 -0
- crackerjack/mcp/tools/error_analyzer.py +308 -0
- crackerjack/mcp/tools/execution_tools.py +372 -0
- crackerjack/mcp/tools/execution_tools_backup.py +1097 -0
- crackerjack/mcp/tools/intelligence_tool_registry.py +80 -0
- crackerjack/mcp/tools/intelligence_tools.py +314 -0
- crackerjack/mcp/tools/monitoring_tools.py +502 -0
- crackerjack/mcp/tools/proactive_tools.py +384 -0
- crackerjack/mcp/tools/progress_tools.py +217 -0
- crackerjack/mcp/tools/utility_tools.py +341 -0
- crackerjack/mcp/tools/workflow_executor.py +565 -0
- crackerjack/mcp/websocket/__init__.py +14 -0
- crackerjack/mcp/websocket/app.py +39 -0
- crackerjack/mcp/websocket/endpoints.py +559 -0
- crackerjack/mcp/websocket/jobs.py +253 -0
- crackerjack/mcp/websocket/server.py +116 -0
- crackerjack/mcp/websocket/websocket_handler.py +78 -0
- crackerjack/mcp/websocket_server.py +10 -0
- crackerjack/models/__init__.py +31 -0
- crackerjack/models/config.py +93 -0
- crackerjack/models/config_adapter.py +230 -0
- crackerjack/models/protocols.py +118 -0
- crackerjack/models/task.py +154 -0
- crackerjack/monitoring/ai_agent_watchdog.py +450 -0
- crackerjack/monitoring/regression_prevention.py +638 -0
- crackerjack/orchestration/__init__.py +0 -0
- crackerjack/orchestration/advanced_orchestrator.py +970 -0
- crackerjack/orchestration/coverage_improvement.py +223 -0
- crackerjack/orchestration/execution_strategies.py +341 -0
- crackerjack/orchestration/test_progress_streamer.py +636 -0
- crackerjack/plugins/__init__.py +15 -0
- crackerjack/plugins/base.py +200 -0
- crackerjack/plugins/hooks.py +246 -0
- crackerjack/plugins/loader.py +335 -0
- crackerjack/plugins/managers.py +259 -0
- crackerjack/py313.py +8 -3
- crackerjack/services/__init__.py +22 -0
- crackerjack/services/cache.py +314 -0
- crackerjack/services/config.py +358 -0
- crackerjack/services/config_integrity.py +99 -0
- crackerjack/services/contextual_ai_assistant.py +516 -0
- crackerjack/services/coverage_ratchet.py +356 -0
- crackerjack/services/debug.py +736 -0
- crackerjack/services/dependency_monitor.py +617 -0
- crackerjack/services/enhanced_filesystem.py +439 -0
- crackerjack/services/file_hasher.py +151 -0
- crackerjack/services/filesystem.py +421 -0
- crackerjack/services/git.py +176 -0
- crackerjack/services/health_metrics.py +611 -0
- crackerjack/services/initialization.py +873 -0
- crackerjack/services/log_manager.py +286 -0
- crackerjack/services/logging.py +174 -0
- crackerjack/services/metrics.py +578 -0
- crackerjack/services/pattern_cache.py +362 -0
- crackerjack/services/pattern_detector.py +515 -0
- crackerjack/services/performance_benchmarks.py +653 -0
- crackerjack/services/security.py +163 -0
- crackerjack/services/server_manager.py +234 -0
- crackerjack/services/smart_scheduling.py +144 -0
- crackerjack/services/tool_version_service.py +61 -0
- crackerjack/services/unified_config.py +437 -0
- crackerjack/services/version_checker.py +248 -0
- crackerjack/slash_commands/__init__.py +14 -0
- crackerjack/slash_commands/init.md +122 -0
- crackerjack/slash_commands/run.md +163 -0
- crackerjack/slash_commands/status.md +127 -0
- crackerjack-0.31.7.dist-info/METADATA +742 -0
- crackerjack-0.31.7.dist-info/RECORD +149 -0
- crackerjack-0.31.7.dist-info/entry_points.txt +2 -0
- crackerjack/.gitignore +0 -34
- crackerjack/.libcst.codemod.yaml +0 -18
- crackerjack/.pdm.toml +0 -1
- crackerjack/crackerjack.py +0 -3805
- crackerjack/pyproject.toml +0 -286
- crackerjack-0.30.3.dist-info/METADATA +0 -1290
- crackerjack-0.30.3.dist-info/RECORD +0 -16
- {crackerjack-0.30.3.dist-info → crackerjack-0.31.7.dist-info}/WHEEL +0 -0
- {crackerjack-0.30.3.dist-info → crackerjack-0.31.7.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,751 @@
|
|
|
1
|
+
"""Adaptive Learning System for Agent Selection.
|
|
2
|
+
|
|
3
|
+
Learns from execution results to improve agent selection over time through
|
|
4
|
+
success tracking, capability refinement, and performance optimization.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import typing as t
|
|
11
|
+
from collections import defaultdict
|
|
12
|
+
from dataclasses import asdict, dataclass, field
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from .agent_registry import AgentCapability, RegisteredAgent
|
|
17
|
+
from .agent_selector import AgentScore, TaskDescription
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class ExecutionRecord:
|
|
22
|
+
"""Record of an agent execution."""
|
|
23
|
+
|
|
24
|
+
timestamp: datetime
|
|
25
|
+
agent_name: str
|
|
26
|
+
agent_source: str
|
|
27
|
+
task_description: str
|
|
28
|
+
task_capabilities: list[str]
|
|
29
|
+
success: bool
|
|
30
|
+
execution_time: float
|
|
31
|
+
confidence_score: float
|
|
32
|
+
final_score: float
|
|
33
|
+
error_message: str | None = None
|
|
34
|
+
task_hash: str | None = None # For grouping similar tasks
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class AgentPerformanceMetrics:
|
|
39
|
+
"""Performance metrics for an agent."""
|
|
40
|
+
|
|
41
|
+
total_executions: int = 0
|
|
42
|
+
successful_executions: int = 0
|
|
43
|
+
failed_executions: int = 0
|
|
44
|
+
average_execution_time: float = 0.0
|
|
45
|
+
average_confidence: float = 0.0
|
|
46
|
+
success_rate: float = 0.0
|
|
47
|
+
capability_success_rates: dict[str, float] = field(default_factory=dict)
|
|
48
|
+
recent_performance_trend: float = 0.0 # -1 to 1, negative = declining
|
|
49
|
+
last_updated: datetime = field(default_factory=datetime.now)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class LearningInsight:
|
|
54
|
+
"""A learning insight discovered from execution data."""
|
|
55
|
+
|
|
56
|
+
insight_type: str # "capability_strength", "task_pattern", "failure_pattern"
|
|
57
|
+
agent_name: str
|
|
58
|
+
confidence: float # 0-1
|
|
59
|
+
description: str
|
|
60
|
+
supporting_evidence: dict[str, t.Any]
|
|
61
|
+
discovered_at: datetime = field(default_factory=datetime.now)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class AdaptiveLearningSystem:
|
|
65
|
+
"""System that learns from agent execution results."""
|
|
66
|
+
|
|
67
|
+
def __init__(self, data_dir: Path | None = None) -> None:
|
|
68
|
+
self.logger = logging.getLogger(__name__)
|
|
69
|
+
self.data_dir = data_dir or Path.home() / ".crackerjack" / "intelligence"
|
|
70
|
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
|
|
72
|
+
self.execution_log_path = self.data_dir / "execution_log.jsonl"
|
|
73
|
+
self.metrics_path = self.data_dir / "agent_metrics.json"
|
|
74
|
+
self.insights_path = self.data_dir / "learning_insights.json"
|
|
75
|
+
|
|
76
|
+
self._execution_records: list[ExecutionRecord] = []
|
|
77
|
+
self._agent_metrics: dict[str, AgentPerformanceMetrics] = {}
|
|
78
|
+
self._learning_insights: list[LearningInsight] = []
|
|
79
|
+
|
|
80
|
+
self._load_existing_data()
|
|
81
|
+
|
|
82
|
+
def _load_existing_data(self) -> None:
|
|
83
|
+
"""Load existing learning data from disk."""
|
|
84
|
+
try:
|
|
85
|
+
self._load_execution_records()
|
|
86
|
+
self._load_agent_metrics()
|
|
87
|
+
self._load_learning_insights()
|
|
88
|
+
except Exception as e:
|
|
89
|
+
self.logger.warning(f"Error loading existing learning data: {e}")
|
|
90
|
+
|
|
91
|
+
def _load_execution_records(self) -> None:
|
|
92
|
+
"""Load execution records from disk."""
|
|
93
|
+
if not self.execution_log_path.exists():
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
with self.execution_log_path.open("r") as f:
|
|
97
|
+
for line in f:
|
|
98
|
+
if line.strip():
|
|
99
|
+
data = json.loads(line)
|
|
100
|
+
data["timestamp"] = datetime.fromisoformat(data["timestamp"])
|
|
101
|
+
record = ExecutionRecord(**data)
|
|
102
|
+
self._execution_records.append(record)
|
|
103
|
+
|
|
104
|
+
self.logger.debug(f"Loaded {len(self._execution_records)} execution records")
|
|
105
|
+
|
|
106
|
+
def _load_agent_metrics(self) -> None:
|
|
107
|
+
"""Load agent metrics from disk."""
|
|
108
|
+
if not self.metrics_path.exists():
|
|
109
|
+
return
|
|
110
|
+
|
|
111
|
+
with self.metrics_path.open("r") as f:
|
|
112
|
+
metrics_data = json.load(f)
|
|
113
|
+
for agent_name, data in metrics_data.items():
|
|
114
|
+
data["last_updated"] = datetime.fromisoformat(data["last_updated"])
|
|
115
|
+
self._agent_metrics[agent_name] = AgentPerformanceMetrics(**data)
|
|
116
|
+
|
|
117
|
+
self.logger.debug(f"Loaded metrics for {len(self._agent_metrics)} agents")
|
|
118
|
+
|
|
119
|
+
def _load_learning_insights(self) -> None:
|
|
120
|
+
"""Load learning insights from disk."""
|
|
121
|
+
if not self.insights_path.exists():
|
|
122
|
+
return
|
|
123
|
+
|
|
124
|
+
with self.insights_path.open("r") as f:
|
|
125
|
+
insights_data = json.load(f)
|
|
126
|
+
for insight_data in insights_data:
|
|
127
|
+
insight_data["discovered_at"] = datetime.fromisoformat(
|
|
128
|
+
insight_data["discovered_at"]
|
|
129
|
+
)
|
|
130
|
+
insight = LearningInsight(**insight_data)
|
|
131
|
+
self._learning_insights.append(insight)
|
|
132
|
+
|
|
133
|
+
self.logger.debug(f"Loaded {len(self._learning_insights)} learning insights")
|
|
134
|
+
|
|
135
|
+
async def record_execution(
|
|
136
|
+
self,
|
|
137
|
+
agent: RegisteredAgent,
|
|
138
|
+
task: TaskDescription,
|
|
139
|
+
success: bool,
|
|
140
|
+
execution_time: float,
|
|
141
|
+
agent_score: AgentScore,
|
|
142
|
+
error_message: str | None = None,
|
|
143
|
+
) -> None:
|
|
144
|
+
"""Record the result of an agent execution."""
|
|
145
|
+
try:
|
|
146
|
+
# Create execution record
|
|
147
|
+
record = ExecutionRecord(
|
|
148
|
+
timestamp=datetime.now(),
|
|
149
|
+
agent_name=agent.metadata.name,
|
|
150
|
+
agent_source=agent.metadata.source.value,
|
|
151
|
+
task_description=task.description,
|
|
152
|
+
task_capabilities=[
|
|
153
|
+
cap.value for cap in self._infer_task_capabilities(task)
|
|
154
|
+
],
|
|
155
|
+
success=success,
|
|
156
|
+
execution_time=execution_time,
|
|
157
|
+
confidence_score=agent_score.confidence_factor,
|
|
158
|
+
final_score=agent_score.final_score,
|
|
159
|
+
error_message=error_message,
|
|
160
|
+
task_hash=self._hash_task(task),
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Add to records
|
|
164
|
+
self._execution_records.append(record)
|
|
165
|
+
|
|
166
|
+
# Update agent metrics
|
|
167
|
+
await self._update_agent_metrics(record)
|
|
168
|
+
|
|
169
|
+
# Persist to disk
|
|
170
|
+
await self._persist_execution_record(record)
|
|
171
|
+
|
|
172
|
+
# Trigger learning analysis (async)
|
|
173
|
+
asyncio.create_task(self._analyze_and_learn())
|
|
174
|
+
|
|
175
|
+
self.logger.debug(
|
|
176
|
+
f"Recorded execution: {agent.metadata.name} on '{task.description[:30]}...' "
|
|
177
|
+
f"({'success' if success else 'failure'})"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
except Exception as e:
|
|
181
|
+
self.logger.error(f"Error recording execution: {e}")
|
|
182
|
+
|
|
183
|
+
def _infer_task_capabilities(self, task: TaskDescription) -> set[AgentCapability]:
|
|
184
|
+
"""Infer capabilities needed for a task (simplified version)."""
|
|
185
|
+
capabilities = set()
|
|
186
|
+
text = task.description.lower()
|
|
187
|
+
|
|
188
|
+
# Capability mapping for efficiency
|
|
189
|
+
capability_keywords = {
|
|
190
|
+
AgentCapability.ARCHITECTURE: ("architect", "design", "structure"),
|
|
191
|
+
AgentCapability.REFACTORING: ("refactor", "clean", "improve"),
|
|
192
|
+
AgentCapability.TESTING: ("test", "pytest", "coverage"),
|
|
193
|
+
AgentCapability.SECURITY: ("security", "secure", "vulnerability"),
|
|
194
|
+
AgentCapability.PERFORMANCE: ("performance", "optimize", "speed"),
|
|
195
|
+
AgentCapability.DOCUMENTATION: ("document", "readme", "comment"),
|
|
196
|
+
AgentCapability.FORMATTING: ("format", "style", "lint"),
|
|
197
|
+
AgentCapability.DEBUGGING: ("debug", "fix", "error"),
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
for capability, keywords in capability_keywords.items():
|
|
201
|
+
if any(word in text for word in keywords):
|
|
202
|
+
capabilities.add(capability)
|
|
203
|
+
|
|
204
|
+
if not capabilities:
|
|
205
|
+
capabilities.add(AgentCapability.CODE_ANALYSIS)
|
|
206
|
+
|
|
207
|
+
return capabilities
|
|
208
|
+
|
|
209
|
+
def _hash_task(self, task: TaskDescription) -> str:
|
|
210
|
+
"""Create a hash for grouping similar tasks."""
|
|
211
|
+
# Simple hash based on key words
|
|
212
|
+
words = task.description.lower().split()
|
|
213
|
+
key_words = [w for w in words if len(w) > 3][
|
|
214
|
+
:10
|
|
215
|
+
] # Take first 10 significant words
|
|
216
|
+
return "_".join(sorted(key_words))
|
|
217
|
+
|
|
218
|
+
async def _update_agent_metrics(self, record: ExecutionRecord) -> None:
|
|
219
|
+
"""Update metrics for an agent based on execution record."""
|
|
220
|
+
agent_name = record.agent_name
|
|
221
|
+
metrics = self._ensure_agent_metrics(agent_name)
|
|
222
|
+
|
|
223
|
+
self._update_basic_counters(metrics, record)
|
|
224
|
+
self._update_execution_averages(metrics, record)
|
|
225
|
+
self._update_capability_success_rates(metrics, record, agent_name)
|
|
226
|
+
self._update_performance_trend(metrics, agent_name)
|
|
227
|
+
|
|
228
|
+
metrics.last_updated = datetime.now()
|
|
229
|
+
await self._persist_agent_metrics()
|
|
230
|
+
|
|
231
|
+
def _ensure_agent_metrics(self, agent_name: str) -> AgentPerformanceMetrics:
|
|
232
|
+
"""Ensure agent metrics exist and return them."""
|
|
233
|
+
if agent_name not in self._agent_metrics:
|
|
234
|
+
self._agent_metrics[agent_name] = AgentPerformanceMetrics()
|
|
235
|
+
return self._agent_metrics[agent_name]
|
|
236
|
+
|
|
237
|
+
def _update_basic_counters(
|
|
238
|
+
self, metrics: AgentPerformanceMetrics, record: ExecutionRecord
|
|
239
|
+
) -> None:
|
|
240
|
+
"""Update basic execution counters and success rate."""
|
|
241
|
+
metrics.total_executions += 1
|
|
242
|
+
if record.success:
|
|
243
|
+
metrics.successful_executions += 1
|
|
244
|
+
else:
|
|
245
|
+
metrics.failed_executions += 1
|
|
246
|
+
metrics.success_rate = metrics.successful_executions / metrics.total_executions
|
|
247
|
+
|
|
248
|
+
def _update_execution_averages(
|
|
249
|
+
self, metrics: AgentPerformanceMetrics, record: ExecutionRecord
|
|
250
|
+
) -> None:
|
|
251
|
+
"""Update execution time and confidence averages."""
|
|
252
|
+
if metrics.total_executions == 1:
|
|
253
|
+
metrics.average_execution_time = record.execution_time
|
|
254
|
+
metrics.average_confidence = record.confidence_score
|
|
255
|
+
else:
|
|
256
|
+
alpha = 0.3
|
|
257
|
+
metrics.average_execution_time = (
|
|
258
|
+
alpha * record.execution_time
|
|
259
|
+
+ (1 - alpha) * metrics.average_execution_time
|
|
260
|
+
)
|
|
261
|
+
metrics.average_confidence = (
|
|
262
|
+
alpha * record.confidence_score
|
|
263
|
+
+ (1 - alpha) * metrics.average_confidence
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
def _update_capability_success_rates(
|
|
267
|
+
self, metrics: AgentPerformanceMetrics, record: ExecutionRecord, agent_name: str
|
|
268
|
+
) -> None:
|
|
269
|
+
"""Update capability-specific success rates."""
|
|
270
|
+
success_value = 1.0 if record.success else 0.0
|
|
271
|
+
|
|
272
|
+
for capability in record.task_capabilities:
|
|
273
|
+
if capability not in metrics.capability_success_rates:
|
|
274
|
+
metrics.capability_success_rates[capability] = 0.0
|
|
275
|
+
|
|
276
|
+
current_rate = metrics.capability_success_rates[capability]
|
|
277
|
+
capability_executions = len(
|
|
278
|
+
[
|
|
279
|
+
r
|
|
280
|
+
for r in self._execution_records[-50:]
|
|
281
|
+
if r.agent_name == agent_name and capability in r.task_capabilities
|
|
282
|
+
]
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
if capability_executions <= 1:
|
|
286
|
+
metrics.capability_success_rates[capability] = success_value
|
|
287
|
+
else:
|
|
288
|
+
alpha = min(0.5, 2.0 / capability_executions)
|
|
289
|
+
metrics.capability_success_rates[capability] = (
|
|
290
|
+
alpha * success_value + (1 - alpha) * current_rate
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
def _update_performance_trend(
|
|
294
|
+
self, metrics: AgentPerformanceMetrics, agent_name: str
|
|
295
|
+
) -> None:
|
|
296
|
+
"""Update recent performance trend."""
|
|
297
|
+
recent_records = [
|
|
298
|
+
r for r in self._execution_records[-20:] if r.agent_name == agent_name
|
|
299
|
+
][-10:]
|
|
300
|
+
|
|
301
|
+
if len(recent_records) >= 5:
|
|
302
|
+
recent_success_rates = self._calculate_windowed_success_rates(
|
|
303
|
+
recent_records
|
|
304
|
+
)
|
|
305
|
+
if len(recent_success_rates) >= 2:
|
|
306
|
+
mid = len(recent_success_rates) // 2
|
|
307
|
+
first_half_avg = sum(recent_success_rates[:mid]) / max(mid, 1)
|
|
308
|
+
second_half_avg = sum(recent_success_rates[mid:]) / max(
|
|
309
|
+
len(recent_success_rates) - mid, 1
|
|
310
|
+
)
|
|
311
|
+
metrics.recent_performance_trend = second_half_avg - first_half_avg
|
|
312
|
+
|
|
313
|
+
def _calculate_windowed_success_rates(
|
|
314
|
+
self, recent_records: list[ExecutionRecord]
|
|
315
|
+
) -> list[float]:
|
|
316
|
+
"""Calculate success rates using sliding window."""
|
|
317
|
+
window_size = 3
|
|
318
|
+
success_rates = []
|
|
319
|
+
|
|
320
|
+
for i in range(len(recent_records) - window_size + 1):
|
|
321
|
+
window = recent_records[i : i + window_size]
|
|
322
|
+
window_success_rate = sum(1 for r in window if r.success) / len(window)
|
|
323
|
+
success_rates.append(window_success_rate)
|
|
324
|
+
|
|
325
|
+
return success_rates
|
|
326
|
+
|
|
327
|
+
async def _persist_execution_record(self, record: ExecutionRecord) -> None:
|
|
328
|
+
"""Persist execution record to disk."""
|
|
329
|
+
try:
|
|
330
|
+
with self.execution_log_path.open("a") as f:
|
|
331
|
+
data = asdict(record)
|
|
332
|
+
data["timestamp"] = data["timestamp"].isoformat()
|
|
333
|
+
f.write(json.dumps(data) + "\n")
|
|
334
|
+
except Exception as e:
|
|
335
|
+
self.logger.error(f"Error persisting execution record: {e}")
|
|
336
|
+
|
|
337
|
+
async def _persist_agent_metrics(self) -> None:
|
|
338
|
+
"""Persist agent metrics to disk."""
|
|
339
|
+
try:
|
|
340
|
+
metrics_data = {}
|
|
341
|
+
for agent_name, metrics in self._agent_metrics.items():
|
|
342
|
+
data = asdict(metrics)
|
|
343
|
+
data["last_updated"] = data["last_updated"].isoformat()
|
|
344
|
+
metrics_data[agent_name] = data
|
|
345
|
+
|
|
346
|
+
with self.metrics_path.open("w") as f:
|
|
347
|
+
json.dump(metrics_data, f, indent=2)
|
|
348
|
+
|
|
349
|
+
except Exception as e:
|
|
350
|
+
self.logger.error(f"Error persisting agent metrics: {e}")
|
|
351
|
+
|
|
352
|
+
async def _persist_learning_insights(self) -> None:
|
|
353
|
+
"""Persist learning insights to disk."""
|
|
354
|
+
try:
|
|
355
|
+
insights_data = []
|
|
356
|
+
for insight in self._learning_insights:
|
|
357
|
+
data = asdict(insight)
|
|
358
|
+
data["discovered_at"] = data["discovered_at"].isoformat()
|
|
359
|
+
insights_data.append(data)
|
|
360
|
+
|
|
361
|
+
with self.insights_path.open("w") as f:
|
|
362
|
+
json.dump(insights_data, f, indent=2)
|
|
363
|
+
|
|
364
|
+
except Exception as e:
|
|
365
|
+
self.logger.error(f"Error persisting learning insights: {e}")
|
|
366
|
+
|
|
367
|
+
async def _analyze_and_learn(self) -> None:
|
|
368
|
+
"""Analyze execution data and generate learning insights."""
|
|
369
|
+
try:
|
|
370
|
+
new_insights = []
|
|
371
|
+
|
|
372
|
+
# Analyze capability strengths
|
|
373
|
+
capability_insights = self._analyze_capability_strengths()
|
|
374
|
+
new_insights.extend(capability_insights)
|
|
375
|
+
|
|
376
|
+
# Analyze failure patterns
|
|
377
|
+
failure_insights = self._analyze_failure_patterns()
|
|
378
|
+
new_insights.extend(failure_insights)
|
|
379
|
+
|
|
380
|
+
# Analyze task patterns
|
|
381
|
+
task_pattern_insights = self._analyze_task_patterns()
|
|
382
|
+
new_insights.extend(task_pattern_insights)
|
|
383
|
+
|
|
384
|
+
# Add new insights (avoid duplicates)
|
|
385
|
+
for insight in new_insights:
|
|
386
|
+
if not self._is_duplicate_insight(insight):
|
|
387
|
+
self._learning_insights.append(insight)
|
|
388
|
+
self.logger.debug(f"New learning insight: {insight.description}")
|
|
389
|
+
|
|
390
|
+
# Persist insights
|
|
391
|
+
await self._persist_learning_insights()
|
|
392
|
+
|
|
393
|
+
except Exception as e:
|
|
394
|
+
self.logger.error(f"Error in learning analysis: {e}")
|
|
395
|
+
|
|
396
|
+
def _analyze_capability_strengths(self) -> list[LearningInsight]:
|
|
397
|
+
"""Analyze which agents excel at which capabilities."""
|
|
398
|
+
capability_performance = self._group_capability_performance()
|
|
399
|
+
insights = []
|
|
400
|
+
|
|
401
|
+
for capability, agents in capability_performance.items():
|
|
402
|
+
insights.extend(self._find_capability_experts(capability, agents))
|
|
403
|
+
|
|
404
|
+
return insights
|
|
405
|
+
|
|
406
|
+
def _group_capability_performance(self) -> dict[str, dict[str, list[bool]]]:
|
|
407
|
+
"""Group execution records by capability and agent."""
|
|
408
|
+
capability_performance = defaultdict(lambda: defaultdict(list))
|
|
409
|
+
|
|
410
|
+
for record in self._execution_records[-100:]:
|
|
411
|
+
for capability in record.task_capabilities:
|
|
412
|
+
capability_performance[capability][record.agent_name].append(
|
|
413
|
+
record.success
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
return dict(capability_performance)
|
|
417
|
+
|
|
418
|
+
def _find_capability_experts(
|
|
419
|
+
self, capability: str, agents: dict[str, list[bool]]
|
|
420
|
+
) -> list[LearningInsight]:
|
|
421
|
+
"""Find agents with exceptional performance in a specific capability."""
|
|
422
|
+
insights = []
|
|
423
|
+
|
|
424
|
+
for agent_name, successes in agents.items():
|
|
425
|
+
if len(successes) >= 3: # Minimum sample size
|
|
426
|
+
success_rate = sum(successes) / len(successes)
|
|
427
|
+
|
|
428
|
+
if success_rate >= 0.9: # High success rate
|
|
429
|
+
insight = LearningInsight(
|
|
430
|
+
insight_type="capability_strength",
|
|
431
|
+
agent_name=agent_name,
|
|
432
|
+
confidence=min(success_rate, len(successes) / 10.0),
|
|
433
|
+
description=f"{agent_name} excels at {capability} tasks (success rate: {success_rate:.1%})",
|
|
434
|
+
supporting_evidence={
|
|
435
|
+
"capability": capability,
|
|
436
|
+
"success_rate": success_rate,
|
|
437
|
+
"sample_size": len(successes),
|
|
438
|
+
"recent_performance": successes,
|
|
439
|
+
},
|
|
440
|
+
)
|
|
441
|
+
insights.append(insight)
|
|
442
|
+
|
|
443
|
+
return insights
|
|
444
|
+
|
|
445
|
+
def _analyze_failure_patterns(self) -> list[LearningInsight]:
|
|
446
|
+
"""Analyze common failure patterns."""
|
|
447
|
+
failure_patterns = self._group_failure_patterns()
|
|
448
|
+
return self._extract_significant_failure_insights(failure_patterns)
|
|
449
|
+
|
|
450
|
+
def _group_failure_patterns(self) -> dict[str, dict[str, int]]:
|
|
451
|
+
"""Group failure patterns by agent and error type."""
|
|
452
|
+
failure_patterns = defaultdict(lambda: defaultdict(int))
|
|
453
|
+
|
|
454
|
+
for record in self._execution_records[-100:]:
|
|
455
|
+
if not record.success and record.error_message:
|
|
456
|
+
error_type = self._categorize_error(record.error_message)
|
|
457
|
+
failure_patterns[record.agent_name][error_type] += 1
|
|
458
|
+
|
|
459
|
+
return {
|
|
460
|
+
agent_name: dict(patterns)
|
|
461
|
+
for agent_name, patterns in failure_patterns.items()
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
def _extract_significant_failure_insights(
|
|
465
|
+
self, failure_patterns: dict[str, dict[str, int]]
|
|
466
|
+
) -> list[LearningInsight]:
|
|
467
|
+
"""Extract significant failure pattern insights."""
|
|
468
|
+
insights = []
|
|
469
|
+
|
|
470
|
+
for agent_name, patterns in failure_patterns.items():
|
|
471
|
+
agent_insights = self._extract_agent_failure_insights(agent_name, patterns)
|
|
472
|
+
insights.extend(agent_insights)
|
|
473
|
+
|
|
474
|
+
return insights
|
|
475
|
+
|
|
476
|
+
def _extract_agent_failure_insights(
|
|
477
|
+
self, agent_name: str, patterns: dict[str, int]
|
|
478
|
+
) -> list[LearningInsight]:
|
|
479
|
+
"""Extract failure insights for a specific agent."""
|
|
480
|
+
total_failures = sum(patterns.values())
|
|
481
|
+
if total_failures < 3: # Minimum sample size
|
|
482
|
+
return []
|
|
483
|
+
|
|
484
|
+
insights = []
|
|
485
|
+
for error_type, count in patterns.items():
|
|
486
|
+
if count / total_failures >= 0.5: # Common pattern
|
|
487
|
+
insight = self._create_failure_insight(
|
|
488
|
+
agent_name, error_type, count, total_failures
|
|
489
|
+
)
|
|
490
|
+
insights.append(insight)
|
|
491
|
+
|
|
492
|
+
return insights
|
|
493
|
+
|
|
494
|
+
def _create_failure_insight(
|
|
495
|
+
self, agent_name: str, error_type: str, count: int, total_failures: int
|
|
496
|
+
) -> LearningInsight:
|
|
497
|
+
"""Create a failure pattern insight."""
|
|
498
|
+
return LearningInsight(
|
|
499
|
+
insight_type="failure_pattern",
|
|
500
|
+
agent_name=agent_name,
|
|
501
|
+
confidence=count / total_failures,
|
|
502
|
+
description=f"{agent_name} commonly fails with {error_type} errors",
|
|
503
|
+
supporting_evidence={
|
|
504
|
+
"error_type": error_type,
|
|
505
|
+
"occurrence_rate": count / total_failures,
|
|
506
|
+
"total_failures": total_failures,
|
|
507
|
+
"pattern_count": count,
|
|
508
|
+
},
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
def _analyze_task_patterns(self) -> list[LearningInsight]:
|
|
512
|
+
"""Analyze task patterns and agent preferences."""
|
|
513
|
+
task_performance = self._group_task_performance()
|
|
514
|
+
insights = []
|
|
515
|
+
|
|
516
|
+
for task_hash, agents in task_performance.items():
|
|
517
|
+
if len(agents) > 1: # Multiple agents tried this task type
|
|
518
|
+
best_agent, best_rate = self._find_best_performing_agent(agents)
|
|
519
|
+
|
|
520
|
+
if best_agent and best_rate >= 0.8:
|
|
521
|
+
insight = self._create_task_pattern_insight(
|
|
522
|
+
task_hash, best_agent, best_rate, agents
|
|
523
|
+
)
|
|
524
|
+
insights.append(insight)
|
|
525
|
+
|
|
526
|
+
return insights
|
|
527
|
+
|
|
528
|
+
def _group_task_performance(self) -> dict[str, dict[str, list[bool]]]:
|
|
529
|
+
"""Group task performance by hash and agent."""
|
|
530
|
+
task_performance = defaultdict(lambda: defaultdict(list))
|
|
531
|
+
|
|
532
|
+
for record in self._execution_records[-100:]:
|
|
533
|
+
if record.task_hash:
|
|
534
|
+
task_performance[record.task_hash][record.agent_name].append(
|
|
535
|
+
record.success
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
return dict(task_performance)
|
|
539
|
+
|
|
540
|
+
def _find_best_performing_agent(
|
|
541
|
+
self, agents: dict[str, list[bool]]
|
|
542
|
+
) -> tuple[str | None, float]:
|
|
543
|
+
"""Find the best performing agent for a task pattern."""
|
|
544
|
+
best_agent = None
|
|
545
|
+
best_rate = 0.0
|
|
546
|
+
|
|
547
|
+
for agent_name, successes in agents.items():
|
|
548
|
+
if len(successes) >= 2: # Minimum attempts
|
|
549
|
+
success_rate = sum(successes) / len(successes)
|
|
550
|
+
if success_rate > best_rate:
|
|
551
|
+
best_rate = success_rate
|
|
552
|
+
best_agent = agent_name
|
|
553
|
+
|
|
554
|
+
return best_agent, best_rate
|
|
555
|
+
|
|
556
|
+
def _create_task_pattern_insight(
|
|
557
|
+
self,
|
|
558
|
+
task_hash: str,
|
|
559
|
+
best_agent: str,
|
|
560
|
+
best_rate: float,
|
|
561
|
+
agents: dict[str, list[bool]],
|
|
562
|
+
) -> LearningInsight:
|
|
563
|
+
"""Create a task pattern insight."""
|
|
564
|
+
example_task = next(
|
|
565
|
+
(
|
|
566
|
+
r.task_description
|
|
567
|
+
for r in self._execution_records
|
|
568
|
+
if r.task_hash == task_hash and r.agent_name == best_agent
|
|
569
|
+
),
|
|
570
|
+
"Unknown task pattern",
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
return LearningInsight(
|
|
574
|
+
insight_type="task_pattern",
|
|
575
|
+
agent_name=best_agent,
|
|
576
|
+
confidence=best_rate,
|
|
577
|
+
description=f"{best_agent} is preferred for tasks like: {example_task[:100]}...",
|
|
578
|
+
supporting_evidence={
|
|
579
|
+
"task_pattern": task_hash,
|
|
580
|
+
"success_rate": best_rate,
|
|
581
|
+
"example_task": example_task,
|
|
582
|
+
"competing_agents": list(agents.keys()),
|
|
583
|
+
},
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
def _categorize_error(self, error_message: str) -> str:
|
|
587
|
+
"""Categorize error message into type."""
|
|
588
|
+
error_lower = error_message.lower()
|
|
589
|
+
|
|
590
|
+
if "timeout" in error_lower:
|
|
591
|
+
return "timeout"
|
|
592
|
+
elif "import" in error_lower:
|
|
593
|
+
return "import_error"
|
|
594
|
+
elif "type" in error_lower:
|
|
595
|
+
return "type_error"
|
|
596
|
+
elif "permission" in error_lower:
|
|
597
|
+
return "permission_error"
|
|
598
|
+
elif "not found" in error_lower:
|
|
599
|
+
return "not_found"
|
|
600
|
+
elif "syntax" in error_lower:
|
|
601
|
+
return "syntax_error"
|
|
602
|
+
|
|
603
|
+
return "other"
|
|
604
|
+
|
|
605
|
+
def _is_duplicate_insight(self, new_insight: LearningInsight) -> bool:
|
|
606
|
+
"""Check if insight already exists."""
|
|
607
|
+
for existing in self._learning_insights:
|
|
608
|
+
if (
|
|
609
|
+
existing.insight_type == new_insight.insight_type
|
|
610
|
+
and existing.agent_name == new_insight.agent_name
|
|
611
|
+
and abs(existing.confidence - new_insight.confidence) < 0.1
|
|
612
|
+
):
|
|
613
|
+
return True
|
|
614
|
+
return False
|
|
615
|
+
|
|
616
|
+
def get_agent_recommendations(
|
|
617
|
+
self,
|
|
618
|
+
task: TaskDescription,
|
|
619
|
+
candidate_agents: list[str],
|
|
620
|
+
) -> dict[str, float]:
|
|
621
|
+
"""Get recommendations for agents based on learning."""
|
|
622
|
+
task_capabilities = [cap.value for cap in self._infer_task_capabilities(task)]
|
|
623
|
+
task_hash = self._hash_task(task)
|
|
624
|
+
|
|
625
|
+
return {
|
|
626
|
+
agent_name: min(
|
|
627
|
+
self._calculate_agent_score(agent_name, task_capabilities, task_hash),
|
|
628
|
+
1.0,
|
|
629
|
+
)
|
|
630
|
+
for agent_name in candidate_agents
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
def _calculate_agent_score(
|
|
634
|
+
self, agent_name: str, task_capabilities: list[str], task_hash: str
|
|
635
|
+
) -> float:
|
|
636
|
+
"""Calculate recommendation score for a specific agent."""
|
|
637
|
+
score = 0.0
|
|
638
|
+
|
|
639
|
+
# Base score from metrics
|
|
640
|
+
if agent_name in self._agent_metrics:
|
|
641
|
+
metrics = self._agent_metrics[agent_name]
|
|
642
|
+
score += self._calculate_metrics_score(metrics, task_capabilities)
|
|
643
|
+
|
|
644
|
+
# Insights bonus/penalty
|
|
645
|
+
score += self._calculate_insights_score(
|
|
646
|
+
agent_name, task_capabilities, task_hash
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
return score
|
|
650
|
+
|
|
651
|
+
def _calculate_metrics_score(
|
|
652
|
+
self, metrics: AgentPerformanceMetrics, task_capabilities: list[str]
|
|
653
|
+
) -> float:
|
|
654
|
+
"""Calculate score based on agent metrics."""
|
|
655
|
+
score = metrics.success_rate * 0.4
|
|
656
|
+
|
|
657
|
+
# Capability-specific performance
|
|
658
|
+
capability_scores = [
|
|
659
|
+
metrics.capability_success_rates[capability]
|
|
660
|
+
for capability in task_capabilities
|
|
661
|
+
if capability in metrics.capability_success_rates
|
|
662
|
+
]
|
|
663
|
+
|
|
664
|
+
if capability_scores:
|
|
665
|
+
score += (sum(capability_scores) / len(capability_scores)) * 0.4
|
|
666
|
+
|
|
667
|
+
# Recent trend adjustment
|
|
668
|
+
if metrics.recent_performance_trend > 0:
|
|
669
|
+
score += metrics.recent_performance_trend * 0.1
|
|
670
|
+
elif metrics.recent_performance_trend < 0:
|
|
671
|
+
score += metrics.recent_performance_trend * 0.05 # Smaller penalty
|
|
672
|
+
|
|
673
|
+
return score
|
|
674
|
+
|
|
675
|
+
def _calculate_insights_score(
|
|
676
|
+
self, agent_name: str, task_capabilities: list[str], task_hash: str
|
|
677
|
+
) -> float:
|
|
678
|
+
"""Calculate score adjustment based on learning insights."""
|
|
679
|
+
relevant_insights = [
|
|
680
|
+
insight
|
|
681
|
+
for insight in self._learning_insights
|
|
682
|
+
if insight.agent_name == agent_name
|
|
683
|
+
]
|
|
684
|
+
|
|
685
|
+
score_adjustment = 0.0
|
|
686
|
+
for insight in relevant_insights:
|
|
687
|
+
if insight.insight_type == "capability_strength":
|
|
688
|
+
insight_capability = insight.supporting_evidence.get("capability", "")
|
|
689
|
+
if insight_capability in task_capabilities:
|
|
690
|
+
score_adjustment += insight.confidence * 0.1
|
|
691
|
+
elif insight.insight_type == "task_pattern":
|
|
692
|
+
if insight.supporting_evidence.get("task_pattern") == task_hash:
|
|
693
|
+
score_adjustment += insight.confidence * 0.15
|
|
694
|
+
elif insight.insight_type == "failure_pattern":
|
|
695
|
+
score_adjustment -= insight.confidence * 0.05
|
|
696
|
+
|
|
697
|
+
return score_adjustment
|
|
698
|
+
|
|
699
|
+
def get_learning_summary(self) -> dict[str, t.Any]:
|
|
700
|
+
"""Get a summary of learning progress."""
|
|
701
|
+
total_records = len(self._execution_records)
|
|
702
|
+
|
|
703
|
+
if total_records == 0:
|
|
704
|
+
return {"status": "no_data"}
|
|
705
|
+
|
|
706
|
+
recent_records = self._execution_records[-50:]
|
|
707
|
+
recent_success_rate = sum(1 for r in recent_records if r.success) / len(
|
|
708
|
+
recent_records
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
# Agent performance summary
|
|
712
|
+
agent_summary = {}
|
|
713
|
+
for agent_name, metrics in self._agent_metrics.items():
|
|
714
|
+
agent_summary[agent_name] = {
|
|
715
|
+
"executions": metrics.total_executions,
|
|
716
|
+
"success_rate": metrics.success_rate,
|
|
717
|
+
"trend": metrics.recent_performance_trend,
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
# Insights summary
|
|
721
|
+
insights_by_type = defaultdict(int)
|
|
722
|
+
for insight in self._learning_insights:
|
|
723
|
+
insights_by_type[insight.insight_type] += 1
|
|
724
|
+
|
|
725
|
+
return {
|
|
726
|
+
"status": "active",
|
|
727
|
+
"total_executions": total_records,
|
|
728
|
+
"recent_success_rate": recent_success_rate,
|
|
729
|
+
"agents_tracked": len(self._agent_metrics),
|
|
730
|
+
"insights_discovered": len(self._learning_insights),
|
|
731
|
+
"insights_by_type": dict(insights_by_type),
|
|
732
|
+
"top_performers": sorted(
|
|
733
|
+
agent_summary.items(),
|
|
734
|
+
key=lambda x: x[1]["success_rate"],
|
|
735
|
+
reverse=True,
|
|
736
|
+
)[:5],
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
# Global learning system instance
|
|
741
|
+
_learning_system_instance: AdaptiveLearningSystem | None = None
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
async def get_learning_system() -> AdaptiveLearningSystem:
|
|
745
|
+
"""Get or create the global learning system."""
|
|
746
|
+
global _learning_system_instance
|
|
747
|
+
|
|
748
|
+
if _learning_system_instance is None:
|
|
749
|
+
_learning_system_instance = AdaptiveLearningSystem()
|
|
750
|
+
|
|
751
|
+
return _learning_system_instance
|