ctrlcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ctrlcode/__init__.py +8 -0
- ctrlcode/agents/__init__.py +29 -0
- ctrlcode/agents/cleanup.py +388 -0
- ctrlcode/agents/communication.py +439 -0
- ctrlcode/agents/observability.py +421 -0
- ctrlcode/agents/react_loop.py +297 -0
- ctrlcode/agents/registry.py +211 -0
- ctrlcode/agents/result_parser.py +242 -0
- ctrlcode/agents/workflow.py +723 -0
- ctrlcode/analysis/__init__.py +28 -0
- ctrlcode/analysis/ast_diff.py +163 -0
- ctrlcode/analysis/bug_detector.py +149 -0
- ctrlcode/analysis/code_graphs.py +329 -0
- ctrlcode/analysis/semantic.py +205 -0
- ctrlcode/analysis/static.py +183 -0
- ctrlcode/analysis/synthesizer.py +281 -0
- ctrlcode/analysis/tests.py +189 -0
- ctrlcode/cleanup/__init__.py +16 -0
- ctrlcode/cleanup/auto_merge.py +350 -0
- ctrlcode/cleanup/doc_gardening.py +388 -0
- ctrlcode/cleanup/pr_automation.py +330 -0
- ctrlcode/cleanup/scheduler.py +356 -0
- ctrlcode/config.py +380 -0
- ctrlcode/embeddings/__init__.py +6 -0
- ctrlcode/embeddings/embedder.py +192 -0
- ctrlcode/embeddings/vector_store.py +213 -0
- ctrlcode/fuzzing/__init__.py +24 -0
- ctrlcode/fuzzing/analyzer.py +280 -0
- ctrlcode/fuzzing/budget.py +112 -0
- ctrlcode/fuzzing/context.py +665 -0
- ctrlcode/fuzzing/context_fuzzer.py +506 -0
- ctrlcode/fuzzing/derived_orchestrator.py +732 -0
- ctrlcode/fuzzing/oracle_adapter.py +135 -0
- ctrlcode/linters/__init__.py +11 -0
- ctrlcode/linters/hand_rolled_utils.py +221 -0
- ctrlcode/linters/yolo_parsing.py +217 -0
- ctrlcode/metrics/__init__.py +6 -0
- ctrlcode/metrics/dashboard.py +283 -0
- ctrlcode/metrics/tech_debt.py +663 -0
- ctrlcode/paths.py +68 -0
- ctrlcode/permissions.py +179 -0
- ctrlcode/providers/__init__.py +15 -0
- ctrlcode/providers/anthropic.py +138 -0
- ctrlcode/providers/base.py +77 -0
- ctrlcode/providers/openai.py +197 -0
- ctrlcode/providers/parallel.py +104 -0
- ctrlcode/server.py +871 -0
- ctrlcode/session/__init__.py +6 -0
- ctrlcode/session/baseline.py +57 -0
- ctrlcode/session/manager.py +967 -0
- ctrlcode/skills/__init__.py +10 -0
- ctrlcode/skills/builtin/commit.toml +29 -0
- ctrlcode/skills/builtin/docs.toml +25 -0
- ctrlcode/skills/builtin/refactor.toml +33 -0
- ctrlcode/skills/builtin/review.toml +28 -0
- ctrlcode/skills/builtin/test.toml +28 -0
- ctrlcode/skills/loader.py +111 -0
- ctrlcode/skills/registry.py +139 -0
- ctrlcode/storage/__init__.py +19 -0
- ctrlcode/storage/history_db.py +708 -0
- ctrlcode/tools/__init__.py +220 -0
- ctrlcode/tools/bash.py +112 -0
- ctrlcode/tools/browser.py +352 -0
- ctrlcode/tools/executor.py +153 -0
- ctrlcode/tools/explore.py +486 -0
- ctrlcode/tools/mcp.py +108 -0
- ctrlcode/tools/observability.py +561 -0
- ctrlcode/tools/registry.py +193 -0
- ctrlcode/tools/todo.py +291 -0
- ctrlcode/tools/update.py +266 -0
- ctrlcode/tools/webfetch.py +147 -0
- ctrlcode-0.1.0.dist-info/METADATA +93 -0
- ctrlcode-0.1.0.dist-info/RECORD +75 -0
- ctrlcode-0.1.0.dist-info/WHEEL +4 -0
- ctrlcode-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""Metrics dashboard for historical learning system."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from ..storage.history_db import HistoryDB
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class MetricsSummary:
|
|
15
|
+
"""Summary of historical learning metrics."""
|
|
16
|
+
|
|
17
|
+
# Session stats
|
|
18
|
+
total_sessions: int
|
|
19
|
+
sessions_with_oracle_reuse: int
|
|
20
|
+
oracle_reuse_rate: float
|
|
21
|
+
|
|
22
|
+
# Bug detection stats
|
|
23
|
+
total_bugs_in_db: int
|
|
24
|
+
sessions_with_bug_warnings: int
|
|
25
|
+
bug_detection_rate: float
|
|
26
|
+
|
|
27
|
+
# Test stats
|
|
28
|
+
total_tests: int
|
|
29
|
+
avg_test_pass_rate: float
|
|
30
|
+
avg_tests_per_session: float
|
|
31
|
+
|
|
32
|
+
# Quality stats
|
|
33
|
+
avg_oracle_quality: float
|
|
34
|
+
avg_session_quality: float
|
|
35
|
+
|
|
36
|
+
# Efficiency stats
|
|
37
|
+
estimated_token_savings: int
|
|
38
|
+
estimated_llm_call_savings: int
|
|
39
|
+
estimated_time_savings_seconds: float
|
|
40
|
+
|
|
41
|
+
# Time range
|
|
42
|
+
earliest_session: Optional[datetime]
|
|
43
|
+
latest_session: Optional[datetime]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class MetricsDashboard:
|
|
47
|
+
"""Dashboard for tracking and visualizing historical learning metrics.
|
|
48
|
+
|
|
49
|
+
Provides insights into:
|
|
50
|
+
- Oracle reuse rate (how often we avoid fresh derivations)
|
|
51
|
+
- Bug pattern detection (proactive warnings)
|
|
52
|
+
- Test deduplication effectiveness
|
|
53
|
+
- Token and time savings
|
|
54
|
+
- Quality trends over time
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, history_db: HistoryDB):
|
|
58
|
+
"""Initialize metrics dashboard.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
history_db: History database to analyze
|
|
62
|
+
"""
|
|
63
|
+
self.history_db = history_db
|
|
64
|
+
|
|
65
|
+
def get_summary(self) -> MetricsSummary:
|
|
66
|
+
"""Get overall metrics summary.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
MetricsSummary with all key metrics
|
|
70
|
+
"""
|
|
71
|
+
stats = self.history_db.get_stats()
|
|
72
|
+
|
|
73
|
+
# Calculate efficiency metrics
|
|
74
|
+
oracle_reuse_count = stats.get("oracle_reuse_count", 0)
|
|
75
|
+
total_sessions = stats.get("total_sessions", 0)
|
|
76
|
+
|
|
77
|
+
# Each reused oracle saves ~2500 tokens and ~25 seconds
|
|
78
|
+
TOKEN_SAVINGS_PER_REUSE = 2500
|
|
79
|
+
TIME_SAVINGS_PER_REUSE = 25.0
|
|
80
|
+
|
|
81
|
+
estimated_token_savings = oracle_reuse_count * TOKEN_SAVINGS_PER_REUSE
|
|
82
|
+
estimated_llm_call_savings = oracle_reuse_count
|
|
83
|
+
estimated_time_savings = oracle_reuse_count * TIME_SAVINGS_PER_REUSE
|
|
84
|
+
|
|
85
|
+
# Get time range (would need to query DB for actual timestamps)
|
|
86
|
+
# For now, using None - would be enhanced with actual queries
|
|
87
|
+
earliest_session = None
|
|
88
|
+
latest_session = None
|
|
89
|
+
|
|
90
|
+
# Calculate averages
|
|
91
|
+
avg_tests_per_session = (
|
|
92
|
+
stats.get("total_tests", 0) / total_sessions if total_sessions > 0 else 0
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Estimate sessions with bug warnings (would need actual data)
|
|
96
|
+
# For now, using 0 - would be enhanced with actual tracking
|
|
97
|
+
sessions_with_bug_warnings = 0
|
|
98
|
+
bug_detection_rate = 0.0
|
|
99
|
+
|
|
100
|
+
return MetricsSummary(
|
|
101
|
+
total_sessions=total_sessions,
|
|
102
|
+
sessions_with_oracle_reuse=oracle_reuse_count,
|
|
103
|
+
oracle_reuse_rate=stats.get("oracle_reuse_rate", 0.0),
|
|
104
|
+
total_bugs_in_db=stats.get("total_bugs", 0),
|
|
105
|
+
sessions_with_bug_warnings=sessions_with_bug_warnings,
|
|
106
|
+
bug_detection_rate=bug_detection_rate,
|
|
107
|
+
total_tests=stats.get("total_tests", 0),
|
|
108
|
+
avg_test_pass_rate=stats.get("test_pass_rate", 0.0),
|
|
109
|
+
avg_tests_per_session=avg_tests_per_session,
|
|
110
|
+
avg_oracle_quality=stats.get("avg_oracle_quality", 0.0),
|
|
111
|
+
avg_session_quality=0.0, # Would calculate from session quality_scores
|
|
112
|
+
estimated_token_savings=estimated_token_savings,
|
|
113
|
+
estimated_llm_call_savings=estimated_llm_call_savings,
|
|
114
|
+
estimated_time_savings_seconds=estimated_time_savings,
|
|
115
|
+
earliest_session=earliest_session,
|
|
116
|
+
latest_session=latest_session,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
def format_report(self, summary: Optional[MetricsSummary] = None) -> str:
|
|
120
|
+
"""Format metrics as human-readable report.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
summary: Optional pre-computed summary (will compute if not provided)
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Formatted report string
|
|
127
|
+
"""
|
|
128
|
+
if summary is None:
|
|
129
|
+
summary = self.get_summary()
|
|
130
|
+
|
|
131
|
+
lines = ["=" * 60]
|
|
132
|
+
lines.append("HISTORICAL LEARNING METRICS DASHBOARD")
|
|
133
|
+
lines.append("=" * 60)
|
|
134
|
+
lines.append("")
|
|
135
|
+
|
|
136
|
+
# Session stats
|
|
137
|
+
lines.append("๐ SESSION STATISTICS")
|
|
138
|
+
lines.append(f" Total fuzzing sessions: {summary.total_sessions}")
|
|
139
|
+
lines.append(
|
|
140
|
+
f" Sessions with oracle reuse: {summary.sessions_with_oracle_reuse}"
|
|
141
|
+
)
|
|
142
|
+
lines.append(f" Oracle reuse rate: {summary.oracle_reuse_rate:.1%}")
|
|
143
|
+
lines.append("")
|
|
144
|
+
|
|
145
|
+
# Bug detection
|
|
146
|
+
lines.append("๐ BUG PATTERN DETECTION")
|
|
147
|
+
lines.append(f" Bug patterns in database: {summary.total_bugs_in_db}")
|
|
148
|
+
lines.append(
|
|
149
|
+
f" Sessions with bug warnings: {summary.sessions_with_bug_warnings}"
|
|
150
|
+
)
|
|
151
|
+
lines.append(f" Bug detection rate: {summary.bug_detection_rate:.1%}")
|
|
152
|
+
lines.append("")
|
|
153
|
+
|
|
154
|
+
# Test stats
|
|
155
|
+
lines.append("๐งช TEST EXECUTION")
|
|
156
|
+
lines.append(f" Total tests executed: {summary.total_tests}")
|
|
157
|
+
lines.append(f" Average test pass rate: {summary.avg_test_pass_rate:.1%}")
|
|
158
|
+
lines.append(f" Avg tests per session: {summary.avg_tests_per_session:.1f}")
|
|
159
|
+
lines.append("")
|
|
160
|
+
|
|
161
|
+
# Quality metrics
|
|
162
|
+
lines.append("โญ QUALITY METRICS")
|
|
163
|
+
lines.append(f" Average oracle quality: {summary.avg_oracle_quality:.2f}/1.0")
|
|
164
|
+
lines.append(
|
|
165
|
+
f" Average session quality: {summary.avg_session_quality:.2f}/1.0"
|
|
166
|
+
)
|
|
167
|
+
lines.append("")
|
|
168
|
+
|
|
169
|
+
# Efficiency gains
|
|
170
|
+
lines.append("โก EFFICIENCY GAINS")
|
|
171
|
+
lines.append(f" Token savings: {summary.estimated_token_savings:,} tokens")
|
|
172
|
+
lines.append(
|
|
173
|
+
f" LLM calls avoided: {summary.estimated_llm_call_savings} derivations"
|
|
174
|
+
)
|
|
175
|
+
lines.append(
|
|
176
|
+
f" Time saved: {summary.estimated_time_savings_seconds:.1f} seconds "
|
|
177
|
+
f"({summary.estimated_time_savings_seconds / 60:.1f} minutes)"
|
|
178
|
+
)
|
|
179
|
+
lines.append("")
|
|
180
|
+
|
|
181
|
+
# Performance indicators
|
|
182
|
+
lines.append("๐ KEY PERFORMANCE INDICATORS")
|
|
183
|
+
if summary.oracle_reuse_rate >= 0.3:
|
|
184
|
+
lines.append(" โ
Oracle reuse rate: EXCELLENT (โฅ30%)")
|
|
185
|
+
elif summary.oracle_reuse_rate >= 0.2:
|
|
186
|
+
lines.append(" โ Oracle reuse rate: GOOD (โฅ20%)")
|
|
187
|
+
elif summary.oracle_reuse_rate >= 0.1:
|
|
188
|
+
lines.append(" โ Oracle reuse rate: FAIR (โฅ10%)")
|
|
189
|
+
else:
|
|
190
|
+
lines.append(
|
|
191
|
+
" โ Oracle reuse rate: LOW (<10%) - need more diverse training data"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
if summary.total_sessions < 10:
|
|
195
|
+
lines.append(
|
|
196
|
+
f" โน๏ธ Limited data: Only {summary.total_sessions} sessions - "
|
|
197
|
+
"metrics will improve with more usage"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
lines.append("")
|
|
201
|
+
lines.append("=" * 60)
|
|
202
|
+
|
|
203
|
+
return "\n".join(lines)
|
|
204
|
+
|
|
205
|
+
def get_trend_data(self, days: int = 30) -> dict:
|
|
206
|
+
"""Get trend data for the last N days.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
days: Number of days to include
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
Dictionary with trend data by day
|
|
213
|
+
"""
|
|
214
|
+
# This would query the database for time-series data
|
|
215
|
+
# For now, returning placeholder structure
|
|
216
|
+
return {
|
|
217
|
+
"oracle_reuse_by_day": [],
|
|
218
|
+
"sessions_by_day": [],
|
|
219
|
+
"quality_by_day": [],
|
|
220
|
+
"bugs_detected_by_day": [],
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
def calculate_roi(self) -> dict:
|
|
224
|
+
"""Calculate return on investment for historical learning.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Dictionary with ROI metrics
|
|
228
|
+
"""
|
|
229
|
+
summary = self.get_summary()
|
|
230
|
+
|
|
231
|
+
# Costs (rough estimates)
|
|
232
|
+
STORAGE_COST_PER_SESSION = 0.001 # $0.001 per session stored
|
|
233
|
+
EMBEDDING_COST_PER_SESSION = 0.0 # Free (local SentenceTransformers)
|
|
234
|
+
|
|
235
|
+
total_storage_cost = summary.total_sessions * STORAGE_COST_PER_SESSION
|
|
236
|
+
|
|
237
|
+
# Benefits (rough estimates)
|
|
238
|
+
TOKEN_COST_PER_1K = 0.003 # $3 per million tokens = $0.003 per 1K
|
|
239
|
+
TIME_VALUE_PER_SECOND = 0.01 # Value user's time at ~$36/hour = $0.01/second
|
|
240
|
+
|
|
241
|
+
token_savings_value = (summary.estimated_token_savings / 1000) * TOKEN_COST_PER_1K
|
|
242
|
+
time_savings_value = summary.estimated_time_savings_seconds * TIME_VALUE_PER_SECOND
|
|
243
|
+
|
|
244
|
+
total_benefit = token_savings_value + time_savings_value
|
|
245
|
+
total_cost = total_storage_cost
|
|
246
|
+
|
|
247
|
+
roi = ((total_benefit - total_cost) / total_cost * 100) if total_cost > 0 else 0
|
|
248
|
+
|
|
249
|
+
return {
|
|
250
|
+
"total_cost_usd": total_cost,
|
|
251
|
+
"total_benefit_usd": total_benefit,
|
|
252
|
+
"net_savings_usd": total_benefit - total_cost,
|
|
253
|
+
"roi_percentage": roi,
|
|
254
|
+
"token_savings_value_usd": token_savings_value,
|
|
255
|
+
"time_savings_value_usd": time_savings_value,
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
def get_top_oracles(self, limit: int = 10) -> list[dict]:
|
|
259
|
+
"""Get top-performing oracles by quality and reuse.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
limit: Number of top oracles to return
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
List of oracle info dicts
|
|
266
|
+
"""
|
|
267
|
+
# This would query oracle_embeddings table ordered by quality_score
|
|
268
|
+
# and join with usage counts
|
|
269
|
+
# For now, returning empty list - would be implemented with actual queries
|
|
270
|
+
return []
|
|
271
|
+
|
|
272
|
+
def get_common_bug_patterns(self, limit: int = 10) -> list[dict]:
|
|
273
|
+
"""Get most common bug patterns.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
limit: Number of patterns to return
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
List of bug pattern info dicts
|
|
280
|
+
"""
|
|
281
|
+
# This would query bug_patterns table and group by similar patterns
|
|
282
|
+
# For now, returning empty list - would be implemented with actual queries
|
|
283
|
+
return []
|