ctrlcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ctrlcode/__init__.py +8 -0
- ctrlcode/agents/__init__.py +29 -0
- ctrlcode/agents/cleanup.py +388 -0
- ctrlcode/agents/communication.py +439 -0
- ctrlcode/agents/observability.py +421 -0
- ctrlcode/agents/react_loop.py +297 -0
- ctrlcode/agents/registry.py +211 -0
- ctrlcode/agents/result_parser.py +242 -0
- ctrlcode/agents/workflow.py +723 -0
- ctrlcode/analysis/__init__.py +28 -0
- ctrlcode/analysis/ast_diff.py +163 -0
- ctrlcode/analysis/bug_detector.py +149 -0
- ctrlcode/analysis/code_graphs.py +329 -0
- ctrlcode/analysis/semantic.py +205 -0
- ctrlcode/analysis/static.py +183 -0
- ctrlcode/analysis/synthesizer.py +281 -0
- ctrlcode/analysis/tests.py +189 -0
- ctrlcode/cleanup/__init__.py +16 -0
- ctrlcode/cleanup/auto_merge.py +350 -0
- ctrlcode/cleanup/doc_gardening.py +388 -0
- ctrlcode/cleanup/pr_automation.py +330 -0
- ctrlcode/cleanup/scheduler.py +356 -0
- ctrlcode/config.py +380 -0
- ctrlcode/embeddings/__init__.py +6 -0
- ctrlcode/embeddings/embedder.py +192 -0
- ctrlcode/embeddings/vector_store.py +213 -0
- ctrlcode/fuzzing/__init__.py +24 -0
- ctrlcode/fuzzing/analyzer.py +280 -0
- ctrlcode/fuzzing/budget.py +112 -0
- ctrlcode/fuzzing/context.py +665 -0
- ctrlcode/fuzzing/context_fuzzer.py +506 -0
- ctrlcode/fuzzing/derived_orchestrator.py +732 -0
- ctrlcode/fuzzing/oracle_adapter.py +135 -0
- ctrlcode/linters/__init__.py +11 -0
- ctrlcode/linters/hand_rolled_utils.py +221 -0
- ctrlcode/linters/yolo_parsing.py +217 -0
- ctrlcode/metrics/__init__.py +6 -0
- ctrlcode/metrics/dashboard.py +283 -0
- ctrlcode/metrics/tech_debt.py +663 -0
- ctrlcode/paths.py +68 -0
- ctrlcode/permissions.py +179 -0
- ctrlcode/providers/__init__.py +15 -0
- ctrlcode/providers/anthropic.py +138 -0
- ctrlcode/providers/base.py +77 -0
- ctrlcode/providers/openai.py +197 -0
- ctrlcode/providers/parallel.py +104 -0
- ctrlcode/server.py +871 -0
- ctrlcode/session/__init__.py +6 -0
- ctrlcode/session/baseline.py +57 -0
- ctrlcode/session/manager.py +967 -0
- ctrlcode/skills/__init__.py +10 -0
- ctrlcode/skills/builtin/commit.toml +29 -0
- ctrlcode/skills/builtin/docs.toml +25 -0
- ctrlcode/skills/builtin/refactor.toml +33 -0
- ctrlcode/skills/builtin/review.toml +28 -0
- ctrlcode/skills/builtin/test.toml +28 -0
- ctrlcode/skills/loader.py +111 -0
- ctrlcode/skills/registry.py +139 -0
- ctrlcode/storage/__init__.py +19 -0
- ctrlcode/storage/history_db.py +708 -0
- ctrlcode/tools/__init__.py +220 -0
- ctrlcode/tools/bash.py +112 -0
- ctrlcode/tools/browser.py +352 -0
- ctrlcode/tools/executor.py +153 -0
- ctrlcode/tools/explore.py +486 -0
- ctrlcode/tools/mcp.py +108 -0
- ctrlcode/tools/observability.py +561 -0
- ctrlcode/tools/registry.py +193 -0
- ctrlcode/tools/todo.py +291 -0
- ctrlcode/tools/update.py +266 -0
- ctrlcode/tools/webfetch.py +147 -0
- ctrlcode-0.1.0.dist-info/METADATA +93 -0
- ctrlcode-0.1.0.dist-info/RECORD +75 -0
- ctrlcode-0.1.0.dist-info/WHEEL +4 -0
- ctrlcode-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
"""Observability tools for Executor agent."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class TestResult:
|
|
10
|
+
"""Parsed test execution result."""
|
|
11
|
+
|
|
12
|
+
total: int
|
|
13
|
+
passed: int
|
|
14
|
+
failed: int
|
|
15
|
+
skipped: int
|
|
16
|
+
duration: float
|
|
17
|
+
status: str # "pass" | "fail"
|
|
18
|
+
failures: list[dict[str, str]]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class PerformanceMetrics:
|
|
23
|
+
"""Parsed performance metrics."""
|
|
24
|
+
|
|
25
|
+
p50: float | None = None
|
|
26
|
+
p75: float | None = None
|
|
27
|
+
p90: float | None = None
|
|
28
|
+
p95: float | None = None
|
|
29
|
+
p99: float | None = None
|
|
30
|
+
mean: float | None = None
|
|
31
|
+
throughput: float | None = None
|
|
32
|
+
error_rate: float | None = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class LogAnalysis:
|
|
37
|
+
"""Parsed log analysis."""
|
|
38
|
+
|
|
39
|
+
total_lines: int
|
|
40
|
+
errors: list[str]
|
|
41
|
+
warnings: list[str]
|
|
42
|
+
error_count: int
|
|
43
|
+
warning_count: int
|
|
44
|
+
health: str # "healthy" | "warnings" | "errors"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class TestOutputParser:
|
|
48
|
+
"""Parser for test framework outputs."""
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def parse_pytest(output: str) -> TestResult:
|
|
52
|
+
"""
|
|
53
|
+
Parse pytest output.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
output: pytest stdout/stderr
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Parsed TestResult
|
|
60
|
+
"""
|
|
61
|
+
# Extract test counts
|
|
62
|
+
# Format: "======================== 10 passed, 2 failed in 0.5s ========================"
|
|
63
|
+
match = re.search(
|
|
64
|
+
r'(\d+)\s+passed(?:,\s+(\d+)\s+failed)?(?:,\s+(\d+)\s+skipped)?.*?in\s+([\d.]+)s',
|
|
65
|
+
output
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
if match:
|
|
69
|
+
passed = int(match.group(1))
|
|
70
|
+
failed = int(match.group(2) or 0)
|
|
71
|
+
skipped = int(match.group(3) or 0)
|
|
72
|
+
duration = float(match.group(4))
|
|
73
|
+
total = passed + failed + skipped
|
|
74
|
+
else:
|
|
75
|
+
# Fallback parsing
|
|
76
|
+
total = 0
|
|
77
|
+
passed = 0
|
|
78
|
+
failed = 0
|
|
79
|
+
skipped = 0
|
|
80
|
+
duration = 0.0
|
|
81
|
+
|
|
82
|
+
# Extract failure details
|
|
83
|
+
failures = []
|
|
84
|
+
failure_pattern = r'FAILED\s+([\w/:.]+)\s+-\s+(.+)'
|
|
85
|
+
for match in re.finditer(failure_pattern, output):
|
|
86
|
+
failures.append({
|
|
87
|
+
"test": match.group(1),
|
|
88
|
+
"reason": match.group(2).strip()
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
status = "pass" if failed == 0 else "fail"
|
|
92
|
+
|
|
93
|
+
return TestResult(
|
|
94
|
+
total=total,
|
|
95
|
+
passed=passed,
|
|
96
|
+
failed=failed,
|
|
97
|
+
skipped=skipped,
|
|
98
|
+
duration=duration,
|
|
99
|
+
status=status,
|
|
100
|
+
failures=failures
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
@staticmethod
|
|
104
|
+
def parse_generic(output: str) -> TestResult:
|
|
105
|
+
"""
|
|
106
|
+
Parse generic test output.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
output: Test framework output
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Parsed TestResult
|
|
113
|
+
"""
|
|
114
|
+
# Look for common patterns
|
|
115
|
+
passed = len(re.findall(r'\bPASS(ED)?\b', output, re.IGNORECASE))
|
|
116
|
+
failed = len(re.findall(r'\bFAIL(ED)?\b', output, re.IGNORECASE))
|
|
117
|
+
skipped = len(re.findall(r'\bSKIP(PED)?\b', output, re.IGNORECASE))
|
|
118
|
+
|
|
119
|
+
total = passed + failed + skipped
|
|
120
|
+
status = "pass" if failed == 0 else "fail"
|
|
121
|
+
|
|
122
|
+
return TestResult(
|
|
123
|
+
total=total,
|
|
124
|
+
passed=passed,
|
|
125
|
+
failed=failed,
|
|
126
|
+
skipped=skipped,
|
|
127
|
+
duration=0.0,
|
|
128
|
+
status=status,
|
|
129
|
+
failures=[]
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class PerformanceParser:
|
|
134
|
+
"""Parser for performance benchmark outputs."""
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
def parse_wrk(output: str) -> PerformanceMetrics:
|
|
138
|
+
"""
|
|
139
|
+
Parse wrk benchmark output.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
output: wrk stdout
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Parsed PerformanceMetrics
|
|
146
|
+
"""
|
|
147
|
+
metrics = PerformanceMetrics()
|
|
148
|
+
|
|
149
|
+
# Parse latency distribution
|
|
150
|
+
# Format: "50% 145ms"
|
|
151
|
+
percentiles = {
|
|
152
|
+
'50%': 'p50',
|
|
153
|
+
'75%': 'p75',
|
|
154
|
+
'90%': 'p90',
|
|
155
|
+
'95%': 'p95',
|
|
156
|
+
'99%': 'p99',
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
for percentile, attr in percentiles.items():
|
|
160
|
+
pattern = rf'{re.escape(percentile)}\s+([\d.]+)(ms|s)'
|
|
161
|
+
match = re.search(pattern, output)
|
|
162
|
+
if match:
|
|
163
|
+
value = float(match.group(1))
|
|
164
|
+
unit = match.group(2)
|
|
165
|
+
if unit == 's':
|
|
166
|
+
value *= 1000 # Convert to ms
|
|
167
|
+
setattr(metrics, attr, value)
|
|
168
|
+
|
|
169
|
+
# Parse throughput
|
|
170
|
+
# Format: "Requests/sec: 6.90"
|
|
171
|
+
match = re.search(r'Requests/sec:\s+([\d.]+)', output)
|
|
172
|
+
if match:
|
|
173
|
+
metrics.throughput = float(match.group(1))
|
|
174
|
+
|
|
175
|
+
return metrics
|
|
176
|
+
|
|
177
|
+
@staticmethod
|
|
178
|
+
def parse_ab(output: str) -> PerformanceMetrics:
|
|
179
|
+
"""
|
|
180
|
+
Parse Apache Bench output.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
output: ab stdout
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Parsed PerformanceMetrics
|
|
187
|
+
"""
|
|
188
|
+
metrics = PerformanceMetrics()
|
|
189
|
+
|
|
190
|
+
# Parse mean latency
|
|
191
|
+
# Format: "Time per request: 145.0 [ms] (mean)"
|
|
192
|
+
match = re.search(r'Time per request:\s+([\d.]+)\s+\[ms\]\s+\(mean\)', output)
|
|
193
|
+
if match:
|
|
194
|
+
metrics.mean = float(match.group(1))
|
|
195
|
+
|
|
196
|
+
# Parse throughput
|
|
197
|
+
# Format: "Requests per second: 6.90 [#/sec] (mean)"
|
|
198
|
+
match = re.search(r'Requests per second:\s+([\d.]+)', output)
|
|
199
|
+
if match:
|
|
200
|
+
metrics.throughput = float(match.group(1))
|
|
201
|
+
|
|
202
|
+
# Parse failure rate
|
|
203
|
+
# Format: "Failed requests: 1"
|
|
204
|
+
total_match = re.search(r'Complete requests:\s+(\d+)', output)
|
|
205
|
+
failed_match = re.search(r'Failed requests:\s+(\d+)', output)
|
|
206
|
+
|
|
207
|
+
if total_match and failed_match:
|
|
208
|
+
total = int(total_match.group(1))
|
|
209
|
+
failed = int(failed_match.group(1))
|
|
210
|
+
metrics.error_rate = (failed / total) * 100 if total > 0 else 0.0
|
|
211
|
+
|
|
212
|
+
return metrics
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class LogParser:
|
|
216
|
+
"""Parser for application logs."""
|
|
217
|
+
|
|
218
|
+
@staticmethod
|
|
219
|
+
def parse_structured_logs(output: str) -> LogAnalysis:
|
|
220
|
+
"""
|
|
221
|
+
Parse structured application logs.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
output: Log output
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Parsed LogAnalysis
|
|
228
|
+
"""
|
|
229
|
+
lines = output.splitlines()
|
|
230
|
+
total_lines = len(lines)
|
|
231
|
+
|
|
232
|
+
errors = []
|
|
233
|
+
warnings = []
|
|
234
|
+
|
|
235
|
+
# Common log level patterns
|
|
236
|
+
error_pattern = r'\b(ERROR|FATAL|CRITICAL)\b'
|
|
237
|
+
warning_pattern = r'\bWARN(ING)?\b'
|
|
238
|
+
|
|
239
|
+
for line in lines:
|
|
240
|
+
if re.search(error_pattern, line, re.IGNORECASE):
|
|
241
|
+
errors.append(line.strip())
|
|
242
|
+
elif re.search(warning_pattern, line, re.IGNORECASE):
|
|
243
|
+
warnings.append(line.strip())
|
|
244
|
+
|
|
245
|
+
error_count = len(errors)
|
|
246
|
+
warning_count = len(warnings)
|
|
247
|
+
|
|
248
|
+
# Determine health
|
|
249
|
+
if error_count > 0:
|
|
250
|
+
health = "errors"
|
|
251
|
+
elif warning_count > 0:
|
|
252
|
+
health = "warnings"
|
|
253
|
+
else:
|
|
254
|
+
health = "healthy"
|
|
255
|
+
|
|
256
|
+
return LogAnalysis(
|
|
257
|
+
total_lines=total_lines,
|
|
258
|
+
errors=errors,
|
|
259
|
+
warnings=warnings,
|
|
260
|
+
error_count=error_count,
|
|
261
|
+
warning_count=warning_count,
|
|
262
|
+
health=health
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
class ObservabilityTools:
|
|
267
|
+
"""High-level observability tools for Executor agent."""
|
|
268
|
+
|
|
269
|
+
def __init__(self):
|
|
270
|
+
"""Initialize observability tools."""
|
|
271
|
+
self.test_parser = TestOutputParser()
|
|
272
|
+
self.perf_parser = PerformanceParser()
|
|
273
|
+
self.log_parser = LogParser()
|
|
274
|
+
|
|
275
|
+
def analyze_test_output(
|
|
276
|
+
self,
|
|
277
|
+
output: str,
|
|
278
|
+
framework: str = "pytest"
|
|
279
|
+
) -> dict[str, Any]:
|
|
280
|
+
"""
|
|
281
|
+
Analyze test output and extract structured data.
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
output: Test framework output
|
|
285
|
+
framework: Test framework name (pytest, jest, etc.)
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
Structured test analysis
|
|
289
|
+
"""
|
|
290
|
+
if framework == "pytest":
|
|
291
|
+
result = self.test_parser.parse_pytest(output)
|
|
292
|
+
else:
|
|
293
|
+
result = self.test_parser.parse_generic(output)
|
|
294
|
+
|
|
295
|
+
return {
|
|
296
|
+
"status": result.status,
|
|
297
|
+
"summary": {
|
|
298
|
+
"total": result.total,
|
|
299
|
+
"passed": result.passed,
|
|
300
|
+
"failed": result.failed,
|
|
301
|
+
"skipped": result.skipped,
|
|
302
|
+
"duration": result.duration,
|
|
303
|
+
},
|
|
304
|
+
"failures": result.failures,
|
|
305
|
+
"recommendation": self._test_recommendation(result)
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
def analyze_performance(
|
|
309
|
+
self,
|
|
310
|
+
output: str,
|
|
311
|
+
tool: str = "wrk",
|
|
312
|
+
threshold_ms: float | None = None
|
|
313
|
+
) -> dict[str, Any]:
|
|
314
|
+
"""
|
|
315
|
+
Analyze performance benchmark output.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
output: Benchmark tool output
|
|
319
|
+
tool: Tool name (wrk, ab, etc.)
|
|
320
|
+
threshold_ms: Optional performance threshold in ms
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
Structured performance analysis
|
|
324
|
+
"""
|
|
325
|
+
if tool == "wrk":
|
|
326
|
+
metrics = self.perf_parser.parse_wrk(output)
|
|
327
|
+
elif tool == "ab":
|
|
328
|
+
metrics = self.perf_parser.parse_ab(output)
|
|
329
|
+
else:
|
|
330
|
+
metrics = PerformanceMetrics()
|
|
331
|
+
|
|
332
|
+
# Determine status
|
|
333
|
+
status = "pass"
|
|
334
|
+
if threshold_ms and metrics.p99:
|
|
335
|
+
status = "pass" if metrics.p99 <= threshold_ms else "warning"
|
|
336
|
+
|
|
337
|
+
return {
|
|
338
|
+
"status": status,
|
|
339
|
+
"metrics": {
|
|
340
|
+
"p50": metrics.p50,
|
|
341
|
+
"p75": metrics.p75,
|
|
342
|
+
"p90": metrics.p90,
|
|
343
|
+
"p95": metrics.p95,
|
|
344
|
+
"p99": metrics.p99,
|
|
345
|
+
"mean": metrics.mean,
|
|
346
|
+
"throughput": metrics.throughput,
|
|
347
|
+
"error_rate": metrics.error_rate,
|
|
348
|
+
},
|
|
349
|
+
"threshold": threshold_ms,
|
|
350
|
+
"recommendation": self._perf_recommendation(metrics, threshold_ms)
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
def analyze_logs(
|
|
354
|
+
self,
|
|
355
|
+
output: str,
|
|
356
|
+
max_errors: int = 0
|
|
357
|
+
) -> dict[str, Any]:
|
|
358
|
+
"""
|
|
359
|
+
Analyze application logs.
|
|
360
|
+
|
|
361
|
+
Args:
|
|
362
|
+
output: Log output
|
|
363
|
+
max_errors: Maximum acceptable error count
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
Structured log analysis
|
|
367
|
+
"""
|
|
368
|
+
analysis = self.log_parser.parse_structured_logs(output)
|
|
369
|
+
|
|
370
|
+
status = "pass"
|
|
371
|
+
if analysis.error_count > max_errors:
|
|
372
|
+
status = "fail"
|
|
373
|
+
elif analysis.warning_count > 0:
|
|
374
|
+
status = "warning"
|
|
375
|
+
|
|
376
|
+
return {
|
|
377
|
+
"status": status,
|
|
378
|
+
"health": analysis.health,
|
|
379
|
+
"summary": {
|
|
380
|
+
"total_lines": analysis.total_lines,
|
|
381
|
+
"error_count": analysis.error_count,
|
|
382
|
+
"warning_count": analysis.warning_count,
|
|
383
|
+
},
|
|
384
|
+
"errors": analysis.errors[:10], # Top 10 errors
|
|
385
|
+
"warnings": analysis.warnings[:10], # Top 10 warnings
|
|
386
|
+
"recommendation": self._log_recommendation(analysis)
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
def _test_recommendation(self, result: TestResult) -> str:
|
|
390
|
+
"""Generate recommendation from test result."""
|
|
391
|
+
if result.status == "pass":
|
|
392
|
+
return "approve"
|
|
393
|
+
elif result.failed <= 2:
|
|
394
|
+
return "investigate (few failures, likely fixable)"
|
|
395
|
+
else:
|
|
396
|
+
return "fix (multiple failures detected)"
|
|
397
|
+
|
|
398
|
+
def _perf_recommendation(
|
|
399
|
+
self,
|
|
400
|
+
metrics: PerformanceMetrics,
|
|
401
|
+
threshold: float | None
|
|
402
|
+
) -> str:
|
|
403
|
+
"""Generate recommendation from performance metrics."""
|
|
404
|
+
if not threshold or not metrics.p99:
|
|
405
|
+
return "baseline established"
|
|
406
|
+
|
|
407
|
+
if metrics.p99 <= threshold:
|
|
408
|
+
return "approve"
|
|
409
|
+
elif metrics.p99 <= threshold * 1.1:
|
|
410
|
+
return "approve (marginal exceedance, acceptable)"
|
|
411
|
+
else:
|
|
412
|
+
return "investigate (performance degradation detected)"
|
|
413
|
+
|
|
414
|
+
def _log_recommendation(self, analysis: LogAnalysis) -> str:
|
|
415
|
+
"""Generate recommendation from log analysis."""
|
|
416
|
+
if analysis.health == "healthy":
|
|
417
|
+
return "approve"
|
|
418
|
+
elif analysis.health == "warnings":
|
|
419
|
+
return "approve (warnings present, monitor in production)"
|
|
420
|
+
else:
|
|
421
|
+
return "investigate (errors detected)"
|