ctrlcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ctrlcode/__init__.py +8 -0
- ctrlcode/agents/__init__.py +29 -0
- ctrlcode/agents/cleanup.py +388 -0
- ctrlcode/agents/communication.py +439 -0
- ctrlcode/agents/observability.py +421 -0
- ctrlcode/agents/react_loop.py +297 -0
- ctrlcode/agents/registry.py +211 -0
- ctrlcode/agents/result_parser.py +242 -0
- ctrlcode/agents/workflow.py +723 -0
- ctrlcode/analysis/__init__.py +28 -0
- ctrlcode/analysis/ast_diff.py +163 -0
- ctrlcode/analysis/bug_detector.py +149 -0
- ctrlcode/analysis/code_graphs.py +329 -0
- ctrlcode/analysis/semantic.py +205 -0
- ctrlcode/analysis/static.py +183 -0
- ctrlcode/analysis/synthesizer.py +281 -0
- ctrlcode/analysis/tests.py +189 -0
- ctrlcode/cleanup/__init__.py +16 -0
- ctrlcode/cleanup/auto_merge.py +350 -0
- ctrlcode/cleanup/doc_gardening.py +388 -0
- ctrlcode/cleanup/pr_automation.py +330 -0
- ctrlcode/cleanup/scheduler.py +356 -0
- ctrlcode/config.py +380 -0
- ctrlcode/embeddings/__init__.py +6 -0
- ctrlcode/embeddings/embedder.py +192 -0
- ctrlcode/embeddings/vector_store.py +213 -0
- ctrlcode/fuzzing/__init__.py +24 -0
- ctrlcode/fuzzing/analyzer.py +280 -0
- ctrlcode/fuzzing/budget.py +112 -0
- ctrlcode/fuzzing/context.py +665 -0
- ctrlcode/fuzzing/context_fuzzer.py +506 -0
- ctrlcode/fuzzing/derived_orchestrator.py +732 -0
- ctrlcode/fuzzing/oracle_adapter.py +135 -0
- ctrlcode/linters/__init__.py +11 -0
- ctrlcode/linters/hand_rolled_utils.py +221 -0
- ctrlcode/linters/yolo_parsing.py +217 -0
- ctrlcode/metrics/__init__.py +6 -0
- ctrlcode/metrics/dashboard.py +283 -0
- ctrlcode/metrics/tech_debt.py +663 -0
- ctrlcode/paths.py +68 -0
- ctrlcode/permissions.py +179 -0
- ctrlcode/providers/__init__.py +15 -0
- ctrlcode/providers/anthropic.py +138 -0
- ctrlcode/providers/base.py +77 -0
- ctrlcode/providers/openai.py +197 -0
- ctrlcode/providers/parallel.py +104 -0
- ctrlcode/server.py +871 -0
- ctrlcode/session/__init__.py +6 -0
- ctrlcode/session/baseline.py +57 -0
- ctrlcode/session/manager.py +967 -0
- ctrlcode/skills/__init__.py +10 -0
- ctrlcode/skills/builtin/commit.toml +29 -0
- ctrlcode/skills/builtin/docs.toml +25 -0
- ctrlcode/skills/builtin/refactor.toml +33 -0
- ctrlcode/skills/builtin/review.toml +28 -0
- ctrlcode/skills/builtin/test.toml +28 -0
- ctrlcode/skills/loader.py +111 -0
- ctrlcode/skills/registry.py +139 -0
- ctrlcode/storage/__init__.py +19 -0
- ctrlcode/storage/history_db.py +708 -0
- ctrlcode/tools/__init__.py +220 -0
- ctrlcode/tools/bash.py +112 -0
- ctrlcode/tools/browser.py +352 -0
- ctrlcode/tools/executor.py +153 -0
- ctrlcode/tools/explore.py +486 -0
- ctrlcode/tools/mcp.py +108 -0
- ctrlcode/tools/observability.py +561 -0
- ctrlcode/tools/registry.py +193 -0
- ctrlcode/tools/todo.py +291 -0
- ctrlcode/tools/update.py +266 -0
- ctrlcode/tools/webfetch.py +147 -0
- ctrlcode-0.1.0.dist-info/METADATA +93 -0
- ctrlcode-0.1.0.dist-info/RECORD +75 -0
- ctrlcode-0.1.0.dist-info/WHEEL +4 -0
- ctrlcode-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Oracle adaptation engine - adapts historical oracles to new code."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from ..providers.base import Provider
|
|
7
|
+
from .context import ContextDerivation
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class OracleAdapter:
|
|
13
|
+
"""Adapts historical oracles to new code using LLM reasoning.
|
|
14
|
+
|
|
15
|
+
When similar code is found in history, instead of deriving a fresh oracle
|
|
16
|
+
from scratch, we adapt the existing oracle to the new code. This saves
|
|
17
|
+
LLM calls and maintains consistency across similar implementations.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, provider: Provider):
|
|
21
|
+
"""Initialize oracle adapter.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
provider: LLM provider for adaptation
|
|
25
|
+
"""
|
|
26
|
+
self.provider = provider
|
|
27
|
+
|
|
28
|
+
async def adapt_oracle(
|
|
29
|
+
self,
|
|
30
|
+
old_oracle: ContextDerivation,
|
|
31
|
+
old_code: str,
|
|
32
|
+
new_code: str,
|
|
33
|
+
user_request: str,
|
|
34
|
+
similarity_score: float,
|
|
35
|
+
) -> Optional[ContextDerivation]:
|
|
36
|
+
"""Adapt historical oracle to new code.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
old_oracle: Historical context derivation to adapt
|
|
40
|
+
old_code: Historical code the oracle was derived from
|
|
41
|
+
new_code: New code to adapt oracle for
|
|
42
|
+
user_request: User specification for new code
|
|
43
|
+
similarity_score: Cosine similarity between old and new code
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Adapted ContextDerivation or None if adaptation fails
|
|
47
|
+
"""
|
|
48
|
+
system_prompt = """You are a senior systems analyst specializing in adapting behavioral
|
|
49
|
+
contracts to similar code implementations.
|
|
50
|
+
|
|
51
|
+
You will be given:
|
|
52
|
+
1. An EXISTING behavioral oracle derived from similar code
|
|
53
|
+
2. The ORIGINAL code that oracle was derived from
|
|
54
|
+
3. The NEW code that needs an oracle
|
|
55
|
+
4. The user's specification for the new code
|
|
56
|
+
5. The similarity score between old and new code
|
|
57
|
+
|
|
58
|
+
Your job is to ADAPT the existing oracle to the new code, rather than deriving
|
|
59
|
+
from scratch. Focus on:
|
|
60
|
+
- What changed between old and new code?
|
|
61
|
+
- Which behavioral invariants remain the same?
|
|
62
|
+
- Which invariants need updating?
|
|
63
|
+
- Any new integration contracts or edge cases?
|
|
64
|
+
|
|
65
|
+
Output the ADAPTED oracle as structured JSON matching the original format with
|
|
66
|
+
all 6 sections + function_invariants:
|
|
67
|
+
|
|
68
|
+
1. system_placement
|
|
69
|
+
2. environmental_constraints
|
|
70
|
+
3. integration_contracts
|
|
71
|
+
4. behavioral_invariants
|
|
72
|
+
5. edge_case_surface
|
|
73
|
+
6. implicit_assumptions
|
|
74
|
+
7. function_invariants (dict mapping function names to their specific invariants)
|
|
75
|
+
|
|
76
|
+
Be conservative: if you're unsure whether an invariant still applies, include it
|
|
77
|
+
with a note. Better to over-specify than under-specify."""
|
|
78
|
+
|
|
79
|
+
user_message = f"""## Similarity Score
|
|
80
|
+
{similarity_score:.2%} match with historical code
|
|
81
|
+
|
|
82
|
+
## Historical Code
|
|
83
|
+
```python
|
|
84
|
+
{old_code}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Historical Oracle
|
|
88
|
+
```json
|
|
89
|
+
{old_oracle.to_json()}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## New Code
|
|
93
|
+
```python
|
|
94
|
+
{new_code}
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## User Specification for New Code
|
|
98
|
+
{user_request}
|
|
99
|
+
|
|
100
|
+
## Task
|
|
101
|
+
Adapt the historical oracle to the new code. Identify what changed and update
|
|
102
|
+
the oracle accordingly. Output as structured JSON."""
|
|
103
|
+
|
|
104
|
+
messages = [
|
|
105
|
+
{"role": "system", "content": system_prompt},
|
|
106
|
+
{"role": "user", "content": user_message},
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
response = await self.provider.generate(messages)
|
|
111
|
+
response_text = response.get("text", "").strip()
|
|
112
|
+
|
|
113
|
+
# Parse JSON response
|
|
114
|
+
if "```json" in response_text:
|
|
115
|
+
start = response_text.find("```json") + 7
|
|
116
|
+
end = response_text.find("```", start)
|
|
117
|
+
response_text = response_text[start:end].strip()
|
|
118
|
+
elif "```" in response_text:
|
|
119
|
+
start = response_text.find("```") + 3
|
|
120
|
+
end = response_text.find("```", start)
|
|
121
|
+
response_text = response_text[start:end].strip()
|
|
122
|
+
|
|
123
|
+
import json
|
|
124
|
+
|
|
125
|
+
data = json.loads(response_text)
|
|
126
|
+
adapted = ContextDerivation.from_dict(data)
|
|
127
|
+
|
|
128
|
+
logger.info(
|
|
129
|
+
f"Successfully adapted oracle from historical code (similarity: {similarity_score:.2%})"
|
|
130
|
+
)
|
|
131
|
+
return adapted
|
|
132
|
+
|
|
133
|
+
except Exception as e:
|
|
134
|
+
logger.warning(f"Failed to adapt historical oracle: {e}")
|
|
135
|
+
return None
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Custom linters for golden principles enforcement."""
|
|
2
|
+
|
|
3
|
+
from .yolo_parsing import lint_file as lint_yolo_parsing
|
|
4
|
+
from .hand_rolled_utils import lint_file as lint_hand_rolled_utils
|
|
5
|
+
from .yolo_parsing import Violation
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"lint_yolo_parsing",
|
|
9
|
+
"lint_hand_rolled_utils",
|
|
10
|
+
"Violation",
|
|
11
|
+
]
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""Hand-rolled utilities linter - detects duplicate patterns."""
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Iterator
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class Violation:
|
|
12
|
+
"""A linting violation."""
|
|
13
|
+
|
|
14
|
+
file: str
|
|
15
|
+
line: int
|
|
16
|
+
column: int
|
|
17
|
+
message: str
|
|
18
|
+
severity: str
|
|
19
|
+
fix_suggestion: str
|
|
20
|
+
principle: str
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class HandRolledUtilsDetector(ast.NodeVisitor):
|
|
24
|
+
"""AST visitor to detect hand-rolled utility patterns."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, filepath: str, source: str):
|
|
27
|
+
"""
|
|
28
|
+
Initialize detector.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
filepath: Path to file being analyzed
|
|
32
|
+
source: Source code content
|
|
33
|
+
"""
|
|
34
|
+
self.filepath = filepath
|
|
35
|
+
self.source = source
|
|
36
|
+
self.violations: list[Violation] = []
|
|
37
|
+
|
|
38
|
+
def visit_For(self, node: ast.For) -> None:
|
|
39
|
+
"""Detect retry patterns in for loops."""
|
|
40
|
+
# Pattern: for i in range(N): try: ... except: ...
|
|
41
|
+
if isinstance(node.iter, ast.Call):
|
|
42
|
+
if isinstance(node.iter.func, ast.Name) and node.iter.func.id == "range":
|
|
43
|
+
# Check if body has try/except
|
|
44
|
+
has_try_except = any(isinstance(stmt, ast.Try) for stmt in node.body)
|
|
45
|
+
|
|
46
|
+
if has_try_except:
|
|
47
|
+
# Get source snippet
|
|
48
|
+
try:
|
|
49
|
+
source_lines = self.source.split("\n")
|
|
50
|
+
snippet = source_lines[node.lineno - 1][:50] + "..."
|
|
51
|
+
except Exception:
|
|
52
|
+
snippet = "retry loop"
|
|
53
|
+
|
|
54
|
+
violation = Violation(
|
|
55
|
+
file=self.filepath,
|
|
56
|
+
line=node.lineno,
|
|
57
|
+
column=node.col_offset,
|
|
58
|
+
message=f"Hand-rolled retry logic detected: {snippet}",
|
|
59
|
+
severity="info",
|
|
60
|
+
fix_suggestion="""Use shared retry utility:
|
|
61
|
+
|
|
62
|
+
from ctrlcode.utils import retry
|
|
63
|
+
|
|
64
|
+
@retry(max_attempts=3, backoff_base=2.0)
|
|
65
|
+
def your_function():
|
|
66
|
+
# Your code here
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
Why: Centralizes retry logic, ensures consistency across codebase.
|
|
70
|
+
See: docs/golden-principles/prefer-shared-utils.md""",
|
|
71
|
+
principle="prefer-shared-utils"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
self.violations.append(violation)
|
|
75
|
+
|
|
76
|
+
self.generic_visit(node)
|
|
77
|
+
|
|
78
|
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
79
|
+
"""Detect common utility function patterns."""
|
|
80
|
+
# Check for retry in function name
|
|
81
|
+
if "retry" in node.name.lower():
|
|
82
|
+
violation = Violation(
|
|
83
|
+
file=self.filepath,
|
|
84
|
+
line=node.lineno,
|
|
85
|
+
column=node.col_offset,
|
|
86
|
+
message=f"Custom retry function '{node.name}' - consider using shared utility",
|
|
87
|
+
severity="info",
|
|
88
|
+
fix_suggestion="""Use shared retry utility instead:
|
|
89
|
+
|
|
90
|
+
from ctrlcode.utils import retry
|
|
91
|
+
|
|
92
|
+
# Remove this function and use decorator:
|
|
93
|
+
@retry(max_attempts=3)
|
|
94
|
+
def your_function():
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
Why: Shared utilities are tested, maintained, and consistent.
|
|
98
|
+
See: docs/golden-principles/prefer-shared-utils.md""",
|
|
99
|
+
principle="prefer-shared-utils"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
self.violations.append(violation)
|
|
103
|
+
|
|
104
|
+
self.generic_visit(node)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def detect_print_statements(filepath: Path, source: str) -> list[Violation]:
|
|
108
|
+
"""
|
|
109
|
+
Detect print statements (should use logging).
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
filepath: Path to file
|
|
113
|
+
source: Source code
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
List of violations
|
|
117
|
+
"""
|
|
118
|
+
violations = []
|
|
119
|
+
|
|
120
|
+
for lineno, line in enumerate(source.split("\n"), start=1):
|
|
121
|
+
# Skip comments
|
|
122
|
+
if line.strip().startswith("#"):
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
# Detect print() calls (not in strings)
|
|
126
|
+
if re.search(r'\bprint\s*\(', line):
|
|
127
|
+
violations.append(Violation(
|
|
128
|
+
file=str(filepath),
|
|
129
|
+
line=lineno,
|
|
130
|
+
column=line.find("print"),
|
|
131
|
+
message="Using print() instead of structured logging",
|
|
132
|
+
severity="warning",
|
|
133
|
+
fix_suggestion="""Use structured logging:
|
|
134
|
+
|
|
135
|
+
import structlog
|
|
136
|
+
logger = structlog.get_logger(__name__)
|
|
137
|
+
|
|
138
|
+
# Instead of:
|
|
139
|
+
# print(f"Processing {item}")
|
|
140
|
+
|
|
141
|
+
# Use:
|
|
142
|
+
logger.info("processing.started", item=item)
|
|
143
|
+
|
|
144
|
+
Why: Enables log querying, filtering, and agent analysis.
|
|
145
|
+
See: docs/golden-principles/structured-logging.md""",
|
|
146
|
+
principle="structured-logging"
|
|
147
|
+
))
|
|
148
|
+
|
|
149
|
+
return violations
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def lint_file(filepath: Path) -> list[Violation]:
|
|
153
|
+
"""
|
|
154
|
+
Lint a Python file for hand-rolled utilities.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
filepath: Path to Python file
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
List of violations found
|
|
161
|
+
"""
|
|
162
|
+
try:
|
|
163
|
+
source = filepath.read_text()
|
|
164
|
+
tree = ast.parse(source, filename=str(filepath))
|
|
165
|
+
|
|
166
|
+
detector = HandRolledUtilsDetector(str(filepath), source)
|
|
167
|
+
detector.visit(tree)
|
|
168
|
+
|
|
169
|
+
# Add print statement detection
|
|
170
|
+
print_violations = detect_print_statements(filepath, source)
|
|
171
|
+
detector.violations.extend(print_violations)
|
|
172
|
+
|
|
173
|
+
return detector.violations
|
|
174
|
+
|
|
175
|
+
except SyntaxError:
|
|
176
|
+
return []
|
|
177
|
+
except Exception:
|
|
178
|
+
return []
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def lint_directory(directory: Path, pattern: str = "**/*.py") -> Iterator[Violation]:
|
|
182
|
+
"""
|
|
183
|
+
Lint all Python files in directory.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
directory: Root directory to search
|
|
187
|
+
pattern: Glob pattern for files to lint
|
|
188
|
+
|
|
189
|
+
Yields:
|
|
190
|
+
Violations found
|
|
191
|
+
"""
|
|
192
|
+
for filepath in directory.glob(pattern):
|
|
193
|
+
if filepath.is_file():
|
|
194
|
+
# Skip test files and virtual environments
|
|
195
|
+
if "test" in str(filepath) or ".venv" in str(filepath):
|
|
196
|
+
continue
|
|
197
|
+
|
|
198
|
+
for violation in lint_file(filepath):
|
|
199
|
+
yield violation
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def format_violation(violation: Violation) -> str:
|
|
203
|
+
"""
|
|
204
|
+
Format violation for output.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
violation: Violation to format
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Formatted string
|
|
211
|
+
"""
|
|
212
|
+
severity_symbol = "ℹ️" if violation.severity == "info" else "⚠️"
|
|
213
|
+
|
|
214
|
+
return f"""{severity_symbol} Hand-Rolled Utility Detected
|
|
215
|
+
File: {violation.file}:{violation.line}:{violation.column}
|
|
216
|
+
Rule: golden-principles/{violation.principle}.md
|
|
217
|
+
|
|
218
|
+
{violation.message}
|
|
219
|
+
|
|
220
|
+
FIX: {violation.fix_suggestion}
|
|
221
|
+
"""
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"""YOLO parsing linter - detects dict access without validation."""
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Iterator
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Violation:
|
|
11
|
+
"""A linting violation."""
|
|
12
|
+
|
|
13
|
+
file: str
|
|
14
|
+
line: int
|
|
15
|
+
column: int
|
|
16
|
+
message: str
|
|
17
|
+
severity: str
|
|
18
|
+
fix_suggestion: str
|
|
19
|
+
principle: str
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class YoloParsingDetector(ast.NodeVisitor):
|
|
23
|
+
"""AST visitor to detect dictionary access without validation."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, filepath: str):
|
|
26
|
+
"""
|
|
27
|
+
Initialize detector.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
filepath: Path to file being analyzed
|
|
31
|
+
"""
|
|
32
|
+
self.filepath = filepath
|
|
33
|
+
self.violations: list[Violation] = []
|
|
34
|
+
self.in_validation_context = False
|
|
35
|
+
self.validated_names: set[str] = set()
|
|
36
|
+
|
|
37
|
+
def visit_Try(self, node: ast.Try) -> None:
|
|
38
|
+
"""Track try blocks (might be validation)."""
|
|
39
|
+
# Check if this is validation pattern (try/except KeyError)
|
|
40
|
+
has_keyerror = any(
|
|
41
|
+
isinstance(handler.type, ast.Name) and handler.type.id == "KeyError"
|
|
42
|
+
for handler in node.handlers
|
|
43
|
+
if handler.type
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
if has_keyerror:
|
|
47
|
+
self.in_validation_context = True
|
|
48
|
+
self.generic_visit(node)
|
|
49
|
+
self.in_validation_context = False
|
|
50
|
+
else:
|
|
51
|
+
self.generic_visit(node)
|
|
52
|
+
|
|
53
|
+
def visit_Call(self, node: ast.Call) -> None:
|
|
54
|
+
"""Track validation function calls."""
|
|
55
|
+
# Check for schema validation patterns
|
|
56
|
+
if isinstance(node.func, ast.Attribute):
|
|
57
|
+
# Pydantic: Model(**data), Model.parse(data)
|
|
58
|
+
if node.func.attr in ("parse", "parse_obj", "parse_raw"):
|
|
59
|
+
if node.args:
|
|
60
|
+
arg = node.args[0]
|
|
61
|
+
if isinstance(arg, ast.Name):
|
|
62
|
+
self.validated_names.add(arg.id)
|
|
63
|
+
|
|
64
|
+
# Check for dataclass instantiation
|
|
65
|
+
if isinstance(node.func, ast.Name):
|
|
66
|
+
# Assuming uppercase names are classes/validators
|
|
67
|
+
if node.func.id and node.func.id[0].isupper():
|
|
68
|
+
for keyword in node.keywords:
|
|
69
|
+
if isinstance(keyword.value, ast.Name):
|
|
70
|
+
self.validated_names.add(keyword.value.id)
|
|
71
|
+
|
|
72
|
+
self.generic_visit(node)
|
|
73
|
+
|
|
74
|
+
def visit_Subscript(self, node: ast.Subscript) -> None:
|
|
75
|
+
"""Detect dictionary subscript access."""
|
|
76
|
+
# Skip if in validation context
|
|
77
|
+
if self.in_validation_context:
|
|
78
|
+
self.generic_visit(node)
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
# Check if this is dict access on a variable
|
|
82
|
+
if isinstance(node.value, ast.Name):
|
|
83
|
+
var_name = node.value.id
|
|
84
|
+
|
|
85
|
+
# Skip if already validated
|
|
86
|
+
if var_name in self.validated_names:
|
|
87
|
+
self.generic_visit(node)
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
# Skip common safe patterns
|
|
91
|
+
if var_name in ("self", "cls", "os", "sys"):
|
|
92
|
+
self.generic_visit(node)
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
# Nested subscript (e.g., data["user"]["email"])
|
|
96
|
+
if isinstance(node.slice, ast.Constant) and isinstance(node.slice.value, str):
|
|
97
|
+
# This looks like YOLO dict access
|
|
98
|
+
key = node.slice.value
|
|
99
|
+
|
|
100
|
+
violation = Violation(
|
|
101
|
+
file=self.filepath,
|
|
102
|
+
line=node.lineno,
|
|
103
|
+
column=node.col_offset,
|
|
104
|
+
message=f'Accessing {var_name}["{key}"] without validation',
|
|
105
|
+
severity="warning",
|
|
106
|
+
fix_suggestion=f"""Use schema validation at boundary:
|
|
107
|
+
|
|
108
|
+
from dataclasses import dataclass
|
|
109
|
+
|
|
110
|
+
@dataclass
|
|
111
|
+
class DataSchema:
|
|
112
|
+
{key}: str # Define expected structure
|
|
113
|
+
|
|
114
|
+
validated = DataSchema(**{var_name})
|
|
115
|
+
value = validated.{key}
|
|
116
|
+
|
|
117
|
+
Why: Prevents runtime crashes from unexpected data shapes.
|
|
118
|
+
See: docs/golden-principles/no-yolo-parsing.md""",
|
|
119
|
+
principle="no-yolo-parsing"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
self.violations.append(violation)
|
|
123
|
+
|
|
124
|
+
# Check for chained subscripts (data["user"]["email"])
|
|
125
|
+
elif isinstance(node.value, ast.Subscript):
|
|
126
|
+
# This is nested dict access - likely YOLO
|
|
127
|
+
violation = Violation(
|
|
128
|
+
file=self.filepath,
|
|
129
|
+
line=node.lineno,
|
|
130
|
+
column=node.col_offset,
|
|
131
|
+
message="Nested dict access without validation",
|
|
132
|
+
severity="warning",
|
|
133
|
+
fix_suggestion="""Use schema validation to define structure:
|
|
134
|
+
|
|
135
|
+
from dataclasses import dataclass
|
|
136
|
+
|
|
137
|
+
@dataclass
|
|
138
|
+
class UserData:
|
|
139
|
+
user: dict # Or better: user: User (nested dataclass)
|
|
140
|
+
|
|
141
|
+
validated = UserData(**data)
|
|
142
|
+
value = validated.user["field"]
|
|
143
|
+
|
|
144
|
+
Why: Makes data contract explicit and fails fast on malformed input.
|
|
145
|
+
See: docs/golden-principles/no-yolo-parsing.md""",
|
|
146
|
+
principle="no-yolo-parsing"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
self.violations.append(violation)
|
|
150
|
+
|
|
151
|
+
self.generic_visit(node)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def lint_file(filepath: Path) -> list[Violation]:
|
|
155
|
+
"""
|
|
156
|
+
Lint a Python file for YOLO parsing violations.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
filepath: Path to Python file
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
List of violations found
|
|
163
|
+
"""
|
|
164
|
+
try:
|
|
165
|
+
source = filepath.read_text()
|
|
166
|
+
tree = ast.parse(source, filename=str(filepath))
|
|
167
|
+
|
|
168
|
+
detector = YoloParsingDetector(str(filepath))
|
|
169
|
+
detector.visit(tree)
|
|
170
|
+
|
|
171
|
+
return detector.violations
|
|
172
|
+
|
|
173
|
+
except SyntaxError:
|
|
174
|
+
# File has syntax errors, skip
|
|
175
|
+
return []
|
|
176
|
+
except Exception:
|
|
177
|
+
# Other errors, skip
|
|
178
|
+
return []
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def lint_directory(directory: Path, pattern: str = "**/*.py") -> Iterator[Violation]:
|
|
182
|
+
"""
|
|
183
|
+
Lint all Python files in directory.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
directory: Root directory to search
|
|
187
|
+
pattern: Glob pattern for files to lint
|
|
188
|
+
|
|
189
|
+
Yields:
|
|
190
|
+
Violations found
|
|
191
|
+
"""
|
|
192
|
+
for filepath in directory.glob(pattern):
|
|
193
|
+
if filepath.is_file():
|
|
194
|
+
for violation in lint_file(filepath):
|
|
195
|
+
yield violation
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def format_violation(violation: Violation) -> str:
|
|
199
|
+
"""
|
|
200
|
+
Format violation for output.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
violation: Violation to format
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Formatted string
|
|
207
|
+
"""
|
|
208
|
+
severity_symbol = "⚠️" if violation.severity == "warning" else "❌"
|
|
209
|
+
|
|
210
|
+
return f"""{severity_symbol} YOLO Parsing Detected
|
|
211
|
+
File: {violation.file}:{violation.line}:{violation.column}
|
|
212
|
+
Rule: golden-principles/{violation.principle}.md
|
|
213
|
+
|
|
214
|
+
{violation.message}
|
|
215
|
+
|
|
216
|
+
FIX: {violation.fix_suggestion}
|
|
217
|
+
"""
|