dotscope 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dotscope/.scope +63 -0
- dotscope/__init__.py +3 -0
- dotscope/absorber.py +390 -0
- dotscope/assertions.py +128 -0
- dotscope/ast_analyzer.py +2 -0
- dotscope/backtest.py +2 -0
- dotscope/bench.py +141 -0
- dotscope/budget.py +3 -0
- dotscope/cache.py +2 -0
- dotscope/check/__init__.py +1 -0
- dotscope/check/acknowledge.py +2 -0
- dotscope/check/checker.py +3 -0
- dotscope/check/checks/__init__.py +1 -0
- dotscope/check/checks/antipattern.py +2 -0
- dotscope/check/checks/boundary.py +2 -0
- dotscope/check/checks/contracts.py +3 -0
- dotscope/check/checks/direction.py +2 -0
- dotscope/check/checks/intent.py +2 -0
- dotscope/check/checks/stability.py +2 -0
- dotscope/check/constraints.py +2 -0
- dotscope/check/models.py +15 -0
- dotscope/cli.py +1447 -0
- dotscope/composer.py +147 -0
- dotscope/constants.py +45 -0
- dotscope/context.py +60 -0
- dotscope/counterfactual.py +180 -0
- dotscope/debug.py +220 -0
- dotscope/discovery.py +104 -0
- dotscope/formatter.py +157 -0
- dotscope/graph.py +3 -0
- dotscope/health.py +212 -0
- dotscope/help.py +204 -0
- dotscope/history.py +6 -0
- dotscope/hooks.py +2 -0
- dotscope/ingest.py +858 -0
- dotscope/intent.py +618 -0
- dotscope/lessons.py +223 -0
- dotscope/matcher.py +104 -0
- dotscope/mcp_server.py +1081 -0
- dotscope/models/.scope +45 -0
- dotscope/models/__init__.py +7 -0
- dotscope/models/core.py +288 -0
- dotscope/models/history.py +73 -0
- dotscope/models/intent.py +213 -0
- dotscope/models/passes.py +58 -0
- dotscope/models/state.py +250 -0
- dotscope/models.py +9 -0
- dotscope/near_miss.py +3 -0
- dotscope/onboarding.py +2 -0
- dotscope/parser.py +387 -0
- dotscope/passes/.scope +105 -0
- dotscope/passes/__init__.py +1 -0
- dotscope/passes/ast_analyzer.py +508 -0
- dotscope/passes/backtest.py +198 -0
- dotscope/passes/budget_allocator.py +164 -0
- dotscope/passes/convention_compliance.py +40 -0
- dotscope/passes/convention_discovery.py +247 -0
- dotscope/passes/convention_parser.py +223 -0
- dotscope/passes/graph_builder.py +299 -0
- dotscope/passes/history_miner.py +336 -0
- dotscope/passes/incremental.py +149 -0
- dotscope/passes/lang/__init__.py +38 -0
- dotscope/passes/lang/_base.py +20 -0
- dotscope/passes/lang/_treesitter.py +93 -0
- dotscope/passes/lang/go.py +333 -0
- dotscope/passes/lang/javascript.py +348 -0
- dotscope/passes/lazy.py +152 -0
- dotscope/passes/semantic_diff.py +160 -0
- dotscope/passes/sentinel/__init__.py +1 -0
- dotscope/passes/sentinel/acknowledge.py +222 -0
- dotscope/passes/sentinel/checker.py +383 -0
- dotscope/passes/sentinel/checks/__init__.py +1 -0
- dotscope/passes/sentinel/checks/antipattern.py +84 -0
- dotscope/passes/sentinel/checks/boundary.py +46 -0
- dotscope/passes/sentinel/checks/contracts.py +148 -0
- dotscope/passes/sentinel/checks/convention.py +54 -0
- dotscope/passes/sentinel/checks/direction.py +71 -0
- dotscope/passes/sentinel/checks/intent.py +207 -0
- dotscope/passes/sentinel/checks/stability.py +66 -0
- dotscope/passes/sentinel/checks/voice.py +108 -0
- dotscope/passes/sentinel/constraints.py +472 -0
- dotscope/passes/sentinel/line_filter.py +88 -0
- dotscope/passes/sentinel/models.py +15 -0
- dotscope/passes/virtual.py +239 -0
- dotscope/passes/voice.py +162 -0
- dotscope/passes/voice_defaults.py +28 -0
- dotscope/passes/voice_discovery.py +245 -0
- dotscope/paths.py +32 -0
- dotscope/progress.py +44 -0
- dotscope/regression.py +147 -0
- dotscope/resolver.py +203 -0
- dotscope/scanner.py +246 -0
- dotscope/sessions.py +2 -0
- dotscope/storage/.scope +64 -0
- dotscope/storage/__init__.py +1 -0
- dotscope/storage/cache.py +114 -0
- dotscope/storage/claude_hooks.py +119 -0
- dotscope/storage/git_hooks.py +277 -0
- dotscope/storage/incremental_state.py +61 -0
- dotscope/storage/mcp_config.py +98 -0
- dotscope/storage/near_miss.py +183 -0
- dotscope/storage/onboarding.py +150 -0
- dotscope/storage/session_manager.py +195 -0
- dotscope/storage/timing.py +84 -0
- dotscope/timing.py +2 -0
- dotscope/tokens.py +53 -0
- dotscope/utility.py +123 -0
- dotscope/virtual.py +3 -0
- dotscope/visibility.py +664 -0
- dotscope-0.1.0.dist-info/METADATA +50 -0
- dotscope-0.1.0.dist-info/RECORD +114 -0
- dotscope-0.1.0.dist-info/WHEEL +4 -0
- dotscope-0.1.0.dist-info/entry_points.txt +3 -0
- dotscope-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""Scope backtesting: validate generated scopes against actual git history.
|
|
2
|
+
|
|
3
|
+
Replays recent commits and measures whether each scope's includes would have
|
|
4
|
+
covered the files that were actually changed. Self-corrects by suggesting
|
|
5
|
+
missing includes.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import subprocess
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
from typing import Dict, List, Set
|
|
12
|
+
|
|
13
|
+
from ..models import (
|
|
14
|
+
BacktestReport,
|
|
15
|
+
BacktestResult,
|
|
16
|
+
MissingSuggestion,
|
|
17
|
+
ScopeConfig,
|
|
18
|
+
)
|
|
19
|
+
from ..resolver import resolve
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def backtest_scopes(
|
|
23
|
+
root: str,
|
|
24
|
+
scopes: List[ScopeConfig],
|
|
25
|
+
n_commits: int = 50,
|
|
26
|
+
) -> BacktestReport:
|
|
27
|
+
"""Validate scopes against git history.
|
|
28
|
+
|
|
29
|
+
For each recent commit, check whether the matched scope's resolved
|
|
30
|
+
file list would have included all changed files.
|
|
31
|
+
"""
|
|
32
|
+
commits = _get_recent_commits(root, n_commits)
|
|
33
|
+
if not commits:
|
|
34
|
+
return BacktestReport()
|
|
35
|
+
|
|
36
|
+
# Resolve each scope to its file set, keyed by relative directory name
|
|
37
|
+
scope_file_sets: Dict[str, Set[str]] = {}
|
|
38
|
+
scope_dirs: Dict[str, ScopeConfig] = {}
|
|
39
|
+
|
|
40
|
+
for scope in scopes:
|
|
41
|
+
resolved = resolve(scope, follow_related=False, root=root)
|
|
42
|
+
rel_dir = os.path.relpath(scope.directory, root)
|
|
43
|
+
scope_file_sets[rel_dir] = set(resolved.files)
|
|
44
|
+
scope_dirs[rel_dir] = scope
|
|
45
|
+
|
|
46
|
+
# Track per-scope results
|
|
47
|
+
scope_commits: Dict[str, int] = defaultdict(int)
|
|
48
|
+
scope_covered: Dict[str, int] = defaultdict(int)
|
|
49
|
+
scope_misses: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
|
50
|
+
|
|
51
|
+
for commit_files in commits:
|
|
52
|
+
# Match commit to scope(s) by directory prefix
|
|
53
|
+
matched_scopes = _match_commit_to_scopes(commit_files, scope_dirs, root)
|
|
54
|
+
|
|
55
|
+
for scope_dir in matched_scopes:
|
|
56
|
+
scope_commits[scope_dir] += 1
|
|
57
|
+
file_set = scope_file_sets.get(scope_dir, set())
|
|
58
|
+
|
|
59
|
+
all_covered = True
|
|
60
|
+
for changed_file in commit_files:
|
|
61
|
+
abs_changed = os.path.join(root, changed_file)
|
|
62
|
+
if abs_changed not in file_set:
|
|
63
|
+
all_covered = False
|
|
64
|
+
scope_misses[scope_dir][changed_file] += 1
|
|
65
|
+
|
|
66
|
+
if all_covered:
|
|
67
|
+
scope_covered[scope_dir] += 1
|
|
68
|
+
|
|
69
|
+
# Build results
|
|
70
|
+
results = []
|
|
71
|
+
for scope in scopes:
|
|
72
|
+
d = os.path.relpath(scope.directory, root)
|
|
73
|
+
total = scope_commits.get(d, 0)
|
|
74
|
+
covered = scope_covered.get(d, 0)
|
|
75
|
+
recall = covered / total if total > 0 else 1.0
|
|
76
|
+
|
|
77
|
+
misses = []
|
|
78
|
+
for path, count in sorted(
|
|
79
|
+
scope_misses.get(d, {}).items(), key=lambda x: -x[1]
|
|
80
|
+
):
|
|
81
|
+
if count >= 2: # Only suggest files that appear multiple times
|
|
82
|
+
misses.append(MissingSuggestion(
|
|
83
|
+
path=path,
|
|
84
|
+
appearances=count,
|
|
85
|
+
would_improve_recall=True,
|
|
86
|
+
))
|
|
87
|
+
|
|
88
|
+
results.append(BacktestResult(
|
|
89
|
+
scope_path=scope.path,
|
|
90
|
+
total_commits=total,
|
|
91
|
+
fully_covered=covered,
|
|
92
|
+
recall=round(recall, 3),
|
|
93
|
+
missing_includes=misses[:10],
|
|
94
|
+
))
|
|
95
|
+
|
|
96
|
+
total_commits = len(commits)
|
|
97
|
+
total_covered = sum(r.fully_covered for r in results)
|
|
98
|
+
total_matched = sum(r.total_commits for r in results)
|
|
99
|
+
overall_recall = total_covered / total_matched if total_matched > 0 else 1.0
|
|
100
|
+
|
|
101
|
+
return BacktestReport(
|
|
102
|
+
results=results,
|
|
103
|
+
total_commits=total_commits,
|
|
104
|
+
overall_recall=round(overall_recall, 3),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def auto_correct_scope(
|
|
109
|
+
scope: ScopeConfig,
|
|
110
|
+
result: BacktestResult,
|
|
111
|
+
root: str,
|
|
112
|
+
min_appearances: int = 3,
|
|
113
|
+
) -> tuple[ScopeConfig, bool]:
|
|
114
|
+
"""Auto-correct a scope's includes based on backtest results.
|
|
115
|
+
|
|
116
|
+
Returns (updated_scope, changed) tuple.
|
|
117
|
+
"""
|
|
118
|
+
changed = False
|
|
119
|
+
for suggestion in result.missing_includes:
|
|
120
|
+
if suggestion.appearances >= min_appearances and suggestion.would_improve_recall:
|
|
121
|
+
if suggestion.path not in scope.includes:
|
|
122
|
+
scope.includes.append(suggestion.path)
|
|
123
|
+
changed = True
|
|
124
|
+
return scope, changed
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def format_backtest_report(report: BacktestReport) -> str:
|
|
128
|
+
"""Human-readable backtest report."""
|
|
129
|
+
lines = [
|
|
130
|
+
f"Backtest: {report.total_commits} commits analyzed",
|
|
131
|
+
f"Overall recall: {report.overall_recall:.0%}",
|
|
132
|
+
"",
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
for result in report.results:
|
|
136
|
+
scope_name = os.path.basename(os.path.dirname(result.scope_path))
|
|
137
|
+
recall_bar = "█" * int(result.recall * 10) + "░" * (10 - int(result.recall * 10))
|
|
138
|
+
lines.append(
|
|
139
|
+
f" {scope_name}/.scope — recall: {recall_bar} {result.recall:.0%} "
|
|
140
|
+
f"({result.fully_covered}/{result.total_commits} commits)"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
for miss in result.missing_includes[:5]:
|
|
144
|
+
lines.append(f" missing: {miss.path} (appeared in {miss.appearances} commits)")
|
|
145
|
+
|
|
146
|
+
return "\n".join(lines)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ---------------------------------------------------------------------------
|
|
150
|
+
# Internals
|
|
151
|
+
# ---------------------------------------------------------------------------
|
|
152
|
+
|
|
153
|
+
def _get_recent_commits(root: str, n: int) -> List[List[str]]:
|
|
154
|
+
"""Get file lists from recent commits."""
|
|
155
|
+
if not os.path.isdir(os.path.join(root, ".git")):
|
|
156
|
+
return []
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
result = subprocess.run(
|
|
160
|
+
["git", "log", f"--max-count={n}", "--pretty=format:%H", "--name-only"],
|
|
161
|
+
cwd=root, capture_output=True, text=True, timeout=15,
|
|
162
|
+
)
|
|
163
|
+
if result.returncode != 0:
|
|
164
|
+
return []
|
|
165
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
166
|
+
return []
|
|
167
|
+
|
|
168
|
+
commits = []
|
|
169
|
+
current_files = []
|
|
170
|
+
|
|
171
|
+
for line in result.stdout.splitlines():
|
|
172
|
+
if len(line) == 40 and " " not in line: # Commit hash
|
|
173
|
+
if current_files:
|
|
174
|
+
commits.append(current_files)
|
|
175
|
+
current_files = []
|
|
176
|
+
elif line.strip():
|
|
177
|
+
current_files.append(line.strip())
|
|
178
|
+
|
|
179
|
+
if current_files:
|
|
180
|
+
commits.append(current_files)
|
|
181
|
+
|
|
182
|
+
return commits
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _match_commit_to_scopes(
|
|
186
|
+
commit_files: List[str],
|
|
187
|
+
scope_dirs: Dict[str, ScopeConfig],
|
|
188
|
+
root: str,
|
|
189
|
+
) -> Set[str]:
|
|
190
|
+
"""Match a commit's changed files to relevant scopes."""
|
|
191
|
+
matched = set()
|
|
192
|
+
for changed_file in commit_files:
|
|
193
|
+
parts = changed_file.split("/")
|
|
194
|
+
if len(parts) > 1:
|
|
195
|
+
top_dir = parts[0]
|
|
196
|
+
if top_dir in scope_dirs:
|
|
197
|
+
matched.add(top_dir)
|
|
198
|
+
return matched
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Token budgeting: rank files, fill to budget, progressive loading.
|
|
2
|
+
|
|
3
|
+
Context is always included first. Then files are ranked and loaded
|
|
4
|
+
until the budget is exhausted.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
|
|
10
|
+
from ..models import ResolvedScope
|
|
11
|
+
from ..tokens import estimate_file_tokens, estimate_context_tokens
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def apply_budget(
|
|
15
|
+
resolved: ResolvedScope,
|
|
16
|
+
max_tokens: int,
|
|
17
|
+
task: Optional[str] = None,
|
|
18
|
+
utility_scores: Optional[dict] = None,
|
|
19
|
+
required_files: Optional[set] = None,
|
|
20
|
+
) -> ResolvedScope:
|
|
21
|
+
"""Apply a token budget to a resolved scope.
|
|
22
|
+
|
|
23
|
+
Algorithm:
|
|
24
|
+
1. Reserve tokens for context (always included)
|
|
25
|
+
2. Rank files by relevance tier and size, weighted by utility
|
|
26
|
+
3. Fill files until budget is exhausted
|
|
27
|
+
4. Set truncated=True if files were dropped
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
resolved: The fully resolved scope
|
|
31
|
+
max_tokens: Maximum total tokens (context + files)
|
|
32
|
+
task: Optional task description for relevance ranking
|
|
33
|
+
utility_scores: Historical file utility data from observations
|
|
34
|
+
"""
|
|
35
|
+
if max_tokens <= 0:
|
|
36
|
+
return ResolvedScope(
|
|
37
|
+
files=[],
|
|
38
|
+
context=resolved.context,
|
|
39
|
+
token_estimate=estimate_context_tokens(resolved.context),
|
|
40
|
+
scope_chain=resolved.scope_chain,
|
|
41
|
+
truncated=True,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Context always goes first
|
|
45
|
+
context_tokens = estimate_context_tokens(resolved.context)
|
|
46
|
+
remaining = max_tokens - context_tokens
|
|
47
|
+
|
|
48
|
+
if remaining <= 0:
|
|
49
|
+
# Budget only fits context (or not even that)
|
|
50
|
+
return ResolvedScope(
|
|
51
|
+
files=[],
|
|
52
|
+
context=resolved.context[:max_tokens * 4], # rough trim
|
|
53
|
+
token_estimate=max_tokens,
|
|
54
|
+
scope_chain=resolved.scope_chain,
|
|
55
|
+
truncated=True,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Rank and score files (utility data flows through when available)
|
|
59
|
+
scored_files = _rank_files(resolved.files, task, utility_scores)
|
|
60
|
+
|
|
61
|
+
# Required files get infinite utility — selected first, unconditionally
|
|
62
|
+
required = required_files or set()
|
|
63
|
+
if required:
|
|
64
|
+
scored_files = _boost_required(scored_files, required)
|
|
65
|
+
|
|
66
|
+
# Fill within budget
|
|
67
|
+
selected_files: List[str] = []
|
|
68
|
+
total_file_tokens = 0
|
|
69
|
+
|
|
70
|
+
for path, score in scored_files:
|
|
71
|
+
file_tokens = estimate_file_tokens(path)
|
|
72
|
+
if total_file_tokens + file_tokens <= remaining:
|
|
73
|
+
selected_files.append(path)
|
|
74
|
+
total_file_tokens += file_tokens
|
|
75
|
+
elif path in required:
|
|
76
|
+
# Required file doesn't fit — hard error
|
|
77
|
+
from ..assertions import ContextExhaustionError
|
|
78
|
+
raise ContextExhaustionError(
|
|
79
|
+
assertion_type="ensure_includes",
|
|
80
|
+
detail=f"Budget ({max_tokens}) cannot fit required file: {path} ({file_tokens} tokens)",
|
|
81
|
+
file=path,
|
|
82
|
+
file_tokens=file_tokens,
|
|
83
|
+
budget=max_tokens,
|
|
84
|
+
tokens_used=context_tokens + total_file_tokens,
|
|
85
|
+
suggestion=f"Increase budget to at least {context_tokens + total_file_tokens + file_tokens}",
|
|
86
|
+
)
|
|
87
|
+
# Don't break early — a smaller file later might still fit
|
|
88
|
+
|
|
89
|
+
truncated = len(selected_files) < len(resolved.files)
|
|
90
|
+
|
|
91
|
+
return ResolvedScope(
|
|
92
|
+
files=selected_files,
|
|
93
|
+
context=resolved.context,
|
|
94
|
+
token_estimate=context_tokens + total_file_tokens,
|
|
95
|
+
scope_chain=resolved.scope_chain,
|
|
96
|
+
truncated=truncated,
|
|
97
|
+
excluded_files=[f for f in resolved.files if f not in set(selected_files)],
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _rank_files(
|
|
102
|
+
files: List[str],
|
|
103
|
+
task: Optional[str] = None,
|
|
104
|
+
utility_scores: Optional[dict] = None,
|
|
105
|
+
) -> List[tuple]:
|
|
106
|
+
"""Rank files by relevance, layering historical utility over static heuristics."""
|
|
107
|
+
import os
|
|
108
|
+
from ..utility import effective_score as _effective_score
|
|
109
|
+
|
|
110
|
+
task_words = set()
|
|
111
|
+
if task:
|
|
112
|
+
task_words = {w.lower() for w in task.split() if len(w) > 2}
|
|
113
|
+
|
|
114
|
+
scored = []
|
|
115
|
+
for path in files:
|
|
116
|
+
score = 1.0
|
|
117
|
+
basename = os.path.basename(path).lower()
|
|
118
|
+
rel_parts = path.lower().split(os.sep)
|
|
119
|
+
|
|
120
|
+
if any(p in ("tests", "test", "fixtures", "migrations", "__pycache__") for p in rel_parts):
|
|
121
|
+
score *= 0.5
|
|
122
|
+
|
|
123
|
+
if basename.endswith((".generated.py", ".generated.ts", ".lock", ".min.js")):
|
|
124
|
+
score *= 0.3
|
|
125
|
+
|
|
126
|
+
if task_words:
|
|
127
|
+
name_words = set(
|
|
128
|
+
w for w in basename.replace("_", " ").replace("-", " ").replace(".", " ").split()
|
|
129
|
+
if len(w) > 2
|
|
130
|
+
)
|
|
131
|
+
overlap = len(task_words & name_words)
|
|
132
|
+
if overlap:
|
|
133
|
+
score *= 1.0 + (overlap * 0.5)
|
|
134
|
+
|
|
135
|
+
tokens = estimate_file_tokens(path)
|
|
136
|
+
if tokens > 0:
|
|
137
|
+
if tokens < 200:
|
|
138
|
+
score *= 1.2
|
|
139
|
+
elif tokens > 2000:
|
|
140
|
+
score *= 0.8
|
|
141
|
+
|
|
142
|
+
# Layer utility data on top of heuristics
|
|
143
|
+
utility = utility_scores.get(path) if utility_scores else None
|
|
144
|
+
score = _effective_score(score, utility, is_explicit_include=True)
|
|
145
|
+
|
|
146
|
+
scored.append((path, score, tokens))
|
|
147
|
+
|
|
148
|
+
scored.sort(key=lambda x: (-x[1], x[2]))
|
|
149
|
+
return [(path, score) for path, score, _ in scored]
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _boost_required(
|
|
153
|
+
scored_files: List[tuple],
|
|
154
|
+
required: set,
|
|
155
|
+
) -> List[tuple]:
|
|
156
|
+
"""Boost required files to infinite utility so they're selected first."""
|
|
157
|
+
boosted = []
|
|
158
|
+
for path, score in scored_files:
|
|
159
|
+
if path in required:
|
|
160
|
+
boosted.append((path, float("inf")))
|
|
161
|
+
else:
|
|
162
|
+
boosted.append((path, score))
|
|
163
|
+
boosted.sort(key=lambda x: -x[1])
|
|
164
|
+
return boosted
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Convention compliance: track how well conventions are followed."""
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List
|
|
4
|
+
|
|
5
|
+
from ..models import ConventionNode, ConventionRule, FileAnalysis
|
|
6
|
+
from .convention_parser import matches_convention
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def compute_compliance(
|
|
10
|
+
convention: ConventionRule,
|
|
11
|
+
nodes: List[ConventionNode],
|
|
12
|
+
ast_data: Dict[str, FileAnalysis],
|
|
13
|
+
) -> float:
|
|
14
|
+
"""What percentage of matching files follow all rules?"""
|
|
15
|
+
matching_files = [
|
|
16
|
+
path for path, analysis in ast_data.items()
|
|
17
|
+
if matches_convention(analysis, path, convention.match_criteria)
|
|
18
|
+
]
|
|
19
|
+
if not matching_files:
|
|
20
|
+
return 1.0
|
|
21
|
+
|
|
22
|
+
compliant = sum(
|
|
23
|
+
1 for n in nodes
|
|
24
|
+
if n.name == convention.name and not n.violations
|
|
25
|
+
)
|
|
26
|
+
return compliant / len(matching_files)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def convention_severity(compliance: float) -> str:
|
|
30
|
+
"""Map compliance ratio to enforcement severity.
|
|
31
|
+
|
|
32
|
+
100-80%: nudge (course correction)
|
|
33
|
+
79-50%: note (informational)
|
|
34
|
+
<50%: retired (not enforced)
|
|
35
|
+
"""
|
|
36
|
+
if compliance >= 0.80:
|
|
37
|
+
return "nudge"
|
|
38
|
+
if compliance >= 0.50:
|
|
39
|
+
return "note"
|
|
40
|
+
return "retired"
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
"""Convention discovery: mine structural patterns from AST data."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from typing import Dict, List, Optional, Set, Tuple
|
|
7
|
+
|
|
8
|
+
from ..models import ConventionRule, DependencyGraph, FileAnalysis, HistoryAnalysis
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def discover_conventions(
|
|
12
|
+
ast_data: Dict[str, FileAnalysis],
|
|
13
|
+
graph: DependencyGraph,
|
|
14
|
+
history: Optional[HistoryAnalysis] = None,
|
|
15
|
+
) -> List[ConventionRule]:
|
|
16
|
+
"""Mine structural patterns that repeat across files.
|
|
17
|
+
|
|
18
|
+
Uses multi-pass clustering to avoid grouping only by directory:
|
|
19
|
+
Pass 1: Group by shared decorators (e.g., all @app.route files)
|
|
20
|
+
Pass 2: Group by shared base classes (e.g., all BaseModel subclasses)
|
|
21
|
+
Pass 3: Group by shared suffix/prefix (e.g., *_repo.py)
|
|
22
|
+
|
|
23
|
+
Cross-cutting conventions (decorator-based, base-class-based) are
|
|
24
|
+
discovered before directory-based ones. A file can match multiple
|
|
25
|
+
passes — deduplication happens after all passes complete.
|
|
26
|
+
"""
|
|
27
|
+
conventions = []
|
|
28
|
+
claimed_files: Set[str] = set()
|
|
29
|
+
|
|
30
|
+
# Pass 1: Shared decorators (strongest signal, survives refactors)
|
|
31
|
+
decorator_groups: Dict[str, List[Tuple[str, FileAnalysis]]] = defaultdict(list)
|
|
32
|
+
for path, analysis in ast_data.items():
|
|
33
|
+
for dec in (analysis.decorators_used or []):
|
|
34
|
+
normalized = _normalize_decorator(dec)
|
|
35
|
+
decorator_groups[normalized].append((path, analysis))
|
|
36
|
+
|
|
37
|
+
for dec, files in decorator_groups.items():
|
|
38
|
+
if len(files) >= 3:
|
|
39
|
+
conv = _build_convention_from_group(
|
|
40
|
+
files, graph, signal_type="decorator", signal_value=dec
|
|
41
|
+
)
|
|
42
|
+
if conv:
|
|
43
|
+
conventions.append(conv)
|
|
44
|
+
claimed_files.update(f[0] for f in files)
|
|
45
|
+
|
|
46
|
+
# Pass 2: Shared base classes
|
|
47
|
+
base_groups: Dict[str, List[Tuple[str, FileAnalysis]]] = defaultdict(list)
|
|
48
|
+
for path, analysis in ast_data.items():
|
|
49
|
+
if path in claimed_files:
|
|
50
|
+
continue
|
|
51
|
+
for cls in (analysis.classes or []):
|
|
52
|
+
for base in (cls.bases or []):
|
|
53
|
+
base_groups[base].append((path, analysis))
|
|
54
|
+
|
|
55
|
+
for base, files in base_groups.items():
|
|
56
|
+
if len(files) >= 3:
|
|
57
|
+
conv = _build_convention_from_group(
|
|
58
|
+
files, graph, signal_type="base_class", signal_value=base
|
|
59
|
+
)
|
|
60
|
+
if conv:
|
|
61
|
+
conventions.append(conv)
|
|
62
|
+
claimed_files.update(f[0] for f in files)
|
|
63
|
+
|
|
64
|
+
# Pass 3: Shared suffix/prefix (weakest signal, path-dependent)
|
|
65
|
+
suffix_groups: Dict[str, List[Tuple[str, FileAnalysis]]] = defaultdict(list)
|
|
66
|
+
for path, analysis in ast_data.items():
|
|
67
|
+
if path in claimed_files:
|
|
68
|
+
continue
|
|
69
|
+
stem = os.path.splitext(os.path.basename(path))[0]
|
|
70
|
+
for suffix in _extract_suffixes(stem):
|
|
71
|
+
suffix_groups[suffix].append((path, analysis))
|
|
72
|
+
|
|
73
|
+
for suffix, files in suffix_groups.items():
|
|
74
|
+
if len(files) >= 3:
|
|
75
|
+
conv = _build_convention_from_group(
|
|
76
|
+
files, graph, signal_type="suffix", signal_value=suffix
|
|
77
|
+
)
|
|
78
|
+
if conv:
|
|
79
|
+
conventions.append(conv)
|
|
80
|
+
|
|
81
|
+
return conventions
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _normalize_decorator(dec: str) -> str:
|
|
85
|
+
"""Normalize a decorator string for grouping.
|
|
86
|
+
|
|
87
|
+
'@app.route("/users")' -> 'app.route'
|
|
88
|
+
'@router.get' -> 'router.get'
|
|
89
|
+
"""
|
|
90
|
+
dec = dec.lstrip("@")
|
|
91
|
+
# Strip arguments
|
|
92
|
+
paren = dec.find("(")
|
|
93
|
+
if paren != -1:
|
|
94
|
+
dec = dec[:paren]
|
|
95
|
+
return dec.strip()
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _extract_suffixes(stem: str) -> List[str]:
|
|
99
|
+
"""Extract meaningful suffixes from a filename stem.
|
|
100
|
+
|
|
101
|
+
"user_controller" -> ["_controller"]
|
|
102
|
+
"billing_repo" -> ["_repo"]
|
|
103
|
+
"""
|
|
104
|
+
known = (
|
|
105
|
+
"_controller", "_service", "_repo", "_repository",
|
|
106
|
+
"_handler", "_manager", "_factory", "_helper",
|
|
107
|
+
"_view", "_model", "_test", "_middleware",
|
|
108
|
+
)
|
|
109
|
+
result = []
|
|
110
|
+
for suffix in known:
|
|
111
|
+
if stem.endswith(suffix):
|
|
112
|
+
result.append(suffix)
|
|
113
|
+
return result
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _build_convention_from_group(
|
|
117
|
+
files: List[Tuple[str, FileAnalysis]],
|
|
118
|
+
graph: DependencyGraph,
|
|
119
|
+
signal_type: str,
|
|
120
|
+
signal_value: str,
|
|
121
|
+
) -> Optional[ConventionRule]:
|
|
122
|
+
"""Build a ConventionRule from a group of files sharing a structural trait."""
|
|
123
|
+
paths = [f[0] for f in files]
|
|
124
|
+
analyses = [f[1] for f in files]
|
|
125
|
+
|
|
126
|
+
match_criteria = _derive_match_criteria(paths, analyses, signal_type, signal_value)
|
|
127
|
+
if not match_criteria:
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
rules = _derive_rules(paths, analyses, graph)
|
|
131
|
+
name = _derive_name(signal_type, signal_value)
|
|
132
|
+
|
|
133
|
+
return ConventionRule(
|
|
134
|
+
name=name,
|
|
135
|
+
source="discovered",
|
|
136
|
+
match_criteria=match_criteria,
|
|
137
|
+
rules=rules,
|
|
138
|
+
description=f"Discovered from {len(files)} files sharing {signal_type}: {signal_value}",
|
|
139
|
+
compliance=1.0,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _derive_name(signal_type: str, signal_value: str) -> str:
|
|
144
|
+
"""Generate a human-readable convention name."""
|
|
145
|
+
if signal_type == "decorator":
|
|
146
|
+
# "@app.route" -> "Route Handler"
|
|
147
|
+
parts = signal_value.split(".")
|
|
148
|
+
name = parts[-1] if parts else signal_value
|
|
149
|
+
return name.replace("_", " ").title()
|
|
150
|
+
elif signal_type == "base_class":
|
|
151
|
+
return f"{signal_value} Subclass"
|
|
152
|
+
elif signal_type == "suffix":
|
|
153
|
+
# "_controller" -> "Controller"
|
|
154
|
+
return signal_value.lstrip("_").replace("_", " ").title()
|
|
155
|
+
return signal_value
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _derive_match_criteria(
|
|
159
|
+
paths: List[str],
|
|
160
|
+
analyses: List[FileAnalysis],
|
|
161
|
+
signal_type: str,
|
|
162
|
+
signal_value: str,
|
|
163
|
+
) -> dict:
|
|
164
|
+
"""Find common structural traits across files sharing a fingerprint."""
|
|
165
|
+
any_of = []
|
|
166
|
+
all_of = []
|
|
167
|
+
|
|
168
|
+
# Primary signal goes into any_of
|
|
169
|
+
if signal_type == "decorator":
|
|
170
|
+
any_of.append({"has_decorator": re.escape(signal_value)})
|
|
171
|
+
elif signal_type == "base_class":
|
|
172
|
+
any_of.append({"base_class": signal_value})
|
|
173
|
+
elif signal_type == "suffix":
|
|
174
|
+
pattern = f".*{re.escape(signal_value)}\\.py"
|
|
175
|
+
any_of.append({"file_path": pattern})
|
|
176
|
+
|
|
177
|
+
# Common directory as secondary hint
|
|
178
|
+
dirs = set(os.path.dirname(p) for p in paths)
|
|
179
|
+
if len(dirs) == 1:
|
|
180
|
+
dir_pattern = re.escape(dirs.pop()) + "/.*\\.py"
|
|
181
|
+
any_of.append({"file_path": dir_pattern})
|
|
182
|
+
|
|
183
|
+
criteria = {}
|
|
184
|
+
if any_of:
|
|
185
|
+
criteria["any_of"] = any_of
|
|
186
|
+
if all_of:
|
|
187
|
+
criteria["all_of"] = all_of
|
|
188
|
+
return criteria
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _derive_rules(
|
|
192
|
+
paths: List[str],
|
|
193
|
+
analyses: List[FileAnalysis],
|
|
194
|
+
graph: DependencyGraph,
|
|
195
|
+
) -> dict:
|
|
196
|
+
"""Find universal behavioral patterns (potential rules)."""
|
|
197
|
+
rules = {}
|
|
198
|
+
|
|
199
|
+
# Universal methods (all files implement these)
|
|
200
|
+
if all(a.classes for a in analyses):
|
|
201
|
+
all_methods = [
|
|
202
|
+
set(a.classes[0].methods)
|
|
203
|
+
for a in analyses if a.classes
|
|
204
|
+
]
|
|
205
|
+
if all_methods:
|
|
206
|
+
common_methods = set.intersection(*all_methods)
|
|
207
|
+
required = sorted(m for m in common_methods if not m.startswith("_"))
|
|
208
|
+
if required:
|
|
209
|
+
rules["required_methods"] = required
|
|
210
|
+
|
|
211
|
+
# Universal non-imports (no file imports these)
|
|
212
|
+
all_imports: Set[str] = set()
|
|
213
|
+
for a in analyses:
|
|
214
|
+
for imp in (a.imports or []):
|
|
215
|
+
if imp.module:
|
|
216
|
+
all_imports.add(imp.module)
|
|
217
|
+
|
|
218
|
+
# Check against all imports in codebase to find conspicuous absences
|
|
219
|
+
all_codebase_imports: Set[str] = set()
|
|
220
|
+
for node in graph.files.values():
|
|
221
|
+
for imp_path in (node.imports or []):
|
|
222
|
+
# Extract module name from path
|
|
223
|
+
module = os.path.splitext(os.path.basename(imp_path))[0] if imp_path else ""
|
|
224
|
+
if module:
|
|
225
|
+
all_codebase_imports.add(module)
|
|
226
|
+
|
|
227
|
+
common_absences = all_codebase_imports - all_imports
|
|
228
|
+
if common_absences:
|
|
229
|
+
frequent_elsewhere = [
|
|
230
|
+
imp for imp in common_absences
|
|
231
|
+
if _import_frequency(imp, graph) >= 3
|
|
232
|
+
]
|
|
233
|
+
if frequent_elsewhere:
|
|
234
|
+
rules["prohibited_imports"] = sorted(frequent_elsewhere[:5])
|
|
235
|
+
|
|
236
|
+
return rules
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _import_frequency(module: str, graph: DependencyGraph) -> int:
|
|
240
|
+
"""Count how many files import a given module."""
|
|
241
|
+
count = 0
|
|
242
|
+
for node in graph.files.values():
|
|
243
|
+
for imp_path in (node.imports or []):
|
|
244
|
+
if module in imp_path:
|
|
245
|
+
count += 1
|
|
246
|
+
break
|
|
247
|
+
return count
|