dotscope 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. dotscope/.scope +63 -0
  2. dotscope/__init__.py +3 -0
  3. dotscope/absorber.py +390 -0
  4. dotscope/assertions.py +128 -0
  5. dotscope/ast_analyzer.py +2 -0
  6. dotscope/backtest.py +2 -0
  7. dotscope/bench.py +141 -0
  8. dotscope/budget.py +3 -0
  9. dotscope/cache.py +2 -0
  10. dotscope/check/__init__.py +1 -0
  11. dotscope/check/acknowledge.py +2 -0
  12. dotscope/check/checker.py +3 -0
  13. dotscope/check/checks/__init__.py +1 -0
  14. dotscope/check/checks/antipattern.py +2 -0
  15. dotscope/check/checks/boundary.py +2 -0
  16. dotscope/check/checks/contracts.py +3 -0
  17. dotscope/check/checks/direction.py +2 -0
  18. dotscope/check/checks/intent.py +2 -0
  19. dotscope/check/checks/stability.py +2 -0
  20. dotscope/check/constraints.py +2 -0
  21. dotscope/check/models.py +15 -0
  22. dotscope/cli.py +1447 -0
  23. dotscope/composer.py +147 -0
  24. dotscope/constants.py +45 -0
  25. dotscope/context.py +60 -0
  26. dotscope/counterfactual.py +180 -0
  27. dotscope/debug.py +220 -0
  28. dotscope/discovery.py +104 -0
  29. dotscope/formatter.py +157 -0
  30. dotscope/graph.py +3 -0
  31. dotscope/health.py +212 -0
  32. dotscope/help.py +204 -0
  33. dotscope/history.py +6 -0
  34. dotscope/hooks.py +2 -0
  35. dotscope/ingest.py +858 -0
  36. dotscope/intent.py +618 -0
  37. dotscope/lessons.py +223 -0
  38. dotscope/matcher.py +104 -0
  39. dotscope/mcp_server.py +1081 -0
  40. dotscope/models/.scope +45 -0
  41. dotscope/models/__init__.py +7 -0
  42. dotscope/models/core.py +288 -0
  43. dotscope/models/history.py +73 -0
  44. dotscope/models/intent.py +213 -0
  45. dotscope/models/passes.py +58 -0
  46. dotscope/models/state.py +250 -0
  47. dotscope/models.py +9 -0
  48. dotscope/near_miss.py +3 -0
  49. dotscope/onboarding.py +2 -0
  50. dotscope/parser.py +387 -0
  51. dotscope/passes/.scope +105 -0
  52. dotscope/passes/__init__.py +1 -0
  53. dotscope/passes/ast_analyzer.py +508 -0
  54. dotscope/passes/backtest.py +198 -0
  55. dotscope/passes/budget_allocator.py +164 -0
  56. dotscope/passes/convention_compliance.py +40 -0
  57. dotscope/passes/convention_discovery.py +247 -0
  58. dotscope/passes/convention_parser.py +223 -0
  59. dotscope/passes/graph_builder.py +299 -0
  60. dotscope/passes/history_miner.py +336 -0
  61. dotscope/passes/incremental.py +149 -0
  62. dotscope/passes/lang/__init__.py +38 -0
  63. dotscope/passes/lang/_base.py +20 -0
  64. dotscope/passes/lang/_treesitter.py +93 -0
  65. dotscope/passes/lang/go.py +333 -0
  66. dotscope/passes/lang/javascript.py +348 -0
  67. dotscope/passes/lazy.py +152 -0
  68. dotscope/passes/semantic_diff.py +160 -0
  69. dotscope/passes/sentinel/__init__.py +1 -0
  70. dotscope/passes/sentinel/acknowledge.py +222 -0
  71. dotscope/passes/sentinel/checker.py +383 -0
  72. dotscope/passes/sentinel/checks/__init__.py +1 -0
  73. dotscope/passes/sentinel/checks/antipattern.py +84 -0
  74. dotscope/passes/sentinel/checks/boundary.py +46 -0
  75. dotscope/passes/sentinel/checks/contracts.py +148 -0
  76. dotscope/passes/sentinel/checks/convention.py +54 -0
  77. dotscope/passes/sentinel/checks/direction.py +71 -0
  78. dotscope/passes/sentinel/checks/intent.py +207 -0
  79. dotscope/passes/sentinel/checks/stability.py +66 -0
  80. dotscope/passes/sentinel/checks/voice.py +108 -0
  81. dotscope/passes/sentinel/constraints.py +472 -0
  82. dotscope/passes/sentinel/line_filter.py +88 -0
  83. dotscope/passes/sentinel/models.py +15 -0
  84. dotscope/passes/virtual.py +239 -0
  85. dotscope/passes/voice.py +162 -0
  86. dotscope/passes/voice_defaults.py +28 -0
  87. dotscope/passes/voice_discovery.py +245 -0
  88. dotscope/paths.py +32 -0
  89. dotscope/progress.py +44 -0
  90. dotscope/regression.py +147 -0
  91. dotscope/resolver.py +203 -0
  92. dotscope/scanner.py +246 -0
  93. dotscope/sessions.py +2 -0
  94. dotscope/storage/.scope +64 -0
  95. dotscope/storage/__init__.py +1 -0
  96. dotscope/storage/cache.py +114 -0
  97. dotscope/storage/claude_hooks.py +119 -0
  98. dotscope/storage/git_hooks.py +277 -0
  99. dotscope/storage/incremental_state.py +61 -0
  100. dotscope/storage/mcp_config.py +98 -0
  101. dotscope/storage/near_miss.py +183 -0
  102. dotscope/storage/onboarding.py +150 -0
  103. dotscope/storage/session_manager.py +195 -0
  104. dotscope/storage/timing.py +84 -0
  105. dotscope/timing.py +2 -0
  106. dotscope/tokens.py +53 -0
  107. dotscope/utility.py +123 -0
  108. dotscope/virtual.py +3 -0
  109. dotscope/visibility.py +664 -0
  110. dotscope-0.1.0.dist-info/METADATA +50 -0
  111. dotscope-0.1.0.dist-info/RECORD +114 -0
  112. dotscope-0.1.0.dist-info/WHEEL +4 -0
  113. dotscope-0.1.0.dist-info/entry_points.txt +3 -0
  114. dotscope-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,198 @@
1
+ """Scope backtesting: validate generated scopes against actual git history.
2
+
3
+ Replays recent commits and measures whether each scope's includes would have
4
+ covered the files that were actually changed. Self-corrects by suggesting
5
+ missing includes.
6
+ """
7
+
8
+ import os
9
+ import subprocess
10
+ from collections import defaultdict
11
+ from typing import Dict, List, Set
12
+
13
+ from ..models import (
14
+ BacktestReport,
15
+ BacktestResult,
16
+ MissingSuggestion,
17
+ ScopeConfig,
18
+ )
19
+ from ..resolver import resolve
20
+
21
+
22
+ def backtest_scopes(
23
+ root: str,
24
+ scopes: List[ScopeConfig],
25
+ n_commits: int = 50,
26
+ ) -> BacktestReport:
27
+ """Validate scopes against git history.
28
+
29
+ For each recent commit, check whether the matched scope's resolved
30
+ file list would have included all changed files.
31
+ """
32
+ commits = _get_recent_commits(root, n_commits)
33
+ if not commits:
34
+ return BacktestReport()
35
+
36
+ # Resolve each scope to its file set, keyed by relative directory name
37
+ scope_file_sets: Dict[str, Set[str]] = {}
38
+ scope_dirs: Dict[str, ScopeConfig] = {}
39
+
40
+ for scope in scopes:
41
+ resolved = resolve(scope, follow_related=False, root=root)
42
+ rel_dir = os.path.relpath(scope.directory, root)
43
+ scope_file_sets[rel_dir] = set(resolved.files)
44
+ scope_dirs[rel_dir] = scope
45
+
46
+ # Track per-scope results
47
+ scope_commits: Dict[str, int] = defaultdict(int)
48
+ scope_covered: Dict[str, int] = defaultdict(int)
49
+ scope_misses: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
50
+
51
+ for commit_files in commits:
52
+ # Match commit to scope(s) by directory prefix
53
+ matched_scopes = _match_commit_to_scopes(commit_files, scope_dirs, root)
54
+
55
+ for scope_dir in matched_scopes:
56
+ scope_commits[scope_dir] += 1
57
+ file_set = scope_file_sets.get(scope_dir, set())
58
+
59
+ all_covered = True
60
+ for changed_file in commit_files:
61
+ abs_changed = os.path.join(root, changed_file)
62
+ if abs_changed not in file_set:
63
+ all_covered = False
64
+ scope_misses[scope_dir][changed_file] += 1
65
+
66
+ if all_covered:
67
+ scope_covered[scope_dir] += 1
68
+
69
+ # Build results
70
+ results = []
71
+ for scope in scopes:
72
+ d = os.path.relpath(scope.directory, root)
73
+ total = scope_commits.get(d, 0)
74
+ covered = scope_covered.get(d, 0)
75
+ recall = covered / total if total > 0 else 1.0
76
+
77
+ misses = []
78
+ for path, count in sorted(
79
+ scope_misses.get(d, {}).items(), key=lambda x: -x[1]
80
+ ):
81
+ if count >= 2: # Only suggest files that appear multiple times
82
+ misses.append(MissingSuggestion(
83
+ path=path,
84
+ appearances=count,
85
+ would_improve_recall=True,
86
+ ))
87
+
88
+ results.append(BacktestResult(
89
+ scope_path=scope.path,
90
+ total_commits=total,
91
+ fully_covered=covered,
92
+ recall=round(recall, 3),
93
+ missing_includes=misses[:10],
94
+ ))
95
+
96
+ total_commits = len(commits)
97
+ total_covered = sum(r.fully_covered for r in results)
98
+ total_matched = sum(r.total_commits for r in results)
99
+ overall_recall = total_covered / total_matched if total_matched > 0 else 1.0
100
+
101
+ return BacktestReport(
102
+ results=results,
103
+ total_commits=total_commits,
104
+ overall_recall=round(overall_recall, 3),
105
+ )
106
+
107
+
108
+ def auto_correct_scope(
109
+ scope: ScopeConfig,
110
+ result: BacktestResult,
111
+ root: str,
112
+ min_appearances: int = 3,
113
+ ) -> tuple[ScopeConfig, bool]:
114
+ """Auto-correct a scope's includes based on backtest results.
115
+
116
+ Returns (updated_scope, changed) tuple.
117
+ """
118
+ changed = False
119
+ for suggestion in result.missing_includes:
120
+ if suggestion.appearances >= min_appearances and suggestion.would_improve_recall:
121
+ if suggestion.path not in scope.includes:
122
+ scope.includes.append(suggestion.path)
123
+ changed = True
124
+ return scope, changed
125
+
126
+
127
+ def format_backtest_report(report: BacktestReport) -> str:
128
+ """Human-readable backtest report."""
129
+ lines = [
130
+ f"Backtest: {report.total_commits} commits analyzed",
131
+ f"Overall recall: {report.overall_recall:.0%}",
132
+ "",
133
+ ]
134
+
135
+ for result in report.results:
136
+ scope_name = os.path.basename(os.path.dirname(result.scope_path))
137
+ recall_bar = "█" * int(result.recall * 10) + "░" * (10 - int(result.recall * 10))
138
+ lines.append(
139
+ f" {scope_name}/.scope — recall: {recall_bar} {result.recall:.0%} "
140
+ f"({result.fully_covered}/{result.total_commits} commits)"
141
+ )
142
+
143
+ for miss in result.missing_includes[:5]:
144
+ lines.append(f" missing: {miss.path} (appeared in {miss.appearances} commits)")
145
+
146
+ return "\n".join(lines)
147
+
148
+
149
+ # ---------------------------------------------------------------------------
150
+ # Internals
151
+ # ---------------------------------------------------------------------------
152
+
153
+ def _get_recent_commits(root: str, n: int) -> List[List[str]]:
154
+ """Get file lists from recent commits."""
155
+ if not os.path.isdir(os.path.join(root, ".git")):
156
+ return []
157
+
158
+ try:
159
+ result = subprocess.run(
160
+ ["git", "log", f"--max-count={n}", "--pretty=format:%H", "--name-only"],
161
+ cwd=root, capture_output=True, text=True, timeout=15,
162
+ )
163
+ if result.returncode != 0:
164
+ return []
165
+ except (subprocess.TimeoutExpired, FileNotFoundError):
166
+ return []
167
+
168
+ commits = []
169
+ current_files = []
170
+
171
+ for line in result.stdout.splitlines():
172
+ if len(line) == 40 and " " not in line: # Commit hash
173
+ if current_files:
174
+ commits.append(current_files)
175
+ current_files = []
176
+ elif line.strip():
177
+ current_files.append(line.strip())
178
+
179
+ if current_files:
180
+ commits.append(current_files)
181
+
182
+ return commits
183
+
184
+
185
+ def _match_commit_to_scopes(
186
+ commit_files: List[str],
187
+ scope_dirs: Dict[str, ScopeConfig],
188
+ root: str,
189
+ ) -> Set[str]:
190
+ """Match a commit's changed files to relevant scopes."""
191
+ matched = set()
192
+ for changed_file in commit_files:
193
+ parts = changed_file.split("/")
194
+ if len(parts) > 1:
195
+ top_dir = parts[0]
196
+ if top_dir in scope_dirs:
197
+ matched.add(top_dir)
198
+ return matched
@@ -0,0 +1,164 @@
1
+ """Token budgeting: rank files, fill to budget, progressive loading.
2
+
3
+ Context is always included first. Then files are ranked and loaded
4
+ until the budget is exhausted.
5
+ """
6
+
7
+
8
+ from typing import List, Optional
9
+
10
+ from ..models import ResolvedScope
11
+ from ..tokens import estimate_file_tokens, estimate_context_tokens
12
+
13
+
14
+ def apply_budget(
15
+ resolved: ResolvedScope,
16
+ max_tokens: int,
17
+ task: Optional[str] = None,
18
+ utility_scores: Optional[dict] = None,
19
+ required_files: Optional[set] = None,
20
+ ) -> ResolvedScope:
21
+ """Apply a token budget to a resolved scope.
22
+
23
+ Algorithm:
24
+ 1. Reserve tokens for context (always included)
25
+ 2. Rank files by relevance tier and size, weighted by utility
26
+ 3. Fill files until budget is exhausted
27
+ 4. Set truncated=True if files were dropped
28
+
29
+ Args:
30
+ resolved: The fully resolved scope
31
+ max_tokens: Maximum total tokens (context + files)
32
+ task: Optional task description for relevance ranking
33
+ utility_scores: Historical file utility data from observations
34
+ """
35
+ if max_tokens <= 0:
36
+ return ResolvedScope(
37
+ files=[],
38
+ context=resolved.context,
39
+ token_estimate=estimate_context_tokens(resolved.context),
40
+ scope_chain=resolved.scope_chain,
41
+ truncated=True,
42
+ )
43
+
44
+ # Context always goes first
45
+ context_tokens = estimate_context_tokens(resolved.context)
46
+ remaining = max_tokens - context_tokens
47
+
48
+ if remaining <= 0:
49
+ # Budget only fits context (or not even that)
50
+ return ResolvedScope(
51
+ files=[],
52
+ context=resolved.context[:max_tokens * 4], # rough trim
53
+ token_estimate=max_tokens,
54
+ scope_chain=resolved.scope_chain,
55
+ truncated=True,
56
+ )
57
+
58
+ # Rank and score files (utility data flows through when available)
59
+ scored_files = _rank_files(resolved.files, task, utility_scores)
60
+
61
+ # Required files get infinite utility — selected first, unconditionally
62
+ required = required_files or set()
63
+ if required:
64
+ scored_files = _boost_required(scored_files, required)
65
+
66
+ # Fill within budget
67
+ selected_files: List[str] = []
68
+ total_file_tokens = 0
69
+
70
+ for path, score in scored_files:
71
+ file_tokens = estimate_file_tokens(path)
72
+ if total_file_tokens + file_tokens <= remaining:
73
+ selected_files.append(path)
74
+ total_file_tokens += file_tokens
75
+ elif path in required:
76
+ # Required file doesn't fit — hard error
77
+ from ..assertions import ContextExhaustionError
78
+ raise ContextExhaustionError(
79
+ assertion_type="ensure_includes",
80
+ detail=f"Budget ({max_tokens}) cannot fit required file: {path} ({file_tokens} tokens)",
81
+ file=path,
82
+ file_tokens=file_tokens,
83
+ budget=max_tokens,
84
+ tokens_used=context_tokens + total_file_tokens,
85
+ suggestion=f"Increase budget to at least {context_tokens + total_file_tokens + file_tokens}",
86
+ )
87
+ # Don't break early — a smaller file later might still fit
88
+
89
+ truncated = len(selected_files) < len(resolved.files)
90
+
91
+ return ResolvedScope(
92
+ files=selected_files,
93
+ context=resolved.context,
94
+ token_estimate=context_tokens + total_file_tokens,
95
+ scope_chain=resolved.scope_chain,
96
+ truncated=truncated,
97
+ excluded_files=[f for f in resolved.files if f not in set(selected_files)],
98
+ )
99
+
100
+
101
+ def _rank_files(
102
+ files: List[str],
103
+ task: Optional[str] = None,
104
+ utility_scores: Optional[dict] = None,
105
+ ) -> List[tuple]:
106
+ """Rank files by relevance, layering historical utility over static heuristics."""
107
+ import os
108
+ from ..utility import effective_score as _effective_score
109
+
110
+ task_words = set()
111
+ if task:
112
+ task_words = {w.lower() for w in task.split() if len(w) > 2}
113
+
114
+ scored = []
115
+ for path in files:
116
+ score = 1.0
117
+ basename = os.path.basename(path).lower()
118
+ rel_parts = path.lower().split(os.sep)
119
+
120
+ if any(p in ("tests", "test", "fixtures", "migrations", "__pycache__") for p in rel_parts):
121
+ score *= 0.5
122
+
123
+ if basename.endswith((".generated.py", ".generated.ts", ".lock", ".min.js")):
124
+ score *= 0.3
125
+
126
+ if task_words:
127
+ name_words = set(
128
+ w for w in basename.replace("_", " ").replace("-", " ").replace(".", " ").split()
129
+ if len(w) > 2
130
+ )
131
+ overlap = len(task_words & name_words)
132
+ if overlap:
133
+ score *= 1.0 + (overlap * 0.5)
134
+
135
+ tokens = estimate_file_tokens(path)
136
+ if tokens > 0:
137
+ if tokens < 200:
138
+ score *= 1.2
139
+ elif tokens > 2000:
140
+ score *= 0.8
141
+
142
+ # Layer utility data on top of heuristics
143
+ utility = utility_scores.get(path) if utility_scores else None
144
+ score = _effective_score(score, utility, is_explicit_include=True)
145
+
146
+ scored.append((path, score, tokens))
147
+
148
+ scored.sort(key=lambda x: (-x[1], x[2]))
149
+ return [(path, score) for path, score, _ in scored]
150
+
151
+
152
+ def _boost_required(
153
+ scored_files: List[tuple],
154
+ required: set,
155
+ ) -> List[tuple]:
156
+ """Boost required files to infinite utility so they're selected first."""
157
+ boosted = []
158
+ for path, score in scored_files:
159
+ if path in required:
160
+ boosted.append((path, float("inf")))
161
+ else:
162
+ boosted.append((path, score))
163
+ boosted.sort(key=lambda x: -x[1])
164
+ return boosted
@@ -0,0 +1,40 @@
1
+ """Convention compliance: track how well conventions are followed."""
2
+
3
+ from typing import Dict, List
4
+
5
+ from ..models import ConventionNode, ConventionRule, FileAnalysis
6
+ from .convention_parser import matches_convention
7
+
8
+
9
+ def compute_compliance(
10
+ convention: ConventionRule,
11
+ nodes: List[ConventionNode],
12
+ ast_data: Dict[str, FileAnalysis],
13
+ ) -> float:
14
+ """What percentage of matching files follow all rules?"""
15
+ matching_files = [
16
+ path for path, analysis in ast_data.items()
17
+ if matches_convention(analysis, path, convention.match_criteria)
18
+ ]
19
+ if not matching_files:
20
+ return 1.0
21
+
22
+ compliant = sum(
23
+ 1 for n in nodes
24
+ if n.name == convention.name and not n.violations
25
+ )
26
+ return compliant / len(matching_files)
27
+
28
+
29
+ def convention_severity(compliance: float) -> str:
30
+ """Map compliance ratio to enforcement severity.
31
+
32
+ 100-80%: nudge (course correction)
33
+ 79-50%: note (informational)
34
+ <50%: retired (not enforced)
35
+ """
36
+ if compliance >= 0.80:
37
+ return "nudge"
38
+ if compliance >= 0.50:
39
+ return "note"
40
+ return "retired"
@@ -0,0 +1,247 @@
1
+ """Convention discovery: mine structural patterns from AST data."""
2
+
3
+ import os
4
+ import re
5
+ from collections import defaultdict
6
+ from typing import Dict, List, Optional, Set, Tuple
7
+
8
+ from ..models import ConventionRule, DependencyGraph, FileAnalysis, HistoryAnalysis
9
+
10
+
11
+ def discover_conventions(
12
+ ast_data: Dict[str, FileAnalysis],
13
+ graph: DependencyGraph,
14
+ history: Optional[HistoryAnalysis] = None,
15
+ ) -> List[ConventionRule]:
16
+ """Mine structural patterns that repeat across files.
17
+
18
+ Uses multi-pass clustering to avoid grouping only by directory:
19
+ Pass 1: Group by shared decorators (e.g., all @app.route files)
20
+ Pass 2: Group by shared base classes (e.g., all BaseModel subclasses)
21
+ Pass 3: Group by shared suffix/prefix (e.g., *_repo.py)
22
+
23
+ Cross-cutting conventions (decorator-based, base-class-based) are
24
+ discovered before directory-based ones. A file can match multiple
25
+ passes — deduplication happens after all passes complete.
26
+ """
27
+ conventions = []
28
+ claimed_files: Set[str] = set()
29
+
30
+ # Pass 1: Shared decorators (strongest signal, survives refactors)
31
+ decorator_groups: Dict[str, List[Tuple[str, FileAnalysis]]] = defaultdict(list)
32
+ for path, analysis in ast_data.items():
33
+ for dec in (analysis.decorators_used or []):
34
+ normalized = _normalize_decorator(dec)
35
+ decorator_groups[normalized].append((path, analysis))
36
+
37
+ for dec, files in decorator_groups.items():
38
+ if len(files) >= 3:
39
+ conv = _build_convention_from_group(
40
+ files, graph, signal_type="decorator", signal_value=dec
41
+ )
42
+ if conv:
43
+ conventions.append(conv)
44
+ claimed_files.update(f[0] for f in files)
45
+
46
+ # Pass 2: Shared base classes
47
+ base_groups: Dict[str, List[Tuple[str, FileAnalysis]]] = defaultdict(list)
48
+ for path, analysis in ast_data.items():
49
+ if path in claimed_files:
50
+ continue
51
+ for cls in (analysis.classes or []):
52
+ for base in (cls.bases or []):
53
+ base_groups[base].append((path, analysis))
54
+
55
+ for base, files in base_groups.items():
56
+ if len(files) >= 3:
57
+ conv = _build_convention_from_group(
58
+ files, graph, signal_type="base_class", signal_value=base
59
+ )
60
+ if conv:
61
+ conventions.append(conv)
62
+ claimed_files.update(f[0] for f in files)
63
+
64
+ # Pass 3: Shared suffix/prefix (weakest signal, path-dependent)
65
+ suffix_groups: Dict[str, List[Tuple[str, FileAnalysis]]] = defaultdict(list)
66
+ for path, analysis in ast_data.items():
67
+ if path in claimed_files:
68
+ continue
69
+ stem = os.path.splitext(os.path.basename(path))[0]
70
+ for suffix in _extract_suffixes(stem):
71
+ suffix_groups[suffix].append((path, analysis))
72
+
73
+ for suffix, files in suffix_groups.items():
74
+ if len(files) >= 3:
75
+ conv = _build_convention_from_group(
76
+ files, graph, signal_type="suffix", signal_value=suffix
77
+ )
78
+ if conv:
79
+ conventions.append(conv)
80
+
81
+ return conventions
82
+
83
+
84
+ def _normalize_decorator(dec: str) -> str:
85
+ """Normalize a decorator string for grouping.
86
+
87
+ '@app.route("/users")' -> 'app.route'
88
+ '@router.get' -> 'router.get'
89
+ """
90
+ dec = dec.lstrip("@")
91
+ # Strip arguments
92
+ paren = dec.find("(")
93
+ if paren != -1:
94
+ dec = dec[:paren]
95
+ return dec.strip()
96
+
97
+
98
+ def _extract_suffixes(stem: str) -> List[str]:
99
+ """Extract meaningful suffixes from a filename stem.
100
+
101
+ "user_controller" -> ["_controller"]
102
+ "billing_repo" -> ["_repo"]
103
+ """
104
+ known = (
105
+ "_controller", "_service", "_repo", "_repository",
106
+ "_handler", "_manager", "_factory", "_helper",
107
+ "_view", "_model", "_test", "_middleware",
108
+ )
109
+ result = []
110
+ for suffix in known:
111
+ if stem.endswith(suffix):
112
+ result.append(suffix)
113
+ return result
114
+
115
+
116
+ def _build_convention_from_group(
117
+ files: List[Tuple[str, FileAnalysis]],
118
+ graph: DependencyGraph,
119
+ signal_type: str,
120
+ signal_value: str,
121
+ ) -> Optional[ConventionRule]:
122
+ """Build a ConventionRule from a group of files sharing a structural trait."""
123
+ paths = [f[0] for f in files]
124
+ analyses = [f[1] for f in files]
125
+
126
+ match_criteria = _derive_match_criteria(paths, analyses, signal_type, signal_value)
127
+ if not match_criteria:
128
+ return None
129
+
130
+ rules = _derive_rules(paths, analyses, graph)
131
+ name = _derive_name(signal_type, signal_value)
132
+
133
+ return ConventionRule(
134
+ name=name,
135
+ source="discovered",
136
+ match_criteria=match_criteria,
137
+ rules=rules,
138
+ description=f"Discovered from {len(files)} files sharing {signal_type}: {signal_value}",
139
+ compliance=1.0,
140
+ )
141
+
142
+
143
+ def _derive_name(signal_type: str, signal_value: str) -> str:
144
+ """Generate a human-readable convention name."""
145
+ if signal_type == "decorator":
146
+ # "@app.route" -> "Route Handler"
147
+ parts = signal_value.split(".")
148
+ name = parts[-1] if parts else signal_value
149
+ return name.replace("_", " ").title()
150
+ elif signal_type == "base_class":
151
+ return f"{signal_value} Subclass"
152
+ elif signal_type == "suffix":
153
+ # "_controller" -> "Controller"
154
+ return signal_value.lstrip("_").replace("_", " ").title()
155
+ return signal_value
156
+
157
+
158
+ def _derive_match_criteria(
159
+ paths: List[str],
160
+ analyses: List[FileAnalysis],
161
+ signal_type: str,
162
+ signal_value: str,
163
+ ) -> dict:
164
+ """Find common structural traits across files sharing a fingerprint."""
165
+ any_of = []
166
+ all_of = []
167
+
168
+ # Primary signal goes into any_of
169
+ if signal_type == "decorator":
170
+ any_of.append({"has_decorator": re.escape(signal_value)})
171
+ elif signal_type == "base_class":
172
+ any_of.append({"base_class": signal_value})
173
+ elif signal_type == "suffix":
174
+ pattern = f".*{re.escape(signal_value)}\\.py"
175
+ any_of.append({"file_path": pattern})
176
+
177
+ # Common directory as secondary hint
178
+ dirs = set(os.path.dirname(p) for p in paths)
179
+ if len(dirs) == 1:
180
+ dir_pattern = re.escape(dirs.pop()) + "/.*\\.py"
181
+ any_of.append({"file_path": dir_pattern})
182
+
183
+ criteria = {}
184
+ if any_of:
185
+ criteria["any_of"] = any_of
186
+ if all_of:
187
+ criteria["all_of"] = all_of
188
+ return criteria
189
+
190
+
191
+ def _derive_rules(
192
+ paths: List[str],
193
+ analyses: List[FileAnalysis],
194
+ graph: DependencyGraph,
195
+ ) -> dict:
196
+ """Find universal behavioral patterns (potential rules)."""
197
+ rules = {}
198
+
199
+ # Universal methods (all files implement these)
200
+ if all(a.classes for a in analyses):
201
+ all_methods = [
202
+ set(a.classes[0].methods)
203
+ for a in analyses if a.classes
204
+ ]
205
+ if all_methods:
206
+ common_methods = set.intersection(*all_methods)
207
+ required = sorted(m for m in common_methods if not m.startswith("_"))
208
+ if required:
209
+ rules["required_methods"] = required
210
+
211
+ # Universal non-imports (no file imports these)
212
+ all_imports: Set[str] = set()
213
+ for a in analyses:
214
+ for imp in (a.imports or []):
215
+ if imp.module:
216
+ all_imports.add(imp.module)
217
+
218
+ # Check against all imports in codebase to find conspicuous absences
219
+ all_codebase_imports: Set[str] = set()
220
+ for node in graph.files.values():
221
+ for imp_path in (node.imports or []):
222
+ # Extract module name from path
223
+ module = os.path.splitext(os.path.basename(imp_path))[0] if imp_path else ""
224
+ if module:
225
+ all_codebase_imports.add(module)
226
+
227
+ common_absences = all_codebase_imports - all_imports
228
+ if common_absences:
229
+ frequent_elsewhere = [
230
+ imp for imp in common_absences
231
+ if _import_frequency(imp, graph) >= 3
232
+ ]
233
+ if frequent_elsewhere:
234
+ rules["prohibited_imports"] = sorted(frequent_elsewhere[:5])
235
+
236
+ return rules
237
+
238
+
239
+ def _import_frequency(module: str, graph: DependencyGraph) -> int:
240
+ """Count how many files import a given module."""
241
+ count = 0
242
+ for node in graph.files.values():
243
+ for imp_path in (node.imports or []):
244
+ if module in imp_path:
245
+ count += 1
246
+ break
247
+ return count