dotscope 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dotscope/.scope +63 -0
- dotscope/__init__.py +3 -0
- dotscope/absorber.py +390 -0
- dotscope/assertions.py +128 -0
- dotscope/ast_analyzer.py +2 -0
- dotscope/backtest.py +2 -0
- dotscope/bench.py +141 -0
- dotscope/budget.py +3 -0
- dotscope/cache.py +2 -0
- dotscope/check/__init__.py +1 -0
- dotscope/check/acknowledge.py +2 -0
- dotscope/check/checker.py +3 -0
- dotscope/check/checks/__init__.py +1 -0
- dotscope/check/checks/antipattern.py +2 -0
- dotscope/check/checks/boundary.py +2 -0
- dotscope/check/checks/contracts.py +3 -0
- dotscope/check/checks/direction.py +2 -0
- dotscope/check/checks/intent.py +2 -0
- dotscope/check/checks/stability.py +2 -0
- dotscope/check/constraints.py +2 -0
- dotscope/check/models.py +15 -0
- dotscope/cli.py +1447 -0
- dotscope/composer.py +147 -0
- dotscope/constants.py +45 -0
- dotscope/context.py +60 -0
- dotscope/counterfactual.py +180 -0
- dotscope/debug.py +220 -0
- dotscope/discovery.py +104 -0
- dotscope/formatter.py +157 -0
- dotscope/graph.py +3 -0
- dotscope/health.py +212 -0
- dotscope/help.py +204 -0
- dotscope/history.py +6 -0
- dotscope/hooks.py +2 -0
- dotscope/ingest.py +858 -0
- dotscope/intent.py +618 -0
- dotscope/lessons.py +223 -0
- dotscope/matcher.py +104 -0
- dotscope/mcp_server.py +1081 -0
- dotscope/models/.scope +45 -0
- dotscope/models/__init__.py +7 -0
- dotscope/models/core.py +288 -0
- dotscope/models/history.py +73 -0
- dotscope/models/intent.py +213 -0
- dotscope/models/passes.py +58 -0
- dotscope/models/state.py +250 -0
- dotscope/models.py +9 -0
- dotscope/near_miss.py +3 -0
- dotscope/onboarding.py +2 -0
- dotscope/parser.py +387 -0
- dotscope/passes/.scope +105 -0
- dotscope/passes/__init__.py +1 -0
- dotscope/passes/ast_analyzer.py +508 -0
- dotscope/passes/backtest.py +198 -0
- dotscope/passes/budget_allocator.py +164 -0
- dotscope/passes/convention_compliance.py +40 -0
- dotscope/passes/convention_discovery.py +247 -0
- dotscope/passes/convention_parser.py +223 -0
- dotscope/passes/graph_builder.py +299 -0
- dotscope/passes/history_miner.py +336 -0
- dotscope/passes/incremental.py +149 -0
- dotscope/passes/lang/__init__.py +38 -0
- dotscope/passes/lang/_base.py +20 -0
- dotscope/passes/lang/_treesitter.py +93 -0
- dotscope/passes/lang/go.py +333 -0
- dotscope/passes/lang/javascript.py +348 -0
- dotscope/passes/lazy.py +152 -0
- dotscope/passes/semantic_diff.py +160 -0
- dotscope/passes/sentinel/__init__.py +1 -0
- dotscope/passes/sentinel/acknowledge.py +222 -0
- dotscope/passes/sentinel/checker.py +383 -0
- dotscope/passes/sentinel/checks/__init__.py +1 -0
- dotscope/passes/sentinel/checks/antipattern.py +84 -0
- dotscope/passes/sentinel/checks/boundary.py +46 -0
- dotscope/passes/sentinel/checks/contracts.py +148 -0
- dotscope/passes/sentinel/checks/convention.py +54 -0
- dotscope/passes/sentinel/checks/direction.py +71 -0
- dotscope/passes/sentinel/checks/intent.py +207 -0
- dotscope/passes/sentinel/checks/stability.py +66 -0
- dotscope/passes/sentinel/checks/voice.py +108 -0
- dotscope/passes/sentinel/constraints.py +472 -0
- dotscope/passes/sentinel/line_filter.py +88 -0
- dotscope/passes/sentinel/models.py +15 -0
- dotscope/passes/virtual.py +239 -0
- dotscope/passes/voice.py +162 -0
- dotscope/passes/voice_defaults.py +28 -0
- dotscope/passes/voice_discovery.py +245 -0
- dotscope/paths.py +32 -0
- dotscope/progress.py +44 -0
- dotscope/regression.py +147 -0
- dotscope/resolver.py +203 -0
- dotscope/scanner.py +246 -0
- dotscope/sessions.py +2 -0
- dotscope/storage/.scope +64 -0
- dotscope/storage/__init__.py +1 -0
- dotscope/storage/cache.py +114 -0
- dotscope/storage/claude_hooks.py +119 -0
- dotscope/storage/git_hooks.py +277 -0
- dotscope/storage/incremental_state.py +61 -0
- dotscope/storage/mcp_config.py +98 -0
- dotscope/storage/near_miss.py +183 -0
- dotscope/storage/onboarding.py +150 -0
- dotscope/storage/session_manager.py +195 -0
- dotscope/storage/timing.py +84 -0
- dotscope/timing.py +2 -0
- dotscope/tokens.py +53 -0
- dotscope/utility.py +123 -0
- dotscope/virtual.py +3 -0
- dotscope/visibility.py +664 -0
- dotscope-0.1.0.dist-info/METADATA +50 -0
- dotscope-0.1.0.dist-info/RECORD +114 -0
- dotscope-0.1.0.dist-info/WHEEL +4 -0
- dotscope-0.1.0.dist-info/entry_points.txt +3 -0
- dotscope-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
"""Virtual scopes: cross-cutting concern detection from import graph hubs.
|
|
2
|
+
|
|
3
|
+
Directory scopes capture physical structure. Virtual scopes capture logical
|
|
4
|
+
architecture — a User lifecycle spanning models/, auth/, validators/, serializers/.
|
|
5
|
+
|
|
6
|
+
Detection algorithm:
|
|
7
|
+
1. Find hub files (imported by 3+ files from 2+ directories)
|
|
8
|
+
2. Collect cluster (hub + importers + shared imports within 1 hop)
|
|
9
|
+
3. Filter by cohesion (more internal edges than external)
|
|
10
|
+
4. Name by centrality (most-imported symbol)
|
|
11
|
+
5. Deduplicate overlapping clusters (>70% overlap → merge)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
from collections import defaultdict
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Dict, List, Optional, Set
|
|
18
|
+
|
|
19
|
+
from ..context import parse_context
|
|
20
|
+
from ..graph import DependencyGraph
|
|
21
|
+
from ..models.core import ScopeConfig
|
|
22
|
+
from ..models.passes import VirtualScope # noqa: F401
|
|
23
|
+
from ..tokens import estimate_scope_tokens
|
|
24
|
+
|
|
25
|
+
# Utility directories whose files connect everything (not meaningful clusters)
|
|
26
|
+
_UTILITY_DIRS = {"utils", "helpers", "common", "shared", "lib", "core"}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def detect_virtual_scopes(
|
|
30
|
+
graph: DependencyGraph,
|
|
31
|
+
min_importers: int = 3,
|
|
32
|
+
min_directories: int = 2,
|
|
33
|
+
min_cohesion: float = 0.3,
|
|
34
|
+
) -> List[ScopeConfig]:
|
|
35
|
+
"""Detect cross-cutting concerns from the import graph.
|
|
36
|
+
|
|
37
|
+
Returns ScopeConfig objects for virtual scopes, ready to be
|
|
38
|
+
added to the ingest plan alongside directory scopes.
|
|
39
|
+
"""
|
|
40
|
+
root = graph.root
|
|
41
|
+
hubs = _find_hubs(graph, min_importers, min_directories)
|
|
42
|
+
clusters = [_build_cluster(hub, graph) for hub in hubs]
|
|
43
|
+
clusters = [c for c in clusters if c.cohesion >= min_cohesion]
|
|
44
|
+
clusters = _deduplicate(clusters)
|
|
45
|
+
|
|
46
|
+
scopes = []
|
|
47
|
+
for cluster in clusters:
|
|
48
|
+
config = _cluster_to_scope(cluster, root)
|
|
49
|
+
if config:
|
|
50
|
+
scopes.append(config)
|
|
51
|
+
|
|
52
|
+
return scopes
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _find_hubs(
|
|
56
|
+
graph: DependencyGraph, min_importers: int, min_dirs: int
|
|
57
|
+
) -> List[str]:
|
|
58
|
+
"""Find files imported by 3+ files from 2+ different directories."""
|
|
59
|
+
hubs = []
|
|
60
|
+
for path, node in graph.files.items():
|
|
61
|
+
if not node.imported_by:
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
# Skip utility directories
|
|
65
|
+
parts = Path(path).parts
|
|
66
|
+
if len(parts) > 1 and parts[0].lower() in _UTILITY_DIRS:
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
importer_dirs = set()
|
|
70
|
+
for imp_by in node.imported_by:
|
|
71
|
+
imp_parts = Path(imp_by).parts
|
|
72
|
+
if len(imp_parts) > 1:
|
|
73
|
+
importer_dirs.add(imp_parts[0])
|
|
74
|
+
|
|
75
|
+
if len(node.imported_by) >= min_importers and len(importer_dirs) >= min_dirs:
|
|
76
|
+
hubs.append(path)
|
|
77
|
+
|
|
78
|
+
return hubs
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _build_cluster(hub: str, graph: DependencyGraph) -> VirtualScope:
|
|
82
|
+
"""Build a cluster around a hub file.
|
|
83
|
+
|
|
84
|
+
Cluster = hub + all importers + shared imports within 1 hop.
|
|
85
|
+
"""
|
|
86
|
+
hub_node = graph.files.get(hub)
|
|
87
|
+
if not hub_node:
|
|
88
|
+
return VirtualScope(name="", hub_file=hub, files=[], cohesion=0, directories_spanned=0)
|
|
89
|
+
|
|
90
|
+
cluster_files: Set[str] = {hub}
|
|
91
|
+
cluster_files.update(hub_node.imported_by)
|
|
92
|
+
|
|
93
|
+
# Add shared imports (files that multiple importers also import)
|
|
94
|
+
import_counts: Dict[str, int] = defaultdict(int)
|
|
95
|
+
for importer in hub_node.imported_by:
|
|
96
|
+
imp_node = graph.files.get(importer)
|
|
97
|
+
if imp_node:
|
|
98
|
+
for dep in imp_node.imports:
|
|
99
|
+
if dep != hub and dep not in cluster_files:
|
|
100
|
+
import_counts[dep] += 1
|
|
101
|
+
|
|
102
|
+
# Only add shared imports that 2+ importers share
|
|
103
|
+
for dep, count in import_counts.items():
|
|
104
|
+
if count >= 2:
|
|
105
|
+
cluster_files.add(dep)
|
|
106
|
+
|
|
107
|
+
# Compute cohesion
|
|
108
|
+
internal_edges = 0
|
|
109
|
+
external_edges = 0
|
|
110
|
+
for f in cluster_files:
|
|
111
|
+
node = graph.files.get(f)
|
|
112
|
+
if not node:
|
|
113
|
+
continue
|
|
114
|
+
for imp in node.imports:
|
|
115
|
+
if imp in cluster_files:
|
|
116
|
+
internal_edges += 1
|
|
117
|
+
else:
|
|
118
|
+
external_edges += 1
|
|
119
|
+
|
|
120
|
+
total = internal_edges + external_edges
|
|
121
|
+
cohesion = internal_edges / total if total > 0 else 0.0
|
|
122
|
+
|
|
123
|
+
# Count directories spanned
|
|
124
|
+
dirs = set()
|
|
125
|
+
for f in cluster_files:
|
|
126
|
+
parts = Path(f).parts
|
|
127
|
+
if len(parts) > 1:
|
|
128
|
+
dirs.add(parts[0])
|
|
129
|
+
|
|
130
|
+
# Name from hub file
|
|
131
|
+
name = _infer_name(hub, graph)
|
|
132
|
+
|
|
133
|
+
return VirtualScope(
|
|
134
|
+
name=name,
|
|
135
|
+
hub_file=hub,
|
|
136
|
+
files=sorted(cluster_files),
|
|
137
|
+
cohesion=round(cohesion, 3),
|
|
138
|
+
directories_spanned=len(dirs),
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _infer_name(hub: str, graph: DependencyGraph) -> str:
|
|
143
|
+
"""Infer a name for the virtual scope from the hub file."""
|
|
144
|
+
basename = os.path.splitext(os.path.basename(hub))[0]
|
|
145
|
+
# e.g., "models/user.py" → "user_lifecycle"
|
|
146
|
+
if basename in ("__init__", "index"):
|
|
147
|
+
parts = Path(hub).parts
|
|
148
|
+
if len(parts) > 1:
|
|
149
|
+
basename = parts[-2]
|
|
150
|
+
return f"{basename}_lifecycle"
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _deduplicate(clusters: List[VirtualScope]) -> List[VirtualScope]:
|
|
154
|
+
"""Merge clusters with >70% file overlap."""
|
|
155
|
+
if len(clusters) <= 1:
|
|
156
|
+
return clusters
|
|
157
|
+
|
|
158
|
+
result = []
|
|
159
|
+
merged = set()
|
|
160
|
+
|
|
161
|
+
for i, a in enumerate(clusters):
|
|
162
|
+
if i in merged:
|
|
163
|
+
continue
|
|
164
|
+
best = a
|
|
165
|
+
for j, b in enumerate(clusters):
|
|
166
|
+
if j <= i or j in merged:
|
|
167
|
+
continue
|
|
168
|
+
a_set = set(a.files)
|
|
169
|
+
b_set = set(b.files)
|
|
170
|
+
overlap = len(a_set & b_set) / len(a_set | b_set) if (a_set | b_set) else 0
|
|
171
|
+
if overlap > 0.7:
|
|
172
|
+
# Keep the one with more files
|
|
173
|
+
if len(b.files) > len(best.files):
|
|
174
|
+
best = b
|
|
175
|
+
merged.add(j)
|
|
176
|
+
result.append(best)
|
|
177
|
+
|
|
178
|
+
return result
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _cluster_to_scope(cluster: VirtualScope, root: str) -> Optional[ScopeConfig]:
|
|
182
|
+
"""Convert a virtual scope cluster to a ScopeConfig."""
|
|
183
|
+
if not cluster.files or not cluster.name:
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
description = (
|
|
187
|
+
f"Virtual scope: {cluster.name} "
|
|
188
|
+
f"(spans {cluster.directories_spanned} modules, "
|
|
189
|
+
f"hub: {cluster.hub_file})"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
dirs_spanned = set()
|
|
193
|
+
for f in cluster.files:
|
|
194
|
+
parts = Path(f).parts
|
|
195
|
+
if len(parts) > 1:
|
|
196
|
+
dirs_spanned.add(parts[0])
|
|
197
|
+
|
|
198
|
+
context = parse_context(
|
|
199
|
+
f"Cross-cutting concern detected from import graph.\n"
|
|
200
|
+
f"Hub file: {cluster.hub_file} "
|
|
201
|
+
f"(imported by {len(cluster.files) - 1} files across "
|
|
202
|
+
f"{cluster.directories_spanned} modules)\n"
|
|
203
|
+
f"\n"
|
|
204
|
+
f"Directories spanned: {', '.join(sorted(dirs_spanned))}\n"
|
|
205
|
+
f"Cohesion: {cluster.cohesion:.0%}"
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
full_paths = [os.path.join(root, f) for f in cluster.files]
|
|
209
|
+
token_est = estimate_scope_tokens(full_paths)
|
|
210
|
+
|
|
211
|
+
related = [f"{d}/.scope" for d in sorted(dirs_spanned)]
|
|
212
|
+
|
|
213
|
+
return ScopeConfig(
|
|
214
|
+
path=os.path.join(root, "virtual", f"{cluster.name}.scope"),
|
|
215
|
+
description=description,
|
|
216
|
+
includes=cluster.files,
|
|
217
|
+
excludes=[],
|
|
218
|
+
context=context,
|
|
219
|
+
related=related,
|
|
220
|
+
tags=["virtual", "cross-cutting", cluster.name.replace("_lifecycle", "")],
|
|
221
|
+
tokens_estimate=token_est,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def format_virtual_scopes(scopes: List[ScopeConfig], root: str) -> str:
|
|
226
|
+
"""Human-readable summary of detected virtual scopes."""
|
|
227
|
+
if not scopes:
|
|
228
|
+
return "No cross-cutting virtual scopes detected."
|
|
229
|
+
|
|
230
|
+
lines = [f"Detected {len(scopes)} virtual scope(s):", ""]
|
|
231
|
+
for scope in scopes:
|
|
232
|
+
lines.append(f" {os.path.relpath(scope.path, root)}")
|
|
233
|
+
lines.append(f" {scope.description}")
|
|
234
|
+
lines.append(f" files: {len(scope.includes)}, ~{scope.tokens_estimate:,} tokens")
|
|
235
|
+
if scope.related:
|
|
236
|
+
lines.append(f" related: {', '.join(scope.related)}")
|
|
237
|
+
lines.append("")
|
|
238
|
+
|
|
239
|
+
return "\n".join(lines)
|
dotscope/passes/voice.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Voice injection into resolve responses and canonical snippet extraction."""
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import os
|
|
5
|
+
from typing import Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
from ..models.intent import CanonicalExample
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def build_voice_response(
|
|
11
|
+
voice_config: dict,
|
|
12
|
+
root: str,
|
|
13
|
+
scope_files: List[str],
|
|
14
|
+
conventions: Optional[list] = None,
|
|
15
|
+
) -> dict:
|
|
16
|
+
"""Build the voice field for a resolve_scope response.
|
|
17
|
+
|
|
18
|
+
Returns a dict with mode, global rules, and optional convention voice.
|
|
19
|
+
"""
|
|
20
|
+
result = {
|
|
21
|
+
"mode": voice_config.get("mode", "adaptive"),
|
|
22
|
+
"global": _serialize_global(voice_config),
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
# Convention-specific voice (if any file matches a convention with voice config)
|
|
26
|
+
if conventions:
|
|
27
|
+
for conv in conventions:
|
|
28
|
+
conv_voice = getattr(conv, "voice", None)
|
|
29
|
+
if not conv_voice:
|
|
30
|
+
# Check if convention dict has voice key
|
|
31
|
+
if isinstance(conv, dict):
|
|
32
|
+
conv_voice = conv.get("voice")
|
|
33
|
+
else:
|
|
34
|
+
continue
|
|
35
|
+
if not conv_voice:
|
|
36
|
+
continue
|
|
37
|
+
|
|
38
|
+
canonical = conv_voice.get("canonical_example") if isinstance(conv_voice, dict) else None
|
|
39
|
+
if canonical:
|
|
40
|
+
snippet = extract_canonical_snippet(canonical, root)
|
|
41
|
+
if snippet:
|
|
42
|
+
result["convention"] = {
|
|
43
|
+
"name": getattr(conv, "name", "") if not isinstance(conv, dict) else conv.get("name", ""),
|
|
44
|
+
"style_notes": conv_voice.get("style_notes", "") if isinstance(conv_voice, dict) else "",
|
|
45
|
+
"canonical_snippet": snippet,
|
|
46
|
+
}
|
|
47
|
+
break
|
|
48
|
+
|
|
49
|
+
return result
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _serialize_global(voice_config: dict) -> str:
|
|
53
|
+
"""Serialize voice rules as compact prose for the agent."""
|
|
54
|
+
rules = voice_config.get("rules", {})
|
|
55
|
+
if not rules:
|
|
56
|
+
return ""
|
|
57
|
+
|
|
58
|
+
parts = []
|
|
59
|
+
for key in ("typing", "docstrings", "error_handling", "structure", "density", "comments", "imports"):
|
|
60
|
+
val = rules.get(key)
|
|
61
|
+
if val:
|
|
62
|
+
parts.append(val.strip())
|
|
63
|
+
|
|
64
|
+
return " ".join(parts)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def extract_canonical_snippet(
|
|
68
|
+
file_path: str,
|
|
69
|
+
repo_root: str,
|
|
70
|
+
max_lines: int = 40,
|
|
71
|
+
) -> Optional[str]:
|
|
72
|
+
"""Extract the first class or function as a canonical snippet.
|
|
73
|
+
|
|
74
|
+
Uses AST node locations to skip imports and module docstrings.
|
|
75
|
+
"""
|
|
76
|
+
full_path = os.path.join(repo_root, file_path) if not os.path.isabs(file_path) else file_path
|
|
77
|
+
if not os.path.isfile(full_path):
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
with open(full_path, "r", encoding="utf-8") as f:
|
|
82
|
+
source = f.read()
|
|
83
|
+
tree = ast.parse(source)
|
|
84
|
+
except (SyntaxError, IOError, UnicodeDecodeError):
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
# Find the first class or function definition
|
|
88
|
+
target = None
|
|
89
|
+
for node in ast.iter_child_nodes(tree):
|
|
90
|
+
if isinstance(node, ast.ClassDef):
|
|
91
|
+
target = node
|
|
92
|
+
break
|
|
93
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
94
|
+
target = node
|
|
95
|
+
break
|
|
96
|
+
|
|
97
|
+
if not target:
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
# Extract source segment
|
|
101
|
+
snippet = ast.get_source_segment(source, target)
|
|
102
|
+
if not snippet:
|
|
103
|
+
# Fallback: extract by line numbers
|
|
104
|
+
lines = source.splitlines()
|
|
105
|
+
start = target.lineno - 1
|
|
106
|
+
end_line = getattr(target, "end_lineno", None) or (start + max_lines)
|
|
107
|
+
end = min(end_line, start + max_lines)
|
|
108
|
+
snippet = "\n".join(lines[start:end])
|
|
109
|
+
|
|
110
|
+
# Truncate if too long
|
|
111
|
+
snippet_lines = snippet.splitlines()
|
|
112
|
+
if len(snippet_lines) > max_lines:
|
|
113
|
+
snippet = "\n".join(snippet_lines[:max_lines]) + "\n ..."
|
|
114
|
+
|
|
115
|
+
return snippet
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def select_canonical(
|
|
119
|
+
convention: object,
|
|
120
|
+
nodes: list,
|
|
121
|
+
history: Optional[dict],
|
|
122
|
+
repo_root: str,
|
|
123
|
+
) -> Optional[CanonicalExample]:
|
|
124
|
+
"""Pick the most representative file and extract its first class/function.
|
|
125
|
+
|
|
126
|
+
Selection: zero violations, most recently maintained, median length.
|
|
127
|
+
"""
|
|
128
|
+
compliant = [n for n in nodes if not getattr(n, "violations", None)]
|
|
129
|
+
if not compliant:
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
# Sort by recency if history available
|
|
133
|
+
if history and history.get("file_histories"):
|
|
134
|
+
compliant.sort(
|
|
135
|
+
key=lambda n: history["file_histories"]
|
|
136
|
+
.get(getattr(n, "file_path", ""), {})
|
|
137
|
+
.get("last_modified", ""),
|
|
138
|
+
reverse=True,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Pick median length
|
|
142
|
+
lengths = []
|
|
143
|
+
for n in compliant[:10]:
|
|
144
|
+
fp = getattr(n, "file_path", "")
|
|
145
|
+
full = os.path.join(repo_root, fp)
|
|
146
|
+
try:
|
|
147
|
+
with open(full, "r", encoding="utf-8") as f:
|
|
148
|
+
length = len(f.readlines())
|
|
149
|
+
except (IOError, UnicodeDecodeError):
|
|
150
|
+
length = 0
|
|
151
|
+
lengths.append((n, length))
|
|
152
|
+
|
|
153
|
+
lengths.sort(key=lambda x: x[1])
|
|
154
|
+
best = lengths[len(lengths) // 2][0]
|
|
155
|
+
best_path = getattr(best, "file_path", "")
|
|
156
|
+
|
|
157
|
+
snippet = extract_canonical_snippet(best_path, repo_root)
|
|
158
|
+
|
|
159
|
+
return CanonicalExample(
|
|
160
|
+
file_path=best_path,
|
|
161
|
+
snippet=snippet,
|
|
162
|
+
)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Prescriptive voice defaults for new codebases.
|
|
2
|
+
|
|
3
|
+
Applied when detect_codebase_maturity returns "new" (<10 files or
|
|
4
|
+
<20 commits). Opinionated starting point that the developer can relax.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from ..models.intent import DiscoveredVoice
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def prescriptive_defaults() -> DiscoveredVoice:
|
|
11
|
+
"""Return strict voice config for a greenfield project."""
|
|
12
|
+
return DiscoveredVoice(
|
|
13
|
+
mode="prescriptive",
|
|
14
|
+
rules={
|
|
15
|
+
"typing": "Type hints on all function signatures. Return types always specified.",
|
|
16
|
+
"docstrings": "Google style. Imperative mood. One-line if the name explains it.",
|
|
17
|
+
"error_handling": "Domain exceptions. No bare excepts. Let unexpected errors propagate.",
|
|
18
|
+
"structure": "Early returns over nested conditionals. Guard clauses at the top.",
|
|
19
|
+
"density": "Concise. Comprehensions where readable. No filler variables.",
|
|
20
|
+
"comments": "Comments explain why, not what.",
|
|
21
|
+
"imports": "stdlib first, third-party second, local third. One import per line.",
|
|
22
|
+
},
|
|
23
|
+
stats={},
|
|
24
|
+
enforce={
|
|
25
|
+
"bare_excepts": "hold",
|
|
26
|
+
"missing_type_hints": "note",
|
|
27
|
+
},
|
|
28
|
+
)
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
"""Voice discovery: scan codebase for coding style patterns.
|
|
2
|
+
|
|
3
|
+
Analyzes type hint adoption, docstring style, error handling,
|
|
4
|
+
structural preferences, and comprehension density. On new codebases,
|
|
5
|
+
returns prescriptive defaults. On existing codebases, codifies
|
|
6
|
+
what's already there.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import ast
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import Dict, List, Optional
|
|
14
|
+
|
|
15
|
+
from ..models.intent import DiscoveredVoice
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class VoiceStats:
|
|
20
|
+
"""Raw measurements from a codebase scan."""
|
|
21
|
+
total_functions: int = 0
|
|
22
|
+
typed_functions: int = 0
|
|
23
|
+
total_docstrings: int = 0
|
|
24
|
+
docstring_styles: Dict[str, int] = field(default_factory=lambda: {
|
|
25
|
+
"google": 0, "sphinx": 0, "numpy": 0, "other": 0,
|
|
26
|
+
})
|
|
27
|
+
total_excepts: int = 0
|
|
28
|
+
bare_excepts: int = 0
|
|
29
|
+
total_return_functions: int = 0
|
|
30
|
+
early_return_functions: int = 0
|
|
31
|
+
comprehensions: int = 0
|
|
32
|
+
for_loops: int = 0
|
|
33
|
+
files_analyzed: int = 0
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def detect_codebase_maturity(
|
|
37
|
+
ast_data: Dict[str, object],
|
|
38
|
+
history: Optional[object] = None,
|
|
39
|
+
override: Optional[str] = None,
|
|
40
|
+
) -> str:
|
|
41
|
+
"""Determine if this is a new or existing codebase.
|
|
42
|
+
|
|
43
|
+
Returns "new" or "existing".
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
override: "prescriptive" forces "new", "adaptive" forces "existing".
|
|
47
|
+
"""
|
|
48
|
+
if override == "prescriptive":
|
|
49
|
+
return "new"
|
|
50
|
+
if override == "adaptive":
|
|
51
|
+
return "existing"
|
|
52
|
+
|
|
53
|
+
file_count = len(ast_data)
|
|
54
|
+
commit_count = getattr(history, "commits_analyzed", 0) if history else 0
|
|
55
|
+
|
|
56
|
+
if file_count < 10 or commit_count < 20:
|
|
57
|
+
return "new"
|
|
58
|
+
return "existing"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def discover_voice(
|
|
62
|
+
ast_data: Dict[str, object],
|
|
63
|
+
repo_root: str,
|
|
64
|
+
) -> DiscoveredVoice:
|
|
65
|
+
"""Analyze the codebase to determine its existing voice.
|
|
66
|
+
|
|
67
|
+
Scans structural patterns across all files to determine type hint
|
|
68
|
+
adoption, docstring style, error handling, structural preferences,
|
|
69
|
+
and comprehension density.
|
|
70
|
+
"""
|
|
71
|
+
stats = VoiceStats()
|
|
72
|
+
|
|
73
|
+
for path, analysis in ast_data.items():
|
|
74
|
+
full_path = os.path.join(repo_root, path)
|
|
75
|
+
if not os.path.isfile(full_path):
|
|
76
|
+
continue
|
|
77
|
+
if not path.endswith(".py"):
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
# Count typed functions from existing FileAnalysis
|
|
81
|
+
for fn in getattr(analysis, "functions", []):
|
|
82
|
+
stats.total_functions += 1
|
|
83
|
+
if fn.return_type or any(
|
|
84
|
+
p for p in fn.params if ":" in str(p)
|
|
85
|
+
):
|
|
86
|
+
stats.typed_functions += 1
|
|
87
|
+
|
|
88
|
+
# Re-parse for deeper analysis
|
|
89
|
+
try:
|
|
90
|
+
with open(full_path, "r", encoding="utf-8") as f:
|
|
91
|
+
source = f.read()
|
|
92
|
+
tree = ast.parse(source)
|
|
93
|
+
except (SyntaxError, IOError, UnicodeDecodeError):
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
stats.files_analyzed += 1
|
|
97
|
+
|
|
98
|
+
# Docstrings
|
|
99
|
+
for node in ast.walk(tree):
|
|
100
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
|
101
|
+
docstring = ast.get_docstring(node)
|
|
102
|
+
if docstring:
|
|
103
|
+
stats.total_docstrings += 1
|
|
104
|
+
style = _detect_docstring_style(docstring)
|
|
105
|
+
stats.docstring_styles[style] += 1
|
|
106
|
+
|
|
107
|
+
# Exception handling
|
|
108
|
+
for node in ast.walk(tree):
|
|
109
|
+
if isinstance(node, ast.ExceptHandler):
|
|
110
|
+
stats.total_excepts += 1
|
|
111
|
+
if node.type is None:
|
|
112
|
+
stats.bare_excepts += 1
|
|
113
|
+
|
|
114
|
+
# Early returns
|
|
115
|
+
for node in ast.walk(tree):
|
|
116
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
117
|
+
stats.total_return_functions += 1
|
|
118
|
+
if _has_early_return(node):
|
|
119
|
+
stats.early_return_functions += 1
|
|
120
|
+
|
|
121
|
+
# Comprehensions vs loops
|
|
122
|
+
for node in ast.walk(tree):
|
|
123
|
+
if isinstance(node, (ast.ListComp, ast.SetComp, ast.DictComp, ast.GeneratorExp)):
|
|
124
|
+
stats.comprehensions += 1
|
|
125
|
+
elif isinstance(node, ast.For):
|
|
126
|
+
stats.for_loops += 1
|
|
127
|
+
|
|
128
|
+
return _synthesize_voice(stats)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _detect_docstring_style(docstring: str) -> str:
|
|
132
|
+
"""Classify a docstring as Google, Sphinx, Numpy, or other."""
|
|
133
|
+
if re.search(r"^\s*(Args|Returns|Raises|Yields|Examples):", docstring, re.MULTILINE):
|
|
134
|
+
return "google"
|
|
135
|
+
if re.search(r"^\s*:(param|type|returns?|rtype|raises)\s*", docstring, re.MULTILINE):
|
|
136
|
+
return "sphinx"
|
|
137
|
+
if re.search(r"^\s*(Parameters|Returns|Raises)\s*\n\s*-{3,}", docstring, re.MULTILINE):
|
|
138
|
+
return "numpy"
|
|
139
|
+
return "other"
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _has_early_return(node: ast.FunctionDef) -> bool:
|
|
143
|
+
"""Check if a function has a return before its final statement."""
|
|
144
|
+
body = node.body
|
|
145
|
+
if len(body) <= 1:
|
|
146
|
+
return False
|
|
147
|
+
for stmt in body[:-1]:
|
|
148
|
+
if isinstance(stmt, ast.Return):
|
|
149
|
+
return True
|
|
150
|
+
if isinstance(stmt, ast.If):
|
|
151
|
+
for sub in ast.walk(stmt):
|
|
152
|
+
if isinstance(sub, ast.Return):
|
|
153
|
+
return True
|
|
154
|
+
return False
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _synthesize_voice(stats: VoiceStats) -> DiscoveredVoice:
|
|
158
|
+
"""Convert raw stats into a voice description."""
|
|
159
|
+
rules = {}
|
|
160
|
+
|
|
161
|
+
# Type hints
|
|
162
|
+
hint_rate = stats.typed_functions / max(stats.total_functions, 1)
|
|
163
|
+
if hint_rate > 0.8:
|
|
164
|
+
rules["typing"] = "Strict type hints on all function signatures."
|
|
165
|
+
elif hint_rate > 0.4:
|
|
166
|
+
rules["typing"] = "Type hints used on most functions. Follow existing patterns."
|
|
167
|
+
else:
|
|
168
|
+
rules["typing"] = "Type hints encouraged on new code but not required."
|
|
169
|
+
|
|
170
|
+
# Docstrings
|
|
171
|
+
if stats.total_docstrings > 0:
|
|
172
|
+
dominant = max(stats.docstring_styles, key=stats.docstring_styles.get)
|
|
173
|
+
if dominant == "other":
|
|
174
|
+
rules["docstrings"] = "Minimal docstrings. Add only when behavior is non-obvious."
|
|
175
|
+
else:
|
|
176
|
+
rules["docstrings"] = f"{dominant.title()} style. Match existing docstrings."
|
|
177
|
+
else:
|
|
178
|
+
rules["docstrings"] = "Minimal docstrings. Add only when behavior is non-obvious."
|
|
179
|
+
|
|
180
|
+
# Error handling
|
|
181
|
+
bare_rate = stats.bare_excepts / max(stats.total_excepts, 1)
|
|
182
|
+
if bare_rate < 0.1:
|
|
183
|
+
rules["error_handling"] = "No bare excepts. Catch specific exception types."
|
|
184
|
+
elif bare_rate < 0.3:
|
|
185
|
+
rules["error_handling"] = "Avoid bare excepts in new code."
|
|
186
|
+
else:
|
|
187
|
+
rules["error_handling"] = "Match existing error handling patterns."
|
|
188
|
+
|
|
189
|
+
# Structure
|
|
190
|
+
early_rate = stats.early_return_functions / max(stats.total_return_functions, 1)
|
|
191
|
+
if early_rate > 0.6:
|
|
192
|
+
rules["structure"] = "Early returns preferred. Guard clauses at the top."
|
|
193
|
+
else:
|
|
194
|
+
rules["structure"] = "Match the pattern of the file being modified."
|
|
195
|
+
|
|
196
|
+
# Density
|
|
197
|
+
if stats.comprehensions > stats.for_loops * 0.5 and stats.comprehensions > 3:
|
|
198
|
+
rules["density"] = "Comprehensions preferred where readable."
|
|
199
|
+
else:
|
|
200
|
+
rules["density"] = "Explicit loops. Comprehensions for simple cases only."
|
|
201
|
+
|
|
202
|
+
enforce = compute_enforcement({
|
|
203
|
+
"type_hint_rate": round(hint_rate, 2),
|
|
204
|
+
"bare_except_rate": round(bare_rate, 2),
|
|
205
|
+
})
|
|
206
|
+
|
|
207
|
+
return DiscoveredVoice(
|
|
208
|
+
mode="adaptive",
|
|
209
|
+
rules=rules,
|
|
210
|
+
stats={
|
|
211
|
+
"type_hint_rate": round(hint_rate, 2),
|
|
212
|
+
"bare_except_rate": round(bare_rate, 2),
|
|
213
|
+
"early_return_rate": round(early_rate, 2),
|
|
214
|
+
"docstring_count": stats.total_docstrings,
|
|
215
|
+
"dominant_docstring_style": max(
|
|
216
|
+
stats.docstring_styles, key=stats.docstring_styles.get,
|
|
217
|
+
) if stats.total_docstrings else None,
|
|
218
|
+
"files_analyzed": stats.files_analyzed,
|
|
219
|
+
},
|
|
220
|
+
enforce=enforce,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def compute_enforcement(stats: dict) -> dict:
|
|
225
|
+
"""Derive enforcement levels from actual codebase state.
|
|
226
|
+
|
|
227
|
+
Only enforce what the codebase already does.
|
|
228
|
+
"""
|
|
229
|
+
enforce = {}
|
|
230
|
+
|
|
231
|
+
bare_rate = stats.get("bare_except_rate", 1.0)
|
|
232
|
+
if bare_rate < 0.10:
|
|
233
|
+
enforce["bare_excepts"] = "hold"
|
|
234
|
+
elif bare_rate < 0.30:
|
|
235
|
+
enforce["bare_excepts"] = "note"
|
|
236
|
+
else:
|
|
237
|
+
enforce["bare_excepts"] = False
|
|
238
|
+
|
|
239
|
+
hint_rate = stats.get("type_hint_rate", 0.0)
|
|
240
|
+
if hint_rate > 0.80:
|
|
241
|
+
enforce["missing_type_hints"] = "note"
|
|
242
|
+
else:
|
|
243
|
+
enforce["missing_type_hints"] = False
|
|
244
|
+
|
|
245
|
+
return enforce
|