dotscope 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. dotscope/.scope +63 -0
  2. dotscope/__init__.py +3 -0
  3. dotscope/absorber.py +390 -0
  4. dotscope/assertions.py +128 -0
  5. dotscope/ast_analyzer.py +2 -0
  6. dotscope/backtest.py +2 -0
  7. dotscope/bench.py +141 -0
  8. dotscope/budget.py +3 -0
  9. dotscope/cache.py +2 -0
  10. dotscope/check/__init__.py +1 -0
  11. dotscope/check/acknowledge.py +2 -0
  12. dotscope/check/checker.py +3 -0
  13. dotscope/check/checks/__init__.py +1 -0
  14. dotscope/check/checks/antipattern.py +2 -0
  15. dotscope/check/checks/boundary.py +2 -0
  16. dotscope/check/checks/contracts.py +3 -0
  17. dotscope/check/checks/direction.py +2 -0
  18. dotscope/check/checks/intent.py +2 -0
  19. dotscope/check/checks/stability.py +2 -0
  20. dotscope/check/constraints.py +2 -0
  21. dotscope/check/models.py +15 -0
  22. dotscope/cli.py +1447 -0
  23. dotscope/composer.py +147 -0
  24. dotscope/constants.py +45 -0
  25. dotscope/context.py +60 -0
  26. dotscope/counterfactual.py +180 -0
  27. dotscope/debug.py +220 -0
  28. dotscope/discovery.py +104 -0
  29. dotscope/formatter.py +157 -0
  30. dotscope/graph.py +3 -0
  31. dotscope/health.py +212 -0
  32. dotscope/help.py +204 -0
  33. dotscope/history.py +6 -0
  34. dotscope/hooks.py +2 -0
  35. dotscope/ingest.py +858 -0
  36. dotscope/intent.py +618 -0
  37. dotscope/lessons.py +223 -0
  38. dotscope/matcher.py +104 -0
  39. dotscope/mcp_server.py +1081 -0
  40. dotscope/models/.scope +45 -0
  41. dotscope/models/__init__.py +7 -0
  42. dotscope/models/core.py +288 -0
  43. dotscope/models/history.py +73 -0
  44. dotscope/models/intent.py +213 -0
  45. dotscope/models/passes.py +58 -0
  46. dotscope/models/state.py +250 -0
  47. dotscope/models.py +9 -0
  48. dotscope/near_miss.py +3 -0
  49. dotscope/onboarding.py +2 -0
  50. dotscope/parser.py +387 -0
  51. dotscope/passes/.scope +105 -0
  52. dotscope/passes/__init__.py +1 -0
  53. dotscope/passes/ast_analyzer.py +508 -0
  54. dotscope/passes/backtest.py +198 -0
  55. dotscope/passes/budget_allocator.py +164 -0
  56. dotscope/passes/convention_compliance.py +40 -0
  57. dotscope/passes/convention_discovery.py +247 -0
  58. dotscope/passes/convention_parser.py +223 -0
  59. dotscope/passes/graph_builder.py +299 -0
  60. dotscope/passes/history_miner.py +336 -0
  61. dotscope/passes/incremental.py +149 -0
  62. dotscope/passes/lang/__init__.py +38 -0
  63. dotscope/passes/lang/_base.py +20 -0
  64. dotscope/passes/lang/_treesitter.py +93 -0
  65. dotscope/passes/lang/go.py +333 -0
  66. dotscope/passes/lang/javascript.py +348 -0
  67. dotscope/passes/lazy.py +152 -0
  68. dotscope/passes/semantic_diff.py +160 -0
  69. dotscope/passes/sentinel/__init__.py +1 -0
  70. dotscope/passes/sentinel/acknowledge.py +222 -0
  71. dotscope/passes/sentinel/checker.py +383 -0
  72. dotscope/passes/sentinel/checks/__init__.py +1 -0
  73. dotscope/passes/sentinel/checks/antipattern.py +84 -0
  74. dotscope/passes/sentinel/checks/boundary.py +46 -0
  75. dotscope/passes/sentinel/checks/contracts.py +148 -0
  76. dotscope/passes/sentinel/checks/convention.py +54 -0
  77. dotscope/passes/sentinel/checks/direction.py +71 -0
  78. dotscope/passes/sentinel/checks/intent.py +207 -0
  79. dotscope/passes/sentinel/checks/stability.py +66 -0
  80. dotscope/passes/sentinel/checks/voice.py +108 -0
  81. dotscope/passes/sentinel/constraints.py +472 -0
  82. dotscope/passes/sentinel/line_filter.py +88 -0
  83. dotscope/passes/sentinel/models.py +15 -0
  84. dotscope/passes/virtual.py +239 -0
  85. dotscope/passes/voice.py +162 -0
  86. dotscope/passes/voice_defaults.py +28 -0
  87. dotscope/passes/voice_discovery.py +245 -0
  88. dotscope/paths.py +32 -0
  89. dotscope/progress.py +44 -0
  90. dotscope/regression.py +147 -0
  91. dotscope/resolver.py +203 -0
  92. dotscope/scanner.py +246 -0
  93. dotscope/sessions.py +2 -0
  94. dotscope/storage/.scope +64 -0
  95. dotscope/storage/__init__.py +1 -0
  96. dotscope/storage/cache.py +114 -0
  97. dotscope/storage/claude_hooks.py +119 -0
  98. dotscope/storage/git_hooks.py +277 -0
  99. dotscope/storage/incremental_state.py +61 -0
  100. dotscope/storage/mcp_config.py +98 -0
  101. dotscope/storage/near_miss.py +183 -0
  102. dotscope/storage/onboarding.py +150 -0
  103. dotscope/storage/session_manager.py +195 -0
  104. dotscope/storage/timing.py +84 -0
  105. dotscope/timing.py +2 -0
  106. dotscope/tokens.py +53 -0
  107. dotscope/utility.py +123 -0
  108. dotscope/virtual.py +3 -0
  109. dotscope/visibility.py +664 -0
  110. dotscope-0.1.0.dist-info/METADATA +50 -0
  111. dotscope-0.1.0.dist-info/RECORD +114 -0
  112. dotscope-0.1.0.dist-info/WHEEL +4 -0
  113. dotscope-0.1.0.dist-info/entry_points.txt +3 -0
  114. dotscope-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,239 @@
1
+ """Virtual scopes: cross-cutting concern detection from import graph hubs.
2
+
3
+ Directory scopes capture physical structure. Virtual scopes capture logical
4
+ architecture — a User lifecycle spanning models/, auth/, validators/, serializers/.
5
+
6
+ Detection algorithm:
7
+ 1. Find hub files (imported by 3+ files from 2+ directories)
8
+ 2. Collect cluster (hub + importers + shared imports within 1 hop)
9
+ 3. Filter by cohesion (more internal edges than external)
10
+ 4. Name by centrality (most-imported symbol)
11
+ 5. Deduplicate overlapping clusters (>70% overlap → merge)
12
+ """
13
+
14
+ import os
15
+ from collections import defaultdict
16
+ from pathlib import Path
17
+ from typing import Dict, List, Optional, Set
18
+
19
+ from ..context import parse_context
20
+ from ..graph import DependencyGraph
21
+ from ..models.core import ScopeConfig
22
+ from ..models.passes import VirtualScope # noqa: F401
23
+ from ..tokens import estimate_scope_tokens
24
+
25
+ # Utility directories whose files connect everything (not meaningful clusters)
26
+ _UTILITY_DIRS = {"utils", "helpers", "common", "shared", "lib", "core"}
27
+
28
+
29
+ def detect_virtual_scopes(
30
+ graph: DependencyGraph,
31
+ min_importers: int = 3,
32
+ min_directories: int = 2,
33
+ min_cohesion: float = 0.3,
34
+ ) -> List[ScopeConfig]:
35
+ """Detect cross-cutting concerns from the import graph.
36
+
37
+ Returns ScopeConfig objects for virtual scopes, ready to be
38
+ added to the ingest plan alongside directory scopes.
39
+ """
40
+ root = graph.root
41
+ hubs = _find_hubs(graph, min_importers, min_directories)
42
+ clusters = [_build_cluster(hub, graph) for hub in hubs]
43
+ clusters = [c for c in clusters if c.cohesion >= min_cohesion]
44
+ clusters = _deduplicate(clusters)
45
+
46
+ scopes = []
47
+ for cluster in clusters:
48
+ config = _cluster_to_scope(cluster, root)
49
+ if config:
50
+ scopes.append(config)
51
+
52
+ return scopes
53
+
54
+
55
+ def _find_hubs(
56
+ graph: DependencyGraph, min_importers: int, min_dirs: int
57
+ ) -> List[str]:
58
+ """Find files imported by 3+ files from 2+ different directories."""
59
+ hubs = []
60
+ for path, node in graph.files.items():
61
+ if not node.imported_by:
62
+ continue
63
+
64
+ # Skip utility directories
65
+ parts = Path(path).parts
66
+ if len(parts) > 1 and parts[0].lower() in _UTILITY_DIRS:
67
+ continue
68
+
69
+ importer_dirs = set()
70
+ for imp_by in node.imported_by:
71
+ imp_parts = Path(imp_by).parts
72
+ if len(imp_parts) > 1:
73
+ importer_dirs.add(imp_parts[0])
74
+
75
+ if len(node.imported_by) >= min_importers and len(importer_dirs) >= min_dirs:
76
+ hubs.append(path)
77
+
78
+ return hubs
79
+
80
+
81
+ def _build_cluster(hub: str, graph: DependencyGraph) -> VirtualScope:
82
+ """Build a cluster around a hub file.
83
+
84
+ Cluster = hub + all importers + shared imports within 1 hop.
85
+ """
86
+ hub_node = graph.files.get(hub)
87
+ if not hub_node:
88
+ return VirtualScope(name="", hub_file=hub, files=[], cohesion=0, directories_spanned=0)
89
+
90
+ cluster_files: Set[str] = {hub}
91
+ cluster_files.update(hub_node.imported_by)
92
+
93
+ # Add shared imports (files that multiple importers also import)
94
+ import_counts: Dict[str, int] = defaultdict(int)
95
+ for importer in hub_node.imported_by:
96
+ imp_node = graph.files.get(importer)
97
+ if imp_node:
98
+ for dep in imp_node.imports:
99
+ if dep != hub and dep not in cluster_files:
100
+ import_counts[dep] += 1
101
+
102
+ # Only add shared imports that 2+ importers share
103
+ for dep, count in import_counts.items():
104
+ if count >= 2:
105
+ cluster_files.add(dep)
106
+
107
+ # Compute cohesion
108
+ internal_edges = 0
109
+ external_edges = 0
110
+ for f in cluster_files:
111
+ node = graph.files.get(f)
112
+ if not node:
113
+ continue
114
+ for imp in node.imports:
115
+ if imp in cluster_files:
116
+ internal_edges += 1
117
+ else:
118
+ external_edges += 1
119
+
120
+ total = internal_edges + external_edges
121
+ cohesion = internal_edges / total if total > 0 else 0.0
122
+
123
+ # Count directories spanned
124
+ dirs = set()
125
+ for f in cluster_files:
126
+ parts = Path(f).parts
127
+ if len(parts) > 1:
128
+ dirs.add(parts[0])
129
+
130
+ # Name from hub file
131
+ name = _infer_name(hub, graph)
132
+
133
+ return VirtualScope(
134
+ name=name,
135
+ hub_file=hub,
136
+ files=sorted(cluster_files),
137
+ cohesion=round(cohesion, 3),
138
+ directories_spanned=len(dirs),
139
+ )
140
+
141
+
142
+ def _infer_name(hub: str, graph: DependencyGraph) -> str:
143
+ """Infer a name for the virtual scope from the hub file."""
144
+ basename = os.path.splitext(os.path.basename(hub))[0]
145
+ # e.g., "models/user.py" → "user_lifecycle"
146
+ if basename in ("__init__", "index"):
147
+ parts = Path(hub).parts
148
+ if len(parts) > 1:
149
+ basename = parts[-2]
150
+ return f"{basename}_lifecycle"
151
+
152
+
153
+ def _deduplicate(clusters: List[VirtualScope]) -> List[VirtualScope]:
154
+ """Merge clusters with >70% file overlap."""
155
+ if len(clusters) <= 1:
156
+ return clusters
157
+
158
+ result = []
159
+ merged = set()
160
+
161
+ for i, a in enumerate(clusters):
162
+ if i in merged:
163
+ continue
164
+ best = a
165
+ for j, b in enumerate(clusters):
166
+ if j <= i or j in merged:
167
+ continue
168
+ a_set = set(a.files)
169
+ b_set = set(b.files)
170
+ overlap = len(a_set & b_set) / len(a_set | b_set) if (a_set | b_set) else 0
171
+ if overlap > 0.7:
172
+ # Keep the one with more files
173
+ if len(b.files) > len(best.files):
174
+ best = b
175
+ merged.add(j)
176
+ result.append(best)
177
+
178
+ return result
179
+
180
+
181
+ def _cluster_to_scope(cluster: VirtualScope, root: str) -> Optional[ScopeConfig]:
182
+ """Convert a virtual scope cluster to a ScopeConfig."""
183
+ if not cluster.files or not cluster.name:
184
+ return None
185
+
186
+ description = (
187
+ f"Virtual scope: {cluster.name} "
188
+ f"(spans {cluster.directories_spanned} modules, "
189
+ f"hub: {cluster.hub_file})"
190
+ )
191
+
192
+ dirs_spanned = set()
193
+ for f in cluster.files:
194
+ parts = Path(f).parts
195
+ if len(parts) > 1:
196
+ dirs_spanned.add(parts[0])
197
+
198
+ context = parse_context(
199
+ f"Cross-cutting concern detected from import graph.\n"
200
+ f"Hub file: {cluster.hub_file} "
201
+ f"(imported by {len(cluster.files) - 1} files across "
202
+ f"{cluster.directories_spanned} modules)\n"
203
+ f"\n"
204
+ f"Directories spanned: {', '.join(sorted(dirs_spanned))}\n"
205
+ f"Cohesion: {cluster.cohesion:.0%}"
206
+ )
207
+
208
+ full_paths = [os.path.join(root, f) for f in cluster.files]
209
+ token_est = estimate_scope_tokens(full_paths)
210
+
211
+ related = [f"{d}/.scope" for d in sorted(dirs_spanned)]
212
+
213
+ return ScopeConfig(
214
+ path=os.path.join(root, "virtual", f"{cluster.name}.scope"),
215
+ description=description,
216
+ includes=cluster.files,
217
+ excludes=[],
218
+ context=context,
219
+ related=related,
220
+ tags=["virtual", "cross-cutting", cluster.name.replace("_lifecycle", "")],
221
+ tokens_estimate=token_est,
222
+ )
223
+
224
+
225
+ def format_virtual_scopes(scopes: List[ScopeConfig], root: str) -> str:
226
+ """Human-readable summary of detected virtual scopes."""
227
+ if not scopes:
228
+ return "No cross-cutting virtual scopes detected."
229
+
230
+ lines = [f"Detected {len(scopes)} virtual scope(s):", ""]
231
+ for scope in scopes:
232
+ lines.append(f" {os.path.relpath(scope.path, root)}")
233
+ lines.append(f" {scope.description}")
234
+ lines.append(f" files: {len(scope.includes)}, ~{scope.tokens_estimate:,} tokens")
235
+ if scope.related:
236
+ lines.append(f" related: {', '.join(scope.related)}")
237
+ lines.append("")
238
+
239
+ return "\n".join(lines)
@@ -0,0 +1,162 @@
1
+ """Voice injection into resolve responses and canonical snippet extraction."""
2
+
3
+ import ast
4
+ import os
5
+ from typing import Dict, List, Optional
6
+
7
+ from ..models.intent import CanonicalExample
8
+
9
+
10
+ def build_voice_response(
11
+ voice_config: dict,
12
+ root: str,
13
+ scope_files: List[str],
14
+ conventions: Optional[list] = None,
15
+ ) -> dict:
16
+ """Build the voice field for a resolve_scope response.
17
+
18
+ Returns a dict with mode, global rules, and optional convention voice.
19
+ """
20
+ result = {
21
+ "mode": voice_config.get("mode", "adaptive"),
22
+ "global": _serialize_global(voice_config),
23
+ }
24
+
25
+ # Convention-specific voice (if any file matches a convention with voice config)
26
+ if conventions:
27
+ for conv in conventions:
28
+ conv_voice = getattr(conv, "voice", None)
29
+ if not conv_voice:
30
+ # Check if convention dict has voice key
31
+ if isinstance(conv, dict):
32
+ conv_voice = conv.get("voice")
33
+ else:
34
+ continue
35
+ if not conv_voice:
36
+ continue
37
+
38
+ canonical = conv_voice.get("canonical_example") if isinstance(conv_voice, dict) else None
39
+ if canonical:
40
+ snippet = extract_canonical_snippet(canonical, root)
41
+ if snippet:
42
+ result["convention"] = {
43
+ "name": getattr(conv, "name", "") if not isinstance(conv, dict) else conv.get("name", ""),
44
+ "style_notes": conv_voice.get("style_notes", "") if isinstance(conv_voice, dict) else "",
45
+ "canonical_snippet": snippet,
46
+ }
47
+ break
48
+
49
+ return result
50
+
51
+
52
+ def _serialize_global(voice_config: dict) -> str:
53
+ """Serialize voice rules as compact prose for the agent."""
54
+ rules = voice_config.get("rules", {})
55
+ if not rules:
56
+ return ""
57
+
58
+ parts = []
59
+ for key in ("typing", "docstrings", "error_handling", "structure", "density", "comments", "imports"):
60
+ val = rules.get(key)
61
+ if val:
62
+ parts.append(val.strip())
63
+
64
+ return " ".join(parts)
65
+
66
+
67
+ def extract_canonical_snippet(
68
+ file_path: str,
69
+ repo_root: str,
70
+ max_lines: int = 40,
71
+ ) -> Optional[str]:
72
+ """Extract the first class or function as a canonical snippet.
73
+
74
+ Uses AST node locations to skip imports and module docstrings.
75
+ """
76
+ full_path = os.path.join(repo_root, file_path) if not os.path.isabs(file_path) else file_path
77
+ if not os.path.isfile(full_path):
78
+ return None
79
+
80
+ try:
81
+ with open(full_path, "r", encoding="utf-8") as f:
82
+ source = f.read()
83
+ tree = ast.parse(source)
84
+ except (SyntaxError, IOError, UnicodeDecodeError):
85
+ return None
86
+
87
+ # Find the first class or function definition
88
+ target = None
89
+ for node in ast.iter_child_nodes(tree):
90
+ if isinstance(node, ast.ClassDef):
91
+ target = node
92
+ break
93
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
94
+ target = node
95
+ break
96
+
97
+ if not target:
98
+ return None
99
+
100
+ # Extract source segment
101
+ snippet = ast.get_source_segment(source, target)
102
+ if not snippet:
103
+ # Fallback: extract by line numbers
104
+ lines = source.splitlines()
105
+ start = target.lineno - 1
106
+ end_line = getattr(target, "end_lineno", None) or (start + max_lines)
107
+ end = min(end_line, start + max_lines)
108
+ snippet = "\n".join(lines[start:end])
109
+
110
+ # Truncate if too long
111
+ snippet_lines = snippet.splitlines()
112
+ if len(snippet_lines) > max_lines:
113
+ snippet = "\n".join(snippet_lines[:max_lines]) + "\n ..."
114
+
115
+ return snippet
116
+
117
+
118
+ def select_canonical(
119
+ convention: object,
120
+ nodes: list,
121
+ history: Optional[dict],
122
+ repo_root: str,
123
+ ) -> Optional[CanonicalExample]:
124
+ """Pick the most representative file and extract its first class/function.
125
+
126
+ Selection: zero violations, most recently maintained, median length.
127
+ """
128
+ compliant = [n for n in nodes if not getattr(n, "violations", None)]
129
+ if not compliant:
130
+ return None
131
+
132
+ # Sort by recency if history available
133
+ if history and history.get("file_histories"):
134
+ compliant.sort(
135
+ key=lambda n: history["file_histories"]
136
+ .get(getattr(n, "file_path", ""), {})
137
+ .get("last_modified", ""),
138
+ reverse=True,
139
+ )
140
+
141
+ # Pick median length
142
+ lengths = []
143
+ for n in compliant[:10]:
144
+ fp = getattr(n, "file_path", "")
145
+ full = os.path.join(repo_root, fp)
146
+ try:
147
+ with open(full, "r", encoding="utf-8") as f:
148
+ length = len(f.readlines())
149
+ except (IOError, UnicodeDecodeError):
150
+ length = 0
151
+ lengths.append((n, length))
152
+
153
+ lengths.sort(key=lambda x: x[1])
154
+ best = lengths[len(lengths) // 2][0]
155
+ best_path = getattr(best, "file_path", "")
156
+
157
+ snippet = extract_canonical_snippet(best_path, repo_root)
158
+
159
+ return CanonicalExample(
160
+ file_path=best_path,
161
+ snippet=snippet,
162
+ )
@@ -0,0 +1,28 @@
1
+ """Prescriptive voice defaults for new codebases.
2
+
3
+ Applied when detect_codebase_maturity returns "new" (<10 files or
4
+ <20 commits). Opinionated starting point that the developer can relax.
5
+ """
6
+
7
+ from ..models.intent import DiscoveredVoice
8
+
9
+
10
+ def prescriptive_defaults() -> DiscoveredVoice:
11
+ """Return strict voice config for a greenfield project."""
12
+ return DiscoveredVoice(
13
+ mode="prescriptive",
14
+ rules={
15
+ "typing": "Type hints on all function signatures. Return types always specified.",
16
+ "docstrings": "Google style. Imperative mood. One-line if the name explains it.",
17
+ "error_handling": "Domain exceptions. No bare excepts. Let unexpected errors propagate.",
18
+ "structure": "Early returns over nested conditionals. Guard clauses at the top.",
19
+ "density": "Concise. Comprehensions where readable. No filler variables.",
20
+ "comments": "Comments explain why, not what.",
21
+ "imports": "stdlib first, third-party second, local third. One import per line.",
22
+ },
23
+ stats={},
24
+ enforce={
25
+ "bare_excepts": "hold",
26
+ "missing_type_hints": "note",
27
+ },
28
+ )
@@ -0,0 +1,245 @@
1
+ """Voice discovery: scan codebase for coding style patterns.
2
+
3
+ Analyzes type hint adoption, docstring style, error handling,
4
+ structural preferences, and comprehension density. On new codebases,
5
+ returns prescriptive defaults. On existing codebases, codifies
6
+ what's already there.
7
+ """
8
+
9
+ import ast
10
+ import os
11
+ import re
12
+ from dataclasses import dataclass, field
13
+ from typing import Dict, List, Optional
14
+
15
+ from ..models.intent import DiscoveredVoice
16
+
17
+
18
+ @dataclass
19
+ class VoiceStats:
20
+ """Raw measurements from a codebase scan."""
21
+ total_functions: int = 0
22
+ typed_functions: int = 0
23
+ total_docstrings: int = 0
24
+ docstring_styles: Dict[str, int] = field(default_factory=lambda: {
25
+ "google": 0, "sphinx": 0, "numpy": 0, "other": 0,
26
+ })
27
+ total_excepts: int = 0
28
+ bare_excepts: int = 0
29
+ total_return_functions: int = 0
30
+ early_return_functions: int = 0
31
+ comprehensions: int = 0
32
+ for_loops: int = 0
33
+ files_analyzed: int = 0
34
+
35
+
36
+ def detect_codebase_maturity(
37
+ ast_data: Dict[str, object],
38
+ history: Optional[object] = None,
39
+ override: Optional[str] = None,
40
+ ) -> str:
41
+ """Determine if this is a new or existing codebase.
42
+
43
+ Returns "new" or "existing".
44
+
45
+ Args:
46
+ override: "prescriptive" forces "new", "adaptive" forces "existing".
47
+ """
48
+ if override == "prescriptive":
49
+ return "new"
50
+ if override == "adaptive":
51
+ return "existing"
52
+
53
+ file_count = len(ast_data)
54
+ commit_count = getattr(history, "commits_analyzed", 0) if history else 0
55
+
56
+ if file_count < 10 or commit_count < 20:
57
+ return "new"
58
+ return "existing"
59
+
60
+
61
+ def discover_voice(
62
+ ast_data: Dict[str, object],
63
+ repo_root: str,
64
+ ) -> DiscoveredVoice:
65
+ """Analyze the codebase to determine its existing voice.
66
+
67
+ Scans structural patterns across all files to determine type hint
68
+ adoption, docstring style, error handling, structural preferences,
69
+ and comprehension density.
70
+ """
71
+ stats = VoiceStats()
72
+
73
+ for path, analysis in ast_data.items():
74
+ full_path = os.path.join(repo_root, path)
75
+ if not os.path.isfile(full_path):
76
+ continue
77
+ if not path.endswith(".py"):
78
+ continue
79
+
80
+ # Count typed functions from existing FileAnalysis
81
+ for fn in getattr(analysis, "functions", []):
82
+ stats.total_functions += 1
83
+ if fn.return_type or any(
84
+ p for p in fn.params if ":" in str(p)
85
+ ):
86
+ stats.typed_functions += 1
87
+
88
+ # Re-parse for deeper analysis
89
+ try:
90
+ with open(full_path, "r", encoding="utf-8") as f:
91
+ source = f.read()
92
+ tree = ast.parse(source)
93
+ except (SyntaxError, IOError, UnicodeDecodeError):
94
+ continue
95
+
96
+ stats.files_analyzed += 1
97
+
98
+ # Docstrings
99
+ for node in ast.walk(tree):
100
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
101
+ docstring = ast.get_docstring(node)
102
+ if docstring:
103
+ stats.total_docstrings += 1
104
+ style = _detect_docstring_style(docstring)
105
+ stats.docstring_styles[style] += 1
106
+
107
+ # Exception handling
108
+ for node in ast.walk(tree):
109
+ if isinstance(node, ast.ExceptHandler):
110
+ stats.total_excepts += 1
111
+ if node.type is None:
112
+ stats.bare_excepts += 1
113
+
114
+ # Early returns
115
+ for node in ast.walk(tree):
116
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
117
+ stats.total_return_functions += 1
118
+ if _has_early_return(node):
119
+ stats.early_return_functions += 1
120
+
121
+ # Comprehensions vs loops
122
+ for node in ast.walk(tree):
123
+ if isinstance(node, (ast.ListComp, ast.SetComp, ast.DictComp, ast.GeneratorExp)):
124
+ stats.comprehensions += 1
125
+ elif isinstance(node, ast.For):
126
+ stats.for_loops += 1
127
+
128
+ return _synthesize_voice(stats)
129
+
130
+
131
+ def _detect_docstring_style(docstring: str) -> str:
132
+ """Classify a docstring as Google, Sphinx, Numpy, or other."""
133
+ if re.search(r"^\s*(Args|Returns|Raises|Yields|Examples):", docstring, re.MULTILINE):
134
+ return "google"
135
+ if re.search(r"^\s*:(param|type|returns?|rtype|raises)\s*", docstring, re.MULTILINE):
136
+ return "sphinx"
137
+ if re.search(r"^\s*(Parameters|Returns|Raises)\s*\n\s*-{3,}", docstring, re.MULTILINE):
138
+ return "numpy"
139
+ return "other"
140
+
141
+
142
+ def _has_early_return(node: ast.FunctionDef) -> bool:
143
+ """Check if a function has a return before its final statement."""
144
+ body = node.body
145
+ if len(body) <= 1:
146
+ return False
147
+ for stmt in body[:-1]:
148
+ if isinstance(stmt, ast.Return):
149
+ return True
150
+ if isinstance(stmt, ast.If):
151
+ for sub in ast.walk(stmt):
152
+ if isinstance(sub, ast.Return):
153
+ return True
154
+ return False
155
+
156
+
157
+ def _synthesize_voice(stats: VoiceStats) -> DiscoveredVoice:
158
+ """Convert raw stats into a voice description."""
159
+ rules = {}
160
+
161
+ # Type hints
162
+ hint_rate = stats.typed_functions / max(stats.total_functions, 1)
163
+ if hint_rate > 0.8:
164
+ rules["typing"] = "Strict type hints on all function signatures."
165
+ elif hint_rate > 0.4:
166
+ rules["typing"] = "Type hints used on most functions. Follow existing patterns."
167
+ else:
168
+ rules["typing"] = "Type hints encouraged on new code but not required."
169
+
170
+ # Docstrings
171
+ if stats.total_docstrings > 0:
172
+ dominant = max(stats.docstring_styles, key=stats.docstring_styles.get)
173
+ if dominant == "other":
174
+ rules["docstrings"] = "Minimal docstrings. Add only when behavior is non-obvious."
175
+ else:
176
+ rules["docstrings"] = f"{dominant.title()} style. Match existing docstrings."
177
+ else:
178
+ rules["docstrings"] = "Minimal docstrings. Add only when behavior is non-obvious."
179
+
180
+ # Error handling
181
+ bare_rate = stats.bare_excepts / max(stats.total_excepts, 1)
182
+ if bare_rate < 0.1:
183
+ rules["error_handling"] = "No bare excepts. Catch specific exception types."
184
+ elif bare_rate < 0.3:
185
+ rules["error_handling"] = "Avoid bare excepts in new code."
186
+ else:
187
+ rules["error_handling"] = "Match existing error handling patterns."
188
+
189
+ # Structure
190
+ early_rate = stats.early_return_functions / max(stats.total_return_functions, 1)
191
+ if early_rate > 0.6:
192
+ rules["structure"] = "Early returns preferred. Guard clauses at the top."
193
+ else:
194
+ rules["structure"] = "Match the pattern of the file being modified."
195
+
196
+ # Density
197
+ if stats.comprehensions > stats.for_loops * 0.5 and stats.comprehensions > 3:
198
+ rules["density"] = "Comprehensions preferred where readable."
199
+ else:
200
+ rules["density"] = "Explicit loops. Comprehensions for simple cases only."
201
+
202
+ enforce = compute_enforcement({
203
+ "type_hint_rate": round(hint_rate, 2),
204
+ "bare_except_rate": round(bare_rate, 2),
205
+ })
206
+
207
+ return DiscoveredVoice(
208
+ mode="adaptive",
209
+ rules=rules,
210
+ stats={
211
+ "type_hint_rate": round(hint_rate, 2),
212
+ "bare_except_rate": round(bare_rate, 2),
213
+ "early_return_rate": round(early_rate, 2),
214
+ "docstring_count": stats.total_docstrings,
215
+ "dominant_docstring_style": max(
216
+ stats.docstring_styles, key=stats.docstring_styles.get,
217
+ ) if stats.total_docstrings else None,
218
+ "files_analyzed": stats.files_analyzed,
219
+ },
220
+ enforce=enforce,
221
+ )
222
+
223
+
224
+ def compute_enforcement(stats: dict) -> dict:
225
+ """Derive enforcement levels from actual codebase state.
226
+
227
+ Only enforce what the codebase already does.
228
+ """
229
+ enforce = {}
230
+
231
+ bare_rate = stats.get("bare_except_rate", 1.0)
232
+ if bare_rate < 0.10:
233
+ enforce["bare_excepts"] = "hold"
234
+ elif bare_rate < 0.30:
235
+ enforce["bare_excepts"] = "note"
236
+ else:
237
+ enforce["bare_excepts"] = False
238
+
239
+ hint_rate = stats.get("type_hint_rate", 0.0)
240
+ if hint_rate > 0.80:
241
+ enforce["missing_type_hints"] = "note"
242
+ else:
243
+ enforce["missing_type_hints"] = False
244
+
245
+ return enforce