dotscope 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dotscope/.scope +63 -0
- dotscope/__init__.py +3 -0
- dotscope/absorber.py +390 -0
- dotscope/assertions.py +128 -0
- dotscope/ast_analyzer.py +2 -0
- dotscope/backtest.py +2 -0
- dotscope/bench.py +141 -0
- dotscope/budget.py +3 -0
- dotscope/cache.py +2 -0
- dotscope/check/__init__.py +1 -0
- dotscope/check/acknowledge.py +2 -0
- dotscope/check/checker.py +3 -0
- dotscope/check/checks/__init__.py +1 -0
- dotscope/check/checks/antipattern.py +2 -0
- dotscope/check/checks/boundary.py +2 -0
- dotscope/check/checks/contracts.py +3 -0
- dotscope/check/checks/direction.py +2 -0
- dotscope/check/checks/intent.py +2 -0
- dotscope/check/checks/stability.py +2 -0
- dotscope/check/constraints.py +2 -0
- dotscope/check/models.py +15 -0
- dotscope/cli.py +1447 -0
- dotscope/composer.py +147 -0
- dotscope/constants.py +45 -0
- dotscope/context.py +60 -0
- dotscope/counterfactual.py +180 -0
- dotscope/debug.py +220 -0
- dotscope/discovery.py +104 -0
- dotscope/formatter.py +157 -0
- dotscope/graph.py +3 -0
- dotscope/health.py +212 -0
- dotscope/help.py +204 -0
- dotscope/history.py +6 -0
- dotscope/hooks.py +2 -0
- dotscope/ingest.py +858 -0
- dotscope/intent.py +618 -0
- dotscope/lessons.py +223 -0
- dotscope/matcher.py +104 -0
- dotscope/mcp_server.py +1081 -0
- dotscope/models/.scope +45 -0
- dotscope/models/__init__.py +7 -0
- dotscope/models/core.py +288 -0
- dotscope/models/history.py +73 -0
- dotscope/models/intent.py +213 -0
- dotscope/models/passes.py +58 -0
- dotscope/models/state.py +250 -0
- dotscope/models.py +9 -0
- dotscope/near_miss.py +3 -0
- dotscope/onboarding.py +2 -0
- dotscope/parser.py +387 -0
- dotscope/passes/.scope +105 -0
- dotscope/passes/__init__.py +1 -0
- dotscope/passes/ast_analyzer.py +508 -0
- dotscope/passes/backtest.py +198 -0
- dotscope/passes/budget_allocator.py +164 -0
- dotscope/passes/convention_compliance.py +40 -0
- dotscope/passes/convention_discovery.py +247 -0
- dotscope/passes/convention_parser.py +223 -0
- dotscope/passes/graph_builder.py +299 -0
- dotscope/passes/history_miner.py +336 -0
- dotscope/passes/incremental.py +149 -0
- dotscope/passes/lang/__init__.py +38 -0
- dotscope/passes/lang/_base.py +20 -0
- dotscope/passes/lang/_treesitter.py +93 -0
- dotscope/passes/lang/go.py +333 -0
- dotscope/passes/lang/javascript.py +348 -0
- dotscope/passes/lazy.py +152 -0
- dotscope/passes/semantic_diff.py +160 -0
- dotscope/passes/sentinel/__init__.py +1 -0
- dotscope/passes/sentinel/acknowledge.py +222 -0
- dotscope/passes/sentinel/checker.py +383 -0
- dotscope/passes/sentinel/checks/__init__.py +1 -0
- dotscope/passes/sentinel/checks/antipattern.py +84 -0
- dotscope/passes/sentinel/checks/boundary.py +46 -0
- dotscope/passes/sentinel/checks/contracts.py +148 -0
- dotscope/passes/sentinel/checks/convention.py +54 -0
- dotscope/passes/sentinel/checks/direction.py +71 -0
- dotscope/passes/sentinel/checks/intent.py +207 -0
- dotscope/passes/sentinel/checks/stability.py +66 -0
- dotscope/passes/sentinel/checks/voice.py +108 -0
- dotscope/passes/sentinel/constraints.py +472 -0
- dotscope/passes/sentinel/line_filter.py +88 -0
- dotscope/passes/sentinel/models.py +15 -0
- dotscope/passes/virtual.py +239 -0
- dotscope/passes/voice.py +162 -0
- dotscope/passes/voice_defaults.py +28 -0
- dotscope/passes/voice_discovery.py +245 -0
- dotscope/paths.py +32 -0
- dotscope/progress.py +44 -0
- dotscope/regression.py +147 -0
- dotscope/resolver.py +203 -0
- dotscope/scanner.py +246 -0
- dotscope/sessions.py +2 -0
- dotscope/storage/.scope +64 -0
- dotscope/storage/__init__.py +1 -0
- dotscope/storage/cache.py +114 -0
- dotscope/storage/claude_hooks.py +119 -0
- dotscope/storage/git_hooks.py +277 -0
- dotscope/storage/incremental_state.py +61 -0
- dotscope/storage/mcp_config.py +98 -0
- dotscope/storage/near_miss.py +183 -0
- dotscope/storage/onboarding.py +150 -0
- dotscope/storage/session_manager.py +195 -0
- dotscope/storage/timing.py +84 -0
- dotscope/timing.py +2 -0
- dotscope/tokens.py +53 -0
- dotscope/utility.py +123 -0
- dotscope/virtual.py +3 -0
- dotscope/visibility.py +664 -0
- dotscope-0.1.0.dist-info/METADATA +50 -0
- dotscope-0.1.0.dist-info/RECORD +114 -0
- dotscope-0.1.0.dist-info/WHEEL +4 -0
- dotscope-0.1.0.dist-info/entry_points.txt +3 -0
- dotscope-0.1.0.dist-info/licenses/LICENSE +21 -0
dotscope/ingest.py
ADDED
|
@@ -0,0 +1,858 @@
|
|
|
1
|
+
"""Ingest: reverse-engineer .scope files from an existing codebase.
|
|
2
|
+
|
|
3
|
+
Orchestrates graph analysis, git history mining, and doc absorption
|
|
4
|
+
to produce complete .scope files for every detected module boundary.
|
|
5
|
+
|
|
6
|
+
This is how dotscope enters any codebase — not by asking humans to write
|
|
7
|
+
.scope files, but by inferring them from signals already in the code.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import List, Optional, Set, Tuple
|
|
15
|
+
|
|
16
|
+
from .absorber import AbsorptionResult, absorb_docs
|
|
17
|
+
from .context import parse_context
|
|
18
|
+
from .graph import DependencyGraph, ModuleBoundary, build_graph, transitive_dependents
|
|
19
|
+
from .history import HistoryAnalysis, analyze_history
|
|
20
|
+
from .models.core import ScopeConfig, ScopesIndex, ScopeEntry
|
|
21
|
+
from .models.passes import IngestPlan, PlannedScope # noqa: F401
|
|
22
|
+
from .models.state import BacktestReport
|
|
23
|
+
from .parser import serialize_scope
|
|
24
|
+
from .tokens import estimate_scope_tokens
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def ingest(
|
|
28
|
+
root: str,
|
|
29
|
+
mine_history: bool = True,
|
|
30
|
+
absorb: bool = True,
|
|
31
|
+
synthesize: bool = True,
|
|
32
|
+
max_commits: int = 500,
|
|
33
|
+
dry_run: bool = False,
|
|
34
|
+
quiet: bool = False,
|
|
35
|
+
voice_override: Optional[str] = None,
|
|
36
|
+
) -> IngestPlan:
|
|
37
|
+
"""Ingest a codebase and produce .scope files.
|
|
38
|
+
|
|
39
|
+
Pipeline:
|
|
40
|
+
1. Build dependency graph → detect module boundaries
|
|
41
|
+
2. Mine git history → change coupling, hotspots, implicit contracts
|
|
42
|
+
3. Absorb existing docs → README, docstrings, signal comments
|
|
43
|
+
4. Synthesize .scope files → combine all signals into scope configs
|
|
44
|
+
5. Optionally write files to disk
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
root: Repository root
|
|
48
|
+
mine_history: Whether to analyze git history
|
|
49
|
+
absorb: Whether to absorb existing documentation
|
|
50
|
+
synthesize: Whether to use LLM for context synthesis (falls back to template)
|
|
51
|
+
max_commits: Max git commits to analyze
|
|
52
|
+
dry_run: If True, plan but don't write files
|
|
53
|
+
"""
|
|
54
|
+
root = os.path.abspath(root)
|
|
55
|
+
plan = IngestPlan(root=root)
|
|
56
|
+
|
|
57
|
+
from .progress import ProgressEmitter
|
|
58
|
+
progress = ProgressEmitter(quiet=quiet)
|
|
59
|
+
|
|
60
|
+
# Step 1: Dependency graph
|
|
61
|
+
progress.start("building dependency graph")
|
|
62
|
+
graph = build_graph(root)
|
|
63
|
+
plan.graph = graph
|
|
64
|
+
plan.total_repo_files = len(graph.files)
|
|
65
|
+
plan.total_repo_tokens = sum(
|
|
66
|
+
estimate_scope_tokens([os.path.join(root, p)])
|
|
67
|
+
for p in graph.files
|
|
68
|
+
)
|
|
69
|
+
from .graph import format_graph_summary
|
|
70
|
+
plan.graph_summary = format_graph_summary(graph)
|
|
71
|
+
edge_count = sum(len(n.imports) for n in graph.files.values())
|
|
72
|
+
progress.finish(f"{len(graph.files)} files, {edge_count} edges, {len(graph.modules)} modules")
|
|
73
|
+
|
|
74
|
+
# Step 2: Git history
|
|
75
|
+
history = HistoryAnalysis()
|
|
76
|
+
if mine_history:
|
|
77
|
+
progress.start(f"mining git history ({max_commits} commits)")
|
|
78
|
+
history = analyze_history(root, max_commits=max_commits)
|
|
79
|
+
from .history import format_history_summary
|
|
80
|
+
plan.history_summary = format_history_summary(history)
|
|
81
|
+
contracts = len(history.implicit_contracts)
|
|
82
|
+
progress.finish(f"{history.commits_analyzed} commits, {contracts} contracts")
|
|
83
|
+
else:
|
|
84
|
+
progress.skip("mining git history", "disabled")
|
|
85
|
+
plan.history = history
|
|
86
|
+
|
|
87
|
+
# Step 3: Doc absorption (with AST data if available)
|
|
88
|
+
docs = AbsorptionResult()
|
|
89
|
+
if absorb:
|
|
90
|
+
progress.start("absorbing documentation")
|
|
91
|
+
docs = absorb_docs(root, apis=graph.apis if graph.apis else None)
|
|
92
|
+
progress.finish(f"{len(docs.fragments)} fragments")
|
|
93
|
+
else:
|
|
94
|
+
progress.skip("absorbing documentation", "disabled")
|
|
95
|
+
|
|
96
|
+
# Step 3b: Discover conventions from structural patterns
|
|
97
|
+
if graph.apis:
|
|
98
|
+
progress.start("discovering conventions")
|
|
99
|
+
from .passes.convention_discovery import discover_conventions
|
|
100
|
+
from .passes.convention_parser import parse_conventions
|
|
101
|
+
from .passes.convention_compliance import compute_compliance
|
|
102
|
+
discovered = discover_conventions(graph.apis, graph, history)
|
|
103
|
+
if discovered:
|
|
104
|
+
nodes = parse_conventions(graph.apis, discovered)
|
|
105
|
+
for conv in discovered:
|
|
106
|
+
conv.compliance = compute_compliance(conv, nodes, graph.apis)
|
|
107
|
+
viable = [c for c in discovered if c.compliance >= 0.5]
|
|
108
|
+
plan.discovered_conventions = viable
|
|
109
|
+
if viable and not dry_run:
|
|
110
|
+
from .intent import save_conventions
|
|
111
|
+
save_conventions(root, viable)
|
|
112
|
+
progress.finish(f"{len(viable) if discovered else 0} patterns")
|
|
113
|
+
else:
|
|
114
|
+
progress.finish("0 patterns")
|
|
115
|
+
|
|
116
|
+
# Step 3c: Voice discovery
|
|
117
|
+
if graph.apis:
|
|
118
|
+
progress.start("discovering voice")
|
|
119
|
+
from .passes.voice_discovery import detect_codebase_maturity, discover_voice
|
|
120
|
+
from .passes.voice_defaults import prescriptive_defaults
|
|
121
|
+
maturity = detect_codebase_maturity(graph.apis, history, voice_override)
|
|
122
|
+
if maturity == "new":
|
|
123
|
+
discovered_voice = prescriptive_defaults()
|
|
124
|
+
else:
|
|
125
|
+
discovered_voice = discover_voice(graph.apis, root)
|
|
126
|
+
if not dry_run:
|
|
127
|
+
from .intent import save_voice_config
|
|
128
|
+
save_voice_config(root, discovered_voice)
|
|
129
|
+
progress.finish(f"{maturity} mode")
|
|
130
|
+
|
|
131
|
+
# Step 4: Synthesize scope files
|
|
132
|
+
progress.start("generating scopes")
|
|
133
|
+
for module in graph.modules:
|
|
134
|
+
planned = synthesize_scope(module, graph, history, docs, root, synthesize)
|
|
135
|
+
if planned:
|
|
136
|
+
plan.scopes.append(planned)
|
|
137
|
+
|
|
138
|
+
# Step 4b: Detect virtual (cross-cutting) scopes
|
|
139
|
+
from .virtual import detect_virtual_scopes
|
|
140
|
+
virtual_scopes = detect_virtual_scopes(graph)
|
|
141
|
+
plan.virtual_scopes = virtual_scopes
|
|
142
|
+
for vs in virtual_scopes:
|
|
143
|
+
plan.scopes.append(PlannedScope(
|
|
144
|
+
directory=f"virtual/{vs.description.split('(')[0].strip().split(':')[-1].strip()}",
|
|
145
|
+
config=vs,
|
|
146
|
+
confidence=0.7,
|
|
147
|
+
signals=["graph: cross-cutting hub detection"],
|
|
148
|
+
))
|
|
149
|
+
real_count = len([s for s in plan.scopes if not s.directory.startswith("virtual/")])
|
|
150
|
+
virtual_count = len(virtual_scopes)
|
|
151
|
+
progress.finish(f"{real_count} scopes, {virtual_count} virtual")
|
|
152
|
+
|
|
153
|
+
# Step 5: Backtest against git history and auto-correct
|
|
154
|
+
if mine_history and plan.scopes:
|
|
155
|
+
progress.start(f"backtesting ({min(max_commits, 50)} commits)")
|
|
156
|
+
from .backtest import backtest_scopes, auto_correct_scope, format_backtest_report
|
|
157
|
+
|
|
158
|
+
configs = [ps.config for ps in plan.scopes]
|
|
159
|
+
report = backtest_scopes(root, configs, n_commits=min(max_commits, 50))
|
|
160
|
+
|
|
161
|
+
# Auto-correct: up to 2 rounds
|
|
162
|
+
for correction_round in range(2):
|
|
163
|
+
any_corrected = False
|
|
164
|
+
for i, result in enumerate(report.results):
|
|
165
|
+
if result.recall < 1.0 and result.missing_includes:
|
|
166
|
+
updated, changed = auto_correct_scope(
|
|
167
|
+
plan.scopes[i].config, result, root
|
|
168
|
+
)
|
|
169
|
+
if changed:
|
|
170
|
+
plan.scopes[i].config = updated
|
|
171
|
+
plan.scopes[i].signals.append(
|
|
172
|
+
f"backtest: auto-corrected {len(result.missing_includes)} missing include(s)"
|
|
173
|
+
)
|
|
174
|
+
any_corrected = True
|
|
175
|
+
|
|
176
|
+
if not any_corrected:
|
|
177
|
+
break
|
|
178
|
+
|
|
179
|
+
# Re-run backtest after corrections
|
|
180
|
+
configs = [ps.config for ps in plan.scopes]
|
|
181
|
+
report = backtest_scopes(root, configs, n_commits=min(max_commits, 50))
|
|
182
|
+
|
|
183
|
+
plan.backtest_summary = format_backtest_report(report)
|
|
184
|
+
plan.backtest_report = report
|
|
185
|
+
progress.finish(f"{report.overall_recall:.0%} recall")
|
|
186
|
+
elif not mine_history:
|
|
187
|
+
progress.skip("backtesting", "no history")
|
|
188
|
+
|
|
189
|
+
# Build .scopes index
|
|
190
|
+
plan.index = _build_index(plan.scopes, plan.total_repo_tokens)
|
|
191
|
+
|
|
192
|
+
# Step 6: Write to disk
|
|
193
|
+
if not dry_run:
|
|
194
|
+
_write_scopes(plan)
|
|
195
|
+
# Cache structured data for MCP server
|
|
196
|
+
from .cache import cache_ingest_data
|
|
197
|
+
cache_ingest_data(root, history=plan.history, graph=plan.graph)
|
|
198
|
+
# Cache invariants for enforcement
|
|
199
|
+
_cache_invariants(root, plan.history)
|
|
200
|
+
# Reset incremental state + remove needs_full_ingest marker
|
|
201
|
+
try:
|
|
202
|
+
from .storage.incremental_state import reset_incremental_state
|
|
203
|
+
reset_incremental_state(root)
|
|
204
|
+
marker = os.path.join(root, ".dotscope", "needs_full_ingest")
|
|
205
|
+
if os.path.exists(marker):
|
|
206
|
+
os.remove(marker)
|
|
207
|
+
except Exception:
|
|
208
|
+
pass
|
|
209
|
+
|
|
210
|
+
return plan
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def synthesize_scope(
|
|
214
|
+
module: ModuleBoundary,
|
|
215
|
+
graph: DependencyGraph,
|
|
216
|
+
history: HistoryAnalysis,
|
|
217
|
+
docs: AbsorptionResult,
|
|
218
|
+
root: str,
|
|
219
|
+
use_llm: bool,
|
|
220
|
+
) -> Optional[PlannedScope]:
|
|
221
|
+
"""Synthesize a single .scope file from all available signals."""
|
|
222
|
+
directory = module.directory
|
|
223
|
+
scope_path = os.path.join(root, directory, ".scope")
|
|
224
|
+
|
|
225
|
+
# Skip if .scope already exists
|
|
226
|
+
if os.path.exists(scope_path):
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
signals = []
|
|
230
|
+
|
|
231
|
+
# --- Description ---
|
|
232
|
+
file_count = len(module.files)
|
|
233
|
+
# Detect primary language
|
|
234
|
+
langs = {}
|
|
235
|
+
for f in module.files:
|
|
236
|
+
ext = os.path.splitext(f)[1]
|
|
237
|
+
langs[ext] = langs.get(ext, 0) + 1
|
|
238
|
+
primary_ext = max(langs, key=langs.get) if langs else ""
|
|
239
|
+
lang_names = {
|
|
240
|
+
".py": "Python", ".js": "JavaScript", ".ts": "TypeScript",
|
|
241
|
+
".go": "Go", ".rs": "Rust", ".rb": "Ruby", ".java": "Java",
|
|
242
|
+
}
|
|
243
|
+
lang = lang_names.get(primary_ext, "")
|
|
244
|
+
|
|
245
|
+
description = f"{directory} module"
|
|
246
|
+
if lang:
|
|
247
|
+
description = f"{directory} -- {lang} module ({file_count} files)"
|
|
248
|
+
|
|
249
|
+
signals.append(f"graph: {file_count} files, cohesion {module.cohesion:.0%}")
|
|
250
|
+
|
|
251
|
+
# --- Includes ---
|
|
252
|
+
includes = [f"{directory}/"]
|
|
253
|
+
|
|
254
|
+
# Add cross-module dependencies detected from imports
|
|
255
|
+
for dep in module.external_deps:
|
|
256
|
+
dep_dir = os.path.join(root, dep)
|
|
257
|
+
if os.path.isdir(dep_dir):
|
|
258
|
+
# Find specific files imported, not the whole directory
|
|
259
|
+
imported_files = _find_cross_module_imports(module, dep, graph)
|
|
260
|
+
for imp_file in imported_files:
|
|
261
|
+
if imp_file not in includes:
|
|
262
|
+
includes.append(imp_file)
|
|
263
|
+
|
|
264
|
+
# Add change-coupled files from other modules
|
|
265
|
+
for coupling in history.change_couplings:
|
|
266
|
+
for f in [coupling.file_a, coupling.file_b]:
|
|
267
|
+
if f.startswith(directory + "/"):
|
|
268
|
+
other = coupling.file_b if f == coupling.file_a else coupling.file_a
|
|
269
|
+
if not other.startswith(directory + "/") and coupling.coupling_strength >= 0.7:
|
|
270
|
+
if other not in includes:
|
|
271
|
+
includes.append(other)
|
|
272
|
+
signals.append(f"history: {other} coupled at {coupling.coupling_strength:.0%}")
|
|
273
|
+
|
|
274
|
+
# --- Excludes ---
|
|
275
|
+
excludes = _default_excludes(directory, module.files)
|
|
276
|
+
|
|
277
|
+
# --- Context (priority: contracts → stability → docs → deps → recent → transitive) ---
|
|
278
|
+
context_parts = []
|
|
279
|
+
|
|
280
|
+
# 1. Implicit contracts FIRST — the thing nobody documented
|
|
281
|
+
relevant_contracts = [
|
|
282
|
+
ic for ic in history.implicit_contracts
|
|
283
|
+
if ic.trigger_file.startswith(directory + "/")
|
|
284
|
+
or ic.coupled_file.startswith(directory + "/")
|
|
285
|
+
]
|
|
286
|
+
if relevant_contracts:
|
|
287
|
+
context_parts.append("## Implicit Contracts (from git history)")
|
|
288
|
+
for ic in relevant_contracts[:5]:
|
|
289
|
+
context_parts.append(f"- {ic.description}")
|
|
290
|
+
signals.append(f"history: {len(relevant_contracts)} implicit contracts")
|
|
291
|
+
|
|
292
|
+
# 2. Stability profiles — which files are fragile
|
|
293
|
+
stability_lines = []
|
|
294
|
+
for f in module.files:
|
|
295
|
+
fh = history.file_histories.get(f)
|
|
296
|
+
if fh and fh.stability and fh.commit_count >= 3:
|
|
297
|
+
lines_info = f", {fh.total_lines_changed} lines" if fh.total_lines_changed else ""
|
|
298
|
+
stability_lines.append(
|
|
299
|
+
f"- {os.path.basename(f)}: {fh.stability} ({fh.commit_count} commits{lines_info})"
|
|
300
|
+
)
|
|
301
|
+
if stability_lines:
|
|
302
|
+
context_parts.append("## Stability")
|
|
303
|
+
context_parts.extend(stability_lines[:10])
|
|
304
|
+
|
|
305
|
+
# 3. Absorbed docs — READMEs, docstrings, signal comments
|
|
306
|
+
doc_context = docs.synthesize_context(directory, max_chars=1500)
|
|
307
|
+
if doc_context:
|
|
308
|
+
context_parts.append(doc_context)
|
|
309
|
+
signals.append(f"docs: absorbed {len(docs.for_module(directory))} fragments")
|
|
310
|
+
|
|
311
|
+
# 4. Dependencies + structural
|
|
312
|
+
if module.external_deps:
|
|
313
|
+
context_parts.append("## Dependencies")
|
|
314
|
+
context_parts.append(f"This module imports from: {', '.join(module.external_deps)}")
|
|
315
|
+
if module.depended_on_by:
|
|
316
|
+
context_parts.append(f"This module is used by: {', '.join(module.depended_on_by)}")
|
|
317
|
+
context_parts.append("Changes here may affect downstream consumers.")
|
|
318
|
+
|
|
319
|
+
# 5. Recent changes
|
|
320
|
+
recent = history.recent_summaries.get(directory, [])
|
|
321
|
+
if recent:
|
|
322
|
+
context_parts.append("## Recent Changes")
|
|
323
|
+
for msg in recent[:5]:
|
|
324
|
+
context_parts.append(f"- {msg}")
|
|
325
|
+
|
|
326
|
+
# 6. Transitive dependency chain (if deeper than 1 hop)
|
|
327
|
+
from .graph import transitive_deps as _transitive_deps
|
|
328
|
+
deep_deps: Set[str] = set()
|
|
329
|
+
for f in module.files:
|
|
330
|
+
for dep in _transitive_deps(graph, f):
|
|
331
|
+
dep_parts = dep.split("/")
|
|
332
|
+
if len(dep_parts) > 1 and dep_parts[0] != directory:
|
|
333
|
+
deep_deps.add(dep)
|
|
334
|
+
if deep_deps:
|
|
335
|
+
direct: Set[str] = set()
|
|
336
|
+
for dep in module.external_deps:
|
|
337
|
+
direct.update(d for d in deep_deps if d.startswith(dep + "/"))
|
|
338
|
+
transitive_only = deep_deps - direct
|
|
339
|
+
if transitive_only:
|
|
340
|
+
context_parts.append("## Transitive Dependencies")
|
|
341
|
+
for dep in sorted(transitive_only)[:5]:
|
|
342
|
+
context_parts.append(f"- {dep} (indirect)")
|
|
343
|
+
|
|
344
|
+
# 7. NEVER TODO. If empty, synthesize from graph structure.
|
|
345
|
+
if not context_parts:
|
|
346
|
+
context_parts.append(
|
|
347
|
+
f"{directory} module -- {file_count} files, "
|
|
348
|
+
f"cohesion {module.cohesion:.0%}, "
|
|
349
|
+
f"{len(module.external_deps)} external dependencies."
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
context_str = "\n".join(context_parts)
|
|
353
|
+
context = parse_context(context_str)
|
|
354
|
+
|
|
355
|
+
# --- Related ---
|
|
356
|
+
related = []
|
|
357
|
+
for dep in module.external_deps:
|
|
358
|
+
scope_candidate = f"{dep}/.scope"
|
|
359
|
+
related.append(scope_candidate)
|
|
360
|
+
for dep_by in module.depended_on_by:
|
|
361
|
+
scope_candidate = f"{dep_by}/.scope"
|
|
362
|
+
if scope_candidate not in related:
|
|
363
|
+
related.append(scope_candidate)
|
|
364
|
+
|
|
365
|
+
# --- Tags ---
|
|
366
|
+
tags = [directory.lower()]
|
|
367
|
+
if module.external_deps:
|
|
368
|
+
tags.extend(d.lower() for d in module.external_deps[:3])
|
|
369
|
+
|
|
370
|
+
# --- Token estimate ---
|
|
371
|
+
full_paths = [os.path.join(root, f) for f in module.files]
|
|
372
|
+
token_est = estimate_scope_tokens(full_paths)
|
|
373
|
+
|
|
374
|
+
# --- Confidence ---
|
|
375
|
+
confidence = module.cohesion
|
|
376
|
+
if doc_context:
|
|
377
|
+
confidence = min(confidence + 0.1, 1.0)
|
|
378
|
+
if relevant_contracts:
|
|
379
|
+
confidence = min(confidence + 0.1, 1.0)
|
|
380
|
+
|
|
381
|
+
config = ScopeConfig(
|
|
382
|
+
path=scope_path,
|
|
383
|
+
description=description,
|
|
384
|
+
includes=includes,
|
|
385
|
+
excludes=excludes,
|
|
386
|
+
context=context,
|
|
387
|
+
related=related,
|
|
388
|
+
owners=[],
|
|
389
|
+
tags=tags,
|
|
390
|
+
tokens_estimate=token_est,
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
return PlannedScope(
|
|
394
|
+
directory=directory,
|
|
395
|
+
config=config,
|
|
396
|
+
confidence=confidence,
|
|
397
|
+
signals=signals,
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _find_cross_module_imports(
|
|
402
|
+
module: ModuleBoundary, dep_module: str, graph: DependencyGraph
|
|
403
|
+
) -> List[str]:
|
|
404
|
+
"""Find specific files in dep_module that are imported by files in module."""
|
|
405
|
+
imported = set()
|
|
406
|
+
for f in module.files:
|
|
407
|
+
node = graph.files.get(f)
|
|
408
|
+
if not node:
|
|
409
|
+
continue
|
|
410
|
+
for imp in node.imports:
|
|
411
|
+
if imp.startswith(dep_module + "/"):
|
|
412
|
+
imported.add(imp)
|
|
413
|
+
return sorted(imported)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def _default_excludes(directory: str, files: List[str]) -> List[str]:
|
|
417
|
+
"""Generate sensible excludes for a module."""
|
|
418
|
+
excludes = []
|
|
419
|
+
|
|
420
|
+
# Common patterns
|
|
421
|
+
excludes.append(f"{directory}/__pycache__/")
|
|
422
|
+
excludes.append("*.pyc")
|
|
423
|
+
|
|
424
|
+
# Detect test/fixture/migration directories
|
|
425
|
+
subdirs = set()
|
|
426
|
+
for f in files:
|
|
427
|
+
parts = f.split("/")
|
|
428
|
+
if len(parts) > 2: # directory/subdir/file
|
|
429
|
+
subdirs.add(parts[1])
|
|
430
|
+
|
|
431
|
+
for subdir in subdirs:
|
|
432
|
+
subdir_lower = subdir.lower()
|
|
433
|
+
if subdir_lower in ("fixtures", "fixture", "testdata", "test_data", "mocks"):
|
|
434
|
+
excludes.append(f"{directory}/{subdir}/")
|
|
435
|
+
if subdir_lower in ("migrations", "migrate"):
|
|
436
|
+
excludes.append(f"{directory}/{subdir}/")
|
|
437
|
+
|
|
438
|
+
return excludes
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _build_index(
|
|
442
|
+
scopes: List[PlannedScope], total_repo_tokens: int = 0,
|
|
443
|
+
) -> ScopesIndex:
|
|
444
|
+
"""Build a .scopes index from planned scopes."""
|
|
445
|
+
entries = {}
|
|
446
|
+
for ps in scopes:
|
|
447
|
+
name = ps.directory
|
|
448
|
+
keywords = list(ps.config.tags)
|
|
449
|
+
# Add words from description
|
|
450
|
+
for word in ps.config.description.split():
|
|
451
|
+
word = word.lower().strip("—()-,.")
|
|
452
|
+
if len(word) > 2 and word not in keywords:
|
|
453
|
+
keywords.append(word)
|
|
454
|
+
|
|
455
|
+
entries[name] = ScopeEntry(
|
|
456
|
+
name=name,
|
|
457
|
+
path=f"{ps.directory}/.scope",
|
|
458
|
+
keywords=keywords[:15], # Cap at 15
|
|
459
|
+
description=ps.config.description,
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
return ScopesIndex(
|
|
463
|
+
version=1,
|
|
464
|
+
scopes=entries,
|
|
465
|
+
defaults={"max_tokens": 8000, "include_related": False},
|
|
466
|
+
total_repo_tokens=total_repo_tokens,
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def append_to_index(root: str, planned: PlannedScope) -> None:
|
|
471
|
+
"""Append a single scope entry to the .scopes index on disk."""
|
|
472
|
+
from .discovery import load_index
|
|
473
|
+
index = load_index(root)
|
|
474
|
+
if index is None:
|
|
475
|
+
index = ScopesIndex(version=1, scopes={}, defaults={"max_tokens": 8000, "include_related": False})
|
|
476
|
+
|
|
477
|
+
name = planned.directory
|
|
478
|
+
keywords = list(planned.config.tags)
|
|
479
|
+
for word in planned.config.description.split():
|
|
480
|
+
word = word.lower().strip("—()-,.")
|
|
481
|
+
if len(word) > 2 and word not in keywords:
|
|
482
|
+
keywords.append(word)
|
|
483
|
+
|
|
484
|
+
index.scopes[name] = ScopeEntry(
|
|
485
|
+
name=name,
|
|
486
|
+
path=f"{planned.directory}/.scope",
|
|
487
|
+
keywords=keywords[:15],
|
|
488
|
+
description=planned.config.description,
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
index_path = os.path.join(root, ".scopes")
|
|
492
|
+
content = _serialize_index(index)
|
|
493
|
+
with open(index_path, "w", encoding="utf-8") as f:
|
|
494
|
+
f.write(content)
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def _write_scopes(plan: IngestPlan) -> None:
|
|
498
|
+
"""Write all planned .scope files and the .scopes index to disk."""
|
|
499
|
+
written = 0
|
|
500
|
+
|
|
501
|
+
for ps in plan.scopes:
|
|
502
|
+
scope_path = os.path.join(plan.root, ps.directory, ".scope")
|
|
503
|
+
# Don't overwrite existing
|
|
504
|
+
if os.path.exists(scope_path):
|
|
505
|
+
continue
|
|
506
|
+
|
|
507
|
+
os.makedirs(os.path.dirname(scope_path), exist_ok=True)
|
|
508
|
+
content = serialize_scope(ps.config)
|
|
509
|
+
with open(scope_path, "w", encoding="utf-8") as f:
|
|
510
|
+
f.write(content)
|
|
511
|
+
written += 1
|
|
512
|
+
|
|
513
|
+
# Write .scopes index (only if it doesn't exist)
|
|
514
|
+
index_path = os.path.join(plan.root, ".scopes")
|
|
515
|
+
if plan.index and not os.path.exists(index_path):
|
|
516
|
+
content = _serialize_index(plan.index)
|
|
517
|
+
with open(index_path, "w", encoding="utf-8") as f:
|
|
518
|
+
f.write(content)
|
|
519
|
+
written += 1
|
|
520
|
+
|
|
521
|
+
pass # Progress is handled by the caller
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def _serialize_index(index: ScopesIndex) -> str:
|
|
525
|
+
"""Serialize a ScopesIndex to .scopes YAML format."""
|
|
526
|
+
lines = [f"version: {index.version}"]
|
|
527
|
+
if index.total_repo_tokens:
|
|
528
|
+
lines.append(f"total_repo_tokens: {index.total_repo_tokens}")
|
|
529
|
+
lines.extend(["", "scopes:"])
|
|
530
|
+
|
|
531
|
+
for name, entry in sorted(index.scopes.items()):
|
|
532
|
+
lines.append(f" {name}:")
|
|
533
|
+
lines.append(f" path: {entry.path}")
|
|
534
|
+
kw_str = ", ".join(entry.keywords)
|
|
535
|
+
lines.append(f" keywords: [{kw_str}]")
|
|
536
|
+
|
|
537
|
+
lines.append("")
|
|
538
|
+
lines.append("defaults:")
|
|
539
|
+
for k, v in index.defaults.items():
|
|
540
|
+
if isinstance(v, bool):
|
|
541
|
+
lines.append(f" {k}: {'true' if v else 'false'}")
|
|
542
|
+
else:
|
|
543
|
+
lines.append(f" {k}: {v}")
|
|
544
|
+
|
|
545
|
+
return "\n".join(lines) + "\n"
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def _use_unicode() -> bool:
|
|
549
|
+
"""Check if stdout can handle Unicode (emoji, box-drawing)."""
|
|
550
|
+
import io
|
|
551
|
+
enc = getattr(sys.stdout, "encoding", None) or ""
|
|
552
|
+
if isinstance(sys.stdout, io.TextIOWrapper):
|
|
553
|
+
enc = sys.stdout.encoding or ""
|
|
554
|
+
return enc.lower().replace("-", "") in ("utf8", "utf16", "utf32", "utf8sig")
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
# Glyph sets: Unicode vs ASCII-safe fallbacks
|
|
558
|
+
_GLYPHS_UNICODE = {
|
|
559
|
+
"discoveries": "\u26a1 Discoveries",
|
|
560
|
+
"validation": "\U0001f4ca Validation",
|
|
561
|
+
"created": "\U0001f4c1 Created",
|
|
562
|
+
"bar_full": "\u2588",
|
|
563
|
+
"bar_empty": "\u2591",
|
|
564
|
+
"arrow": "\u2192",
|
|
565
|
+
"dash": "\u2014",
|
|
566
|
+
"attention": "\u2190 needs attention",
|
|
567
|
+
}
|
|
568
|
+
_GLYPHS_ASCII = {
|
|
569
|
+
"discoveries": ">> Discoveries",
|
|
570
|
+
"validation": ">> Validation",
|
|
571
|
+
"created": ">> Created",
|
|
572
|
+
"bar_full": "#",
|
|
573
|
+
"bar_empty": ".",
|
|
574
|
+
"arrow": "->",
|
|
575
|
+
"dash": "--",
|
|
576
|
+
"attention": "<- needs attention",
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
def _glyphs() -> dict:
|
|
581
|
+
"""Return the appropriate glyph set for the current terminal."""
|
|
582
|
+
return _GLYPHS_UNICODE if _use_unicode() else _GLYPHS_ASCII
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def format_ingest_report(plan: IngestPlan) -> str:
|
|
586
|
+
"""Format the discovery-first ingest report."""
|
|
587
|
+
g = _glyphs()
|
|
588
|
+
lines = []
|
|
589
|
+
|
|
590
|
+
# --- Header ---
|
|
591
|
+
real_scopes = [s for s in plan.scopes if not s.directory.startswith("virtual/")]
|
|
592
|
+
module_count = len(real_scopes)
|
|
593
|
+
lines.append(
|
|
594
|
+
f"dotscope scanned {plan.total_repo_files} files "
|
|
595
|
+
f"across {module_count} modules."
|
|
596
|
+
)
|
|
597
|
+
lines.append("")
|
|
598
|
+
|
|
599
|
+
# --- Section 1: Discoveries ---
|
|
600
|
+
discoveries = _extract_discoveries(plan, g)
|
|
601
|
+
if discoveries:
|
|
602
|
+
lines.append(g["discoveries"])
|
|
603
|
+
lines.append("")
|
|
604
|
+
lines.extend(discoveries)
|
|
605
|
+
|
|
606
|
+
# --- Section 2: Validation ---
|
|
607
|
+
validation = _extract_validation(plan, g)
|
|
608
|
+
if validation:
|
|
609
|
+
lines.append(g["validation"])
|
|
610
|
+
lines.append("")
|
|
611
|
+
lines.extend(validation)
|
|
612
|
+
|
|
613
|
+
# --- Section 3: Files created ---
|
|
614
|
+
lines.append(f"{g['created']} {len(real_scopes)} .scope files + .scopes index")
|
|
615
|
+
lines.append("")
|
|
616
|
+
lines.append(" Try it: dotscope resolve <module>")
|
|
617
|
+
lines.append(" See it: dotscope resolve <module> --json --budget 4000")
|
|
618
|
+
lines.append(" Trust it: dotscope backtest --commits 500")
|
|
619
|
+
|
|
620
|
+
return "\n".join(lines)
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
# ---------------------------------------------------------------------------
|
|
624
|
+
# Discovery extraction helpers
|
|
625
|
+
# ---------------------------------------------------------------------------
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def _is_cross_module(file_a: str, file_b: str) -> bool:
|
|
629
|
+
"""True if two files are in different top-level directories."""
|
|
630
|
+
dir_a = file_a.split("/")[0] if "/" in file_a else ""
|
|
631
|
+
dir_b = file_b.split("/")[0] if "/" in file_b else ""
|
|
632
|
+
return dir_a != dir_b and bool(dir_a) and bool(dir_b)
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def _find_hub_discoveries(
|
|
636
|
+
graph: DependencyGraph,
|
|
637
|
+
) -> List[Tuple[str, int, int, int]]:
|
|
638
|
+
"""Find files with high import fan-in across multiple modules.
|
|
639
|
+
|
|
640
|
+
Returns: [(path, importer_count, directory_count, blast_radius)]
|
|
641
|
+
"""
|
|
642
|
+
results = []
|
|
643
|
+
for path, node in graph.files.items():
|
|
644
|
+
if not node.imported_by:
|
|
645
|
+
continue
|
|
646
|
+
importer_dirs: Set[str] = set()
|
|
647
|
+
for imp_by in node.imported_by:
|
|
648
|
+
parts = Path(imp_by).parts
|
|
649
|
+
if len(parts) > 1:
|
|
650
|
+
importer_dirs.add(parts[0])
|
|
651
|
+
|
|
652
|
+
if len(node.imported_by) >= 3 and len(importer_dirs) >= 2:
|
|
653
|
+
blast = transitive_dependents(graph, path)
|
|
654
|
+
results.append((
|
|
655
|
+
path,
|
|
656
|
+
len(node.imported_by),
|
|
657
|
+
len(importer_dirs),
|
|
658
|
+
len(blast) + 1, # +1 for the file itself
|
|
659
|
+
))
|
|
660
|
+
|
|
661
|
+
results.sort(key=lambda x: -x[1])
|
|
662
|
+
return results
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
# Directories an engineer expects to be stable
|
|
666
|
+
_EXPECTED_STABLE = {
|
|
667
|
+
"config", "configs", "settings", "constants",
|
|
668
|
+
"migrations", "fixtures", "static",
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
def _find_volatility_surprises(
|
|
673
|
+
history: HistoryAnalysis,
|
|
674
|
+
) -> List[Tuple[str, "FileHistory"]]:
|
|
675
|
+
"""Files classified volatile that live in directories expected to be stable."""
|
|
676
|
+
from .history import FileHistory # noqa: F811 — type hint only
|
|
677
|
+
|
|
678
|
+
surprises: List[Tuple[str, FileHistory]] = []
|
|
679
|
+
for path, fh in history.file_histories.items():
|
|
680
|
+
if fh.stability != "volatile":
|
|
681
|
+
continue
|
|
682
|
+
parts = path.split("/")
|
|
683
|
+
if len(parts) > 1 and parts[0].lower() in _EXPECTED_STABLE:
|
|
684
|
+
surprises.append((path, fh))
|
|
685
|
+
|
|
686
|
+
# Also include the repo's most-changed file if high churn
|
|
687
|
+
if history.hotspots:
|
|
688
|
+
top_path, _top_churn = history.hotspots[0]
|
|
689
|
+
top_fh = history.file_histories.get(top_path)
|
|
690
|
+
if top_fh and top_fh.commit_count >= 10:
|
|
691
|
+
if not any(p == top_path for p, _ in surprises):
|
|
692
|
+
surprises.insert(0, (top_path, top_fh))
|
|
693
|
+
|
|
694
|
+
surprises.sort(key=lambda x: -x[1].total_lines_changed)
|
|
695
|
+
return surprises
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
def _extract_discoveries(plan: IngestPlan, g: Optional[dict] = None) -> List[str]:
|
|
699
|
+
"""Extract surprising findings from history, graph, and docs."""
|
|
700
|
+
if g is None:
|
|
701
|
+
g = _glyphs()
|
|
702
|
+
lines: List[str] = []
|
|
703
|
+
history = plan.history
|
|
704
|
+
graph = plan.graph
|
|
705
|
+
|
|
706
|
+
# --- Hidden dependencies (cross-module implicit contracts) ---
|
|
707
|
+
if history and history.implicit_contracts:
|
|
708
|
+
cross_module = [
|
|
709
|
+
ic for ic in history.implicit_contracts
|
|
710
|
+
if _is_cross_module(ic.trigger_file, ic.coupled_file)
|
|
711
|
+
and ic.confidence >= 0.65
|
|
712
|
+
]
|
|
713
|
+
if cross_module:
|
|
714
|
+
lines.append(
|
|
715
|
+
f" Hidden dependencies "
|
|
716
|
+
f"(from {history.commits_analyzed} commits of git history):"
|
|
717
|
+
)
|
|
718
|
+
for ic in cross_module[:5]:
|
|
719
|
+
trigger = os.path.basename(ic.trigger_file)
|
|
720
|
+
coupled = os.path.basename(ic.coupled_file)
|
|
721
|
+
if trigger == coupled:
|
|
722
|
+
trigger = ic.trigger_file
|
|
723
|
+
coupled = ic.coupled_file
|
|
724
|
+
lines.append(
|
|
725
|
+
f" {trigger} {g['arrow']} {coupled}"
|
|
726
|
+
f" {ic.confidence:.0%} co-change, undocumented"
|
|
727
|
+
)
|
|
728
|
+
lines.append("")
|
|
729
|
+
|
|
730
|
+
# --- Cross-cutting hubs (from graph analysis) ---
|
|
731
|
+
if graph:
|
|
732
|
+
hubs = _find_hub_discoveries(graph)
|
|
733
|
+
if hubs:
|
|
734
|
+
for hub_path, importer_count, dir_count, blast_radius in hubs[:3]:
|
|
735
|
+
lines.append(" Cross-cutting hub:")
|
|
736
|
+
lines.append(
|
|
737
|
+
f" {hub_path} is imported by "
|
|
738
|
+
f"{importer_count} files across {dir_count} modules"
|
|
739
|
+
)
|
|
740
|
+
if blast_radius > importer_count:
|
|
741
|
+
lines.append(
|
|
742
|
+
f" A change here affects "
|
|
743
|
+
f"{blast_radius} files transitively"
|
|
744
|
+
)
|
|
745
|
+
lines.append("")
|
|
746
|
+
|
|
747
|
+
# --- Volatility surprises ---
|
|
748
|
+
if history and history.file_histories:
|
|
749
|
+
surprises = _find_volatility_surprises(history)
|
|
750
|
+
if surprises:
|
|
751
|
+
lines.append(" Volatility surprise:")
|
|
752
|
+
for path, fh in surprises[:3]:
|
|
753
|
+
lines.append(
|
|
754
|
+
f" {path} {g['dash']} {fh.commit_count} commits, "
|
|
755
|
+
f"{fh.total_lines_changed} lines changed"
|
|
756
|
+
)
|
|
757
|
+
# Annotate if top file has no scope covering it
|
|
758
|
+
if surprises:
|
|
759
|
+
top_path = surprises[0][0]
|
|
760
|
+
has_scope = any(
|
|
761
|
+
top_path.startswith(s.directory + "/")
|
|
762
|
+
for s in plan.scopes
|
|
763
|
+
)
|
|
764
|
+
if not has_scope:
|
|
765
|
+
lines.append(
|
|
766
|
+
" Most changed file in the repo. "
|
|
767
|
+
"No .scope context exists for it."
|
|
768
|
+
)
|
|
769
|
+
lines.append("")
|
|
770
|
+
|
|
771
|
+
return lines
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
def _extract_validation(plan: IngestPlan, g: Optional[dict] = None) -> List[str]:
|
|
775
|
+
"""Extract validation stats: backtest recall + token reduction."""
|
|
776
|
+
if g is None:
|
|
777
|
+
g = _glyphs()
|
|
778
|
+
lines: List[str] = []
|
|
779
|
+
report = plan.backtest_report
|
|
780
|
+
|
|
781
|
+
if not report or report.total_commits == 0:
|
|
782
|
+
return lines
|
|
783
|
+
|
|
784
|
+
lines.append(
|
|
785
|
+
f" Backtested against {report.total_commits} recent commits:"
|
|
786
|
+
)
|
|
787
|
+
lines.append(
|
|
788
|
+
f" Overall recall: {report.overall_recall:.0%} {g['dash']} "
|
|
789
|
+
f"scopes would have given agents the right files"
|
|
790
|
+
)
|
|
791
|
+
|
|
792
|
+
# Token reduction ratio — the single most compelling number
|
|
793
|
+
real_scopes = [
|
|
794
|
+
s for s in plan.scopes
|
|
795
|
+
if not s.directory.startswith("virtual/")
|
|
796
|
+
]
|
|
797
|
+
if plan.total_repo_tokens > 0 and real_scopes:
|
|
798
|
+
avg_scope_tokens = sum(
|
|
799
|
+
s.config.tokens_estimate or 0 for s in real_scopes
|
|
800
|
+
) / max(len(real_scopes), 1)
|
|
801
|
+
reduction = (1 - avg_scope_tokens / plan.total_repo_tokens) * 100
|
|
802
|
+
lines.append(
|
|
803
|
+
f" Token reduction: {reduction:.0f}% {g['dash']} "
|
|
804
|
+
f"from ~{plan.total_repo_tokens:,} to "
|
|
805
|
+
f"~{int(avg_scope_tokens):,} average per resolution"
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
lines.append("")
|
|
809
|
+
|
|
810
|
+
# Per-scope recall bars
|
|
811
|
+
for result in report.results:
|
|
812
|
+
scope_name = os.path.basename(os.path.dirname(result.scope_path))
|
|
813
|
+
if result.total_commits == 0:
|
|
814
|
+
continue
|
|
815
|
+
filled = int(result.recall * 10)
|
|
816
|
+
bar = g["bar_full"] * filled + g["bar_empty"] * (10 - filled)
|
|
817
|
+
suffix = f" {g['attention']}" if result.recall < 0.8 else ""
|
|
818
|
+
lines.append(
|
|
819
|
+
f" {scope_name:<12} {bar} {result.recall:.0%} recall{suffix}"
|
|
820
|
+
)
|
|
821
|
+
|
|
822
|
+
return lines
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
def _cache_invariants(root: str, history: Optional[HistoryAnalysis]) -> None:
|
|
826
|
+
"""Cache invariants.json with contracts, function_co_changes, and file_stabilities."""
|
|
827
|
+
if not history:
|
|
828
|
+
return
|
|
829
|
+
|
|
830
|
+
dot_dir = os.path.join(root, ".dotscope")
|
|
831
|
+
os.makedirs(dot_dir, exist_ok=True)
|
|
832
|
+
|
|
833
|
+
contracts = []
|
|
834
|
+
for ic in history.implicit_contracts:
|
|
835
|
+
contracts.append({
|
|
836
|
+
"trigger_file": ic.trigger_file,
|
|
837
|
+
"coupled_file": ic.coupled_file,
|
|
838
|
+
"confidence": ic.confidence,
|
|
839
|
+
"description": ic.description,
|
|
840
|
+
})
|
|
841
|
+
|
|
842
|
+
stabilities = {}
|
|
843
|
+
for path, fh in history.file_histories.items():
|
|
844
|
+
stabilities[path] = {
|
|
845
|
+
"classification": fh.stability,
|
|
846
|
+
"commit_count": fh.commit_count,
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
invariants = {
|
|
850
|
+
"contracts": contracts,
|
|
851
|
+
"function_co_changes": {}, # Populated when function-level data available
|
|
852
|
+
"file_stabilities": stabilities,
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
import json
|
|
856
|
+
path = os.path.join(dot_dir, "invariants.json")
|
|
857
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
858
|
+
json.dump(invariants, f, indent=2)
|