dotscope 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dotscope/.scope +63 -0
- dotscope/__init__.py +3 -0
- dotscope/absorber.py +390 -0
- dotscope/assertions.py +128 -0
- dotscope/ast_analyzer.py +2 -0
- dotscope/backtest.py +2 -0
- dotscope/bench.py +141 -0
- dotscope/budget.py +3 -0
- dotscope/cache.py +2 -0
- dotscope/check/__init__.py +1 -0
- dotscope/check/acknowledge.py +2 -0
- dotscope/check/checker.py +3 -0
- dotscope/check/checks/__init__.py +1 -0
- dotscope/check/checks/antipattern.py +2 -0
- dotscope/check/checks/boundary.py +2 -0
- dotscope/check/checks/contracts.py +3 -0
- dotscope/check/checks/direction.py +2 -0
- dotscope/check/checks/intent.py +2 -0
- dotscope/check/checks/stability.py +2 -0
- dotscope/check/constraints.py +2 -0
- dotscope/check/models.py +15 -0
- dotscope/cli.py +1447 -0
- dotscope/composer.py +147 -0
- dotscope/constants.py +45 -0
- dotscope/context.py +60 -0
- dotscope/counterfactual.py +180 -0
- dotscope/debug.py +220 -0
- dotscope/discovery.py +104 -0
- dotscope/formatter.py +157 -0
- dotscope/graph.py +3 -0
- dotscope/health.py +212 -0
- dotscope/help.py +204 -0
- dotscope/history.py +6 -0
- dotscope/hooks.py +2 -0
- dotscope/ingest.py +858 -0
- dotscope/intent.py +618 -0
- dotscope/lessons.py +223 -0
- dotscope/matcher.py +104 -0
- dotscope/mcp_server.py +1081 -0
- dotscope/models/.scope +45 -0
- dotscope/models/__init__.py +7 -0
- dotscope/models/core.py +288 -0
- dotscope/models/history.py +73 -0
- dotscope/models/intent.py +213 -0
- dotscope/models/passes.py +58 -0
- dotscope/models/state.py +250 -0
- dotscope/models.py +9 -0
- dotscope/near_miss.py +3 -0
- dotscope/onboarding.py +2 -0
- dotscope/parser.py +387 -0
- dotscope/passes/.scope +105 -0
- dotscope/passes/__init__.py +1 -0
- dotscope/passes/ast_analyzer.py +508 -0
- dotscope/passes/backtest.py +198 -0
- dotscope/passes/budget_allocator.py +164 -0
- dotscope/passes/convention_compliance.py +40 -0
- dotscope/passes/convention_discovery.py +247 -0
- dotscope/passes/convention_parser.py +223 -0
- dotscope/passes/graph_builder.py +299 -0
- dotscope/passes/history_miner.py +336 -0
- dotscope/passes/incremental.py +149 -0
- dotscope/passes/lang/__init__.py +38 -0
- dotscope/passes/lang/_base.py +20 -0
- dotscope/passes/lang/_treesitter.py +93 -0
- dotscope/passes/lang/go.py +333 -0
- dotscope/passes/lang/javascript.py +348 -0
- dotscope/passes/lazy.py +152 -0
- dotscope/passes/semantic_diff.py +160 -0
- dotscope/passes/sentinel/__init__.py +1 -0
- dotscope/passes/sentinel/acknowledge.py +222 -0
- dotscope/passes/sentinel/checker.py +383 -0
- dotscope/passes/sentinel/checks/__init__.py +1 -0
- dotscope/passes/sentinel/checks/antipattern.py +84 -0
- dotscope/passes/sentinel/checks/boundary.py +46 -0
- dotscope/passes/sentinel/checks/contracts.py +148 -0
- dotscope/passes/sentinel/checks/convention.py +54 -0
- dotscope/passes/sentinel/checks/direction.py +71 -0
- dotscope/passes/sentinel/checks/intent.py +207 -0
- dotscope/passes/sentinel/checks/stability.py +66 -0
- dotscope/passes/sentinel/checks/voice.py +108 -0
- dotscope/passes/sentinel/constraints.py +472 -0
- dotscope/passes/sentinel/line_filter.py +88 -0
- dotscope/passes/sentinel/models.py +15 -0
- dotscope/passes/virtual.py +239 -0
- dotscope/passes/voice.py +162 -0
- dotscope/passes/voice_defaults.py +28 -0
- dotscope/passes/voice_discovery.py +245 -0
- dotscope/paths.py +32 -0
- dotscope/progress.py +44 -0
- dotscope/regression.py +147 -0
- dotscope/resolver.py +203 -0
- dotscope/scanner.py +246 -0
- dotscope/sessions.py +2 -0
- dotscope/storage/.scope +64 -0
- dotscope/storage/__init__.py +1 -0
- dotscope/storage/cache.py +114 -0
- dotscope/storage/claude_hooks.py +119 -0
- dotscope/storage/git_hooks.py +277 -0
- dotscope/storage/incremental_state.py +61 -0
- dotscope/storage/mcp_config.py +98 -0
- dotscope/storage/near_miss.py +183 -0
- dotscope/storage/onboarding.py +150 -0
- dotscope/storage/session_manager.py +195 -0
- dotscope/storage/timing.py +84 -0
- dotscope/timing.py +2 -0
- dotscope/tokens.py +53 -0
- dotscope/utility.py +123 -0
- dotscope/virtual.py +3 -0
- dotscope/visibility.py +664 -0
- dotscope-0.1.0.dist-info/METADATA +50 -0
- dotscope-0.1.0.dist-info/RECORD +114 -0
- dotscope-0.1.0.dist-info/WHEEL +4 -0
- dotscope-0.1.0.dist-info/entry_points.txt +3 -0
- dotscope-0.1.0.dist-info/licenses/LICENSE +21 -0
dotscope/visibility.py
ADDED
|
@@ -0,0 +1,664 @@
|
|
|
1
|
+
"""DX Visibility: making dotscope's value legible at point of use.
|
|
2
|
+
|
|
3
|
+
Five features that surface existing data through two channels:
|
|
4
|
+
1. Session Summary — aggregate stats for the MCP session
|
|
5
|
+
2. Attribution Hints — top context fragments + provenance
|
|
6
|
+
3. Post-Commit Delta — per-scope accuracy after observe
|
|
7
|
+
4. Health Nudges — warnings when scope accuracy degrades
|
|
8
|
+
5. Near-Miss Detection — disasters that didn't happen
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import re
|
|
13
|
+
import time
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from typing import Dict, List, Optional, Set
|
|
16
|
+
|
|
17
|
+
from .models.state import ObservationLog, SessionLog, SessionStats # noqa: F401
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SessionTracker:
|
|
21
|
+
"""Accumulates stats across MCP tool calls within a session."""
|
|
22
|
+
|
|
23
|
+
def __init__(self) -> None:
|
|
24
|
+
self._stats = SessionStats()
|
|
25
|
+
|
|
26
|
+
def record_resolve(self, scope_name: str, response: dict) -> None:
|
|
27
|
+
"""Called after every resolve_scope response is built."""
|
|
28
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
29
|
+
if not self._stats.started_at:
|
|
30
|
+
self._stats.started_at = now
|
|
31
|
+
self._stats.last_activity = now
|
|
32
|
+
|
|
33
|
+
self._stats.scopes_resolved += 1
|
|
34
|
+
self._stats.unique_scopes.add(scope_name)
|
|
35
|
+
self._stats.tokens_served += response.get("token_count", 0)
|
|
36
|
+
self._stats.tokens_available = max(
|
|
37
|
+
self._stats.tokens_available,
|
|
38
|
+
response.get("_repo_tokens", 0),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
hints = response.get("attribution_hints", [])
|
|
42
|
+
self._stats.attribution_hints_served += len(hints)
|
|
43
|
+
|
|
44
|
+
if response.get("context"):
|
|
45
|
+
self._stats.context_fields_used += 1
|
|
46
|
+
|
|
47
|
+
warnings = response.get("health_warnings", [])
|
|
48
|
+
self._stats.health_warnings_surfaced += len(warnings)
|
|
49
|
+
|
|
50
|
+
constraints = response.get("constraints", [])
|
|
51
|
+
self._stats.constraints_served.extend(constraints)
|
|
52
|
+
|
|
53
|
+
def set_repo_root(self, root: str) -> None:
|
|
54
|
+
"""Set repo root for counterfactual computation."""
|
|
55
|
+
self._repo_root = root
|
|
56
|
+
|
|
57
|
+
def summary(self) -> dict:
|
|
58
|
+
"""Return session summary as dict for MCP response."""
|
|
59
|
+
s = self._stats
|
|
60
|
+
reduction_pct = 0.0
|
|
61
|
+
if s.tokens_available > 0:
|
|
62
|
+
reduction_pct = round(
|
|
63
|
+
(1 - s.tokens_served / s.tokens_available) * 100, 1
|
|
64
|
+
)
|
|
65
|
+
result = {
|
|
66
|
+
"scopes_resolved": s.scopes_resolved,
|
|
67
|
+
"unique_scopes": len(s.unique_scopes),
|
|
68
|
+
"tokens_served": s.tokens_served,
|
|
69
|
+
"tokens_available": s.tokens_available,
|
|
70
|
+
"reduction_pct": max(reduction_pct, 0.0),
|
|
71
|
+
"attribution_hints_served": s.attribution_hints_served,
|
|
72
|
+
"health_warnings_surfaced": s.health_warnings_surfaced,
|
|
73
|
+
"started_at": s.started_at,
|
|
74
|
+
"last_activity": s.last_activity,
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# Counterfactuals (gated by onboarding stage)
|
|
78
|
+
cfs = self._compute_counterfactuals()
|
|
79
|
+
if cfs:
|
|
80
|
+
result["counterfactuals"] = [
|
|
81
|
+
{
|
|
82
|
+
"type": cf.type,
|
|
83
|
+
"description": cf.description,
|
|
84
|
+
"source": cf.source,
|
|
85
|
+
"severity": cf.severity,
|
|
86
|
+
}
|
|
87
|
+
for cf in cfs
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
return result
|
|
91
|
+
|
|
92
|
+
def format_terminal(self) -> str:
|
|
93
|
+
"""Format summary for stderr output."""
|
|
94
|
+
s = self._stats
|
|
95
|
+
if s.scopes_resolved == 0:
|
|
96
|
+
return ""
|
|
97
|
+
|
|
98
|
+
reduction_pct = 0
|
|
99
|
+
if s.tokens_available > 0:
|
|
100
|
+
reduction_pct = round(
|
|
101
|
+
(1 - s.tokens_served / s.tokens_available) * 100
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
scope_word = "scope" if s.scopes_resolved == 1 else "scopes"
|
|
105
|
+
lines = [
|
|
106
|
+
"-- dotscope session " + "-" * 34,
|
|
107
|
+
f" {s.scopes_resolved} {scope_word} resolved"
|
|
108
|
+
f" . {s.tokens_served:,} tokens served"
|
|
109
|
+
f" ({reduction_pct}% reduction)",
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
# Counterfactuals (the magic section)
|
|
113
|
+
cfs = self._compute_counterfactuals()
|
|
114
|
+
if cfs:
|
|
115
|
+
from .counterfactual import format_counterfactuals_terminal
|
|
116
|
+
cf_text = format_counterfactuals_terminal(cfs)
|
|
117
|
+
if cf_text:
|
|
118
|
+
lines.append(cf_text)
|
|
119
|
+
|
|
120
|
+
# Knowledge provided
|
|
121
|
+
provided = []
|
|
122
|
+
if s.attribution_hints_served:
|
|
123
|
+
provided.append(f"{s.attribution_hints_served} attribution hints served")
|
|
124
|
+
if s.health_warnings_surfaced:
|
|
125
|
+
provided.append(f"{s.health_warnings_surfaced} health warnings surfaced")
|
|
126
|
+
if s.constraints_served:
|
|
127
|
+
provided.append(f"{len(s.constraints_served)} constraints applied")
|
|
128
|
+
if provided:
|
|
129
|
+
lines.append("")
|
|
130
|
+
lines.append(" What dotscope provided:")
|
|
131
|
+
for p in provided:
|
|
132
|
+
lines.append(f" {p}")
|
|
133
|
+
|
|
134
|
+
# Milestone message
|
|
135
|
+
try:
|
|
136
|
+
root = getattr(self, "_repo_root", None)
|
|
137
|
+
if root:
|
|
138
|
+
from .onboarding import load_onboarding, milestone_message, next_step
|
|
139
|
+
state = load_onboarding(root)
|
|
140
|
+
msg = milestone_message(state)
|
|
141
|
+
if msg:
|
|
142
|
+
lines.append(f"\n {msg}")
|
|
143
|
+
ns = next_step(state)
|
|
144
|
+
if ns:
|
|
145
|
+
lines.append(f"\n {ns}")
|
|
146
|
+
except Exception:
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
lines.append("-" * 55)
|
|
150
|
+
return "\n".join(lines)
|
|
151
|
+
|
|
152
|
+
def _compute_counterfactuals(self) -> list:
|
|
153
|
+
"""Compute counterfactuals from session data. Best-effort."""
|
|
154
|
+
try:
|
|
155
|
+
root = getattr(self, "_repo_root", None)
|
|
156
|
+
if not root:
|
|
157
|
+
return []
|
|
158
|
+
|
|
159
|
+
from .onboarding import load_onboarding, should_show_counterfactuals
|
|
160
|
+
state = load_onboarding(root)
|
|
161
|
+
if not should_show_counterfactuals(state):
|
|
162
|
+
return []
|
|
163
|
+
|
|
164
|
+
from .counterfactual import compute_counterfactuals
|
|
165
|
+
from .near_miss import load_recent_near_misses
|
|
166
|
+
import json
|
|
167
|
+
|
|
168
|
+
# Gather data
|
|
169
|
+
near_misses = []
|
|
170
|
+
for scope in self._stats.unique_scopes:
|
|
171
|
+
nms = load_recent_near_misses(root, scope)
|
|
172
|
+
near_misses.extend(nms)
|
|
173
|
+
|
|
174
|
+
invariants = {}
|
|
175
|
+
inv_path = os.path.join(root, ".dotscope", "invariants.json")
|
|
176
|
+
if os.path.exists(inv_path):
|
|
177
|
+
with open(inv_path, "r", encoding="utf-8") as f:
|
|
178
|
+
invariants = json.load(f)
|
|
179
|
+
|
|
180
|
+
intents = []
|
|
181
|
+
try:
|
|
182
|
+
from .intent import load_intents
|
|
183
|
+
intents = load_intents(root)
|
|
184
|
+
except Exception:
|
|
185
|
+
pass
|
|
186
|
+
|
|
187
|
+
# Modified files from recent observations
|
|
188
|
+
modified = set()
|
|
189
|
+
diff_text = ""
|
|
190
|
+
try:
|
|
191
|
+
from .sessions import SessionManager
|
|
192
|
+
mgr = SessionManager(root)
|
|
193
|
+
recent_obs = mgr.get_observations(limit=5)
|
|
194
|
+
for obs in recent_obs:
|
|
195
|
+
modified.update(obs.actual_files_modified)
|
|
196
|
+
except Exception:
|
|
197
|
+
pass
|
|
198
|
+
|
|
199
|
+
return compute_counterfactuals(
|
|
200
|
+
constraints_served=self._stats.constraints_served,
|
|
201
|
+
modified_files=modified,
|
|
202
|
+
diff_text=diff_text,
|
|
203
|
+
near_misses=near_misses,
|
|
204
|
+
invariants=invariants,
|
|
205
|
+
intents=intents,
|
|
206
|
+
)
|
|
207
|
+
except Exception:
|
|
208
|
+
return []
|
|
209
|
+
|
|
210
|
+
def reset(self) -> None:
|
|
211
|
+
"""Clear all session stats."""
|
|
212
|
+
self._stats = SessionStats()
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
# ---------------------------------------------------------------------------
|
|
216
|
+
# Feature 2: Attribution Hints
|
|
217
|
+
# ---------------------------------------------------------------------------
|
|
218
|
+
|
|
219
|
+
# Keywords that signal high-value context lines
|
|
220
|
+
_HINT_KEYWORDS = re.compile(
|
|
221
|
+
r"\b(never|always|gotcha|fragile|important|careful|avoid|don't|do not|invariant|hack|warning)\b",
|
|
222
|
+
re.IGNORECASE,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def extract_attribution_hints(
|
|
227
|
+
context: str,
|
|
228
|
+
max_hints: int = 3,
|
|
229
|
+
implicit_contracts: Optional[List] = None,
|
|
230
|
+
graph_hubs: Optional[Dict] = None,
|
|
231
|
+
scope_directory: str = "",
|
|
232
|
+
) -> List[Dict[str, str]]:
|
|
233
|
+
"""Extract highest-value context fragments with provenance.
|
|
234
|
+
|
|
235
|
+
Sources (in priority order):
|
|
236
|
+
1. Implicit contracts from cached history → source: git_history
|
|
237
|
+
2. Warning-keyword lines from context → source inferred from section headers
|
|
238
|
+
3. Graph hub info → source: graph
|
|
239
|
+
|
|
240
|
+
Returns: [{"hint": "...", "source": "git_history|hand_authored|..."}]
|
|
241
|
+
"""
|
|
242
|
+
hints: List[Dict[str, str]] = []
|
|
243
|
+
seen: set = set()
|
|
244
|
+
|
|
245
|
+
# 1. Implicit contracts (highest priority — things nobody documented)
|
|
246
|
+
if implicit_contracts:
|
|
247
|
+
for ic in implicit_contracts:
|
|
248
|
+
desc = getattr(ic, "description", "") or str(ic.get("description", "")) if isinstance(ic, dict) else ic.description
|
|
249
|
+
trigger = getattr(ic, "trigger_file", "") if not isinstance(ic, dict) else ic.get("trigger_file", "")
|
|
250
|
+
coupled = getattr(ic, "coupled_file", "") if not isinstance(ic, dict) else ic.get("coupled_file", "")
|
|
251
|
+
|
|
252
|
+
if scope_directory and not (
|
|
253
|
+
trigger.startswith(scope_directory + "/")
|
|
254
|
+
or coupled.startswith(scope_directory + "/")
|
|
255
|
+
):
|
|
256
|
+
continue
|
|
257
|
+
|
|
258
|
+
if desc and desc not in seen and len(desc) > 15:
|
|
259
|
+
hints.append({"hint": desc, "source": "git_history"})
|
|
260
|
+
seen.add(desc)
|
|
261
|
+
|
|
262
|
+
# 2. Warning-keyword lines from context
|
|
263
|
+
if context:
|
|
264
|
+
for line in context.split("\n"):
|
|
265
|
+
line = line.strip()
|
|
266
|
+
if not line or line.startswith("#") or line.startswith("dotscope-session"):
|
|
267
|
+
continue
|
|
268
|
+
|
|
269
|
+
clean = re.sub(r"^[-*]\s+", "", line)
|
|
270
|
+
if len(clean) <= 15 or clean in seen:
|
|
271
|
+
continue
|
|
272
|
+
|
|
273
|
+
if _HINT_KEYWORDS.search(line):
|
|
274
|
+
source = _infer_source(line, context)
|
|
275
|
+
hints.append({"hint": clean, "source": source})
|
|
276
|
+
seen.add(clean)
|
|
277
|
+
elif "co-change" in line.lower():
|
|
278
|
+
hints.append({"hint": clean, "source": "git_history"})
|
|
279
|
+
seen.add(clean)
|
|
280
|
+
|
|
281
|
+
# 3. Graph hubs (wide blast radius warnings)
|
|
282
|
+
if graph_hubs and scope_directory:
|
|
283
|
+
for path, hub_info in graph_hubs.items():
|
|
284
|
+
if not path.startswith(scope_directory + "/"):
|
|
285
|
+
continue
|
|
286
|
+
count = hub_info.get("imported_by_count", 0)
|
|
287
|
+
if count >= 5:
|
|
288
|
+
hint = f"{path} is imported by {count} files, changes here have wide blast radius"
|
|
289
|
+
if hint not in seen:
|
|
290
|
+
hints.append({"hint": hint, "source": "graph"})
|
|
291
|
+
seen.add(hint)
|
|
292
|
+
|
|
293
|
+
# Priority sort: git_history > signal_comment > hand_authored > docstring > graph
|
|
294
|
+
_PRIORITY = {
|
|
295
|
+
"git_history": 0, "implicit_contract": 0,
|
|
296
|
+
"signal_comment": 1, "hand_authored": 2,
|
|
297
|
+
"docstring": 3, "graph": 4,
|
|
298
|
+
}
|
|
299
|
+
hints.sort(key=lambda h: _PRIORITY.get(h["source"], 5))
|
|
300
|
+
|
|
301
|
+
return hints[:max_hints]
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def _infer_source(line: str, full_context: str) -> str:
|
|
305
|
+
"""Infer provenance by walking backward to the nearest ## section header."""
|
|
306
|
+
lines = full_context.splitlines()
|
|
307
|
+
target_idx = None
|
|
308
|
+
for i, l in enumerate(lines):
|
|
309
|
+
if l.strip() == line.strip():
|
|
310
|
+
target_idx = i
|
|
311
|
+
break
|
|
312
|
+
|
|
313
|
+
if target_idx is None:
|
|
314
|
+
return _classify_line(line)
|
|
315
|
+
|
|
316
|
+
for i in range(target_idx, -1, -1):
|
|
317
|
+
header = lines[i].strip().lower()
|
|
318
|
+
if not header.startswith("##"):
|
|
319
|
+
continue
|
|
320
|
+
if "implicit contract" in header or "git history" in header:
|
|
321
|
+
return "git_history"
|
|
322
|
+
if "stability" in header:
|
|
323
|
+
return "git_history"
|
|
324
|
+
if "docstring" in header or "readme" in header:
|
|
325
|
+
return "docstring"
|
|
326
|
+
if "signal" in header or "comment" in header:
|
|
327
|
+
return "signal_comment"
|
|
328
|
+
# Any other ## header — treat content as hand_authored
|
|
329
|
+
return "hand_authored"
|
|
330
|
+
|
|
331
|
+
return _classify_line(line)
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def _classify_line(line: str) -> str:
|
|
335
|
+
"""Fallback: classify a line's provenance from its own content."""
|
|
336
|
+
lower = line.lower()
|
|
337
|
+
if "co-change" in lower or "from git history" in lower or "commits" in lower:
|
|
338
|
+
return "git_history"
|
|
339
|
+
if "implicit contract" in lower:
|
|
340
|
+
return "git_history"
|
|
341
|
+
if any(kw in lower for kw in ("invariant:", "hack:", "warning:", "note:")):
|
|
342
|
+
return "signal_comment"
|
|
343
|
+
return "hand_authored"
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
# ---------------------------------------------------------------------------
|
|
347
|
+
# Feature 3: Post-Commit Delta
|
|
348
|
+
# ---------------------------------------------------------------------------
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def format_observation_delta(
|
|
352
|
+
observation: ObservationLog,
|
|
353
|
+
scope_expr: str,
|
|
354
|
+
) -> str:
|
|
355
|
+
"""Format a post-commit observation as a human-readable delta."""
|
|
356
|
+
lines = []
|
|
357
|
+
scope_name = scope_expr.split("+")[0].split("-")[0].split("@")[0]
|
|
358
|
+
|
|
359
|
+
actual_count = len(observation.actual_files_modified)
|
|
360
|
+
predicted_correct = actual_count - len(observation.touched_not_predicted)
|
|
361
|
+
|
|
362
|
+
lines.append(f"dotscope: observation recorded for {scope_name}/")
|
|
363
|
+
|
|
364
|
+
pct = observation.recall
|
|
365
|
+
suffix = ""
|
|
366
|
+
if pct < 0.7:
|
|
367
|
+
suffix = " <- degraded"
|
|
368
|
+
|
|
369
|
+
lines.append(
|
|
370
|
+
f" {scope_name}/ predicted {predicted_correct}/{actual_count}"
|
|
371
|
+
f" files correctly ({pct:.0%}){suffix}"
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
if observation.touched_not_predicted:
|
|
375
|
+
missing_names = [
|
|
376
|
+
os.path.basename(f) for f in observation.touched_not_predicted[:4]
|
|
377
|
+
]
|
|
378
|
+
if len(observation.touched_not_predicted) > 4:
|
|
379
|
+
missing_names.append(
|
|
380
|
+
f"+{len(observation.touched_not_predicted) - 4} more"
|
|
381
|
+
)
|
|
382
|
+
lines.append(f" Missing: {', '.join(missing_names)}")
|
|
383
|
+
if pct < 0.8:
|
|
384
|
+
lines.append(f" Run `dotscope health {scope_name}` to diagnose")
|
|
385
|
+
|
|
386
|
+
lines.append(" Utility scores updated")
|
|
387
|
+
return "\n".join(lines)
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def build_accuracy(
|
|
391
|
+
observations: List[ObservationLog],
|
|
392
|
+
scope: str,
|
|
393
|
+
) -> Optional[dict]:
|
|
394
|
+
"""Build unified accuracy metadata from all observations.
|
|
395
|
+
|
|
396
|
+
Merges what was previously scope_accuracy + recent_learning into one field.
|
|
397
|
+
Returns None if no observations exist.
|
|
398
|
+
"""
|
|
399
|
+
if not observations:
|
|
400
|
+
return None
|
|
401
|
+
|
|
402
|
+
now = time.time()
|
|
403
|
+
recalls = [o.recall for o in observations]
|
|
404
|
+
precisions = [o.precision for o in observations]
|
|
405
|
+
avg_recall = sum(recalls) / len(recalls)
|
|
406
|
+
avg_precision = sum(precisions) / len(precisions)
|
|
407
|
+
|
|
408
|
+
# Trend: compare recent vs older
|
|
409
|
+
recent_r = recalls[-5:] if len(recalls) >= 5 else recalls
|
|
410
|
+
older_r = recalls[:-5] if len(recalls) > 5 else []
|
|
411
|
+
trend = (
|
|
412
|
+
"improving"
|
|
413
|
+
if older_r and sum(recent_r) / len(recent_r) > sum(older_r) / len(older_r)
|
|
414
|
+
else "stable"
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
result: dict = {
|
|
418
|
+
"observations": len(observations),
|
|
419
|
+
"avg_recall": round(avg_recall, 3),
|
|
420
|
+
"avg_precision": round(avg_precision, 3),
|
|
421
|
+
"trend": trend,
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
# Add recency info from most recent observation
|
|
425
|
+
latest = observations[-1]
|
|
426
|
+
if latest.timestamp > 0:
|
|
427
|
+
hours_ago = max(1, int((now - latest.timestamp) / 3600))
|
|
428
|
+
result["last_observation"] = f"{hours_ago}h ago"
|
|
429
|
+
|
|
430
|
+
# Count lessons applied (observations where something was learned)
|
|
431
|
+
lessons = sum(1 for o in observations if o.touched_not_predicted)
|
|
432
|
+
if lessons:
|
|
433
|
+
result["lessons_applied"] = lessons
|
|
434
|
+
|
|
435
|
+
return result
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
# ---------------------------------------------------------------------------
|
|
439
|
+
# Feature 4: Health Nudges
|
|
440
|
+
# ---------------------------------------------------------------------------
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
ACCURACY_DROP_THRESHOLD = 0.15
|
|
444
|
+
STALENESS_DAYS = 30
|
|
445
|
+
UNCOVERED_FILES_MIN = 3
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def check_health_nudges(
|
|
449
|
+
observations: List[ObservationLog],
|
|
450
|
+
scope: str,
|
|
451
|
+
repo_root: str = "",
|
|
452
|
+
threshold_drop: float = ACCURACY_DROP_THRESHOLD,
|
|
453
|
+
) -> List[dict]:
|
|
454
|
+
"""Generate health warnings for a scope on resolve."""
|
|
455
|
+
warnings = []
|
|
456
|
+
|
|
457
|
+
# 1. Accuracy degradation
|
|
458
|
+
if len(observations) >= 3:
|
|
459
|
+
recalls = [o.recall for o in observations]
|
|
460
|
+
mid = len(recalls) // 2
|
|
461
|
+
if mid >= 1:
|
|
462
|
+
older_avg = sum(recalls[:mid]) / mid
|
|
463
|
+
recent_avg = sum(recalls[mid:]) / len(recalls[mid:])
|
|
464
|
+
if older_avg - recent_avg >= threshold_drop:
|
|
465
|
+
warnings.append({
|
|
466
|
+
"scope": scope,
|
|
467
|
+
"issue": "accuracy_degraded",
|
|
468
|
+
"message": (
|
|
469
|
+
f"{scope}/ accuracy has dropped"
|
|
470
|
+
f" from {older_avg:.0%} to {recent_avg:.0%}"
|
|
471
|
+
),
|
|
472
|
+
"suggestion": f"dotscope health {scope}",
|
|
473
|
+
})
|
|
474
|
+
|
|
475
|
+
# 2. Staleness (scope file age + commits since)
|
|
476
|
+
if repo_root:
|
|
477
|
+
scope_path = os.path.join(repo_root, scope, ".scope")
|
|
478
|
+
if os.path.exists(scope_path):
|
|
479
|
+
mtime = os.path.getmtime(scope_path)
|
|
480
|
+
days_since = int((time.time() - mtime) / 86400)
|
|
481
|
+
if days_since > STALENESS_DAYS:
|
|
482
|
+
commits = _count_commits_since(repo_root, scope, mtime)
|
|
483
|
+
if commits > 0:
|
|
484
|
+
warnings.append({
|
|
485
|
+
"scope": scope,
|
|
486
|
+
"issue": "stale",
|
|
487
|
+
"message": (
|
|
488
|
+
f"{scope}/ hasn't been updated in {days_since} days"
|
|
489
|
+
f" . {commits} commits have touched this module"
|
|
490
|
+
),
|
|
491
|
+
"suggestion": f"dotscope ingest {scope}/",
|
|
492
|
+
})
|
|
493
|
+
|
|
494
|
+
# 3. Uncovered files
|
|
495
|
+
if repo_root:
|
|
496
|
+
uncovered = _count_uncovered_files(repo_root, scope)
|
|
497
|
+
if uncovered > UNCOVERED_FILES_MIN:
|
|
498
|
+
warnings.append({
|
|
499
|
+
"scope": scope,
|
|
500
|
+
"issue": "uncovered_files",
|
|
501
|
+
"message": (
|
|
502
|
+
f"{scope}/ has {uncovered} files"
|
|
503
|
+
f" not covered by scope includes"
|
|
504
|
+
),
|
|
505
|
+
"suggestion": f"dotscope ingest {scope}/",
|
|
506
|
+
})
|
|
507
|
+
|
|
508
|
+
return warnings
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def _count_commits_since(repo_root: str, scope: str, since_ts: float) -> int:
|
|
512
|
+
"""Count commits touching files in this scope's directory since a timestamp."""
|
|
513
|
+
import subprocess
|
|
514
|
+
from datetime import datetime, timezone
|
|
515
|
+
since_dt = datetime.fromtimestamp(since_ts, tz=timezone.utc)
|
|
516
|
+
try:
|
|
517
|
+
result = subprocess.run(
|
|
518
|
+
["git", "log", "--oneline",
|
|
519
|
+
f"--since={since_dt.isoformat()}",
|
|
520
|
+
"--", f"{scope}/"],
|
|
521
|
+
capture_output=True, text=True, cwd=repo_root, timeout=10,
|
|
522
|
+
)
|
|
523
|
+
return len(result.stdout.strip().splitlines()) if result.stdout.strip() else 0
|
|
524
|
+
except Exception:
|
|
525
|
+
return 0
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _count_uncovered_files(repo_root: str, scope: str) -> int:
|
|
529
|
+
"""Count source files in a scope directory not covered by includes."""
|
|
530
|
+
scope_dir = os.path.join(repo_root, scope)
|
|
531
|
+
if not os.path.isdir(scope_dir):
|
|
532
|
+
return 0
|
|
533
|
+
|
|
534
|
+
source_exts = {".py", ".js", ".ts", ".go", ".rs", ".rb", ".java"}
|
|
535
|
+
count = 0
|
|
536
|
+
for dirpath, _dirs, filenames in os.walk(scope_dir):
|
|
537
|
+
# Skip hidden dirs and common non-source dirs
|
|
538
|
+
rel = os.path.relpath(dirpath, repo_root)
|
|
539
|
+
if any(part.startswith(".") or part in ("node_modules", "__pycache__", "venv")
|
|
540
|
+
for part in rel.split(os.sep)):
|
|
541
|
+
continue
|
|
542
|
+
for fn in filenames:
|
|
543
|
+
if os.path.splitext(fn)[1] in source_exts:
|
|
544
|
+
count += 1
|
|
545
|
+
|
|
546
|
+
# Subtract the files that ARE in includes (rough: count files under scope/)
|
|
547
|
+
# The includes typically have "scope/" which covers all, so uncovered = 0 in that case
|
|
548
|
+
# Only flag if the scope file exists but doesn't include the directory
|
|
549
|
+
scope_path = os.path.join(repo_root, scope, ".scope")
|
|
550
|
+
if os.path.exists(scope_path):
|
|
551
|
+
try:
|
|
552
|
+
from .parser import parse_scope_file
|
|
553
|
+
config = parse_scope_file(scope_path)
|
|
554
|
+
# If the scope includes the directory itself, all files are covered
|
|
555
|
+
if any(inc.rstrip("/") == scope or inc.startswith(scope + "/")
|
|
556
|
+
for inc in config.includes):
|
|
557
|
+
return 0
|
|
558
|
+
except Exception:
|
|
559
|
+
pass
|
|
560
|
+
|
|
561
|
+
return count
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
# ---------------------------------------------------------------------------
|
|
565
|
+
# Feature 5: Near-Miss Detection
|
|
566
|
+
# ---------------------------------------------------------------------------
|
|
567
|
+
|
|
568
|
+
# Patterns: (anti_pattern_keywords, safe_pattern_keywords)
|
|
569
|
+
_NEAR_MISS_PATTERNS = [
|
|
570
|
+
# Soft delete pattern
|
|
571
|
+
(
|
|
572
|
+
[".delete()", "hard delete", "drop table"],
|
|
573
|
+
[".deactivate()", "soft_delete", "is_active", "deleted_at"],
|
|
574
|
+
),
|
|
575
|
+
# Direct DB access
|
|
576
|
+
(
|
|
577
|
+
["raw sql", "execute(", "cursor.execute"],
|
|
578
|
+
["orm", "query.", "filter(", "objects."],
|
|
579
|
+
),
|
|
580
|
+
# Force push
|
|
581
|
+
(
|
|
582
|
+
["--force", "push -f", "reset --hard"],
|
|
583
|
+
["--force-with-lease", "revert"],
|
|
584
|
+
),
|
|
585
|
+
]
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def detect_near_misses(
|
|
589
|
+
context: str,
|
|
590
|
+
diff_text: str,
|
|
591
|
+
scope: str,
|
|
592
|
+
) -> List[dict]:
|
|
593
|
+
"""Detect cases where scope context may have prevented a mistake.
|
|
594
|
+
|
|
595
|
+
A near-miss is when:
|
|
596
|
+
1. The context contains a warning about an anti-pattern
|
|
597
|
+
2. The diff does NOT contain the anti-pattern
|
|
598
|
+
3. The diff DOES contain the safe alternative
|
|
599
|
+
|
|
600
|
+
False positives are acceptable — they still build trust.
|
|
601
|
+
"""
|
|
602
|
+
if not context or not diff_text:
|
|
603
|
+
return []
|
|
604
|
+
|
|
605
|
+
near_misses = []
|
|
606
|
+
context_lower = context.lower()
|
|
607
|
+
diff_lower = diff_text.lower()
|
|
608
|
+
|
|
609
|
+
# Check keyword patterns from context
|
|
610
|
+
for anti_keywords, safe_keywords in _NEAR_MISS_PATTERNS:
|
|
611
|
+
# Is the anti-pattern warned about in context?
|
|
612
|
+
context_warns = any(kw in context_lower for kw in anti_keywords)
|
|
613
|
+
if not context_warns:
|
|
614
|
+
continue
|
|
615
|
+
|
|
616
|
+
# Anti-pattern absent from diff, safe pattern present?
|
|
617
|
+
anti_in_diff = any(kw in diff_lower for kw in anti_keywords)
|
|
618
|
+
safe_in_diff = any(kw in diff_lower for kw in safe_keywords)
|
|
619
|
+
|
|
620
|
+
if not anti_in_diff and safe_in_diff:
|
|
621
|
+
# Find the specific context line that warned
|
|
622
|
+
warning_line = ""
|
|
623
|
+
for line in context.split("\n"):
|
|
624
|
+
if any(kw in line.lower() for kw in anti_keywords + safe_keywords):
|
|
625
|
+
warning_line = line.strip()
|
|
626
|
+
break
|
|
627
|
+
|
|
628
|
+
near_misses.append({
|
|
629
|
+
"scope_context_used": warning_line or "Context warning applied",
|
|
630
|
+
"potential_impact": f"Avoided anti-pattern in {scope}/",
|
|
631
|
+
})
|
|
632
|
+
|
|
633
|
+
# Also check explicit "never"/"don't" patterns from context
|
|
634
|
+
for line in context.split("\n"):
|
|
635
|
+
line_clean = line.strip()
|
|
636
|
+
if not line_clean:
|
|
637
|
+
continue
|
|
638
|
+
|
|
639
|
+
match = re.search(
|
|
640
|
+
r"\b(never|don't|do not|avoid)\b\s+(.{10,50})",
|
|
641
|
+
line_clean,
|
|
642
|
+
re.IGNORECASE,
|
|
643
|
+
)
|
|
644
|
+
if not match:
|
|
645
|
+
continue
|
|
646
|
+
|
|
647
|
+
anti_phrase = match.group(2).lower().split(".")[0].strip()
|
|
648
|
+
# If the anti-phrase is NOT in the diff, that's a candidate
|
|
649
|
+
if anti_phrase not in diff_lower and len(anti_phrase) > 5:
|
|
650
|
+
near_misses.append({
|
|
651
|
+
"scope_context_used": line_clean,
|
|
652
|
+
"potential_impact": f"Anti-pattern avoided: {anti_phrase}",
|
|
653
|
+
})
|
|
654
|
+
|
|
655
|
+
# Deduplicate by scope_context_used
|
|
656
|
+
seen = set()
|
|
657
|
+
unique = []
|
|
658
|
+
for nm in near_misses:
|
|
659
|
+
key = nm["scope_context_used"]
|
|
660
|
+
if key not in seen:
|
|
661
|
+
seen.add(key)
|
|
662
|
+
unique.append(nm)
|
|
663
|
+
|
|
664
|
+
return unique[:3] # Cap at 3
|