dotscope 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. dotscope/.scope +63 -0
  2. dotscope/__init__.py +3 -0
  3. dotscope/absorber.py +390 -0
  4. dotscope/assertions.py +128 -0
  5. dotscope/ast_analyzer.py +2 -0
  6. dotscope/backtest.py +2 -0
  7. dotscope/bench.py +141 -0
  8. dotscope/budget.py +3 -0
  9. dotscope/cache.py +2 -0
  10. dotscope/check/__init__.py +1 -0
  11. dotscope/check/acknowledge.py +2 -0
  12. dotscope/check/checker.py +3 -0
  13. dotscope/check/checks/__init__.py +1 -0
  14. dotscope/check/checks/antipattern.py +2 -0
  15. dotscope/check/checks/boundary.py +2 -0
  16. dotscope/check/checks/contracts.py +3 -0
  17. dotscope/check/checks/direction.py +2 -0
  18. dotscope/check/checks/intent.py +2 -0
  19. dotscope/check/checks/stability.py +2 -0
  20. dotscope/check/constraints.py +2 -0
  21. dotscope/check/models.py +15 -0
  22. dotscope/cli.py +1447 -0
  23. dotscope/composer.py +147 -0
  24. dotscope/constants.py +45 -0
  25. dotscope/context.py +60 -0
  26. dotscope/counterfactual.py +180 -0
  27. dotscope/debug.py +220 -0
  28. dotscope/discovery.py +104 -0
  29. dotscope/formatter.py +157 -0
  30. dotscope/graph.py +3 -0
  31. dotscope/health.py +212 -0
  32. dotscope/help.py +204 -0
  33. dotscope/history.py +6 -0
  34. dotscope/hooks.py +2 -0
  35. dotscope/ingest.py +858 -0
  36. dotscope/intent.py +618 -0
  37. dotscope/lessons.py +223 -0
  38. dotscope/matcher.py +104 -0
  39. dotscope/mcp_server.py +1081 -0
  40. dotscope/models/.scope +45 -0
  41. dotscope/models/__init__.py +7 -0
  42. dotscope/models/core.py +288 -0
  43. dotscope/models/history.py +73 -0
  44. dotscope/models/intent.py +213 -0
  45. dotscope/models/passes.py +58 -0
  46. dotscope/models/state.py +250 -0
  47. dotscope/models.py +9 -0
  48. dotscope/near_miss.py +3 -0
  49. dotscope/onboarding.py +2 -0
  50. dotscope/parser.py +387 -0
  51. dotscope/passes/.scope +105 -0
  52. dotscope/passes/__init__.py +1 -0
  53. dotscope/passes/ast_analyzer.py +508 -0
  54. dotscope/passes/backtest.py +198 -0
  55. dotscope/passes/budget_allocator.py +164 -0
  56. dotscope/passes/convention_compliance.py +40 -0
  57. dotscope/passes/convention_discovery.py +247 -0
  58. dotscope/passes/convention_parser.py +223 -0
  59. dotscope/passes/graph_builder.py +299 -0
  60. dotscope/passes/history_miner.py +336 -0
  61. dotscope/passes/incremental.py +149 -0
  62. dotscope/passes/lang/__init__.py +38 -0
  63. dotscope/passes/lang/_base.py +20 -0
  64. dotscope/passes/lang/_treesitter.py +93 -0
  65. dotscope/passes/lang/go.py +333 -0
  66. dotscope/passes/lang/javascript.py +348 -0
  67. dotscope/passes/lazy.py +152 -0
  68. dotscope/passes/semantic_diff.py +160 -0
  69. dotscope/passes/sentinel/__init__.py +1 -0
  70. dotscope/passes/sentinel/acknowledge.py +222 -0
  71. dotscope/passes/sentinel/checker.py +383 -0
  72. dotscope/passes/sentinel/checks/__init__.py +1 -0
  73. dotscope/passes/sentinel/checks/antipattern.py +84 -0
  74. dotscope/passes/sentinel/checks/boundary.py +46 -0
  75. dotscope/passes/sentinel/checks/contracts.py +148 -0
  76. dotscope/passes/sentinel/checks/convention.py +54 -0
  77. dotscope/passes/sentinel/checks/direction.py +71 -0
  78. dotscope/passes/sentinel/checks/intent.py +207 -0
  79. dotscope/passes/sentinel/checks/stability.py +66 -0
  80. dotscope/passes/sentinel/checks/voice.py +108 -0
  81. dotscope/passes/sentinel/constraints.py +472 -0
  82. dotscope/passes/sentinel/line_filter.py +88 -0
  83. dotscope/passes/sentinel/models.py +15 -0
  84. dotscope/passes/virtual.py +239 -0
  85. dotscope/passes/voice.py +162 -0
  86. dotscope/passes/voice_defaults.py +28 -0
  87. dotscope/passes/voice_discovery.py +245 -0
  88. dotscope/paths.py +32 -0
  89. dotscope/progress.py +44 -0
  90. dotscope/regression.py +147 -0
  91. dotscope/resolver.py +203 -0
  92. dotscope/scanner.py +246 -0
  93. dotscope/sessions.py +2 -0
  94. dotscope/storage/.scope +64 -0
  95. dotscope/storage/__init__.py +1 -0
  96. dotscope/storage/cache.py +114 -0
  97. dotscope/storage/claude_hooks.py +119 -0
  98. dotscope/storage/git_hooks.py +277 -0
  99. dotscope/storage/incremental_state.py +61 -0
  100. dotscope/storage/mcp_config.py +98 -0
  101. dotscope/storage/near_miss.py +183 -0
  102. dotscope/storage/onboarding.py +150 -0
  103. dotscope/storage/session_manager.py +195 -0
  104. dotscope/storage/timing.py +84 -0
  105. dotscope/timing.py +2 -0
  106. dotscope/tokens.py +53 -0
  107. dotscope/utility.py +123 -0
  108. dotscope/virtual.py +3 -0
  109. dotscope/visibility.py +664 -0
  110. dotscope-0.1.0.dist-info/METADATA +50 -0
  111. dotscope-0.1.0.dist-info/RECORD +114 -0
  112. dotscope-0.1.0.dist-info/WHEEL +4 -0
  113. dotscope-0.1.0.dist-info/entry_points.txt +3 -0
  114. dotscope-0.1.0.dist-info/licenses/LICENSE +21 -0
dotscope/visibility.py ADDED
@@ -0,0 +1,664 @@
1
+ """DX Visibility: making dotscope's value legible at point of use.
2
+
3
+ Five features that surface existing data through two channels:
4
+ 1. Session Summary — aggregate stats for the MCP session
5
+ 2. Attribution Hints — top context fragments + provenance
6
+ 3. Post-Commit Delta — per-scope accuracy after observe
7
+ 4. Health Nudges — warnings when scope accuracy degrades
8
+ 5. Near-Miss Detection — disasters that didn't happen
9
+ """
10
+
11
+ import os
12
+ import re
13
+ import time
14
+ from datetime import datetime, timezone
15
+ from typing import Dict, List, Optional, Set
16
+
17
+ from .models.state import ObservationLog, SessionLog, SessionStats # noqa: F401
18
+
19
+
20
+ class SessionTracker:
21
+ """Accumulates stats across MCP tool calls within a session."""
22
+
23
+ def __init__(self) -> None:
24
+ self._stats = SessionStats()
25
+
26
+ def record_resolve(self, scope_name: str, response: dict) -> None:
27
+ """Called after every resolve_scope response is built."""
28
+ now = datetime.now(timezone.utc).isoformat()
29
+ if not self._stats.started_at:
30
+ self._stats.started_at = now
31
+ self._stats.last_activity = now
32
+
33
+ self._stats.scopes_resolved += 1
34
+ self._stats.unique_scopes.add(scope_name)
35
+ self._stats.tokens_served += response.get("token_count", 0)
36
+ self._stats.tokens_available = max(
37
+ self._stats.tokens_available,
38
+ response.get("_repo_tokens", 0),
39
+ )
40
+
41
+ hints = response.get("attribution_hints", [])
42
+ self._stats.attribution_hints_served += len(hints)
43
+
44
+ if response.get("context"):
45
+ self._stats.context_fields_used += 1
46
+
47
+ warnings = response.get("health_warnings", [])
48
+ self._stats.health_warnings_surfaced += len(warnings)
49
+
50
+ constraints = response.get("constraints", [])
51
+ self._stats.constraints_served.extend(constraints)
52
+
53
+ def set_repo_root(self, root: str) -> None:
54
+ """Set repo root for counterfactual computation."""
55
+ self._repo_root = root
56
+
57
+ def summary(self) -> dict:
58
+ """Return session summary as dict for MCP response."""
59
+ s = self._stats
60
+ reduction_pct = 0.0
61
+ if s.tokens_available > 0:
62
+ reduction_pct = round(
63
+ (1 - s.tokens_served / s.tokens_available) * 100, 1
64
+ )
65
+ result = {
66
+ "scopes_resolved": s.scopes_resolved,
67
+ "unique_scopes": len(s.unique_scopes),
68
+ "tokens_served": s.tokens_served,
69
+ "tokens_available": s.tokens_available,
70
+ "reduction_pct": max(reduction_pct, 0.0),
71
+ "attribution_hints_served": s.attribution_hints_served,
72
+ "health_warnings_surfaced": s.health_warnings_surfaced,
73
+ "started_at": s.started_at,
74
+ "last_activity": s.last_activity,
75
+ }
76
+
77
+ # Counterfactuals (gated by onboarding stage)
78
+ cfs = self._compute_counterfactuals()
79
+ if cfs:
80
+ result["counterfactuals"] = [
81
+ {
82
+ "type": cf.type,
83
+ "description": cf.description,
84
+ "source": cf.source,
85
+ "severity": cf.severity,
86
+ }
87
+ for cf in cfs
88
+ ]
89
+
90
+ return result
91
+
92
+ def format_terminal(self) -> str:
93
+ """Format summary for stderr output."""
94
+ s = self._stats
95
+ if s.scopes_resolved == 0:
96
+ return ""
97
+
98
+ reduction_pct = 0
99
+ if s.tokens_available > 0:
100
+ reduction_pct = round(
101
+ (1 - s.tokens_served / s.tokens_available) * 100
102
+ )
103
+
104
+ scope_word = "scope" if s.scopes_resolved == 1 else "scopes"
105
+ lines = [
106
+ "-- dotscope session " + "-" * 34,
107
+ f" {s.scopes_resolved} {scope_word} resolved"
108
+ f" . {s.tokens_served:,} tokens served"
109
+ f" ({reduction_pct}% reduction)",
110
+ ]
111
+
112
+ # Counterfactuals (the magic section)
113
+ cfs = self._compute_counterfactuals()
114
+ if cfs:
115
+ from .counterfactual import format_counterfactuals_terminal
116
+ cf_text = format_counterfactuals_terminal(cfs)
117
+ if cf_text:
118
+ lines.append(cf_text)
119
+
120
+ # Knowledge provided
121
+ provided = []
122
+ if s.attribution_hints_served:
123
+ provided.append(f"{s.attribution_hints_served} attribution hints served")
124
+ if s.health_warnings_surfaced:
125
+ provided.append(f"{s.health_warnings_surfaced} health warnings surfaced")
126
+ if s.constraints_served:
127
+ provided.append(f"{len(s.constraints_served)} constraints applied")
128
+ if provided:
129
+ lines.append("")
130
+ lines.append(" What dotscope provided:")
131
+ for p in provided:
132
+ lines.append(f" {p}")
133
+
134
+ # Milestone message
135
+ try:
136
+ root = getattr(self, "_repo_root", None)
137
+ if root:
138
+ from .onboarding import load_onboarding, milestone_message, next_step
139
+ state = load_onboarding(root)
140
+ msg = milestone_message(state)
141
+ if msg:
142
+ lines.append(f"\n {msg}")
143
+ ns = next_step(state)
144
+ if ns:
145
+ lines.append(f"\n {ns}")
146
+ except Exception:
147
+ pass
148
+
149
+ lines.append("-" * 55)
150
+ return "\n".join(lines)
151
+
152
+ def _compute_counterfactuals(self) -> list:
153
+ """Compute counterfactuals from session data. Best-effort."""
154
+ try:
155
+ root = getattr(self, "_repo_root", None)
156
+ if not root:
157
+ return []
158
+
159
+ from .onboarding import load_onboarding, should_show_counterfactuals
160
+ state = load_onboarding(root)
161
+ if not should_show_counterfactuals(state):
162
+ return []
163
+
164
+ from .counterfactual import compute_counterfactuals
165
+ from .near_miss import load_recent_near_misses
166
+ import json
167
+
168
+ # Gather data
169
+ near_misses = []
170
+ for scope in self._stats.unique_scopes:
171
+ nms = load_recent_near_misses(root, scope)
172
+ near_misses.extend(nms)
173
+
174
+ invariants = {}
175
+ inv_path = os.path.join(root, ".dotscope", "invariants.json")
176
+ if os.path.exists(inv_path):
177
+ with open(inv_path, "r", encoding="utf-8") as f:
178
+ invariants = json.load(f)
179
+
180
+ intents = []
181
+ try:
182
+ from .intent import load_intents
183
+ intents = load_intents(root)
184
+ except Exception:
185
+ pass
186
+
187
+ # Modified files from recent observations
188
+ modified = set()
189
+ diff_text = ""
190
+ try:
191
+ from .sessions import SessionManager
192
+ mgr = SessionManager(root)
193
+ recent_obs = mgr.get_observations(limit=5)
194
+ for obs in recent_obs:
195
+ modified.update(obs.actual_files_modified)
196
+ except Exception:
197
+ pass
198
+
199
+ return compute_counterfactuals(
200
+ constraints_served=self._stats.constraints_served,
201
+ modified_files=modified,
202
+ diff_text=diff_text,
203
+ near_misses=near_misses,
204
+ invariants=invariants,
205
+ intents=intents,
206
+ )
207
+ except Exception:
208
+ return []
209
+
210
+ def reset(self) -> None:
211
+ """Clear all session stats."""
212
+ self._stats = SessionStats()
213
+
214
+
215
+ # ---------------------------------------------------------------------------
216
+ # Feature 2: Attribution Hints
217
+ # ---------------------------------------------------------------------------
218
+
219
+ # Keywords that signal high-value context lines
220
+ _HINT_KEYWORDS = re.compile(
221
+ r"\b(never|always|gotcha|fragile|important|careful|avoid|don't|do not|invariant|hack|warning)\b",
222
+ re.IGNORECASE,
223
+ )
224
+
225
+
226
+ def extract_attribution_hints(
227
+ context: str,
228
+ max_hints: int = 3,
229
+ implicit_contracts: Optional[List] = None,
230
+ graph_hubs: Optional[Dict] = None,
231
+ scope_directory: str = "",
232
+ ) -> List[Dict[str, str]]:
233
+ """Extract highest-value context fragments with provenance.
234
+
235
+ Sources (in priority order):
236
+ 1. Implicit contracts from cached history → source: git_history
237
+ 2. Warning-keyword lines from context → source inferred from section headers
238
+ 3. Graph hub info → source: graph
239
+
240
+ Returns: [{"hint": "...", "source": "git_history|hand_authored|..."}]
241
+ """
242
+ hints: List[Dict[str, str]] = []
243
+ seen: set = set()
244
+
245
+ # 1. Implicit contracts (highest priority — things nobody documented)
246
+ if implicit_contracts:
247
+ for ic in implicit_contracts:
248
+ desc = getattr(ic, "description", "") or str(ic.get("description", "")) if isinstance(ic, dict) else ic.description
249
+ trigger = getattr(ic, "trigger_file", "") if not isinstance(ic, dict) else ic.get("trigger_file", "")
250
+ coupled = getattr(ic, "coupled_file", "") if not isinstance(ic, dict) else ic.get("coupled_file", "")
251
+
252
+ if scope_directory and not (
253
+ trigger.startswith(scope_directory + "/")
254
+ or coupled.startswith(scope_directory + "/")
255
+ ):
256
+ continue
257
+
258
+ if desc and desc not in seen and len(desc) > 15:
259
+ hints.append({"hint": desc, "source": "git_history"})
260
+ seen.add(desc)
261
+
262
+ # 2. Warning-keyword lines from context
263
+ if context:
264
+ for line in context.split("\n"):
265
+ line = line.strip()
266
+ if not line or line.startswith("#") or line.startswith("dotscope-session"):
267
+ continue
268
+
269
+ clean = re.sub(r"^[-*]\s+", "", line)
270
+ if len(clean) <= 15 or clean in seen:
271
+ continue
272
+
273
+ if _HINT_KEYWORDS.search(line):
274
+ source = _infer_source(line, context)
275
+ hints.append({"hint": clean, "source": source})
276
+ seen.add(clean)
277
+ elif "co-change" in line.lower():
278
+ hints.append({"hint": clean, "source": "git_history"})
279
+ seen.add(clean)
280
+
281
+ # 3. Graph hubs (wide blast radius warnings)
282
+ if graph_hubs and scope_directory:
283
+ for path, hub_info in graph_hubs.items():
284
+ if not path.startswith(scope_directory + "/"):
285
+ continue
286
+ count = hub_info.get("imported_by_count", 0)
287
+ if count >= 5:
288
+ hint = f"{path} is imported by {count} files, changes here have wide blast radius"
289
+ if hint not in seen:
290
+ hints.append({"hint": hint, "source": "graph"})
291
+ seen.add(hint)
292
+
293
+ # Priority sort: git_history > signal_comment > hand_authored > docstring > graph
294
+ _PRIORITY = {
295
+ "git_history": 0, "implicit_contract": 0,
296
+ "signal_comment": 1, "hand_authored": 2,
297
+ "docstring": 3, "graph": 4,
298
+ }
299
+ hints.sort(key=lambda h: _PRIORITY.get(h["source"], 5))
300
+
301
+ return hints[:max_hints]
302
+
303
+
304
+ def _infer_source(line: str, full_context: str) -> str:
305
+ """Infer provenance by walking backward to the nearest ## section header."""
306
+ lines = full_context.splitlines()
307
+ target_idx = None
308
+ for i, l in enumerate(lines):
309
+ if l.strip() == line.strip():
310
+ target_idx = i
311
+ break
312
+
313
+ if target_idx is None:
314
+ return _classify_line(line)
315
+
316
+ for i in range(target_idx, -1, -1):
317
+ header = lines[i].strip().lower()
318
+ if not header.startswith("##"):
319
+ continue
320
+ if "implicit contract" in header or "git history" in header:
321
+ return "git_history"
322
+ if "stability" in header:
323
+ return "git_history"
324
+ if "docstring" in header or "readme" in header:
325
+ return "docstring"
326
+ if "signal" in header or "comment" in header:
327
+ return "signal_comment"
328
+ # Any other ## header — treat content as hand_authored
329
+ return "hand_authored"
330
+
331
+ return _classify_line(line)
332
+
333
+
334
+ def _classify_line(line: str) -> str:
335
+ """Fallback: classify a line's provenance from its own content."""
336
+ lower = line.lower()
337
+ if "co-change" in lower or "from git history" in lower or "commits" in lower:
338
+ return "git_history"
339
+ if "implicit contract" in lower:
340
+ return "git_history"
341
+ if any(kw in lower for kw in ("invariant:", "hack:", "warning:", "note:")):
342
+ return "signal_comment"
343
+ return "hand_authored"
344
+
345
+
346
+ # ---------------------------------------------------------------------------
347
+ # Feature 3: Post-Commit Delta
348
+ # ---------------------------------------------------------------------------
349
+
350
+
351
+ def format_observation_delta(
352
+ observation: ObservationLog,
353
+ scope_expr: str,
354
+ ) -> str:
355
+ """Format a post-commit observation as a human-readable delta."""
356
+ lines = []
357
+ scope_name = scope_expr.split("+")[0].split("-")[0].split("@")[0]
358
+
359
+ actual_count = len(observation.actual_files_modified)
360
+ predicted_correct = actual_count - len(observation.touched_not_predicted)
361
+
362
+ lines.append(f"dotscope: observation recorded for {scope_name}/")
363
+
364
+ pct = observation.recall
365
+ suffix = ""
366
+ if pct < 0.7:
367
+ suffix = " <- degraded"
368
+
369
+ lines.append(
370
+ f" {scope_name}/ predicted {predicted_correct}/{actual_count}"
371
+ f" files correctly ({pct:.0%}){suffix}"
372
+ )
373
+
374
+ if observation.touched_not_predicted:
375
+ missing_names = [
376
+ os.path.basename(f) for f in observation.touched_not_predicted[:4]
377
+ ]
378
+ if len(observation.touched_not_predicted) > 4:
379
+ missing_names.append(
380
+ f"+{len(observation.touched_not_predicted) - 4} more"
381
+ )
382
+ lines.append(f" Missing: {', '.join(missing_names)}")
383
+ if pct < 0.8:
384
+ lines.append(f" Run `dotscope health {scope_name}` to diagnose")
385
+
386
+ lines.append(" Utility scores updated")
387
+ return "\n".join(lines)
388
+
389
+
390
+ def build_accuracy(
391
+ observations: List[ObservationLog],
392
+ scope: str,
393
+ ) -> Optional[dict]:
394
+ """Build unified accuracy metadata from all observations.
395
+
396
+ Merges what was previously scope_accuracy + recent_learning into one field.
397
+ Returns None if no observations exist.
398
+ """
399
+ if not observations:
400
+ return None
401
+
402
+ now = time.time()
403
+ recalls = [o.recall for o in observations]
404
+ precisions = [o.precision for o in observations]
405
+ avg_recall = sum(recalls) / len(recalls)
406
+ avg_precision = sum(precisions) / len(precisions)
407
+
408
+ # Trend: compare recent vs older
409
+ recent_r = recalls[-5:] if len(recalls) >= 5 else recalls
410
+ older_r = recalls[:-5] if len(recalls) > 5 else []
411
+ trend = (
412
+ "improving"
413
+ if older_r and sum(recent_r) / len(recent_r) > sum(older_r) / len(older_r)
414
+ else "stable"
415
+ )
416
+
417
+ result: dict = {
418
+ "observations": len(observations),
419
+ "avg_recall": round(avg_recall, 3),
420
+ "avg_precision": round(avg_precision, 3),
421
+ "trend": trend,
422
+ }
423
+
424
+ # Add recency info from most recent observation
425
+ latest = observations[-1]
426
+ if latest.timestamp > 0:
427
+ hours_ago = max(1, int((now - latest.timestamp) / 3600))
428
+ result["last_observation"] = f"{hours_ago}h ago"
429
+
430
+ # Count lessons applied (observations where something was learned)
431
+ lessons = sum(1 for o in observations if o.touched_not_predicted)
432
+ if lessons:
433
+ result["lessons_applied"] = lessons
434
+
435
+ return result
436
+
437
+
438
+ # ---------------------------------------------------------------------------
439
+ # Feature 4: Health Nudges
440
+ # ---------------------------------------------------------------------------
441
+
442
+
443
+ ACCURACY_DROP_THRESHOLD = 0.15
444
+ STALENESS_DAYS = 30
445
+ UNCOVERED_FILES_MIN = 3
446
+
447
+
448
+ def check_health_nudges(
449
+ observations: List[ObservationLog],
450
+ scope: str,
451
+ repo_root: str = "",
452
+ threshold_drop: float = ACCURACY_DROP_THRESHOLD,
453
+ ) -> List[dict]:
454
+ """Generate health warnings for a scope on resolve."""
455
+ warnings = []
456
+
457
+ # 1. Accuracy degradation
458
+ if len(observations) >= 3:
459
+ recalls = [o.recall for o in observations]
460
+ mid = len(recalls) // 2
461
+ if mid >= 1:
462
+ older_avg = sum(recalls[:mid]) / mid
463
+ recent_avg = sum(recalls[mid:]) / len(recalls[mid:])
464
+ if older_avg - recent_avg >= threshold_drop:
465
+ warnings.append({
466
+ "scope": scope,
467
+ "issue": "accuracy_degraded",
468
+ "message": (
469
+ f"{scope}/ accuracy has dropped"
470
+ f" from {older_avg:.0%} to {recent_avg:.0%}"
471
+ ),
472
+ "suggestion": f"dotscope health {scope}",
473
+ })
474
+
475
+ # 2. Staleness (scope file age + commits since)
476
+ if repo_root:
477
+ scope_path = os.path.join(repo_root, scope, ".scope")
478
+ if os.path.exists(scope_path):
479
+ mtime = os.path.getmtime(scope_path)
480
+ days_since = int((time.time() - mtime) / 86400)
481
+ if days_since > STALENESS_DAYS:
482
+ commits = _count_commits_since(repo_root, scope, mtime)
483
+ if commits > 0:
484
+ warnings.append({
485
+ "scope": scope,
486
+ "issue": "stale",
487
+ "message": (
488
+ f"{scope}/ hasn't been updated in {days_since} days"
489
+ f" . {commits} commits have touched this module"
490
+ ),
491
+ "suggestion": f"dotscope ingest {scope}/",
492
+ })
493
+
494
+ # 3. Uncovered files
495
+ if repo_root:
496
+ uncovered = _count_uncovered_files(repo_root, scope)
497
+ if uncovered > UNCOVERED_FILES_MIN:
498
+ warnings.append({
499
+ "scope": scope,
500
+ "issue": "uncovered_files",
501
+ "message": (
502
+ f"{scope}/ has {uncovered} files"
503
+ f" not covered by scope includes"
504
+ ),
505
+ "suggestion": f"dotscope ingest {scope}/",
506
+ })
507
+
508
+ return warnings
509
+
510
+
511
+ def _count_commits_since(repo_root: str, scope: str, since_ts: float) -> int:
512
+ """Count commits touching files in this scope's directory since a timestamp."""
513
+ import subprocess
514
+ from datetime import datetime, timezone
515
+ since_dt = datetime.fromtimestamp(since_ts, tz=timezone.utc)
516
+ try:
517
+ result = subprocess.run(
518
+ ["git", "log", "--oneline",
519
+ f"--since={since_dt.isoformat()}",
520
+ "--", f"{scope}/"],
521
+ capture_output=True, text=True, cwd=repo_root, timeout=10,
522
+ )
523
+ return len(result.stdout.strip().splitlines()) if result.stdout.strip() else 0
524
+ except Exception:
525
+ return 0
526
+
527
+
528
+ def _count_uncovered_files(repo_root: str, scope: str) -> int:
529
+ """Count source files in a scope directory not covered by includes."""
530
+ scope_dir = os.path.join(repo_root, scope)
531
+ if not os.path.isdir(scope_dir):
532
+ return 0
533
+
534
+ source_exts = {".py", ".js", ".ts", ".go", ".rs", ".rb", ".java"}
535
+ count = 0
536
+ for dirpath, _dirs, filenames in os.walk(scope_dir):
537
+ # Skip hidden dirs and common non-source dirs
538
+ rel = os.path.relpath(dirpath, repo_root)
539
+ if any(part.startswith(".") or part in ("node_modules", "__pycache__", "venv")
540
+ for part in rel.split(os.sep)):
541
+ continue
542
+ for fn in filenames:
543
+ if os.path.splitext(fn)[1] in source_exts:
544
+ count += 1
545
+
546
+ # Subtract the files that ARE in includes (rough: count files under scope/)
547
+ # The includes typically have "scope/" which covers all, so uncovered = 0 in that case
548
+ # Only flag if the scope file exists but doesn't include the directory
549
+ scope_path = os.path.join(repo_root, scope, ".scope")
550
+ if os.path.exists(scope_path):
551
+ try:
552
+ from .parser import parse_scope_file
553
+ config = parse_scope_file(scope_path)
554
+ # If the scope includes the directory itself, all files are covered
555
+ if any(inc.rstrip("/") == scope or inc.startswith(scope + "/")
556
+ for inc in config.includes):
557
+ return 0
558
+ except Exception:
559
+ pass
560
+
561
+ return count
562
+
563
+
564
+ # ---------------------------------------------------------------------------
565
+ # Feature 5: Near-Miss Detection
566
+ # ---------------------------------------------------------------------------
567
+
568
+ # Patterns: (anti_pattern_keywords, safe_pattern_keywords)
569
+ _NEAR_MISS_PATTERNS = [
570
+ # Soft delete pattern
571
+ (
572
+ [".delete()", "hard delete", "drop table"],
573
+ [".deactivate()", "soft_delete", "is_active", "deleted_at"],
574
+ ),
575
+ # Direct DB access
576
+ (
577
+ ["raw sql", "execute(", "cursor.execute"],
578
+ ["orm", "query.", "filter(", "objects."],
579
+ ),
580
+ # Force push
581
+ (
582
+ ["--force", "push -f", "reset --hard"],
583
+ ["--force-with-lease", "revert"],
584
+ ),
585
+ ]
586
+
587
+
588
+ def detect_near_misses(
589
+ context: str,
590
+ diff_text: str,
591
+ scope: str,
592
+ ) -> List[dict]:
593
+ """Detect cases where scope context may have prevented a mistake.
594
+
595
+ A near-miss is when:
596
+ 1. The context contains a warning about an anti-pattern
597
+ 2. The diff does NOT contain the anti-pattern
598
+ 3. The diff DOES contain the safe alternative
599
+
600
+ False positives are acceptable — they still build trust.
601
+ """
602
+ if not context or not diff_text:
603
+ return []
604
+
605
+ near_misses = []
606
+ context_lower = context.lower()
607
+ diff_lower = diff_text.lower()
608
+
609
+ # Check keyword patterns from context
610
+ for anti_keywords, safe_keywords in _NEAR_MISS_PATTERNS:
611
+ # Is the anti-pattern warned about in context?
612
+ context_warns = any(kw in context_lower for kw in anti_keywords)
613
+ if not context_warns:
614
+ continue
615
+
616
+ # Anti-pattern absent from diff, safe pattern present?
617
+ anti_in_diff = any(kw in diff_lower for kw in anti_keywords)
618
+ safe_in_diff = any(kw in diff_lower for kw in safe_keywords)
619
+
620
+ if not anti_in_diff and safe_in_diff:
621
+ # Find the specific context line that warned
622
+ warning_line = ""
623
+ for line in context.split("\n"):
624
+ if any(kw in line.lower() for kw in anti_keywords + safe_keywords):
625
+ warning_line = line.strip()
626
+ break
627
+
628
+ near_misses.append({
629
+ "scope_context_used": warning_line or "Context warning applied",
630
+ "potential_impact": f"Avoided anti-pattern in {scope}/",
631
+ })
632
+
633
+ # Also check explicit "never"/"don't" patterns from context
634
+ for line in context.split("\n"):
635
+ line_clean = line.strip()
636
+ if not line_clean:
637
+ continue
638
+
639
+ match = re.search(
640
+ r"\b(never|don't|do not|avoid)\b\s+(.{10,50})",
641
+ line_clean,
642
+ re.IGNORECASE,
643
+ )
644
+ if not match:
645
+ continue
646
+
647
+ anti_phrase = match.group(2).lower().split(".")[0].strip()
648
+ # If the anti-phrase is NOT in the diff, that's a candidate
649
+ if anti_phrase not in diff_lower and len(anti_phrase) > 5:
650
+ near_misses.append({
651
+ "scope_context_used": line_clean,
652
+ "potential_impact": f"Anti-pattern avoided: {anti_phrase}",
653
+ })
654
+
655
+ # Deduplicate by scope_context_used
656
+ seen = set()
657
+ unique = []
658
+ for nm in near_misses:
659
+ key = nm["scope_context_used"]
660
+ if key not in seen:
661
+ seen.add(key)
662
+ unique.append(nm)
663
+
664
+ return unique[:3] # Cap at 3