dotscope 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. dotscope/.scope +63 -0
  2. dotscope/__init__.py +3 -0
  3. dotscope/absorber.py +390 -0
  4. dotscope/assertions.py +128 -0
  5. dotscope/ast_analyzer.py +2 -0
  6. dotscope/backtest.py +2 -0
  7. dotscope/bench.py +141 -0
  8. dotscope/budget.py +3 -0
  9. dotscope/cache.py +2 -0
  10. dotscope/check/__init__.py +1 -0
  11. dotscope/check/acknowledge.py +2 -0
  12. dotscope/check/checker.py +3 -0
  13. dotscope/check/checks/__init__.py +1 -0
  14. dotscope/check/checks/antipattern.py +2 -0
  15. dotscope/check/checks/boundary.py +2 -0
  16. dotscope/check/checks/contracts.py +3 -0
  17. dotscope/check/checks/direction.py +2 -0
  18. dotscope/check/checks/intent.py +2 -0
  19. dotscope/check/checks/stability.py +2 -0
  20. dotscope/check/constraints.py +2 -0
  21. dotscope/check/models.py +15 -0
  22. dotscope/cli.py +1447 -0
  23. dotscope/composer.py +147 -0
  24. dotscope/constants.py +45 -0
  25. dotscope/context.py +60 -0
  26. dotscope/counterfactual.py +180 -0
  27. dotscope/debug.py +220 -0
  28. dotscope/discovery.py +104 -0
  29. dotscope/formatter.py +157 -0
  30. dotscope/graph.py +3 -0
  31. dotscope/health.py +212 -0
  32. dotscope/help.py +204 -0
  33. dotscope/history.py +6 -0
  34. dotscope/hooks.py +2 -0
  35. dotscope/ingest.py +858 -0
  36. dotscope/intent.py +618 -0
  37. dotscope/lessons.py +223 -0
  38. dotscope/matcher.py +104 -0
  39. dotscope/mcp_server.py +1081 -0
  40. dotscope/models/.scope +45 -0
  41. dotscope/models/__init__.py +7 -0
  42. dotscope/models/core.py +288 -0
  43. dotscope/models/history.py +73 -0
  44. dotscope/models/intent.py +213 -0
  45. dotscope/models/passes.py +58 -0
  46. dotscope/models/state.py +250 -0
  47. dotscope/models.py +9 -0
  48. dotscope/near_miss.py +3 -0
  49. dotscope/onboarding.py +2 -0
  50. dotscope/parser.py +387 -0
  51. dotscope/passes/.scope +105 -0
  52. dotscope/passes/__init__.py +1 -0
  53. dotscope/passes/ast_analyzer.py +508 -0
  54. dotscope/passes/backtest.py +198 -0
  55. dotscope/passes/budget_allocator.py +164 -0
  56. dotscope/passes/convention_compliance.py +40 -0
  57. dotscope/passes/convention_discovery.py +247 -0
  58. dotscope/passes/convention_parser.py +223 -0
  59. dotscope/passes/graph_builder.py +299 -0
  60. dotscope/passes/history_miner.py +336 -0
  61. dotscope/passes/incremental.py +149 -0
  62. dotscope/passes/lang/__init__.py +38 -0
  63. dotscope/passes/lang/_base.py +20 -0
  64. dotscope/passes/lang/_treesitter.py +93 -0
  65. dotscope/passes/lang/go.py +333 -0
  66. dotscope/passes/lang/javascript.py +348 -0
  67. dotscope/passes/lazy.py +152 -0
  68. dotscope/passes/semantic_diff.py +160 -0
  69. dotscope/passes/sentinel/__init__.py +1 -0
  70. dotscope/passes/sentinel/acknowledge.py +222 -0
  71. dotscope/passes/sentinel/checker.py +383 -0
  72. dotscope/passes/sentinel/checks/__init__.py +1 -0
  73. dotscope/passes/sentinel/checks/antipattern.py +84 -0
  74. dotscope/passes/sentinel/checks/boundary.py +46 -0
  75. dotscope/passes/sentinel/checks/contracts.py +148 -0
  76. dotscope/passes/sentinel/checks/convention.py +54 -0
  77. dotscope/passes/sentinel/checks/direction.py +71 -0
  78. dotscope/passes/sentinel/checks/intent.py +207 -0
  79. dotscope/passes/sentinel/checks/stability.py +66 -0
  80. dotscope/passes/sentinel/checks/voice.py +108 -0
  81. dotscope/passes/sentinel/constraints.py +472 -0
  82. dotscope/passes/sentinel/line_filter.py +88 -0
  83. dotscope/passes/sentinel/models.py +15 -0
  84. dotscope/passes/virtual.py +239 -0
  85. dotscope/passes/voice.py +162 -0
  86. dotscope/passes/voice_defaults.py +28 -0
  87. dotscope/passes/voice_discovery.py +245 -0
  88. dotscope/paths.py +32 -0
  89. dotscope/progress.py +44 -0
  90. dotscope/regression.py +147 -0
  91. dotscope/resolver.py +203 -0
  92. dotscope/scanner.py +246 -0
  93. dotscope/sessions.py +2 -0
  94. dotscope/storage/.scope +64 -0
  95. dotscope/storage/__init__.py +1 -0
  96. dotscope/storage/cache.py +114 -0
  97. dotscope/storage/claude_hooks.py +119 -0
  98. dotscope/storage/git_hooks.py +277 -0
  99. dotscope/storage/incremental_state.py +61 -0
  100. dotscope/storage/mcp_config.py +98 -0
  101. dotscope/storage/near_miss.py +183 -0
  102. dotscope/storage/onboarding.py +150 -0
  103. dotscope/storage/session_manager.py +195 -0
  104. dotscope/storage/timing.py +84 -0
  105. dotscope/timing.py +2 -0
  106. dotscope/tokens.py +53 -0
  107. dotscope/utility.py +123 -0
  108. dotscope/virtual.py +3 -0
  109. dotscope/visibility.py +664 -0
  110. dotscope-0.1.0.dist-info/METADATA +50 -0
  111. dotscope-0.1.0.dist-info/RECORD +114 -0
  112. dotscope-0.1.0.dist-info/WHEEL +4 -0
  113. dotscope-0.1.0.dist-info/entry_points.txt +3 -0
  114. dotscope-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,98 @@
1
+ """Auto-detect IDE and configure MCP server.
2
+
3
+ Finds Claude Desktop, Claude Code, and Cursor configs.
4
+ Writes the dotscope MCP entry if not already present.
5
+ """
6
+
7
+ import json
8
+ import os
9
+ import sys
10
+ from pathlib import Path
11
+
12
+
13
+ def configure_mcp(repo_root: str) -> list:
14
+ """Detect IDEs and write MCP config. Returns list of configured IDEs."""
15
+ configured = []
16
+
17
+ # Claude Desktop
18
+ path = _claude_desktop_config_path()
19
+ if path:
20
+ if _add_mcp_entry(path, repo_root):
21
+ configured.append("Claude Desktop")
22
+
23
+ # Claude Code (.claude/settings.json in project)
24
+ cc_path = os.path.join(repo_root, ".claude", "settings.json")
25
+ if _add_mcp_entry(cc_path, None):
26
+ configured.append("Claude Code")
27
+
28
+ # Cursor (.cursor/mcp.json in project)
29
+ cursor_path = os.path.join(repo_root, ".cursor", "mcp.json")
30
+ if _add_mcp_entry_cursor(cursor_path, repo_root):
31
+ configured.append("Cursor")
32
+
33
+ return configured
34
+
35
+
36
+ def _claude_desktop_config_path() -> str:
37
+ """Find Claude Desktop config file."""
38
+ if sys.platform == "darwin":
39
+ p = Path.home() / "Library" / "Application Support" / "Claude" / "claude_desktop_config.json"
40
+ elif sys.platform == "win32":
41
+ appdata = os.environ.get("APPDATA", "")
42
+ p = Path(appdata) / "Claude" / "claude_desktop_config.json"
43
+ else:
44
+ p = Path.home() / ".config" / "Claude" / "claude_desktop_config.json"
45
+
46
+ return str(p) if p.parent.exists() else ""
47
+
48
+
49
+ def _add_mcp_entry(config_path: str, repo_root: str) -> bool:
50
+ """Add dotscope to an MCP config file. Returns True if written."""
51
+ try:
52
+ config = {}
53
+ if os.path.exists(config_path):
54
+ with open(config_path, "r", encoding="utf-8") as f:
55
+ config = json.load(f)
56
+
57
+ servers = config.setdefault("mcpServers", {})
58
+ if "dotscope" in servers:
59
+ return False # Already configured
60
+
61
+ entry = {"command": "dotscope-mcp"}
62
+ if repo_root:
63
+ entry["args"] = ["--root", os.path.abspath(repo_root)]
64
+
65
+ servers["dotscope"] = entry
66
+
67
+ os.makedirs(os.path.dirname(config_path), exist_ok=True)
68
+ with open(config_path, "w", encoding="utf-8") as f:
69
+ json.dump(config, f, indent=2)
70
+
71
+ return True
72
+ except (IOError, json.JSONDecodeError, OSError):
73
+ return False
74
+
75
+
76
+ def _add_mcp_entry_cursor(config_path: str, repo_root: str) -> bool:
77
+ """Add dotscope to Cursor's MCP config."""
78
+ try:
79
+ config = {}
80
+ if os.path.exists(config_path):
81
+ with open(config_path, "r", encoding="utf-8") as f:
82
+ config = json.load(f)
83
+
84
+ if "dotscope" in config:
85
+ return False
86
+
87
+ config["dotscope"] = {
88
+ "command": "dotscope-mcp",
89
+ "args": ["--root", os.path.abspath(repo_root)],
90
+ }
91
+
92
+ os.makedirs(os.path.dirname(config_path), exist_ok=True)
93
+ with open(config_path, "w", encoding="utf-8") as f:
94
+ json.dump(config, f, indent=2)
95
+
96
+ return True
97
+ except (IOError, json.JSONDecodeError, OSError):
98
+ return False
@@ -0,0 +1,183 @@
1
+ """Near-miss detection: disasters that didn't happen.
2
+
3
+ Extracts warning pairs from scope context, compares against commit diffs,
4
+ and stores detected near-misses for the agent channel.
5
+ """
6
+
7
+ import json
8
+ import os
9
+ import re
10
+ import time
11
+ from pathlib import Path
12
+ from typing import Dict, List, Optional
13
+
14
+ from ..models.intent import NearMiss, WarningPair # noqa: F401
15
+
16
+
17
+ # Regex patterns for extracting warning pairs from context
18
+ _PAIR_PATTERNS = [
19
+ # "Never call .delete(), use .deactivate()"
20
+ r"[Nn]ever\s+(?:call\s+)?(\S+).*?use\s+(\S+)",
21
+ # "Don't use X, use Y instead"
22
+ r"[Dd]on'?t\s+(?:use\s+)?(\S+).*?use\s+(\S+)",
23
+ # "Avoid X — use Y"
24
+ r"[Aa]void\s+(\S+).*?use\s+(\S+)",
25
+ # "X is deprecated, use Y"
26
+ r"(\S+)\s+is\s+deprecated.*?use\s+(\S+)",
27
+ ]
28
+
29
+
30
+ def extract_warning_pairs(
31
+ scope_name: str, context: str,
32
+ ) -> List[WarningPair]:
33
+ """Extract (anti_pattern, safe_pattern) pairs from scope context."""
34
+ pairs = []
35
+ for line in context.splitlines():
36
+ stripped = line.strip().lstrip("- ")
37
+ if not stripped:
38
+ continue
39
+ for pattern in _PAIR_PATTERNS:
40
+ match = re.search(pattern, stripped)
41
+ if match:
42
+ pairs.append(WarningPair(
43
+ anti_pattern=match.group(1).strip(".,;:()"),
44
+ safe_pattern=match.group(2).strip(".,;:()"),
45
+ context_line=stripped,
46
+ scope=scope_name,
47
+ ))
48
+ break
49
+ return pairs
50
+
51
+
52
+ def detect_near_misses(
53
+ diff_text: str,
54
+ scope_contexts: Dict[str, str],
55
+ ) -> List[NearMiss]:
56
+ """Detect near-misses by comparing commit diff against scope warnings.
57
+
58
+ Args:
59
+ diff_text: The full commit diff
60
+ scope_contexts: {scope_name: context_str} for scopes resolved in session
61
+ """
62
+ if not diff_text:
63
+ return []
64
+
65
+ near_misses = []
66
+ diff_lower = diff_text.lower()
67
+
68
+ for scope_name, context in scope_contexts.items():
69
+ if not context:
70
+ continue
71
+
72
+ pairs = extract_warning_pairs(scope_name, context)
73
+ for pair in pairs:
74
+ anti_in_diff = pair.anti_pattern.lower() in diff_lower
75
+ safe_in_diff = pair.safe_pattern.lower() in diff_lower
76
+
77
+ if safe_in_diff and not anti_in_diff:
78
+ near_misses.append(NearMiss(
79
+ scope=scope_name,
80
+ event=(
81
+ f"Agent used {pair.safe_pattern}"
82
+ f" instead of {pair.anti_pattern}"
83
+ ),
84
+ context_used=pair.context_line,
85
+ potential_impact=(
86
+ f"Using {pair.anti_pattern} instead of {pair.safe_pattern}"
87
+ f" would have violated the constraint:"
88
+ f" {pair.context_line}"
89
+ ),
90
+ ))
91
+
92
+ return near_misses[:5] # Cap
93
+
94
+
95
+ def store_near_misses(root: str, near_misses: List[NearMiss]) -> None:
96
+ """Append near-misses to .dotscope/near_misses.jsonl."""
97
+ path = Path(root) / ".dotscope" / "near_misses.jsonl"
98
+ path.parent.mkdir(parents=True, exist_ok=True)
99
+
100
+ with open(path, "a", encoding="utf-8") as f:
101
+ for nm in near_misses:
102
+ f.write(json.dumps({
103
+ "scope": nm.scope,
104
+ "event": nm.event,
105
+ "context_used": nm.context_used,
106
+ "potential_impact": nm.potential_impact,
107
+ "timestamp": time.time(),
108
+ }) + "\n")
109
+
110
+ # Cap at 100 entries
111
+ _truncate_jsonl(path, max_entries=100)
112
+
113
+
114
+ def load_recent_near_misses(
115
+ root: str, scope: str, max_age_hours: int = 48,
116
+ ) -> List[dict]:
117
+ """Load near-misses from ..dotscope/near_misses.jsonl."""
118
+ path = Path(root) / ".dotscope" / "near_misses.jsonl"
119
+ if not path.exists():
120
+ return []
121
+
122
+ cutoff = time.time() - (max_age_hours * 3600)
123
+ results = []
124
+ try:
125
+ for line in path.read_text(encoding="utf-8").splitlines():
126
+ if not line.strip():
127
+ continue
128
+ entry = json.loads(line)
129
+ if entry.get("scope") != scope:
130
+ continue
131
+ if entry.get("timestamp", 0) < cutoff:
132
+ continue
133
+ hours_ago = max(1, int((time.time() - entry["timestamp"]) / 3600))
134
+ results.append({
135
+ "event": entry["event"],
136
+ "context_used": entry["context_used"],
137
+ "potential_impact": entry["potential_impact"],
138
+ "detected": f"{hours_ago}h ago",
139
+ })
140
+ except (json.JSONDecodeError, KeyError):
141
+ pass
142
+
143
+ return results
144
+
145
+
146
+ def save_session_scopes(root: str, scopes: list) -> None:
147
+ """Write resolved scopes to .dotscope/last_session.json for post-commit hook."""
148
+ if not scopes:
149
+ return
150
+ path = Path(root) / ".dotscope" / "last_session.json"
151
+ path.parent.mkdir(parents=True, exist_ok=True)
152
+ path.write_text(json.dumps({
153
+ "scopes": scopes,
154
+ "ended_at": time.time(),
155
+ }), encoding="utf-8")
156
+
157
+
158
+ def load_session_scopes(root: str) -> List[str]:
159
+ """Load scopes from the last MCP session."""
160
+ path = Path(root) / ".dotscope" / "last_session.json"
161
+ if not path.exists():
162
+ return []
163
+ try:
164
+ data = json.loads(path.read_text(encoding="utf-8"))
165
+ # Only use if session ended within 4 hours
166
+ if time.time() - data.get("ended_at", 0) > 14400:
167
+ return []
168
+ return data.get("scopes", [])
169
+ except (json.JSONDecodeError, KeyError):
170
+ return []
171
+
172
+
173
+ def _truncate_jsonl(path: Path, max_entries: int = 100) -> None:
174
+ """Keep only the last N entries in a JSONL file."""
175
+ try:
176
+ lines = path.read_text(encoding="utf-8").splitlines()
177
+ if len(lines) > max_entries:
178
+ path.write_text(
179
+ "\n".join(lines[-max_entries:]) + "\n",
180
+ encoding="utf-8",
181
+ )
182
+ except Exception:
183
+ pass
@@ -0,0 +1,150 @@
1
+ """Stage-aware onboarding: guide the developer from skepticism to dependency.
2
+
3
+ Tracks milestones in .dotscope/onboarding.json. Used to:
4
+ 1. Tailor "next step" prompts (one at a time, never nag)
5
+ 2. Gate complexity (counterfactuals after 3+ observations, health after 7+ days)
6
+ 3. Celebrate milestones (first session, first observation, first counterfactual)
7
+ """
8
+
9
+ import json
10
+ import os
11
+ import time
12
+ from typing import Optional
13
+
14
+
15
+ def load_onboarding(repo_root: str) -> dict:
16
+ """Load onboarding state, creating default if missing."""
17
+ path = _onboarding_path(repo_root)
18
+ if os.path.exists(path):
19
+ try:
20
+ with open(path, "r", encoding="utf-8") as f:
21
+ return json.load(f)
22
+ except (json.JSONDecodeError, IOError):
23
+ pass
24
+ return _default_state()
25
+
26
+
27
+ def save_onboarding(repo_root: str, state: dict) -> None:
28
+ """Persist onboarding state."""
29
+ dot_dir = os.path.join(repo_root, ".dotscope")
30
+ os.makedirs(dot_dir, exist_ok=True)
31
+ path = _onboarding_path(repo_root)
32
+ with open(path, "w", encoding="utf-8") as f:
33
+ json.dump(state, f, indent=2)
34
+
35
+
36
+ def mark_milestone(repo_root: str, milestone: str) -> dict:
37
+ """Record a milestone timestamp if not already set."""
38
+ state = load_onboarding(repo_root)
39
+ if milestone in state and state[milestone] is None:
40
+ state[milestone] = _now()
41
+ save_onboarding(repo_root, state)
42
+ return state
43
+
44
+
45
+ def increment_counter(repo_root: str, counter: str) -> dict:
46
+ """Increment a counter (sessions_completed, observations_recorded)."""
47
+ state = load_onboarding(repo_root)
48
+ state[counter] = state.get(counter, 0) + 1
49
+ save_onboarding(repo_root, state)
50
+ return state
51
+
52
+
53
+ # ---------------------------------------------------------------------------
54
+ # Gating rules: when to show what
55
+ # ---------------------------------------------------------------------------
56
+
57
+ def should_show_counterfactuals(state: dict) -> bool:
58
+ """Counterfactuals need observation data to be meaningful."""
59
+ return state.get("observations_recorded", 0) >= 3
60
+
61
+
62
+ def should_show_health_nudges(state: dict) -> bool:
63
+ """Health nudges aren't relevant on day 1."""
64
+ first = state.get("first_ingest")
65
+ if not first:
66
+ return False
67
+ try:
68
+ elapsed = time.time() - _parse_ts(first)
69
+ return elapsed >= 7 * 86400 # 7 days
70
+ except (ValueError, TypeError):
71
+ return True # If we can't parse, show them
72
+
73
+
74
+ def next_step(state: dict) -> Optional[str]:
75
+ """Return the single next action the developer should take, or None."""
76
+ if not state.get("first_backtest"):
77
+ return "Next: `dotscope check --backtest`"
78
+ if not state.get("conventions_reviewed"):
79
+ return "Next: `dotscope conventions`"
80
+ if not state.get("voice_reviewed"):
81
+ return "Next: `dotscope voice`"
82
+ if not state.get("first_session"):
83
+ return "Next: Add dotscope to your agent (docs/mcp-setup.md)"
84
+ if not state.get("hook_installed"):
85
+ return "Next: `dotscope hook install`"
86
+ return None # Onboarded. Stop prompting.
87
+
88
+
89
+ def milestone_message(state: dict) -> Optional[str]:
90
+ """Return a milestone celebration message, or None."""
91
+ sessions = state.get("sessions_completed", 0)
92
+ observations = state.get("observations_recorded", 0)
93
+
94
+ if sessions == 1:
95
+ return "First session tracked."
96
+ if observations == 1:
97
+ return "Feedback loop active — scopes will improve with use."
98
+ if sessions == 5:
99
+ return f"5 sessions completed. {observations} observations recorded."
100
+ return None
101
+
102
+
103
+ def version_control_tip(state: dict) -> Optional[str]:
104
+ """One-time tip about committing .scope files. Shown on first ingest only."""
105
+ if state.get("vc_tip_shown"):
106
+ return None
107
+ return (
108
+ "Commit .scope files and intent.yaml. .dotscope/ is gitignored and rebuilds."
109
+ )
110
+
111
+
112
+ def mark_vc_tip_shown(repo_root: str) -> None:
113
+ """Record that the version control tip has been shown."""
114
+ state = load_onboarding(repo_root)
115
+ state["vc_tip_shown"] = True
116
+ save_onboarding(repo_root, state)
117
+
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # Internals
121
+ # ---------------------------------------------------------------------------
122
+
123
+ def _onboarding_path(repo_root: str) -> str:
124
+ return os.path.join(repo_root, ".dotscope", "onboarding.json")
125
+
126
+
127
+ def _default_state() -> dict:
128
+ return {
129
+ "first_ingest": None,
130
+ "first_backtest": None,
131
+ "first_session": None,
132
+ "hook_installed": None,
133
+ "first_observation": None,
134
+ "first_check_hold": None,
135
+ "conventions_reviewed": None,
136
+ "voice_reviewed": None,
137
+ "sessions_completed": 0,
138
+ "observations_recorded": 0,
139
+ "vc_tip_shown": False,
140
+ }
141
+
142
+
143
+ def _now() -> str:
144
+ return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
145
+
146
+
147
+ def _parse_ts(ts: str) -> float:
148
+ """Parse ISO timestamp to epoch seconds."""
149
+ from datetime import datetime as dt
150
+ return dt.fromisoformat(ts.replace("Z", "+00:00")).timestamp()
@@ -0,0 +1,195 @@
1
+ """Session tracking and observation: the feedback loop.
2
+
3
+ Sessions record what dotscope predicted an agent would need.
4
+ Observations record what actually happened (post-commit).
5
+ Together they close the loop between prediction and reality.
6
+
7
+ All data is append-only. Derived views (utility scores, lessons)
8
+ are computed from these logs and can be rebuilt.
9
+ """
10
+
11
+ import hashlib
12
+ import json
13
+ import os
14
+ import subprocess
15
+ import time
16
+ import uuid
17
+ from pathlib import Path
18
+ from typing import List, Optional
19
+
20
+ from ..models import ObservationLog, SessionLog
21
+
22
+
23
+ class SessionManager:
24
+ """Manages the .dotscope/ state directory and the session lifecycle."""
25
+
26
+ def __init__(self, root: str):
27
+ self.root = Path(root)
28
+ self.dot_dir = self.root / ".dotscope"
29
+ self.sessions_dir = self.dot_dir / "sessions"
30
+ self.obs_dir = self.dot_dir / "observations"
31
+
32
+ def ensure_initialized(self):
33
+ """Create .dotscope/ with schema version and .gitignore."""
34
+ for d in [self.sessions_dir, self.obs_dir]:
35
+ d.mkdir(parents=True, exist_ok=True)
36
+
37
+ version_file = self.dot_dir / "schema_version"
38
+ if not version_file.exists():
39
+ version_file.write_text("1", encoding="utf-8")
40
+
41
+ gitignore = self.dot_dir / ".gitignore"
42
+ if not gitignore.exists():
43
+ gitignore.write_text("*\n", encoding="utf-8")
44
+
45
+ def create_session(
46
+ self,
47
+ scope_expr: str,
48
+ task: Optional[str],
49
+ files: List[str],
50
+ context: str,
51
+ ) -> str:
52
+ """Record a scope resolution event (the prediction). Returns session ID."""
53
+ self.ensure_initialized()
54
+
55
+ session_id = uuid.uuid4().hex[:8]
56
+ session = SessionLog(
57
+ session_id=session_id,
58
+ timestamp=time.time(),
59
+ scope_expr=scope_expr,
60
+ task=task,
61
+ predicted_files=files,
62
+ context_hash=hashlib.sha256(context.encode()).hexdigest()[:16],
63
+ )
64
+
65
+ path = self.sessions_dir / f"{session_id}.json"
66
+ path.write_text(json.dumps({
67
+ "session_id": session.session_id,
68
+ "timestamp": session.timestamp,
69
+ "scope_expr": session.scope_expr,
70
+ "task": session.task,
71
+ "predicted_files": session.predicted_files,
72
+ "context_hash": session.context_hash,
73
+ }, indent=2), encoding="utf-8")
74
+
75
+ return session_id
76
+
77
+ def record_observation(self, commit_hash: str) -> Optional[ObservationLog]:
78
+ """Match a commit to a session and log what actually happened."""
79
+ self.ensure_initialized()
80
+
81
+ modified_files = self._get_commit_files(commit_hash)
82
+ if not modified_files:
83
+ return None
84
+
85
+ session = self._find_relevant_session(modified_files)
86
+ if not session:
87
+ return None
88
+
89
+ predicted_set = set(session.predicted_files)
90
+ actual_set = set(modified_files)
91
+
92
+ intersection = predicted_set & actual_set
93
+ predicted_not_touched = sorted(predicted_set - actual_set)
94
+ touched_not_predicted = sorted(actual_set - predicted_set)
95
+
96
+ recall = len(intersection) / len(actual_set) if actual_set else 1.0
97
+ precision = len(intersection) / len(predicted_set) if predicted_set else 1.0
98
+
99
+ obs = ObservationLog(
100
+ commit_hash=commit_hash,
101
+ session_id=session.session_id,
102
+ actual_files_modified=modified_files,
103
+ predicted_not_touched=predicted_not_touched,
104
+ touched_not_predicted=touched_not_predicted,
105
+ recall=round(recall, 3),
106
+ precision=round(precision, 3),
107
+ timestamp=time.time(),
108
+ )
109
+
110
+ path = self.obs_dir / f"{commit_hash[:8]}.json"
111
+ path.write_text(json.dumps({
112
+ "commit_hash": obs.commit_hash,
113
+ "session_id": obs.session_id,
114
+ "actual_files_modified": obs.actual_files_modified,
115
+ "predicted_not_touched": obs.predicted_not_touched,
116
+ "touched_not_predicted": obs.touched_not_predicted,
117
+ "recall": obs.recall,
118
+ "precision": obs.precision,
119
+ "timestamp": obs.timestamp,
120
+ }, indent=2), encoding="utf-8")
121
+
122
+ return obs
123
+
124
+ def get_sessions(self, limit: int = 50) -> List[SessionLog]:
125
+ """Load recent sessions, newest first."""
126
+ sessions = []
127
+ for p in sorted(self.sessions_dir.glob("*.json"), key=os.path.getmtime, reverse=True):
128
+ if len(sessions) >= limit:
129
+ break
130
+ try:
131
+ data = json.loads(p.read_text(encoding="utf-8"))
132
+ sessions.append(SessionLog(**data))
133
+ except (json.JSONDecodeError, TypeError):
134
+ continue
135
+ return sessions
136
+
137
+ def get_observations(self, limit: int = 50) -> List[ObservationLog]:
138
+ """Load recent observations, newest first."""
139
+ observations = []
140
+ for p in sorted(self.obs_dir.glob("*.json"), key=os.path.getmtime, reverse=True):
141
+ if len(observations) >= limit:
142
+ break
143
+ try:
144
+ data = json.loads(p.read_text(encoding="utf-8"))
145
+ observations.append(ObservationLog(**data))
146
+ except (json.JSONDecodeError, TypeError):
147
+ continue
148
+ return observations
149
+
150
+ def _get_commit_files(self, commit_hash: str) -> List[str]:
151
+ """Extract modified files from a commit."""
152
+ try:
153
+ result = subprocess.run(
154
+ ["git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit_hash],
155
+ cwd=str(self.root),
156
+ capture_output=True, text=True, timeout=10,
157
+ )
158
+ if result.returncode != 0:
159
+ return []
160
+ except (subprocess.TimeoutExpired, FileNotFoundError):
161
+ return []
162
+ return [f.strip() for f in result.stdout.splitlines() if f.strip()]
163
+
164
+ def _find_relevant_session(self, modified_files: List[str]) -> Optional[SessionLog]:
165
+ """Match a commit to the best-fit session via Jaccard overlap.
166
+
167
+ Only considers sessions from the last 4 hours.
168
+ Requires minimum 10% Jaccard score to avoid spurious matches.
169
+ """
170
+ modified_set = set(modified_files)
171
+ best_session = None
172
+ best_score = 0.0
173
+ cutoff = time.time() - (4 * 3600)
174
+
175
+ for p in sorted(self.sessions_dir.glob("*.json"), key=os.path.getmtime, reverse=True):
176
+ if os.path.getmtime(p) < cutoff:
177
+ break
178
+ try:
179
+ data = json.loads(p.read_text(encoding="utf-8"))
180
+ session = SessionLog(**data)
181
+ except (json.JSONDecodeError, TypeError):
182
+ continue
183
+
184
+ predicted_set = set(session.predicted_files)
185
+ intersection = modified_set & predicted_set
186
+ union = modified_set | predicted_set
187
+ jaccard = len(intersection) / len(union) if union else 0.0
188
+
189
+ if jaccard > best_score:
190
+ best_score = jaccard
191
+ best_session = session
192
+
193
+ if best_score < 0.1:
194
+ return None
195
+ return best_session