autoctxd 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +62 -0
  2. package/CONTRIBUTING.md +80 -0
  3. package/LICENSE +21 -0
  4. package/README.md +301 -0
  5. package/SECURITY.md +81 -0
  6. package/package.json +55 -0
  7. package/scripts/install-hooks.ts +80 -0
  8. package/scripts/install.ps1 +71 -0
  9. package/scripts/install.sh +67 -0
  10. package/scripts/uninstall-hooks.ts +57 -0
  11. package/src/ai/active-guard.ts +96 -0
  12. package/src/ai/adaptive-ranker.ts +48 -0
  13. package/src/ai/classifier.ts +256 -0
  14. package/src/ai/compressor.ts +129 -0
  15. package/src/ai/decision-chains.ts +100 -0
  16. package/src/ai/decision-extractor.ts +148 -0
  17. package/src/ai/pattern-detector.ts +147 -0
  18. package/src/ai/proactive.ts +78 -0
  19. package/src/cli/doctor.ts +171 -0
  20. package/src/cli/embeddings.ts +209 -0
  21. package/src/cli/index.ts +574 -0
  22. package/src/cli/reclassify.ts +134 -0
  23. package/src/context/builder.ts +97 -0
  24. package/src/context/formatter.ts +109 -0
  25. package/src/context/ranker.ts +84 -0
  26. package/src/db/sqlite/decisions.ts +56 -0
  27. package/src/db/sqlite/feedback.ts +92 -0
  28. package/src/db/sqlite/observations.ts +58 -0
  29. package/src/db/sqlite/schema.ts +366 -0
  30. package/src/db/sqlite/sessions.ts +50 -0
  31. package/src/db/sqlite/summaries.ts +69 -0
  32. package/src/db/vector/client.ts +134 -0
  33. package/src/db/vector/embeddings.ts +119 -0
  34. package/src/db/vector/providers/factory.ts +99 -0
  35. package/src/db/vector/providers/minilm.ts +90 -0
  36. package/src/db/vector/providers/ollama.ts +92 -0
  37. package/src/db/vector/providers/tfidf.ts +98 -0
  38. package/src/db/vector/providers/types.ts +39 -0
  39. package/src/db/vector/search.ts +131 -0
  40. package/src/hooks/post-tool-use.ts +205 -0
  41. package/src/hooks/pre-tool-use.ts +305 -0
  42. package/src/hooks/stop.ts +334 -0
  43. package/src/mcp/server.ts +293 -0
  44. package/src/server/dashboard.html +268 -0
  45. package/src/server/dashboard.ts +170 -0
  46. package/src/util/debug.ts +56 -0
  47. package/src/util/ignore.ts +171 -0
  48. package/src/util/metrics.ts +236 -0
  49. package/src/util/path.ts +57 -0
  50. package/tsconfig.json +14 -0
@@ -0,0 +1,205 @@
1
+ #!/usr/bin/env bun
2
+ // PostToolUse hook: captures and classifies each tool use as an observation
3
+
4
+ import { getDb, closeDb } from "../db/sqlite/schema";
5
+ import { insertObservation } from "../db/sqlite/observations";
6
+ import { createSession } from "../db/sqlite/sessions";
7
+ import { classifyObservation, extractFilePaths } from "../ai/classifier";
8
+ import { recordExplorationCall, recordFirstEdit } from "../util/metrics";
9
+ import { normalizePaths } from "../util/path";
10
+ import { shouldIgnorePaths } from "../util/ignore";
11
+
12
+ interface HookInput {
13
+ session_id: string;
14
+ transcript_path: string;
15
+ cwd: string;
16
+ hook_event_name: string;
17
+ tool_name: string;
18
+ tool_input: Record<string, any>;
19
+ tool_response?: any;
20
+ tool_use_id: string;
21
+ }
22
+
23
+ async function main() {
24
+ let input: HookInput;
25
+ try {
26
+ const raw = await Bun.stdin.text();
27
+ input = JSON.parse(raw);
28
+ } catch {
29
+ process.exit(0);
30
+ }
31
+
32
+ if (input.hook_event_name !== "PostToolUse") {
33
+ process.exit(0);
34
+ }
35
+
36
+ // Exploration tools: track for metrics but don't save as observations
37
+ const EXPLORATION_TOOLS = new Set(["Read", "Glob", "Grep"]);
38
+ // Tools that are pure noise — skip entirely
39
+ const SKIP_TOOLS = new Set(["TodoWrite", "AskUserQuestion"]);
40
+ // Substantive edit tools — trigger "first edit" recording
41
+ const EDIT_TOOLS = new Set(["Edit", "Write", "Bash"]);
42
+
43
+ if (SKIP_TOOLS.has(input.tool_name) || input.tool_name.startsWith("mcp__")) {
44
+ process.exit(0);
45
+ }
46
+
47
+ try {
48
+ // Ensure session exists
49
+ createSession({
50
+ session_id: input.session_id,
51
+ project_path: input.cwd,
52
+ git_repo: await getGitRepo(input.cwd),
53
+ git_branch: await getGitBranch(input.cwd),
54
+ });
55
+
56
+ // Track exploration calls for metrics (Read/Glob/Grep)
57
+ if (EXPLORATION_TOOLS.has(input.tool_name)) {
58
+ const exploredPath = input.tool_input.file_path
59
+ || input.tool_input.pattern
60
+ || input.tool_input.path
61
+ || "";
62
+ if (exploredPath) {
63
+ recordExplorationCall(input.session_id, exploredPath);
64
+ }
65
+ process.exit(0); // Don't save as observation
66
+ }
67
+
68
+ // Track first substantive action
69
+ if (EDIT_TOOLS.has(input.tool_name)) {
70
+ recordFirstEdit(input.session_id);
71
+ }
72
+
73
+ // Build a summary from tool input/response
74
+ const summary = buildSummary(input);
75
+ const filePaths = extractFilePathsFromInput(input);
76
+
77
+ // Honor .autoctxd-ignore at the project root before persisting anything.
78
+ if (shouldIgnorePaths(input.cwd, [input.cwd, ...filePaths])) {
79
+ process.exit(0);
80
+ }
81
+
82
+ const classification = classifyObservation(input.tool_name, summary, filePaths);
83
+
84
+ insertObservation({
85
+ session_id: input.session_id,
86
+ type: classification.type,
87
+ tool_name: input.tool_name,
88
+ summary,
89
+ file_paths: filePaths.join(","),
90
+ importance_score: classification.importance,
91
+ });
92
+ } catch (e) {
93
+ // Silently fail - don't interrupt Claude
94
+ const errorLog = Bun.file(`${import.meta.dir}/../../data/error.log`);
95
+ await Bun.write(errorLog, `${new Date().toISOString()} PostToolUse error: ${e}\n`);
96
+ } finally {
97
+ closeDb();
98
+ }
99
+
100
+ process.exit(0);
101
+ }
102
+
103
+ function buildSummary(input: HookInput): string {
104
+ const { tool_name, tool_input, tool_response } = input;
105
+
106
+ switch (tool_name) {
107
+ case "Edit": {
108
+ const file = tool_input.file_path || "file";
109
+ const oldSnippet = firstMeaningfulLine(tool_input.old_string || "", 80);
110
+ const newSnippet = firstMeaningfulLine(tool_input.new_string || "", 80);
111
+ return `Edited ${file}: ${oldSnippet} → ${newSnippet}`;
112
+ }
113
+
114
+ case "Write": {
115
+ const file = tool_input.file_path || "file";
116
+ const content = tool_input.content || "";
117
+ const firstLines = content.split("\n").filter((l: string) => l.trim()).slice(0, 3).join(" | ");
118
+ return `Wrote ${file} (${content.length} chars): ${truncate(firstLines, 160)}`;
119
+ }
120
+
121
+ case "Bash": {
122
+ const cmd = tool_input.command || "";
123
+ const desc = tool_input.description || "";
124
+ if (desc) {
125
+ return `Bash: ${truncate(desc, 200)}`;
126
+ }
127
+ const shortCmd = extractBashIntent(cmd);
128
+ return `Bash: ${truncate(shortCmd, 200)}`;
129
+ }
130
+
131
+ case "Agent":
132
+ return `Spawned agent: ${tool_input.description || ""} — ${truncate(tool_input.prompt || "", 160)}`;
133
+
134
+ case "WebFetch":
135
+ return `WebFetch ${tool_input.url || ""}: ${truncate(tool_input.prompt || "", 120)}`;
136
+
137
+ case "WebSearch":
138
+ return `WebSearch: ${tool_input.query || ""}`;
139
+
140
+ case "NotebookEdit":
141
+ return `Notebook ${tool_input.edit_mode || "edit"} ${tool_input.notebook_path || ""}`;
142
+
143
+ default:
144
+ return `${tool_name}: ${truncate(JSON.stringify(tool_input), 160)}`;
145
+ }
146
+ }
147
+
148
+ function firstMeaningfulLine(text: string, maxLen: number): string {
149
+ const line = text.split("\n").find(l => l.trim()) || text;
150
+ return truncate(line.trim(), maxLen);
151
+ }
152
+
153
+ function extractFilePathsFromInput(input: HookInput): string[] {
154
+ const raw: string[] = [];
155
+
156
+ if (input.tool_input.file_path) {
157
+ raw.push(input.tool_input.file_path);
158
+ }
159
+
160
+ if (input.tool_input.command) {
161
+ for (const fp of extractFilePaths(input.tool_input.command)) {
162
+ raw.push(fp);
163
+ }
164
+ }
165
+
166
+ return normalizePaths(raw);
167
+ }
168
+
169
+ async function getGitRepo(cwd: string): Promise<string | undefined> {
170
+ try {
171
+ const proc = Bun.spawn(["git", "remote", "get-url", "origin"], { cwd, stdout: "pipe", stderr: "pipe" });
172
+ const text = await new Response(proc.stdout).text();
173
+ return text.trim() || undefined;
174
+ } catch {
175
+ return undefined;
176
+ }
177
+ }
178
+
179
+ async function getGitBranch(cwd: string): Promise<string | undefined> {
180
+ try {
181
+ const proc = Bun.spawn(["git", "branch", "--show-current"], { cwd, stdout: "pipe", stderr: "pipe" });
182
+ const text = await new Response(proc.stdout).text();
183
+ return text.trim() || undefined;
184
+ } catch {
185
+ return undefined;
186
+ }
187
+ }
188
+
189
+ function extractBashIntent(cmd: string): string {
190
+ const first = cmd.split("&&")[0].split(";")[0].trim();
191
+ if (/\b(npm|bun|pnpm|yarn)\s+(install|add|i|remove|run|test)\b/i.test(first)) return first;
192
+ if (/\b(git)\s+(commit|push|pull|merge|rebase|checkout|branch)\b/i.test(first)) return first;
193
+ if (/\b(pip|cargo|go)\s+(install|add|get|build|test)\b/i.test(first)) return first;
194
+ if (/\bcd\s+/.test(first) && cmd.includes("&&")) {
195
+ return extractBashIntent(cmd.split("&&").slice(1).join("&&"));
196
+ }
197
+ return first.length > 120 ? first.slice(0, 117) + "..." : first;
198
+ }
199
+
200
+ function truncate(s: string, max: number): string {
201
+ if (s.length <= max) return s;
202
+ return s.slice(0, max - 3) + "...";
203
+ }
204
+
205
+ main();
@@ -0,0 +1,305 @@
1
+ #!/usr/bin/env bun
2
+ // PreToolUse hook: session-start logic - injects semantic context on first tool use
3
+
4
+ import { getDb, closeDb } from "../db/sqlite/schema";
5
+ import { createSession, getRecentSessions } from "../db/sqlite/sessions";
6
+ import { getRecentSummaries } from "../db/sqlite/summaries";
7
+ import { getDecisionsByProject } from "../db/sqlite/decisions";
8
+ import { formatContextBlock } from "../context/formatter";
9
+ import { generateEmbedding } from "../db/vector/embeddings";
10
+ import { searchSimilar, closeVectorDb } from "../db/vector/client";
11
+ import { existsSync, readFileSync, mkdirSync, writeFileSync } from "fs";
12
+ import { join } from "path";
13
+ import { debug } from "../util/debug";
14
+ import { recordInjection } from "../util/metrics";
15
+ import { getUnfinishedItems } from "../ai/proactive";
16
+ import { normalizePath } from "../util/path";
17
+
18
+ interface HookInput {
19
+ session_id: string;
20
+ transcript_path: string;
21
+ cwd: string;
22
+ hook_event_name: string;
23
+ tool_name: string;
24
+ tool_input: Record<string, any>;
25
+ tool_use_id: string;
26
+ }
27
+
28
+ const STATE_DIR = join(import.meta.dir, "..", "..", "data");
29
+ const ACTIVE_SESSION_FILE = join(STATE_DIR, "active-session.json");
30
+
31
+ async function main() {
32
+ let input: HookInput;
33
+ try {
34
+ const raw = await Bun.stdin.text();
35
+ input = JSON.parse(raw);
36
+ } catch {
37
+ process.exit(0);
38
+ }
39
+
40
+ if (input.hook_event_name !== "PreToolUse") {
41
+ process.exit(0);
42
+ }
43
+
44
+ try {
45
+ // Check if this is the first tool use of the session
46
+ const isFirstUse = checkAndSetActiveSession(input.session_id);
47
+
48
+ if (!isFirstUse) {
49
+ process.exit(0);
50
+ }
51
+
52
+ // Create session record
53
+ const gitBranch = await getGitBranch(input.cwd);
54
+ const gitRepo = await getGitRepo(input.cwd);
55
+
56
+ createSession({
57
+ session_id: input.session_id,
58
+ project_path: input.cwd,
59
+ git_repo: gitRepo,
60
+ git_branch: gitBranch,
61
+ });
62
+
63
+ // Build context from multiple sources in parallel
64
+ const [recentSummaries, decisions, hotFiles, patterns, semanticResults] = await Promise.all([
65
+ Promise.resolve(getRecentSummaries(input.cwd, 1, 3)),
66
+ Promise.resolve(getDecisionsByProject(input.cwd)),
67
+ Promise.resolve(getHotFiles(input.cwd)),
68
+ Promise.resolve(getPatterns(input.cwd)),
69
+ semanticSearch(input.cwd, gitBranch),
70
+ ]);
71
+
72
+ const recentSessions = getRecentSessions(input.cwd, 3);
73
+
74
+ // Only inject if we have something useful
75
+ if (recentSummaries.length === 0 && decisions.length === 0 && semanticResults.length === 0) {
76
+ process.exit(0);
77
+ }
78
+
79
+ // Merge semantic results into summaries if they're from different sessions
80
+ const allSummaries = [...recentSummaries];
81
+ for (const sr of semanticResults) {
82
+ if (!allSummaries.some(s => s.session_id === sr.session_id)) {
83
+ allSummaries.push({
84
+ session_id: sr.session_id,
85
+ level: sr.level,
86
+ text: sr.text,
87
+ project_path: sr.project_path,
88
+ created_at: sr.created_at,
89
+ });
90
+ }
91
+ }
92
+
93
+ const unfinished = getUnfinishedItems(input.cwd, 3);
94
+
95
+ const contextBlock = formatContextBlock({
96
+ projectPath: input.cwd,
97
+ gitBranch,
98
+ recentSessions,
99
+ recentSummaries: allSummaries.slice(0, 5),
100
+ decisions: decisions.slice(0, 5),
101
+ hotFiles,
102
+ patterns,
103
+ unfinished,
104
+ });
105
+
106
+ // Collect all files mentioned in context for hit-rate tracking
107
+ const injectedFiles: string[] = [...hotFiles];
108
+ for (const dec of decisions.slice(0, 5)) {
109
+ if (dec.files_affected) {
110
+ for (const f of dec.files_affected.split(",")) {
111
+ const trimmed = f.trim();
112
+ if (trimmed) injectedFiles.push(trimmed);
113
+ }
114
+ }
115
+ }
116
+
117
+ recordInjection(input.session_id, contextBlock, input.cwd, injectedFiles);
118
+ debug("pre-tool-use", "injected context", {
119
+ session: input.session_id.slice(0, 8),
120
+ project: input.cwd,
121
+ chars: contextBlock.length,
122
+ decisions: decisions.length,
123
+ summaries: allSummaries.length,
124
+ semantic: semanticResults.length,
125
+ unfinished: unfinished.length,
126
+ });
127
+
128
+ const output = {
129
+ hookSpecificOutput: {
130
+ hookEventName: "PreToolUse",
131
+ additionalContext: contextBlock,
132
+ },
133
+ };
134
+
135
+ console.log(JSON.stringify(output));
136
+ } catch (e) {
137
+ const errorLog = Bun.file(`${import.meta.dir}/../../data/error.log`);
138
+ await Bun.write(errorLog, `${new Date().toISOString()} PreToolUse error: ${e}\n`);
139
+ } finally {
140
+ closeDb();
141
+ await closeVectorDb();
142
+ }
143
+
144
+ process.exit(0);
145
+ }
146
+
147
+ function checkAndSetActiveSession(sessionId: string): boolean {
148
+ try {
149
+ if (existsSync(ACTIVE_SESSION_FILE)) {
150
+ const data = JSON.parse(readFileSync(ACTIVE_SESSION_FILE, "utf8"));
151
+ if (data.session_id === sessionId) {
152
+ return false;
153
+ }
154
+ }
155
+ } catch {
156
+ // Treat as first use
157
+ }
158
+
159
+ mkdirSync(STATE_DIR, { recursive: true });
160
+ writeFileSync(ACTIVE_SESSION_FILE, JSON.stringify({
161
+ session_id: sessionId,
162
+ started_at: new Date().toISOString(),
163
+ }));
164
+ return true;
165
+ }
166
+
167
+ async function semanticSearch(cwd: string, branch?: string): Promise<Array<{
168
+ session_id: string;
169
+ project_path: string;
170
+ text: string;
171
+ level: number;
172
+ created_at: string;
173
+ }>> {
174
+ try {
175
+ // Build a richer query from actual project state rather than the path
176
+ const queryText = buildSemanticQuery(cwd, branch);
177
+ const embedding = await generateEmbedding(queryText);
178
+ // Cross-project search (no filter) so similar work in other repos can surface too
179
+ const results = await searchSimilar(Array.from(embedding), 8);
180
+ // Prefer same-project results but keep the richest matches overall
181
+ const sameProject = results.filter(r => r.project_path === cwd);
182
+ const otherProject = results.filter(r => r.project_path !== cwd).slice(0, 2);
183
+ return [...sameProject, ...otherProject].slice(0, 5);
184
+ } catch {
185
+ return [];
186
+ }
187
+ }
188
+
189
+ function buildSemanticQuery(cwd: string, branch?: string): string {
190
+ const db = getDb();
191
+ const parts: string[] = [];
192
+
193
+ // Project name + branch provide baseline signal
194
+ const projectName = cwd.split(/[/\\]/).filter(Boolean).slice(-2).join(" ");
195
+ parts.push(projectName);
196
+ if (branch) parts.push(`branch ${branch}`);
197
+
198
+ try {
199
+ // Pull titles of recent decisions — these describe the project's stack/architecture
200
+ const decs = db.prepare(
201
+ "SELECT title FROM decisions WHERE project_path = ? ORDER BY created_at DESC LIMIT 5"
202
+ ).all(cwd) as Array<{ title: string }>;
203
+ for (const d of decs) parts.push(d.title);
204
+
205
+ // Pull last session's first 200 chars — captures what we were just working on
206
+ const lastSum = db.prepare(
207
+ "SELECT text FROM summaries WHERE project_path = ? AND level = 1 ORDER BY created_at DESC LIMIT 1"
208
+ ).get(cwd) as { text: string } | undefined;
209
+ if (lastSum?.text) parts.push(lastSum.text.slice(0, 200));
210
+
211
+ // Recent hot files by basename
212
+ const hotRows = db.prepare(`
213
+ SELECT file_paths FROM observations o
214
+ JOIN sessions s ON o.session_id = s.session_id
215
+ WHERE s.project_path = ? AND o.file_paths IS NOT NULL
216
+ ORDER BY o.timestamp DESC LIMIT 40
217
+ `).all(cwd) as Array<{ file_paths: string }>;
218
+ const basenames = new Set<string>();
219
+ for (const r of hotRows) {
220
+ for (const fp of r.file_paths.split(",")) {
221
+ const base = fp.trim().split(/[/\\]/).pop();
222
+ if (base) basenames.add(base);
223
+ }
224
+ }
225
+ parts.push([...basenames].slice(0, 8).join(" "));
226
+ } catch {
227
+ // Missing historical data — fall back to project name alone
228
+ }
229
+
230
+ return parts.filter(Boolean).join(" ").slice(0, 1000);
231
+ }
232
+
233
+ function getHotFiles(projectPath: string): string[] {
234
+ const db = getDb();
235
+ try {
236
+ const rows = db.prepare(`
237
+ SELECT file_paths FROM observations o
238
+ JOIN sessions s ON o.session_id = s.session_id
239
+ WHERE s.project_path = ? AND o.file_paths IS NOT NULL AND o.file_paths != ''
240
+ ORDER BY o.timestamp DESC
241
+ LIMIT 100
242
+ `).all(projectPath) as Array<{ file_paths: string }>;
243
+
244
+ const counts = new Map<string, number>();
245
+ for (const row of rows) {
246
+ for (const fp of row.file_paths.split(",")) {
247
+ const norm = normalizePath(fp);
248
+ if (norm) {
249
+ counts.set(norm, (counts.get(norm) || 0) + 1);
250
+ }
251
+ }
252
+ }
253
+
254
+ return [...counts.entries()]
255
+ .filter(([, count]) => count >= 3)
256
+ .sort((a, b) => b[1] - a[1])
257
+ .map(([f]) => f);
258
+ } catch {
259
+ return [];
260
+ }
261
+ }
262
+
263
+ function getPatterns(projectPath: string): Array<{ pattern_type: string; description: string }> {
264
+ const db = getDb();
265
+ try {
266
+ // Dedupe by pattern_type — keep the highest-frequency row per type so two
267
+ // rows for the same insight (legacy duplicates) never both render.
268
+ return db.prepare(`
269
+ SELECT p.pattern_type, p.description
270
+ FROM patterns p
271
+ JOIN (
272
+ SELECT pattern_type, MAX(frequency) AS max_freq
273
+ FROM patterns WHERE project_path = ?
274
+ GROUP BY pattern_type
275
+ ) m ON m.pattern_type = p.pattern_type AND m.max_freq = p.frequency
276
+ WHERE p.project_path = ?
277
+ ORDER BY p.frequency DESC, p.last_seen DESC
278
+ LIMIT 5
279
+ `).all(projectPath, projectPath) as Array<{ pattern_type: string; description: string }>;
280
+ } catch {
281
+ return [];
282
+ }
283
+ }
284
+
285
+ async function getGitRepo(cwd: string): Promise<string | undefined> {
286
+ try {
287
+ const proc = Bun.spawn(["git", "remote", "get-url", "origin"], { cwd, stdout: "pipe", stderr: "pipe" });
288
+ const text = await new Response(proc.stdout).text();
289
+ return text.trim() || undefined;
290
+ } catch {
291
+ return undefined;
292
+ }
293
+ }
294
+
295
+ async function getGitBranch(cwd: string): Promise<string | undefined> {
296
+ try {
297
+ const proc = Bun.spawn(["git", "branch", "--show-current"], { cwd, stdout: "pipe", stderr: "pipe" });
298
+ const text = await new Response(proc.stdout).text();
299
+ return text.trim() || undefined;
300
+ } catch {
301
+ return undefined;
302
+ }
303
+ }
304
+
305
+ main();