@mnemonik/shared 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,11 @@
1
+ /**
2
+ * @mnemonik/shared - Shared constants and utilities
3
+ *
4
+ * This package provides a single source of truth for constants
5
+ * that need to be shared across Mnemonik packages.
6
+ */
7
+ export { MCP_INSTRUCTIONS, MCP_INSTRUCTIONS_RAW, getMcpInstructions } from './instructions.js';
8
+ export { USAGE_GUIDE } from './usageGuide.js';
9
+ export { CodeScanner } from './codeScanner.js';
10
+ export { FileSystemReader, getFileSystemReader, } from './FileSystemReader.js';
11
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AAC/F,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,WAAW,EAAoC,MAAM,kBAAkB,CAAC;AACjF,OAAO,EACL,gBAAgB,EAChB,mBAAmB,GAIpB,MAAM,uBAAuB,CAAC"}
@@ -0,0 +1,34 @@
1
+ /**
2
+ * MCP Instructions - Persistent agent guidance
3
+ *
4
+ * This is the SINGLE SOURCE OF TRUTH for MCP instructions.
5
+ * Shared instruction content imported by the server.
6
+ *
7
+ * Version: 2.92
8
+ * Updated: 2026-02-24
9
+ *
10
+ * v2.93: Code mode permanent — all memory operations via memory_tools sandbox.
11
+ * memory_add, file_context etc. are now mnemonik.* methods, not standalone tools.
12
+ *
13
+ * v2.92: Zero-cooperation rewrite. Context auto-loads if session_bootstrap is skipped.
14
+ * Session summaries are auto-saved if agent doesn't call mnemonik.memory_add().
15
+ * Instructions drastically simplified — the server handles the workflow now.
16
+ *
17
+ * v2.80: Token-optimised rewrite (superseded by v2.92).
18
+ */
19
+ /**
20
+ * Get MCP instructions, respecting MNEMONIK_INSTRUCTIONS_ENABLED env var.
21
+ * Set MNEMONIK_INSTRUCTIONS_ENABLED=false to disable for testing.
22
+ */
23
+ export declare function getMcpInstructions(): string;
24
+ /**
25
+ * Raw instructions content (always returns the content, ignores env var).
26
+ * Use getMcpInstructions() for production code.
27
+ */
28
+ export declare const MCP_INSTRUCTIONS_RAW = "You have Mnemonik, a persistent memory system for this project.\n\nFirst call, every session: session_bootstrap. Read the mnemonik skill (from available_skills) for the full workflow.\nAfter bootstrap: execute _directive.message actions immediately (scanner daemon check is mandatory).\n\nProactively call memory_search before starting new work \u2014 avoids rediscovering known patterns and contradicting past decisions.\nProactively call file_context before editing any file \u2014 loads past bugs, decisions, and gotchas for that file.\nProactively call checkpoint after making changes or decisions worth keeping \u2014 your context is ephemeral and checkpoint is the only way decisions survive across sessions and context compaction. Do not wait for the user to say \"done\" or \"thanks\".\n\nWhen mnemonik.file_context({ filePaths, cwd }) returns linkedDocs with driftStatus 'stale', update docs then call mnemonik.docs({ action: 'resolve', docPath, cwd }).\n\nSkip: formatting-only, trivial one-line, mechanical refactors, git ops, tests.\nSave: architectural decisions, bug root causes, user preferences, discovered patterns, multi-file changes.";
29
+ /**
30
+ * Default export for convenience.
31
+ * Note: This respects the MNEMONIK_INSTRUCTIONS_ENABLED env var.
32
+ */
33
+ export declare const MCP_INSTRUCTIONS: string;
34
+ //# sourceMappingURL=instructions.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"instructions.d.ts","sourceRoot":"","sources":["../src/instructions.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAgBH;;;GAGG;AACH,wBAAgB,kBAAkB,IAAI,MAAM,CAK3C;AAED;;;GAGG;AACH,eAAO,MAAM,oBAAoB,qoCAAuB,CAAC;AAEzD;;;GAGG;AACH,eAAO,MAAM,gBAAgB,QAAuB,CAAC"}
@@ -0,0 +1,52 @@
1
+ /**
2
+ * MCP Instructions - Persistent agent guidance
3
+ *
4
+ * This is the SINGLE SOURCE OF TRUTH for MCP instructions.
5
+ * Shared instruction content imported by the server.
6
+ *
7
+ * Version: 2.92
8
+ * Updated: 2026-02-24
9
+ *
10
+ * v2.93: Code mode permanent — all memory operations via memory_tools sandbox.
11
+ * memory_add, file_context etc. are now mnemonik.* methods, not standalone tools.
12
+ *
13
+ * v2.92: Zero-cooperation rewrite. Context auto-loads if session_bootstrap is skipped.
14
+ * Session summaries are auto-saved if agent doesn't call mnemonik.memory_add().
15
+ * Instructions drastically simplified — the server handles the workflow now.
16
+ *
17
+ * v2.80: Token-optimised rewrite (superseded by v2.92).
18
+ */
19
+ const INSTRUCTIONS_CONTENT = `You have Mnemonik, a persistent memory system for this project.
20
+
21
+ First call, every session: session_bootstrap. Read the mnemonik skill (from available_skills) for the full workflow.
22
+ After bootstrap: execute _directive.message actions immediately (scanner daemon check is mandatory).
23
+
24
+ Proactively call memory_search before starting new work — avoids rediscovering known patterns and contradicting past decisions.
25
+ Proactively call file_context before editing any file — loads past bugs, decisions, and gotchas for that file.
26
+ Proactively call checkpoint after making changes or decisions worth keeping — your context is ephemeral and checkpoint is the only way decisions survive across sessions and context compaction. Do not wait for the user to say "done" or "thanks".
27
+
28
+ When mnemonik.file_context({ filePaths, cwd }) returns linkedDocs with driftStatus 'stale', update docs then call mnemonik.docs({ action: 'resolve', docPath, cwd }).
29
+
30
+ Skip: formatting-only, trivial one-line, mechanical refactors, git ops, tests.
31
+ Save: architectural decisions, bug root causes, user preferences, discovered patterns, multi-file changes.`;
32
+ /**
33
+ * Get MCP instructions, respecting MNEMONIK_INSTRUCTIONS_ENABLED env var.
34
+ * Set MNEMONIK_INSTRUCTIONS_ENABLED=false to disable for testing.
35
+ */
36
+ export function getMcpInstructions() {
37
+ if (typeof process !== 'undefined' && process.env?.MNEMONIK_INSTRUCTIONS_ENABLED === 'false') {
38
+ return '';
39
+ }
40
+ return INSTRUCTIONS_CONTENT;
41
+ }
42
+ /**
43
+ * Raw instructions content (always returns the content, ignores env var).
44
+ * Use getMcpInstructions() for production code.
45
+ */
46
+ export const MCP_INSTRUCTIONS_RAW = INSTRUCTIONS_CONTENT;
47
+ /**
48
+ * Default export for convenience.
49
+ * Note: This respects the MNEMONIK_INSTRUCTIONS_ENABLED env var.
50
+ */
51
+ export const MCP_INSTRUCTIONS = getMcpInstructions();
52
+ //# sourceMappingURL=instructions.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"instructions.js","sourceRoot":"","sources":["../src/instructions.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,MAAM,oBAAoB,GAAG;;;;;;;;;;;;2GAY8E,CAAC;AAE5G;;;GAGG;AACH,MAAM,UAAU,kBAAkB;IAChC,IAAI,OAAO,OAAO,KAAK,WAAW,IAAI,OAAO,CAAC,GAAG,EAAE,6BAA6B,KAAK,OAAO,EAAE,CAAC;QAC7F,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,OAAO,oBAAoB,CAAC;AAC9B,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAG,oBAAoB,CAAC;AAEzD;;;GAGG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,kBAAkB,EAAE,CAAC"}
@@ -0,0 +1,4 @@
1
+ export declare const debug: (_msg: string, _ctx?: Record<string, unknown>) => void;
2
+ export declare const info: (msg: string, ctx?: Record<string, unknown>) => void;
3
+ export declare const warn: (msg: string, ctx?: Record<string, unknown>) => void;
4
+ //# sourceMappingURL=logger.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,KAAK,GAAI,MAAM,MAAM,EAAE,OAAO,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAAG,IAAU,CAAC;AAChF,eAAO,MAAM,IAAI,GAAI,KAAK,MAAM,EAAE,MAAM,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAAG,IAEjE,CAAC;AACF,eAAO,MAAM,IAAI,GAAI,KAAK,MAAM,EAAE,MAAM,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAAG,IAEjE,CAAC"}
package/dist/logger.js ADDED
@@ -0,0 +1,8 @@
1
+ export const debug = (_msg, _ctx) => { };
2
+ export const info = (msg, ctx) => {
3
+ console.log(`[info] ${msg}`, ctx ? JSON.stringify(ctx) : '');
4
+ };
5
+ export const warn = (msg, ctx) => {
6
+ console.warn(`[warn] ${msg}`, ctx ? JSON.stringify(ctx) : '');
7
+ };
8
+ //# sourceMappingURL=logger.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.js","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,KAAK,GAAG,CAAC,IAAY,EAAE,IAA8B,EAAQ,EAAE,GAAE,CAAC,CAAC;AAChF,MAAM,CAAC,MAAM,IAAI,GAAG,CAAC,GAAW,EAAE,GAA6B,EAAQ,EAAE;IACvE,OAAO,CAAC,GAAG,CAAC,UAAU,GAAG,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AAC/D,CAAC,CAAC;AACF,MAAM,CAAC,MAAM,IAAI,GAAG,CAAC,GAAW,EAAE,GAA6B,EAAQ,EAAE;IACvE,OAAO,CAAC,IAAI,CAAC,UAAU,GAAG,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AAChE,CAAC,CAAC"}
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Mnemonik Usage Guide - Procedural workflow guidance for agents
3
+ *
4
+ * This is the SINGLE SOURCE OF TRUTH for the usage guide.
5
+ * Shared usage guide content imported by the server.
6
+ *
7
+ * Version: 2.80
8
+ * Updated: 2026-02-20 - Aligned with instructions/rules/skill v2.80
9
+ *
10
+ * This guide focuses on HOW to use Mnemonik effectively, not WHAT tools exist.
11
+ * Tool schemas already tell agents what's available - they need the workflow.
12
+ */
13
+ export declare const USAGE_GUIDE = "# Mnemonik Workflow Guide (v2.80)\n\n## Workflow\n\nsession_bootstrap \u2192 memory_search \u2192 file_context \u2192 [work] \u2192 memory_add \u2192 memory_state\n\n## Tool Selection by Stage\n\n### Session start\n- session_bootstrap: loads context, policies, pending tasks (call once, first thing)\n- memory_search: search by task domain; set workflowContext (feature_implementation, debugging, exploration, policy_review)\n- projects: resolve project IDs if context unclear\n- policy: review safety rules\n\n### Before editing files\n- file_context: fetch memories for the file \u2014 call for EVERY file you edit\n- memory_search: second search scoped to file/module if needed\n- docs(action: 'links'): check doc couplings for the file\n\n### During implementation\n- memory_get: retrieve specific memory by id\n- memory_update: refine memory created this session\n- memory_info: query history, provenance, confidence breakdown, links, graph\n- assist: get tool guidance if uncertain\n\n### After significant work\n- memory_add: save decisions, outcomes, patterns, bug root causes\n- memory_state: reinforce (memory helped), supersede (replace outdated), deprecate, penalize, dispute\n- tasks: mark tasks in progress or complete\n- docs(action: 'drift'): check for stale documentation after code changes\n- docs(action: 'resolve'): mark stale docs as fixed after updating them\n\n### Diagnostics\n- doctor: when tool calls fail or behavior is inconsistent\n- scanner: refresh embeddings, trigger scans, check drift\n\n## Skip conditions\n\nSkip memory tools for: formatting-only edits, trivial one-line changes, mechanical refactors, git operations, running tests.\n\n## Completion gate\n\nNever tell the user significant work is done without calling memory_add first in the same response. Changes made + responding next = completion. \"Progress updates\" count.\n\n## Memory search tips\n\n- Query should include task intent + key entities\n- Set workflowContext when you know the phase\n- Use currentFile to boost file-linked memories\n- Use filterOnly:true only for narrow filters (no embedding, requires >=1 filter)\n\n## Proactive heuristics\n\n- Long sessions: re-run memory_search after switching topics\n- Conflicting info: use memory_state to supersede/dispute\n- High-impact changes: save memory immediately after verification\n- When file_context returns linkedDocs with driftStatus 'stale': update docs then docs(action: 'resolve')\n\n## Anti-fade (every ~10 tool calls)\n\nCheck: (1) memory_search before work? (2) file_context before edit? (3) memory_add after completing? No session_bootstrap? Call it now.\n";
14
+ //# sourceMappingURL=usageGuide.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"usageGuide.d.ts","sourceRoot":"","sources":["../src/usageGuide.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,eAAO,MAAM,WAAW,wkFA6DvB,CAAC"}
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Mnemonik Usage Guide - Procedural workflow guidance for agents
3
+ *
4
+ * This is the SINGLE SOURCE OF TRUTH for the usage guide.
5
+ * Shared usage guide content imported by the server.
6
+ *
7
+ * Version: 2.80
8
+ * Updated: 2026-02-20 - Aligned with instructions/rules/skill v2.80
9
+ *
10
+ * This guide focuses on HOW to use Mnemonik effectively, not WHAT tools exist.
11
+ * Tool schemas already tell agents what's available - they need the workflow.
12
+ */
13
+ export const USAGE_GUIDE = `# Mnemonik Workflow Guide (v2.80)
14
+
15
+ ## Workflow
16
+
17
+ session_bootstrap → memory_search → file_context → [work] → memory_add → memory_state
18
+
19
+ ## Tool Selection by Stage
20
+
21
+ ### Session start
22
+ - session_bootstrap: loads context, policies, pending tasks (call once, first thing)
23
+ - memory_search: search by task domain; set workflowContext (feature_implementation, debugging, exploration, policy_review)
24
+ - projects: resolve project IDs if context unclear
25
+ - policy: review safety rules
26
+
27
+ ### Before editing files
28
+ - file_context: fetch memories for the file — call for EVERY file you edit
29
+ - memory_search: second search scoped to file/module if needed
30
+ - docs(action: 'links'): check doc couplings for the file
31
+
32
+ ### During implementation
33
+ - memory_get: retrieve specific memory by id
34
+ - memory_update: refine memory created this session
35
+ - memory_info: query history, provenance, confidence breakdown, links, graph
36
+ - assist: get tool guidance if uncertain
37
+
38
+ ### After significant work
39
+ - memory_add: save decisions, outcomes, patterns, bug root causes
40
+ - memory_state: reinforce (memory helped), supersede (replace outdated), deprecate, penalize, dispute
41
+ - tasks: mark tasks in progress or complete
42
+ - docs(action: 'drift'): check for stale documentation after code changes
43
+ - docs(action: 'resolve'): mark stale docs as fixed after updating them
44
+
45
+ ### Diagnostics
46
+ - doctor: when tool calls fail or behavior is inconsistent
47
+ - scanner: refresh embeddings, trigger scans, check drift
48
+
49
+ ## Skip conditions
50
+
51
+ Skip memory tools for: formatting-only edits, trivial one-line changes, mechanical refactors, git operations, running tests.
52
+
53
+ ## Completion gate
54
+
55
+ Never tell the user significant work is done without calling memory_add first in the same response. Changes made + responding next = completion. "Progress updates" count.
56
+
57
+ ## Memory search tips
58
+
59
+ - Query should include task intent + key entities
60
+ - Set workflowContext when you know the phase
61
+ - Use currentFile to boost file-linked memories
62
+ - Use filterOnly:true only for narrow filters (no embedding, requires >=1 filter)
63
+
64
+ ## Proactive heuristics
65
+
66
+ - Long sessions: re-run memory_search after switching topics
67
+ - Conflicting info: use memory_state to supersede/dispute
68
+ - High-impact changes: save memory immediately after verification
69
+ - When file_context returns linkedDocs with driftStatus 'stale': update docs then docs(action: 'resolve')
70
+
71
+ ## Anti-fade (every ~10 tool calls)
72
+
73
+ Check: (1) memory_search before work? (2) file_context before edit? (3) memory_add after completing? No session_bootstrap? Call it now.
74
+ `;
75
+ //# sourceMappingURL=usageGuide.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"usageGuide.js","sourceRoot":"","sources":["../src/usageGuide.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,MAAM,CAAC,MAAM,WAAW,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA6D1B,CAAC"}
package/package.json ADDED
@@ -0,0 +1,28 @@
1
+ {
2
+ "name": "@mnemonik/shared",
3
+ "version": "1.0.0",
4
+ "description": "Shared constants and utilities for Mnemonik packages",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "import": "./dist/index.js",
11
+ "types": "./dist/index.d.ts"
12
+ }
13
+ },
14
+ "scripts": {
15
+ "build": "tsc",
16
+ "typecheck": "tsc --noEmit"
17
+ },
18
+ "keywords": [
19
+ "mnemonik",
20
+ "shared",
21
+ "constants"
22
+ ],
23
+ "author": "Anthony",
24
+ "license": "MIT",
25
+ "devDependencies": {
26
+ "typescript": "^5.3.3"
27
+ }
28
+ }
@@ -0,0 +1,299 @@
1
+ /**
2
+ * FileSystemReader — Encapsulates all filesystem operations for scanning.
3
+ *
4
+ * This module is the boundary between disk I/O and the rest of the system.
5
+ * In a cloud deployment, a lightweight client-side agent uses this module
6
+ * locally and pushes results to the server's indexing API; the server
7
+ * itself never touches the filesystem.
8
+ */
9
+
10
+ import { readdir, readFile, stat } from 'fs/promises';
11
+ import { join, relative, isAbsolute } from 'path';
12
+ import { createHash } from 'crypto';
13
+ import { debug as logDebug, warn as logWarn, info as logInfo } from './logger.js';
14
+ import { withTimeout } from './asyncUtils.js';
15
+ import { CodeScanner, CodeChunk } from './codeScanner.js';
16
+
17
+ const FILE_OP_TIMEOUT_MS = 5000;
18
+ const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10 MB
19
+
20
+ const SKIP_DIRS = new Set([
21
+ 'node_modules',
22
+ '.git',
23
+ 'dist',
24
+ 'build',
25
+ '.next',
26
+ 'coverage',
27
+ 'venv',
28
+ '.venv',
29
+ 'env',
30
+ '.env',
31
+ '__pycache__',
32
+ '.cache',
33
+ 'target',
34
+ '.tox',
35
+ '.mypy_cache',
36
+ '.pytest_cache',
37
+ ]);
38
+
39
+ const TEXT_EXTENSIONS = new Set([
40
+ 'ts',
41
+ 'tsx',
42
+ 'js',
43
+ 'jsx',
44
+ 'py',
45
+ 'java',
46
+ 'cpp',
47
+ 'c',
48
+ 'h',
49
+ 'hpp',
50
+ 'go',
51
+ 'rs',
52
+ 'php',
53
+ 'rb',
54
+ 'swift',
55
+ 'kt',
56
+ 'scala',
57
+ 'sh',
58
+ 'bash',
59
+ 'sql',
60
+ 'html',
61
+ 'css',
62
+ 'scss',
63
+ 'sass',
64
+ 'less',
65
+ 'json',
66
+ 'yaml',
67
+ 'yml',
68
+ 'toml',
69
+ 'xml',
70
+ 'md',
71
+ 'txt',
72
+ 'vue',
73
+ 'svelte',
74
+ 'astro',
75
+ ]);
76
+
77
+ const SKIP_PATTERNS = ['.min.js', '.min.css', '.bundle.js', '.legacy.js', '.map'];
78
+
79
+ export interface FileData {
80
+ /** Relative path from project root */
81
+ path: string;
82
+ /** SHA-256 hash (content + size + mtime) */
83
+ hash: string;
84
+ }
85
+
86
+ export interface ScanFilesResult {
87
+ files: FileData[];
88
+ chunks: CodeChunk[];
89
+ }
90
+
91
+ export interface ChangedFilesResult {
92
+ changed: FileData[];
93
+ chunks: CodeChunk[];
94
+ skipped: number;
95
+ }
96
+
97
+ export class FileSystemReader {
98
+ /**
99
+ * Recursively list all scannable text files in a project directory.
100
+ * Returns absolute paths.
101
+ */
102
+ async readProjectFiles(dirPath: string): Promise<string[]> {
103
+ const files: string[] = [];
104
+ await this.traverseDirectory(dirPath, files);
105
+ return files;
106
+ }
107
+
108
+ /**
109
+ * Compute SHA-256 hash for a file (content + size + mtime).
110
+ * Returns empty string on error or if the file exceeds MAX_FILE_SIZE.
111
+ */
112
+ async calculateFileHash(filePath: string): Promise<string> {
113
+ try {
114
+ const stats = await withTimeout(
115
+ stat(filePath),
116
+ FILE_OP_TIMEOUT_MS,
117
+ `stat timed out: ${filePath}`
118
+ );
119
+
120
+ if (stats.size > MAX_FILE_SIZE) {
121
+ logDebug('Skipping file hash — exceeds size limit', {
122
+ file: filePath,
123
+ size: stats.size,
124
+ });
125
+ return '';
126
+ }
127
+
128
+ const content = await withTimeout(
129
+ readFile(filePath, 'utf-8'),
130
+ FILE_OP_TIMEOUT_MS,
131
+ `readFile timed out: ${filePath}`
132
+ );
133
+
134
+ const hashInput = `${content}\n${stats.size}\n${stats.mtime.getTime()}`;
135
+ return createHash('sha256').update(hashInput).digest('hex');
136
+ } catch (error) {
137
+ logWarn('Failed to calculate file hash', {
138
+ file: filePath,
139
+ error: (error as Error).message,
140
+ });
141
+ return '';
142
+ }
143
+ }
144
+
145
+ /**
146
+ * Scan a directory and return files + extracted code chunks.
147
+ */
148
+ async scanDirectory(projectPath: string): Promise<ScanFilesResult> {
149
+ const scanner = new CodeScanner();
150
+ const chunks = await scanner.scanDirectory(projectPath);
151
+
152
+ const files: FileData[] = [];
153
+ const seen = new Set<string>();
154
+ for (const chunk of chunks) {
155
+ if (!seen.has(chunk.filePath)) {
156
+ seen.add(chunk.filePath);
157
+ const absPath = isAbsolute(chunk.filePath)
158
+ ? chunk.filePath
159
+ : join(projectPath, chunk.filePath);
160
+ const hash = await this.calculateFileHash(absPath);
161
+ files.push({ path: chunk.filePath, hash });
162
+ }
163
+ }
164
+
165
+ return { files, chunks };
166
+ }
167
+
168
+ /**
169
+ * Determine which files have changed compared to known hashes.
170
+ * Returns changed files plus their extracted code chunks.
171
+ */
172
+ async detectChangedFiles(
173
+ projectPath: string,
174
+ knownHashes: Map<string, string>
175
+ ): Promise<ChangedFilesResult> {
176
+ const allFiles = await this.readProjectFiles(projectPath);
177
+ const changed: FileData[] = [];
178
+ let skipped = 0;
179
+
180
+ for (const absPath of allFiles) {
181
+ try {
182
+ const hash = await this.calculateFileHash(absPath);
183
+ const relPath = relative(projectPath, absPath);
184
+ const known = knownHashes.get(absPath) ?? knownHashes.get(relPath);
185
+ if (!known || known !== hash) {
186
+ changed.push({ path: absPath, hash });
187
+ } else {
188
+ skipped++;
189
+ }
190
+ } catch (error) {
191
+ logWarn('Failed to process file for incremental scan', {
192
+ file: absPath,
193
+ error: (error as Error).message,
194
+ });
195
+ }
196
+ }
197
+
198
+ logInfo('Incremental scan analysis', {
199
+ totalFiles: allFiles.length,
200
+ filesToScan: changed.length,
201
+ filesSkipped: skipped,
202
+ });
203
+
204
+ if (changed.length === 0) {
205
+ return { changed: [], chunks: [], skipped };
206
+ }
207
+
208
+ const scanner = new CodeScanner();
209
+ const chunks = await scanner.scanFiles(
210
+ changed.map((f) => f.path),
211
+ projectPath
212
+ );
213
+
214
+ return { changed, chunks, skipped };
215
+ }
216
+
217
+ /**
218
+ * Read file content with timeout protection.
219
+ */
220
+ async readFileContent(filePath: string): Promise<string | null> {
221
+ try {
222
+ return await withTimeout(
223
+ readFile(filePath, 'utf-8'),
224
+ FILE_OP_TIMEOUT_MS,
225
+ `readFile timed out: ${filePath}`
226
+ );
227
+ } catch {
228
+ return null;
229
+ }
230
+ }
231
+
232
+ /**
233
+ * Stat a file with timeout protection.
234
+ */
235
+ async statFile(filePath: string): Promise<{ mtime: Date; size: number } | null> {
236
+ try {
237
+ const s = await withTimeout(
238
+ stat(filePath),
239
+ FILE_OP_TIMEOUT_MS,
240
+ `stat timed out: ${filePath}`
241
+ );
242
+ return { mtime: s.mtime, size: s.size };
243
+ } catch {
244
+ return null;
245
+ }
246
+ }
247
+
248
+ // ── private helpers ─────────────────────────────────────────────────
249
+
250
+ private async traverseDirectory(dirPath: string, files: string[]): Promise<void> {
251
+ try {
252
+ const entries = await withTimeout(
253
+ readdir(dirPath, { withFileTypes: true }),
254
+ FILE_OP_TIMEOUT_MS,
255
+ `readdir timed out: ${dirPath}`
256
+ );
257
+
258
+ for (const entry of entries) {
259
+ const fullPath = join(dirPath, entry.name);
260
+
261
+ if (entry.isDirectory()) {
262
+ if (SKIP_DIRS.has(entry.name)) continue;
263
+ await this.traverseDirectory(fullPath, files);
264
+ } else if (entry.isFile() && this.isTextFile(entry.name)) {
265
+ try {
266
+ const fileStat = await withTimeout(
267
+ stat(fullPath),
268
+ FILE_OP_TIMEOUT_MS,
269
+ `stat timed out: ${fullPath}`
270
+ );
271
+ if (fileStat.size <= 100 * 1024) {
272
+ files.push(fullPath);
273
+ }
274
+ } catch {
275
+ // can't stat → skip
276
+ }
277
+ }
278
+ }
279
+ } catch (error) {
280
+ logWarn('Failed to read directory', {
281
+ path: dirPath,
282
+ error: (error as Error).message,
283
+ });
284
+ }
285
+ }
286
+
287
+ private isTextFile(filename: string): boolean {
288
+ if (SKIP_PATTERNS.some((p) => filename.endsWith(p))) return false;
289
+ const ext = filename.split('.').pop()?.toLowerCase();
290
+ return TEXT_EXTENSIONS.has(ext || '');
291
+ }
292
+ }
293
+
294
+ /** Singleton for convenience */
295
+ let _instance: FileSystemReader | null = null;
296
+ export function getFileSystemReader(): FileSystemReader {
297
+ if (!_instance) _instance = new FileSystemReader();
298
+ return _instance;
299
+ }
@@ -0,0 +1,16 @@
1
+ export function withTimeout<T>(
2
+ promise: Promise<T>,
3
+ timeoutMs: number,
4
+ timeoutMessage?: string
5
+ ): Promise<T> {
6
+ let timer: ReturnType<typeof setTimeout>;
7
+ const timeoutPromise = new Promise<never>((_resolve, reject) => {
8
+ timer = setTimeout(() => {
9
+ reject(new Error(timeoutMessage ?? `Operation timed out after ${timeoutMs}ms`));
10
+ }, timeoutMs);
11
+ });
12
+
13
+ return Promise.race([promise, timeoutPromise]).finally(() => {
14
+ clearTimeout(timer);
15
+ });
16
+ }