@mnemonik/scanner 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/daemon.ts CHANGED
@@ -4,34 +4,187 @@ import { join } from 'path';
4
4
  import { CodeScanner, type CodeChunk } from '@mnemonik/shared';
5
5
  import { MnemonikClient, type ScanPushFile } from './client.js';
6
6
  import { FileWatcher } from './watcher.js';
7
+ import { ProjectDiscovery, type DiscoveredProject } from './discovery.js';
7
8
 
8
9
  export interface DaemonConfig {
9
- projectId: string;
10
- projectRoot: string;
11
10
  serverUrl: string;
12
11
  apiKey: string;
12
+ roots: string[];
13
+ refreshIntervalMs?: number;
14
+ maxConcurrentScans?: number;
15
+ }
16
+
17
+ interface WatchedProject {
18
+ projectId: string;
19
+ path: string;
20
+ name?: string;
21
+ watcher: FileWatcher;
22
+ pendingRetries: Set<string>;
23
+ retryTimer: ReturnType<typeof setInterval> | null;
13
24
  }
14
25
 
15
26
  export class ScannerDaemon {
16
27
  private client: MnemonikClient;
17
28
  private scanner: CodeScanner;
18
- private watcher: FileWatcher | null = null;
19
- private pendingRetries: Set<string> = new Set();
20
- private retryTimer: ReturnType<typeof setInterval> | null = null;
29
+ private projects = new Map<string, WatchedProject>();
30
+ private refreshTimer: ReturnType<typeof setInterval> | null = null;
31
+ private heartbeatTimer: ReturnType<typeof setInterval> | null = null;
32
+ private discovery: ProjectDiscovery;
33
+ private refreshIntervalMs: number;
34
+ private maxConcurrentScans: number;
21
35
 
22
36
  constructor(private config: DaemonConfig) {
23
37
  this.client = new MnemonikClient(config.serverUrl, config.apiKey);
24
38
  this.scanner = new CodeScanner();
39
+ this.discovery = new ProjectDiscovery(config.roots);
40
+ this.refreshIntervalMs = config.refreshIntervalMs ?? 300_000; // 5 min
41
+ this.maxConcurrentScans = config.maxConcurrentScans ?? 5;
25
42
  }
26
43
 
27
44
  async start(): Promise<void> {
28
- console.log(`[scanner] Starting daemon for project: ${this.config.projectId}`);
45
+ console.log(`[scanner] Starting daemon`);
29
46
  console.log(`[scanner] Server: ${this.config.serverUrl}`);
30
- console.log(`[scanner] Root: ${this.config.projectRoot}`);
47
+ console.log(`[scanner] Roots: ${this.config.roots.join(', ')}`);
31
48
 
32
49
  await this.waitForServer();
33
- await this.initialScan();
34
- await this.startWatching();
50
+ await this.refreshProjects();
51
+
52
+ this.refreshTimer = setInterval(() => {
53
+ this.refreshProjects().catch((err) => {
54
+ console.warn('[scanner] Refresh failed:', (err as Error).message);
55
+ });
56
+ }, this.refreshIntervalMs);
57
+ this.refreshTimer.unref();
58
+
59
+ // Send heartbeat immediately, then every 60s so session_bootstrap
60
+ // can reliably detect daemon liveness without waiting for a file scan.
61
+ await this.sendHeartbeats();
62
+ this.heartbeatTimer = setInterval(() => {
63
+ this.sendHeartbeats().catch((err) => {
64
+ console.warn('[scanner] Heartbeat failed:', (err as Error).message);
65
+ });
66
+ }, 60_000);
67
+ this.heartbeatTimer.unref();
68
+
69
+ console.log('[scanner] Watching for changes.');
70
+ }
71
+
72
+ async stop(): Promise<void> {
73
+ if (this.refreshTimer) {
74
+ clearInterval(this.refreshTimer);
75
+ this.refreshTimer = null;
76
+ }
77
+ if (this.heartbeatTimer) {
78
+ clearInterval(this.heartbeatTimer);
79
+ this.heartbeatTimer = null;
80
+ }
81
+
82
+ for (const project of this.projects.values()) {
83
+ project.watcher.stop();
84
+ if (project.retryTimer) clearInterval(project.retryTimer);
85
+ }
86
+ this.projects.clear();
87
+ console.log('[scanner] Daemon stopped');
88
+ }
89
+
90
+ private async sendHeartbeats(): Promise<void> {
91
+ for (const { projectId } of this.projects.values()) {
92
+ await this.client.sendHeartbeat(projectId).catch((err) => {
93
+ console.warn(`[scanner] Heartbeat failed for ${projectId}:`, (err as Error).message);
94
+ });
95
+ }
96
+ }
97
+
98
+ getWatchedProjects(): Array<{ projectId: string; path: string; name?: string }> {
99
+ return Array.from(this.projects.values()).map((p) => ({
100
+ projectId: p.projectId,
101
+ path: p.path,
102
+ name: p.name,
103
+ }));
104
+ }
105
+
106
+ /**
107
+ * Discover projects from configured roots and reconcile with current watch list.
108
+ */
109
+ async refreshProjects(): Promise<void> {
110
+ const discovered = await this.discovery.discover();
111
+ const discoveredMap = new Map(discovered.map((d) => [d.projectId, d]));
112
+
113
+ // Remove projects no longer discovered
114
+ for (const [projectId, project] of this.projects) {
115
+ if (!discoveredMap.has(projectId)) {
116
+ console.log(`[scanner] Project removed: ${project.name ?? projectId} (${project.path})`);
117
+ project.watcher.stop();
118
+ if (project.retryTimer) clearInterval(project.retryTimer);
119
+ this.projects.delete(projectId);
120
+ }
121
+ }
122
+
123
+ // Add new projects or handle path changes
124
+ for (const discovered_project of discoveredMap.values()) {
125
+ const existing = this.projects.get(discovered_project.projectId);
126
+
127
+ if (!existing) {
128
+ // New project
129
+ await this.addProject(discovered_project);
130
+ } else if (existing.path !== discovered_project.path) {
131
+ // Path changed (folder renamed/moved)
132
+ console.log(
133
+ `[scanner] Project moved: ${existing.name ?? existing.projectId} ` +
134
+ `${existing.path} → ${discovered_project.path}`
135
+ );
136
+ existing.watcher.stop();
137
+ if (existing.retryTimer) clearInterval(existing.retryTimer);
138
+ this.projects.delete(discovered_project.projectId);
139
+ await this.addProject(discovered_project);
140
+ }
141
+ }
142
+
143
+ console.log(`[scanner] Watching ${this.projects.size} project(s)`);
144
+ }
145
+
146
+ private async addProject(discovered: DiscoveredProject): Promise<void> {
147
+ const label = discovered.projectName ?? discovered.projectId.slice(0, 8);
148
+ console.log(`[scanner] Adding project: ${label} (${discovered.path})`);
149
+
150
+ try {
151
+ await this.initialScan(discovered.projectId, discovered.path);
152
+ } catch (err) {
153
+ console.warn(`[scanner] Initial scan failed for ${label}:`, (err as Error).message);
154
+ }
155
+
156
+ const watcher = new FileWatcher(
157
+ discovered.path,
158
+ (changedFiles) => this.handleChanges(discovered.projectId, discovered.path, changedFiles),
159
+ 500,
160
+ (err) => {
161
+ console.warn(
162
+ `[scanner] Root watcher error for ${label}: ${err.message}. Removing project.`
163
+ );
164
+ const project = this.projects.get(discovered.projectId);
165
+ if (project) {
166
+ project.watcher.stop();
167
+ if (project.retryTimer) clearInterval(project.retryTimer);
168
+ this.projects.delete(discovered.projectId);
169
+ }
170
+ }
171
+ );
172
+
173
+ try {
174
+ await watcher.start();
175
+ } catch (err) {
176
+ console.warn(`[scanner] Failed to start watcher for ${label}:`, (err as Error).message);
177
+ return;
178
+ }
179
+
180
+ this.projects.set(discovered.projectId, {
181
+ projectId: discovered.projectId,
182
+ path: discovered.path,
183
+ name: discovered.projectName,
184
+ watcher,
185
+ pendingRetries: new Set(),
186
+ retryTimer: null,
187
+ });
35
188
  }
36
189
 
37
190
  private async waitForServer(): Promise<void> {
@@ -58,83 +211,48 @@ export class ScannerDaemon {
58
211
  throw new Error(`Server unreachable after ${maxRetries} attempts`);
59
212
  }
60
213
 
61
- getProjectRoot(): string {
62
- return this.config.projectRoot;
63
- }
64
-
65
- getProjectId(): string {
66
- return this.config.projectId;
67
- }
68
-
69
- async stop(): Promise<void> {
70
- this.watcher?.stop();
71
- if (this.retryTimer) clearInterval(this.retryTimer);
72
- console.log('[scanner] Daemon stopped');
73
- }
74
-
75
- private startRetryLoop(): void {
76
- if (this.retryTimer) return;
77
- console.log(`[scanner] ${this.pendingRetries.size} file(s) queued for retry`);
78
- this.retryTimer = setInterval(async () => {
79
- if (this.pendingRetries.size === 0) {
80
- if (this.retryTimer) clearInterval(this.retryTimer);
81
- this.retryTimer = null;
82
- return;
83
- }
84
- const files = [...this.pendingRetries];
85
- this.pendingRetries.clear();
86
- await this.handleChanges(files);
87
- }, 10_000);
88
- this.retryTimer.unref();
89
- }
90
-
91
- private async initialScan(): Promise<void> {
92
- console.log('[scanner] Starting initial scan...');
214
+ private async initialScan(projectId: string, projectRoot: string): Promise<void> {
93
215
  const startTime = Date.now();
94
216
 
95
- const chunks = await this.scanner.scanDirectory(this.config.projectRoot);
96
- console.log(`[scanner] Scanned ${chunks.length} chunks locally`);
217
+ const chunks = await this.scanner.scanDirectory(projectRoot);
218
+ const serverHashes = await this.client.getStatus(projectId);
97
219
 
98
- const serverHashes = await this.client.getStatus(this.config.projectId);
99
- console.log(`[scanner] Server knows ${serverHashes.size} files`);
100
-
101
- const files = await this.groupChunksByFile(chunks);
220
+ const files = await this.groupChunksByFile(chunks, projectRoot);
102
221
  const filesToPush = files.filter((f) => {
103
222
  const serverHash = serverHashes.get(f.path);
104
223
  return !serverHash || serverHash !== f.hash;
105
224
  });
106
225
 
107
- if (filesToPush.length === 0) {
108
- console.log('[scanner] All files up to date, nothing to push');
109
- } else {
110
- console.log(`[scanner] Pushing ${filesToPush.length} changed files...`);
111
- await this.client.pushFiles(this.config.projectId, filesToPush);
112
- console.log(`[scanner] Push complete`);
226
+ if (filesToPush.length > 0) {
227
+ console.log(
228
+ `[scanner] Pushing ${filesToPush.length} changed files for ${projectId.slice(0, 8)}...`
229
+ );
230
+ await this.client.pushFiles(projectId, filesToPush);
113
231
  }
114
232
 
115
233
  const duration = ((Date.now() - startTime) / 1000).toFixed(1);
116
- console.log(`[scanner] Initial scan complete in ${duration}s`);
117
- }
118
-
119
- private async startWatching(): Promise<void> {
120
- this.watcher = new FileWatcher(
121
- this.config.projectRoot,
122
- (changedFiles) => this.handleChanges(changedFiles),
123
- 500
234
+ console.log(
235
+ `[scanner] Scan complete for ${projectId.slice(0, 8)}: ` +
236
+ `${chunks.length} chunks, ${filesToPush.length} pushed (${duration}s)`
124
237
  );
125
- await this.watcher.start();
126
238
  }
127
239
 
128
- private async handleChanges(changedFiles: string[]): Promise<void> {
240
+ private async handleChanges(
241
+ projectId: string,
242
+ projectRoot: string,
243
+ changedFiles: string[]
244
+ ): Promise<void> {
245
+ const project = this.projects.get(projectId);
246
+ if (!project) return;
247
+
129
248
  try {
130
- const absPaths = changedFiles.map((rel) => join(this.config.projectRoot, rel));
131
- const chunks = await this.scanner.scanFiles(absPaths, this.config.projectRoot);
249
+ const absPaths = changedFiles.map((rel) => join(projectRoot, rel));
250
+ const chunks = await this.scanner.scanFiles(absPaths, projectRoot);
132
251
 
133
252
  if (chunks.length === 0) return;
134
253
 
135
- const files = await this.groupChunksByFile(chunks);
254
+ const files = await this.groupChunksByFile(chunks, projectRoot);
136
255
 
137
- // Push in batches, only retrying files from failed batches
138
256
  const batchSize = 25;
139
257
  const succeededPaths = new Set<string>();
140
258
  let hadFailure = false;
@@ -142,39 +260,55 @@ export class ScannerDaemon {
142
260
  for (let i = 0; i < files.length; i += batchSize) {
143
261
  const batch = files.slice(i, i + batchSize);
144
262
  try {
145
- await this.client.pushFiles(this.config.projectId, batch);
263
+ await this.client.pushFiles(projectId, batch);
146
264
  for (const f of batch) succeededPaths.add(f.path);
147
265
  } catch {
148
266
  hadFailure = true;
149
- for (const f of batch) this.pendingRetries.add(f.path);
267
+ for (const f of batch) project.pendingRetries.add(f.path);
150
268
  }
151
269
  }
152
270
 
153
271
  if (succeededPaths.size > 0) {
154
272
  console.log(
155
- `[scanner] Pushed ${succeededPaths.size} changed file(s): ${[...succeededPaths].join(', ')}`
273
+ `[scanner] [${projectId.slice(0, 8)}] Pushed ${succeededPaths.size} file(s): ${[...succeededPaths].join(', ')}`
156
274
  );
157
275
  }
158
276
  if (hadFailure) {
159
- console.warn(`[scanner] ${this.pendingRetries.size} file(s) failed, queued for retry`);
160
- this.startRetryLoop();
277
+ console.warn(
278
+ `[scanner] [${projectId.slice(0, 8)}] ${project.pendingRetries.size} file(s) failed, queued for retry`
279
+ );
280
+ this.startRetryLoop(project);
161
281
  }
162
282
  } catch (err) {
163
- console.error('[scanner] Error handling changes:', err);
283
+ console.error(`[scanner] [${projectId.slice(0, 8)}] Error handling changes:`, err);
164
284
  }
165
285
  }
166
286
 
167
- private async groupChunksByFile(chunks: CodeChunk[]): Promise<ScanPushFile[]> {
287
+ private startRetryLoop(project: WatchedProject): void {
288
+ if (project.retryTimer) return;
289
+ project.retryTimer = setInterval(async () => {
290
+ if (project.pendingRetries.size === 0) {
291
+ if (project.retryTimer) clearInterval(project.retryTimer);
292
+ project.retryTimer = null;
293
+ return;
294
+ }
295
+ const files = [...project.pendingRetries];
296
+ project.pendingRetries.clear();
297
+ await this.handleChanges(project.projectId, project.path, files);
298
+ }, 10_000);
299
+ project.retryTimer.unref();
300
+ }
301
+
302
+ private async groupChunksByFile(
303
+ chunks: CodeChunk[],
304
+ projectRoot: string
305
+ ): Promise<ScanPushFile[]> {
168
306
  const fileMap = new Map<string, ScanPushFile>();
169
307
 
170
308
  for (const chunk of chunks) {
171
309
  const key = chunk.filePath;
172
310
  if (!fileMap.has(key)) {
173
- fileMap.set(key, {
174
- path: key,
175
- hash: '',
176
- chunks: [],
177
- });
311
+ fileMap.set(key, { path: key, hash: '', chunks: [] });
178
312
  }
179
313
  const file = fileMap.get(key)!;
180
314
  file.chunks.push({
@@ -190,7 +324,7 @@ export class ScannerDaemon {
190
324
 
191
325
  for (const file of fileMap.values()) {
192
326
  try {
193
- const absPath = join(this.config.projectRoot, file.path);
327
+ const absPath = join(projectRoot, file.path);
194
328
  const raw = await readFile(absPath, 'utf-8');
195
329
  file.hash = createHash('sha256').update(raw).digest('hex');
196
330
  } catch (err) {
@@ -0,0 +1,124 @@
1
+ import { readFile, readdir, stat } from 'fs/promises';
2
+ import { join, resolve } from 'path';
3
+
4
+ /** Directories to skip during discovery walk */
5
+ const SKIP_DIRS = new Set([
6
+ 'node_modules',
7
+ '.git',
8
+ 'dist',
9
+ 'build',
10
+ '.next',
11
+ '.nuxt',
12
+ '.output',
13
+ '__pycache__',
14
+ '.venv',
15
+ 'venv',
16
+ '.tox',
17
+ 'target',
18
+ '.cache',
19
+ 'coverage',
20
+ '.turbo',
21
+ '.vercel',
22
+ '.svelte-kit',
23
+ ]);
24
+
25
+ export interface DiscoveredProject {
26
+ projectId: string;
27
+ path: string;
28
+ projectName?: string;
29
+ }
30
+
31
+ export class ProjectDiscovery {
32
+ private maxDepth: number;
33
+ private timeoutMs: number;
34
+
35
+ constructor(
36
+ private roots: string[],
37
+ options?: { maxDepth?: number; timeoutMs?: number }
38
+ ) {
39
+ this.maxDepth = options?.maxDepth ?? 3;
40
+ this.timeoutMs = options?.timeoutMs ?? 30_000;
41
+ }
42
+
43
+ /**
44
+ * Discover all projects with .mnemonik.json files under the configured roots.
45
+ * Deduplicates by projectId (same project found at multiple paths = first wins).
46
+ */
47
+ async discover(): Promise<DiscoveredProject[]> {
48
+ const seen = new Map<string, DiscoveredProject>();
49
+
50
+ for (const root of this.roots) {
51
+ const absRoot = resolve(root.replace(/^~/, process.env.HOME || ''));
52
+ try {
53
+ await this.walkWithTimeout(absRoot, 0, seen);
54
+ } catch (err) {
55
+ if (err instanceof DiscoveryTimeoutError) {
56
+ console.warn(`[scanner] Discovery timeout for root: ${absRoot} (>${this.timeoutMs}ms)`);
57
+ } else {
58
+ console.warn(`[scanner] Error scanning root ${absRoot}:`, (err as Error).message);
59
+ }
60
+ }
61
+ }
62
+
63
+ return Array.from(seen.values());
64
+ }
65
+
66
+ private async walkWithTimeout(
67
+ dir: string,
68
+ depth: number,
69
+ seen: Map<string, DiscoveredProject>
70
+ ): Promise<void> {
71
+ const deadline = Date.now() + this.timeoutMs;
72
+ await this.walk(dir, depth, seen, deadline);
73
+ }
74
+
75
+ private async walk(
76
+ dir: string,
77
+ depth: number,
78
+ seen: Map<string, DiscoveredProject>,
79
+ deadline: number
80
+ ): Promise<void> {
81
+ if (depth > this.maxDepth) return;
82
+ if (Date.now() > deadline) throw new DiscoveryTimeoutError();
83
+
84
+ // Check for .mnemonik.json in this directory
85
+ const configPath = join(dir, '.mnemonik.json');
86
+ try {
87
+ const raw = await readFile(configPath, 'utf-8');
88
+ const parsed = JSON.parse(raw) as Record<string, unknown>;
89
+ if (typeof parsed.projectId === 'string' && parsed.projectId.length > 0) {
90
+ if (!seen.has(parsed.projectId)) {
91
+ seen.set(parsed.projectId, {
92
+ projectId: parsed.projectId,
93
+ path: dir,
94
+ projectName: typeof parsed.projectName === 'string' ? parsed.projectName : undefined,
95
+ });
96
+ }
97
+ }
98
+ // Don't recurse into subdirectories of a project — the project owns this tree
99
+ return;
100
+ } catch {
101
+ // No .mnemonik.json here, continue walking
102
+ }
103
+
104
+ // Recurse into subdirectories
105
+ try {
106
+ const entries = await readdir(dir, { withFileTypes: true });
107
+ for (const entry of entries) {
108
+ if (!entry.isDirectory()) continue;
109
+ if (SKIP_DIRS.has(entry.name)) continue;
110
+ if (entry.name.startsWith('.') && entry.name !== '.mnemonik') continue;
111
+ await this.walk(join(dir, entry.name), depth + 1, seen, deadline);
112
+ }
113
+ } catch {
114
+ // Permission denied or inaccessible directory
115
+ }
116
+ }
117
+ }
118
+
119
+ class DiscoveryTimeoutError extends Error {
120
+ constructor() {
121
+ super('Discovery walk timed out');
122
+ this.name = 'DiscoveryTimeoutError';
123
+ }
124
+ }