moflo 4.8.33 → 4.8.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,956 +1,956 @@
1
- #!/usr/bin/env node
2
- /**
3
- * Generate structural code map for a monorepo or project.
4
- *
5
- * Produces five chunk types stored in the `code-map` namespace of .swarm/memory.db:
6
- * 1. project: — one per top-level project directory (bird's-eye overview)
7
- * 2. dir: — one per directory with 2+ exported types (drill-down detail)
8
- * 3. iface-map: — batched interface-to-implementation mappings
9
- * 4. type-index: — batched type-name-to-file-path lookups
10
- * 5. file: — ONE PER FILE with exported types (file-level granularity)
11
- *
12
- * The `file:` entries are the key improvement — they enable precise semantic search
13
- * for individual types, entities, and services instead of diluting results across
14
- * large batches.
15
- *
16
- * Design: regex-based extraction (no AST parser), incremental via SHA-256 hash,
17
- * stores in sql.js memory DB, triggers embedding generation in background.
18
- *
19
- * Usage:
20
- * node node_modules/moflo/bin/generate-code-map.mjs # Incremental
21
- * node node_modules/moflo/bin/generate-code-map.mjs --force # Full regenerate
22
- * node node_modules/moflo/bin/generate-code-map.mjs --verbose # Detailed logging
23
- * node node_modules/moflo/bin/generate-code-map.mjs --no-embeddings # Skip embedding generation
24
- * node node_modules/moflo/bin/generate-code-map.mjs --stats # Print stats and exit
25
- * npx flo-codemap # Via npx
26
- */
27
-
28
- import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync } from 'fs';
29
- import { resolve, dirname, relative, basename, extname } from 'path';
30
- import { fileURLToPath } from 'url';
31
- import { createHash } from 'crypto';
32
- import { execSync, spawn } from 'child_process';
33
- import { mofloResolveURL } from './lib/moflo-resolve.mjs';
34
- const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
35
-
36
-
37
- const __dirname = dirname(fileURLToPath(import.meta.url));
38
-
39
- // Detect project root: walk up from cwd to find a package.json
40
- function findProjectRoot() {
41
- let dir = process.cwd();
42
- const root = resolve(dir, '/');
43
- while (dir !== root) {
44
- if (existsSync(resolve(dir, 'package.json'))) return dir;
45
- dir = dirname(dir);
46
- }
47
- return process.cwd();
48
- }
49
-
50
- const projectRoot = findProjectRoot();
51
- const NAMESPACE = 'code-map';
52
- const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
53
- const HASH_CACHE_PATH = resolve(projectRoot, '.swarm/code-map-hash.txt');
54
-
55
- // Directories to exclude from indexing
56
- const EXCLUDE_DIRS = [
57
- 'node_modules', 'dist', 'build', '.next', 'coverage',
58
- '.claude', 'template', 'back-office-template',
59
- ];
60
-
61
- // Heuristic descriptions for well-known directory names
62
- const DIR_DESCRIPTIONS = {
63
- entities: 'MikroORM entity definitions',
64
- services: 'business logic services',
65
- routes: 'Fastify route handlers',
66
- middleware: 'request middleware (auth, validation, tenancy)',
67
- schemas: 'Zod validation schemas',
68
- types: 'TypeScript type definitions',
69
- utils: 'utility helpers',
70
- config: 'configuration',
71
- migrations: 'database migrations',
72
- scripts: 'CLI scripts',
73
- components: 'React components',
74
- pages: 'route page components',
75
- contexts: 'React context providers',
76
- hooks: 'React custom hooks',
77
- layout: 'app shell layout',
78
- themes: 'MUI theme configuration',
79
- api: 'API client layer',
80
- locales: 'i18n translation files',
81
- tests: 'test suites',
82
- e2e: 'end-to-end tests',
83
- providers: 'dependency injection providers',
84
- };
85
-
86
- // Batch sizes for chunking
87
- const IFACE_MAP_BATCH = 20;
88
- const TYPE_INDEX_BATCH = 30; // Reduced from 80 for better search relevance
89
-
90
- // Parse args
91
- const args = process.argv.slice(2);
92
- const force = args.includes('--force');
93
- const verbose = args.includes('--verbose') || args.includes('-v');
94
- const skipEmbeddings = args.includes('--no-embeddings');
95
- const statsOnly = args.includes('--stats');
96
-
97
- function log(msg) { console.log(`[code-map] ${msg}`); }
98
- function debug(msg) { if (verbose) console.log(`[code-map] ${msg}`); }
99
-
100
- // ---------------------------------------------------------------------------
101
- // Database helpers
102
- // ---------------------------------------------------------------------------
103
-
104
- function ensureDbDir() {
105
- const dir = dirname(DB_PATH);
106
- if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
107
- }
108
-
109
- async function getDb() {
110
- ensureDbDir();
111
- const SQL = await initSqlJs();
112
- let db;
113
- if (existsSync(DB_PATH)) {
114
- const buffer = readFileSync(DB_PATH);
115
- db = new SQL.Database(buffer);
116
- } else {
117
- db = new SQL.Database();
118
- }
119
-
120
- db.run(`
121
- CREATE TABLE IF NOT EXISTS memory_entries (
122
- id TEXT PRIMARY KEY,
123
- key TEXT NOT NULL,
124
- namespace TEXT DEFAULT 'default',
125
- content TEXT NOT NULL,
126
- type TEXT DEFAULT 'semantic',
127
- embedding TEXT,
128
- embedding_model TEXT DEFAULT 'local',
129
- embedding_dimensions INTEGER,
130
- tags TEXT,
131
- metadata TEXT,
132
- owner_id TEXT,
133
- created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
134
- updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
135
- expires_at INTEGER,
136
- last_accessed_at INTEGER,
137
- access_count INTEGER DEFAULT 0,
138
- status TEXT DEFAULT 'active',
139
- UNIQUE(namespace, key)
140
- )
141
- `);
142
- db.run(`CREATE INDEX IF NOT EXISTS idx_memory_key_ns ON memory_entries(key, namespace)`);
143
- db.run(`CREATE INDEX IF NOT EXISTS idx_memory_namespace ON memory_entries(namespace)`);
144
- return db;
145
- }
146
-
147
- function saveDb(db) {
148
- const data = db.export();
149
- writeFileSync(DB_PATH, Buffer.from(data));
150
- }
151
-
152
- function generateId() {
153
- return `mem_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
154
- }
155
-
156
- function storeEntry(db, key, content, metadata = {}, tags = []) {
157
- const now = Date.now();
158
- const id = generateId();
159
- db.run(`
160
- INSERT OR REPLACE INTO memory_entries
161
- (id, key, namespace, content, metadata, tags, created_at, updated_at, status)
162
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active')
163
- `, [id, key, NAMESPACE, content, JSON.stringify(metadata), JSON.stringify(tags), now, now]);
164
- }
165
-
166
- function deleteNamespace(db) {
167
- db.run(`DELETE FROM memory_entries WHERE namespace = ?`, [NAMESPACE]);
168
- }
169
-
170
- function countNamespace(db) {
171
- const stmt = db.prepare(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = ?`);
172
- stmt.bind([NAMESPACE]);
173
- let count = 0;
174
- if (stmt.step()) count = stmt.getAsObject().cnt;
175
- stmt.free();
176
- return count;
177
- }
178
-
179
- function countMissingEmbeddings(db) {
180
- const stmt = db.prepare(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = ? AND (embedding IS NULL OR embedding = '')`);
181
- stmt.bind([NAMESPACE]);
182
- let count = 0;
183
- if (stmt.step()) count = stmt.getAsObject().cnt;
184
- stmt.free();
185
- return count;
186
- }
187
-
188
- // ---------------------------------------------------------------------------
189
- // Source file enumeration — git ls-files with filesystem fallback
190
- // ---------------------------------------------------------------------------
191
-
192
- /** Read code_map config from moflo.yaml (directories, extensions, exclude). */
193
- function readCodeMapConfig() {
194
- const defaults = {
195
- directories: ['src'],
196
- extensions: [
197
- '.ts', '.tsx', '.js', '.mjs', '.jsx', // JS/TS
198
- '.py', '.pyi', // Python
199
- '.go', // Go
200
- '.java', '.kt', '.kts', // JVM
201
- '.cs', // C#
202
- '.rs', // Rust
203
- '.rb', // Ruby
204
- '.swift', // Swift
205
- '.php', // PHP
206
- '.c', '.h', '.cpp', '.hpp', '.cc', // C/C++
207
- ],
208
- exclude: [...EXCLUDE_DIRS],
209
- };
210
- try {
211
- const yamlPath = resolve(projectRoot, 'moflo.yaml');
212
- if (!existsSync(yamlPath)) return defaults;
213
- const content = readFileSync(yamlPath, 'utf-8');
214
- // Simple YAML parsing for code_map block
215
- const block = content.match(/code_map:\s*\n((?:\s+\w+:.*\n?|\s+- .*\n?)+)/);
216
- if (!block) return defaults;
217
- const lines = block[1].split('\n');
218
- let currentKey = null;
219
- const result = { ...defaults };
220
- for (const line of lines) {
221
- const keyMatch = line.match(/^\s+(\w+):/);
222
- const itemMatch = line.match(/^\s+- (.+)/);
223
- if (keyMatch) {
224
- currentKey = keyMatch[1];
225
- // Inline array: extensions: [".ts", ".tsx"]
226
- const inlineArray = line.match(/\[([^\]]+)\]/);
227
- if (inlineArray && (currentKey === 'extensions' || currentKey === 'exclude' || currentKey === 'directories')) {
228
- result[currentKey] = inlineArray[1].split(',').map(s => s.trim().replace(/^["']|["']$/g, ''));
229
- }
230
- } else if (itemMatch && currentKey) {
231
- if (!Array.isArray(result[currentKey])) result[currentKey] = [];
232
- result[currentKey].push(itemMatch[1].trim().replace(/^["']|["']$/g, ''));
233
- }
234
- }
235
- return result;
236
- } catch { return defaults; }
237
- }
238
-
239
- /** Walk a directory tree collecting source files (filesystem fallback). */
240
- function walkDir(dir, extensions, excludeSet, maxDepth = 8, depth = 0) {
241
- if (depth > maxDepth) return [];
242
- const results = [];
243
- let entries;
244
- try {
245
- entries = readdirSync(resolve(projectRoot, dir), { withFileTypes: true });
246
- } catch { return []; }
247
- for (const entry of entries) {
248
- if (excludeSet.has(entry.name)) continue;
249
- // Use forward slashes for consistent cross-platform paths
250
- const rel = dir ? `${dir}/${entry.name}` : entry.name;
251
- if (entry.isDirectory()) {
252
- results.push(...walkDir(rel, extensions, excludeSet, maxDepth, depth + 1));
253
- } else if (entry.isFile()) {
254
- const ext = extname(entry.name);
255
- if (extensions.has(ext)) results.push(rel);
256
- }
257
- }
258
- return results;
259
- }
260
-
261
- function getSourceFiles() {
262
- const config = readCodeMapConfig();
263
- const extSet = new Set(config.extensions);
264
- const excludeSet = new Set(config.exclude);
265
-
266
- // Build git glob patterns from configured extensions
267
- const gitGlobs = config.extensions.map(ext => `"*${ext}"`).join(' ');
268
-
269
- // Try git ls-files first (fast, respects .gitignore)
270
- try {
271
- const raw = execSync(
272
- `git ls-files -- ${gitGlobs}`,
273
- { cwd: projectRoot, encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024 }
274
- ).trim();
275
-
276
- if (raw) {
277
- const files = raw.split('\n').filter(f => {
278
- for (const ex of EXCLUDE_DIRS) {
279
- if (f.startsWith(ex + '/') || f.startsWith(ex + '\\')) return false;
280
- }
281
- return true;
282
- });
283
- if (files.length > 0) return files;
284
- }
285
- } catch {
286
- // git not available or not a git repo — fall through
287
- }
288
-
289
- // Fallback: walk configured directories from moflo.yaml
290
- log('git ls-files returned no files — falling back to filesystem walk');
291
- const files = [];
292
-
293
- for (const dir of config.directories) {
294
- if (existsSync(resolve(projectRoot, dir))) {
295
- files.push(...walkDir(dir, extSet, excludeSet));
296
- }
297
- }
298
-
299
- return files;
300
- }
301
-
302
- function computeFileListHash(files) {
303
- const sorted = [...files].sort();
304
- return createHash('sha256').update(sorted.join('\n')).digest('hex');
305
- }
306
-
307
- function isUnchanged(currentHash) {
308
- if (force) return false;
309
- if (!existsSync(HASH_CACHE_PATH)) return false;
310
- const cached = readFileSync(HASH_CACHE_PATH, 'utf-8').trim();
311
- return cached === currentHash;
312
- }
313
-
314
- // ---------------------------------------------------------------------------
315
- // Type extraction (regex-based, no AST) — multi-language
316
- // ---------------------------------------------------------------------------
317
-
318
- // Per-language extraction patterns: each entry is [regex, kindOverride?]
319
- // Group 1 = name, Group 2 = base/extends (optional), Group 3 = implements (optional)
320
- const LANG_PATTERNS = {
321
- // JS/TS — require `export` keyword
322
- ts: [
323
- [/^export\s+(?:default\s+)?(?:abstract\s+)?class\s+(\w+)(?:\s+extends\s+([\w.]+))?(?:\s+implements\s+([\w,\s.]+))?/],
324
- [/^export\s+(?:default\s+)?interface\s+(\w+)(?:\s+extends\s+([\w,\s.]+))?/],
325
- [/^export\s+(?:default\s+)?type\s+(\w+)\s*[=<]/],
326
- [/^export\s+(?:const\s+)?enum\s+(\w+)/],
327
- [/^export\s+(?:default\s+)?(?:async\s+)?function\s+(\w+)/],
328
- [/^export\s+(?:default\s+)?const\s+(\w+)\s*[=:]/],
329
- // CommonJS / plain JS (no export keyword)
330
- [/^(?:module\.exports\s*=\s*)?class\s+(\w+)(?:\s+extends\s+([\w.]+))?/],
331
- [/^(?:async\s+)?function\s+(\w+)\s*\(/],
332
- [/^const\s+(\w+)\s*=\s*(?:async\s+)?\(?.*\)?\s*=>/],
333
- [/^(?:var|let|const)\s+(\w+)\s*=\s*require\s*\(/],
334
- ],
335
-
336
- // Python — class/def at module level
337
- py: [
338
- [/^class\s+(\w+)(?:\(([^)]+)\))?:/],
339
- [/^(?:async\s+)?def\s+(\w+)\s*\(/],
340
- [/^(\w+)\s*:\s*TypeAlias\s*=/, 'type'],
341
- [/^(\w+)\s*=\s*(?:TypeVar|NewType|NamedTuple|dataclass)\s*\(/, 'type'],
342
- ],
343
-
344
- // Go — top-level type/func/var declarations
345
- go: [
346
- [/^type\s+(\w+)\s+struct\b/, 'struct'],
347
- [/^type\s+(\w+)\s+interface\b/, 'interface'],
348
- [/^type\s+(\w+)\s+/, 'type'],
349
- [/^func\s+(\w+)\s*\(/],
350
- [/^func\s+\([^)]+\)\s+(\w+)\s*\(/, 'method'],
351
- [/^var\s+(\w+)\s+/, 'var'],
352
- [/^const\s+(\w+)\s+/, 'const'],
353
- ],
354
-
355
- // Java/Kotlin
356
- java: [
357
- [/^(?:public|protected|private|abstract|static|final|sealed|open|\s)*class\s+(\w+)(?:\s+extends\s+([\w.]+))?(?:\s+implements\s+([\w,\s.]+))?/],
358
- [/^(?:public|protected|private|abstract|static|sealed|\s)*interface\s+(\w+)(?:\s+extends\s+([\w,\s.]+))?/],
359
- [/^(?:public|protected|private|abstract|static|\s)*enum\s+(\w+)/],
360
- [/^(?:public|protected|private|abstract|static|\s)*@?interface\s+(\w+)/, 'annotation'],
361
- [/^(?:public|protected|private|abstract|static|final|synchronized|\s)*(?:[\w<>\[\],\s]+)\s+(\w+)\s*\(/, 'method'],
362
- [/^(?:data\s+)?class\s+(\w+)(?:\s*:\s*([\w.]+))?/, 'class'], // Kotlin
363
- [/^(?:fun|suspend\s+fun)\s+(\w+)\s*\(/], // Kotlin
364
- [/^object\s+(\w+)/, 'object'], // Kotlin
365
- ],
366
-
367
- // C#
368
- cs: [
369
- [/^(?:public|protected|private|internal|abstract|static|sealed|partial|\s)*class\s+(\w+)(?:\s*:\s*([\w.,\s<>]+))?/],
370
- [/^(?:public|protected|private|internal|abstract|static|\s)*interface\s+(\w+)(?:\s*:\s*([\w.,\s<>]+))?/],
371
- [/^(?:public|protected|private|internal|abstract|static|\s)*enum\s+(\w+)/],
372
- [/^(?:public|protected|private|internal|abstract|static|\s)*struct\s+(\w+)/],
373
- [/^(?:public|protected|private|internal|abstract|static|\s)*record\s+(\w+)/],
374
- [/^(?:public|protected|private|internal|abstract|static|\s)*delegate\s+\S+\s+(\w+)\s*\(/, 'delegate'],
375
- [/^namespace\s+([\w.]+)/, 'namespace'],
376
- ],
377
-
378
- // Rust
379
- rs: [
380
- [/^pub(?:\([\w]+\))?\s+struct\s+(\w+)/, 'struct'],
381
- [/^pub(?:\([\w]+\))?\s+enum\s+(\w+)/],
382
- [/^pub(?:\([\w]+\))?\s+trait\s+(\w+)(?:\s*:\s*([\w\s+]+))?/, 'trait'],
383
- [/^pub(?:\([\w]+\))?\s+(?:async\s+)?fn\s+(\w+)/],
384
- [/^pub(?:\([\w]+\))?\s+type\s+(\w+)\s*=/, 'type'],
385
- [/^pub(?:\([\w]+\))?\s+mod\s+(\w+)/, 'module'],
386
- [/^impl(?:<[^>]+>)?\s+(\w+)/, 'impl'],
387
- [/^struct\s+(\w+)/, 'struct'],
388
- [/^enum\s+(\w+)/],
389
- [/^trait\s+(\w+)/, 'trait'],
390
- [/^(?:async\s+)?fn\s+(\w+)/],
391
- ],
392
-
393
- // Ruby
394
- rb: [
395
- [/^class\s+(\w+)(?:\s*<\s*([\w:]+))?/],
396
- [/^module\s+(\w+)/, 'module'],
397
- [/^def\s+(self\.)?(\w+)/, 'method'],
398
- ],
399
-
400
- // Swift
401
- swift: [
402
- [/^(?:public|open|internal|fileprivate|private|\s)*(?:final\s+)?class\s+(\w+)(?:\s*:\s*([\w,\s]+))?/],
403
- [/^(?:public|open|internal|fileprivate|private|\s)*protocol\s+(\w+)(?:\s*:\s*([\w,\s]+))?/, 'protocol'],
404
- [/^(?:public|open|internal|fileprivate|private|\s)*struct\s+(\w+)/, 'struct'],
405
- [/^(?:public|open|internal|fileprivate|private|\s)*enum\s+(\w+)/],
406
- [/^(?:public|open|internal|fileprivate|private|\s)*func\s+(\w+)\s*\(/],
407
- [/^(?:public|open|internal|fileprivate|private|\s)*typealias\s+(\w+)/, 'type'],
408
- ],
409
-
410
- // PHP
411
- php: [
412
- [/^(?:abstract\s+|final\s+)?class\s+(\w+)(?:\s+extends\s+([\w\\]+))?(?:\s+implements\s+([\w\\,\s]+))?/],
413
- [/^interface\s+(\w+)(?:\s+extends\s+([\w\\,\s]+))?/],
414
- [/^trait\s+(\w+)/, 'trait'],
415
- [/^enum\s+(\w+)/],
416
- [/^(?:public|protected|private|static|\s)*function\s+(\w+)\s*\(/],
417
- ],
418
-
419
- // C/C++
420
- c: [
421
- [/^(?:typedef\s+)?struct\s+(\w+)/, 'struct'],
422
- [/^(?:typedef\s+)?enum\s+(\w+)/],
423
- [/^(?:typedef\s+)?union\s+(\w+)/, 'union'],
424
- [/^class\s+(\w+)(?:\s*:\s*(?:public|protected|private)\s+([\w:]+))?/],
425
- [/^namespace\s+(\w+)/, 'namespace'],
426
- [/^template\s*<[^>]*>\s*class\s+(\w+)/, 'template-class'],
427
- [/^(?:static\s+|inline\s+|extern\s+)*(?:[\w*&:<>,\s]+)\s+(\w+)\s*\(/, 'function'],
428
- ],
429
- };
430
-
431
- // Map file extensions to language keys
432
- const EXT_TO_LANG = {
433
- '.ts': 'ts', '.tsx': 'ts', '.js': 'ts', '.mjs': 'ts', '.jsx': 'ts', '.cjs': 'ts',
434
- '.py': 'py', '.pyi': 'py',
435
- '.go': 'go',
436
- '.java': 'java', '.kt': 'java', '.kts': 'java',
437
- '.cs': 'cs',
438
- '.rs': 'rs',
439
- '.rb': 'rb',
440
- '.swift': 'swift',
441
- '.php': 'php',
442
- '.c': 'c', '.h': 'c', '.cpp': 'c', '.hpp': 'c', '.cc': 'c',
443
- };
444
-
445
- const ENTITY_DECORATOR = /@Entity\s*\(/;
446
-
447
- function extractTypes(filePath) {
448
- const fullPath = resolve(projectRoot, filePath);
449
- if (!existsSync(fullPath)) return [];
450
-
451
- let content;
452
- try {
453
- content = readFileSync(fullPath, 'utf-8');
454
- } catch {
455
- return [];
456
- }
457
-
458
- const ext = extname(filePath);
459
- const lang = EXT_TO_LANG[ext] || 'ts';
460
- const patterns = LANG_PATTERNS[lang] || LANG_PATTERNS.ts;
461
-
462
- const lines = content.split('\n');
463
- const types = [];
464
- const seen = new Set();
465
- let isEntityNext = false;
466
-
467
- for (let i = 0; i < lines.length; i++) {
468
- const line = lines[i].trim();
469
-
470
- if (ENTITY_DECORATOR.test(line)) {
471
- isEntityNext = true;
472
- continue;
473
- }
474
-
475
- for (const [pattern, kindOverride] of patterns) {
476
- const m = line.match(pattern);
477
- if (m) {
478
- // Ruby's def self.name captures differently — normalize
479
- const name = m[1] === 'self.' ? m[2] : m[1];
480
- if (!name || seen.has(name)) continue;
481
- seen.add(name);
482
- const kind = kindOverride || detectKind(line, name, lang);
483
- const bases = (m[2] || '').trim();
484
- const implements_ = (m[3] || '').trim();
485
- types.push({
486
- name,
487
- kind,
488
- bases: bases || null,
489
- implements: implements_ || null,
490
- isEntity: isEntityNext,
491
- file: filePath,
492
- });
493
- isEntityNext = false;
494
- break;
495
- }
496
- }
497
-
498
- if (isEntityNext && !line.startsWith('@') && !line.startsWith('export') && line.length > 0) {
499
- isEntityNext = false;
500
- }
501
- }
502
-
503
- return types;
504
- }
505
-
506
- function detectKind(line, name, lang) {
507
- if (/\bclass\b/.test(line)) return 'class';
508
- if (/\binterface\b/.test(line)) return 'interface';
509
- if (/\btype\b/.test(line)) return 'type';
510
- if (/\benum\b/.test(line)) return 'enum';
511
- if (/\bstruct\b/.test(line)) return 'struct';
512
- if (/\btrait\b/.test(line)) return 'trait';
513
- if (/\bprotocol\b/.test(line)) return 'protocol';
514
- if (/\bmodule\b/.test(line)) return 'module';
515
- if (/\bfunction\b/.test(line) || /\bfunc\b/.test(line) || /\bdef\b/.test(line) || /\bfn\b/.test(line)) return 'function';
516
- if (/\bconst\b/.test(line)) return 'const';
517
- return 'export';
518
- }
519
-
520
- // ---------------------------------------------------------------------------
521
- // Project structure analysis
522
- // ---------------------------------------------------------------------------
523
-
524
- function getProjectName(filePath) {
525
- const parts = filePath.split('/');
526
-
527
- if (parts[0] === 'packages' && parts.length >= 2) return `${parts[0]}/${parts[1]}`;
528
- if (parts[0] === 'back-office' && parts.length >= 2) return `${parts[0]}/${parts[1]}`;
529
- if (parts[0] === 'customer-portal' && parts.length >= 2) return `${parts[0]}/${parts[1]}`;
530
- if (parts[0] === 'admin-console' && parts.length >= 2) return `${parts[0]}/${parts[1]}`;
531
- if (parts[0] === 'webhooks' && parts.length >= 2) return `${parts[0]}/${parts[1]}`;
532
- if (parts[0] === 'mobile-app') return 'mobile-app';
533
- if (parts[0] === 'tests') return 'tests';
534
- if (parts[0] === 'scripts') return 'scripts';
535
- return parts[0];
536
- }
537
-
538
- function getDirectory(filePath) {
539
- return dirname(filePath).replace(/\\/g, '/');
540
- }
541
-
542
- function getDirDescription(dirName) {
543
- const last = dirName.split('/').pop();
544
- return DIR_DESCRIPTIONS[last] || null;
545
- }
546
-
547
- function detectLanguage(filePath) {
548
- const ext = extname(filePath);
549
- const langMap = {
550
- '.tsx': 'React/TypeScript', '.jsx': 'React/JavaScript',
551
- '.ts': 'TypeScript', '.mjs': 'ESM', '.cjs': 'CommonJS', '.js': 'JavaScript',
552
- '.py': 'Python', '.pyi': 'Python',
553
- '.go': 'Go',
554
- '.java': 'Java', '.kt': 'Kotlin', '.kts': 'Kotlin',
555
- '.cs': 'C#',
556
- '.rs': 'Rust',
557
- '.rb': 'Ruby',
558
- '.swift': 'Swift',
559
- '.php': 'PHP',
560
- '.c': 'C', '.h': 'C/C++ Header', '.cpp': 'C++', '.hpp': 'C++ Header', '.cc': 'C++',
561
- };
562
- return langMap[ext] || 'Unknown';
563
- }
564
-
565
- // ---------------------------------------------------------------------------
566
- // Chunk generators
567
- // ---------------------------------------------------------------------------
568
-
569
- function generateProjectOverviews(filesByProject, typesByProject) {
570
- const chunks = [];
571
-
572
- for (const [project, files] of Object.entries(filesByProject)) {
573
- const types = typesByProject[project] || [];
574
- const lang = detectProjectLang(files);
575
- const dirMap = {};
576
-
577
- for (const t of types) {
578
- const rel = relative(project, dirname(t.file)).replace(/\\/g, '/') || '(root)';
579
- if (!dirMap[rel]) dirMap[rel] = [];
580
- dirMap[rel].push(t.name);
581
- }
582
-
583
- let content = `# ${project} [${lang}, ${files.length} files, ${types.length} types]\n\n`;
584
-
585
- const sortedDirs = Object.keys(dirMap).sort();
586
- for (const dir of sortedDirs) {
587
- const names = dirMap[dir];
588
- const desc = getDirDescription(dir);
589
- const descStr = desc ? ` -- ${desc}` : '';
590
- const shown = names.slice(0, 8).join(', ');
591
- const overflow = names.length > 8 ? `, ... (+${names.length - 8} more)` : '';
592
- content += ` ${dir}${descStr}: ${shown}${overflow}\n`;
593
- }
594
-
595
- chunks.push({
596
- key: `project:${project}`,
597
- content: content.trim(),
598
- metadata: { kind: 'project-overview', project, language: lang, fileCount: files.length, typeCount: types.length },
599
- tags: ['project', project],
600
- });
601
- }
602
-
603
- return chunks;
604
- }
605
-
606
- function detectProjectLang(files) {
607
- const counts = {};
608
- for (const f of files) {
609
- const lang = detectLanguage(f);
610
- counts[lang] = (counts[lang] || 0) + 1;
611
- }
612
- // Return the dominant language, or list top 2 if mixed
613
- const sorted = Object.entries(counts).sort((a, b) => b[1] - a[1]);
614
- if (sorted.length === 0) return 'Unknown';
615
- if (sorted.length === 1 || sorted[0][1] > sorted[1][1] * 2) return sorted[0][0];
616
- return `${sorted[0][0]}/${sorted[1][0]}`;
617
- }
618
-
619
- function generateDirectoryDetails(typesByDir) {
620
- const chunks = [];
621
-
622
- for (const [dir, types] of Object.entries(typesByDir)) {
623
- if (types.length < 2) continue;
624
-
625
- const desc = getDirDescription(dir);
626
- let content = `# ${dir} (${types.length} types)\n`;
627
- if (desc) content += `${desc}\n`;
628
- content += '\n';
629
-
630
- const sorted = [...types].sort((a, b) => a.name.localeCompare(b.name));
631
- for (const t of sorted) {
632
- const suffix = [];
633
- if (t.bases) suffix.push(`: ${t.bases}`);
634
- if (t.implements) suffix.push(`: ${t.implements}`);
635
- const suffixStr = suffix.length ? ` ${suffix.join(' ')}` : '';
636
- const fileName = basename(t.file);
637
- content += ` ${t.name}${suffixStr} (${fileName})\n`;
638
- }
639
-
640
- chunks.push({
641
- key: `dir:${dir}`,
642
- content: content.trim(),
643
- metadata: { kind: 'directory-detail', directory: dir, typeCount: types.length },
644
- tags: ['directory', dir.split('/')[0]],
645
- });
646
- }
647
-
648
- return chunks;
649
- }
650
-
651
- function generateInterfaceMaps(allTypes) {
652
- const interfaces = new Map();
653
-
654
- for (const t of allTypes) {
655
- if (t.kind === 'interface') {
656
- if (!interfaces.has(t.name)) {
657
- interfaces.set(t.name, { defined: t.file, implementations: [] });
658
- }
659
- }
660
- }
661
-
662
- for (const t of allTypes) {
663
- if (t.kind !== 'class') continue;
664
- const impls = t.implements ? t.implements.split(',').map(s => s.trim()) : [];
665
- const bases = t.bases ? [t.bases.trim()] : [];
666
- for (const iface of [...impls, ...bases]) {
667
- if (interfaces.has(iface)) {
668
- interfaces.get(iface).implementations.push({
669
- name: t.name,
670
- project: getProjectName(t.file),
671
- });
672
- }
673
- }
674
- }
675
-
676
- const mapped = [...interfaces.entries()]
677
- .filter(([, v]) => v.implementations.length > 0)
678
- .sort(([a], [b]) => a.localeCompare(b));
679
-
680
- if (mapped.length === 0) return [];
681
-
682
- const chunks = [];
683
- const totalBatches = Math.ceil(mapped.length / IFACE_MAP_BATCH);
684
-
685
- for (let i = 0; i < mapped.length; i += IFACE_MAP_BATCH) {
686
- const batch = mapped.slice(i, i + IFACE_MAP_BATCH);
687
- const batchNum = Math.floor(i / IFACE_MAP_BATCH) + 1;
688
-
689
- let content = `# Interface-to-Implementation Map (${batchNum}/${totalBatches})\n\n`;
690
- for (const [name, info] of batch) {
691
- const implStr = info.implementations
692
- .map(impl => `${impl.name} (${impl.project})`)
693
- .join(', ');
694
- content += ` ${name} -> ${implStr}\n`;
695
- }
696
-
697
- chunks.push({
698
- key: `iface-map:${batchNum}`,
699
- content: content.trim(),
700
- metadata: { kind: 'interface-map', batch: batchNum, totalBatches, count: batch.length },
701
- tags: ['interface-map'],
702
- });
703
- }
704
-
705
- return chunks;
706
- }
707
-
708
- function generateTypeIndex(allTypes) {
709
- const sorted = [...allTypes].sort((a, b) => a.name.localeCompare(b.name));
710
- const chunks = [];
711
- const totalBatches = Math.ceil(sorted.length / TYPE_INDEX_BATCH);
712
-
713
- for (let i = 0; i < sorted.length; i += TYPE_INDEX_BATCH) {
714
- const batch = sorted.slice(i, i + TYPE_INDEX_BATCH);
715
- const batchNum = Math.floor(i / TYPE_INDEX_BATCH) + 1;
716
-
717
- let content = `# Type Index (batch ${batchNum}, ${batch.length} types)\n\n`;
718
- for (const t of batch) {
719
- const lang = detectLanguage(t.file);
720
- content += ` ${t.name} -> ${t.file} [${lang}]\n`;
721
- }
722
-
723
- chunks.push({
724
- key: `type-index:${batchNum}`,
725
- content: content.trim(),
726
- metadata: { kind: 'type-index', batch: batchNum, totalBatches, count: batch.length },
727
- tags: ['type-index'],
728
- });
729
- }
730
-
731
- return chunks;
732
- }
733
-
734
- /**
735
- * NEW: Generate file-level entries for each source file that has exported types.
736
- *
737
- * Each file gets its own entry keyed as `file:<path>`, containing:
738
- * - The file path
739
- * - All exported type names with their kind, base class, and implementations
740
- * - Whether it's a MikroORM entity
741
- * - The project and directory it belongs to
742
- *
743
- * This enables precise semantic search: a query for "CompanyAuditLog" will match
744
- * the specific file entry rather than being diluted across a batch of 80 types.
745
- */
746
- function generateFileEntries(typesByFile) {
747
- const chunks = [];
748
-
749
- for (const [filePath, types] of Object.entries(typesByFile)) {
750
- const project = getProjectName(filePath);
751
- const dir = getDirectory(filePath);
752
- const dirDesc = getDirDescription(dir);
753
- const lang = detectLanguage(filePath);
754
- const fileName = basename(filePath);
755
-
756
- // Build a rich, searchable content string
757
- let content = `# ${fileName} (${filePath})\n`;
758
- content += `Project: ${project} | Language: ${lang}\n`;
759
- if (dirDesc) content += `Directory: ${dirDesc}\n`;
760
-
761
- if (types.length > 0) {
762
- content += '\nExported types:\n';
763
- for (const t of types) {
764
- let line = ` ${t.kind} ${t.name}`;
765
- if (t.isEntity) line += ' [MikroORM entity]';
766
- if (t.bases) line += ` extends ${t.bases}`;
767
- if (t.implements) line += ` implements ${t.implements}`;
768
- content += line + '\n';
769
- }
770
- } else {
771
- // For files without detected exports, include a summary line
772
- // so the file is still discoverable via semantic search
773
- content += '\nSource file (no detected exports)\n';
774
- }
775
-
776
- // Build tags for filtering
777
- const tags = ['file', project];
778
- if (types.some(t => t.isEntity)) tags.push('entity');
779
- if (types.some(t => t.kind === 'interface')) tags.push('interface');
780
- // Use path separator pattern that works cross-platform
781
- if (filePath.includes('/services/') || filePath.includes('\\services\\')) tags.push('service');
782
- if (filePath.includes('/routes/') || filePath.includes('\\routes\\')) tags.push('route');
783
- if (filePath.includes('/middleware/') || filePath.includes('\\middleware\\')) tags.push('middleware');
784
- if (filePath.includes('/components/') || filePath.includes('\\components\\')) tags.push('component');
785
- if (filePath.includes('/hooks/') || filePath.includes('\\hooks\\')) tags.push('hook');
786
- if (filePath.includes('/api/') || filePath.includes('\\api\\')) tags.push('api');
787
- if (filePath.includes('/utils/') || filePath.includes('\\utils\\')) tags.push('util');
788
-
789
- chunks.push({
790
- key: `file:${filePath}`,
791
- content: content.trim(),
792
- metadata: {
793
- kind: 'file-detail',
794
- filePath,
795
- project,
796
- directory: dir,
797
- language: lang,
798
- typeCount: types.length,
799
- hasEntities: types.some(t => t.isEntity),
800
- typeNames: types.map(t => t.name),
801
- },
802
- tags,
803
- });
804
- }
805
-
806
- return chunks;
807
- }
808
-
809
- // ---------------------------------------------------------------------------
810
- // Main
811
- // ---------------------------------------------------------------------------
812
-
813
- async function main() {
814
- const startTime = Date.now();
815
-
816
- log(`Project root: ${projectRoot}`);
817
-
818
- // 1. Get source files
819
- log('Enumerating source files via git ls-files...');
820
- const files = getSourceFiles();
821
- log(`Found ${files.length} source files`);
822
-
823
- if (files.length === 0) {
824
- log('No source files found — nothing to index');
825
- return;
826
- }
827
-
828
- // 2. Check hash for incremental skip
829
- const currentHash = computeFileListHash(files);
830
-
831
- if (statsOnly) {
832
- const db = await getDb();
833
- const count = countNamespace(db);
834
- db.close();
835
- log(`Stats: ${files.length} source files, ${count} chunks in code-map namespace`);
836
- log(`File list hash: ${currentHash.slice(0, 12)}...`);
837
- return;
838
- }
839
-
840
- if (isUnchanged(currentHash)) {
841
- const db = await getDb();
842
- const count = countNamespace(db);
843
- const missing = countMissingEmbeddings(db);
844
- db.close();
845
- if (count > 0) {
846
- if (missing > 0 && !skipEmbeddings) {
847
- log(`File list unchanged but ${missing}/${count} entries missing embeddings — generating...`);
848
- await runEmbeddings();
849
- } else {
850
- log(`Skipping — file list unchanged (${count} chunks in DB, hash ${currentHash.slice(0, 12)}...)`);
851
- }
852
- return;
853
- }
854
- log('File list unchanged but no chunks in DB — forcing regeneration');
855
- }
856
-
857
- // 3. Extract types from all files
858
- log('Extracting type declarations...');
859
- const allTypes = [];
860
- const filesByProject = {};
861
- const typesByProject = {};
862
- const typesByDir = {};
863
- const typesByFile = {};
864
-
865
- for (const file of files) {
866
- const project = getProjectName(file);
867
- if (!filesByProject[project]) filesByProject[project] = [];
868
- filesByProject[project].push(file);
869
-
870
- const types = extractTypes(file);
871
-
872
- // Track ALL files for file-level entries (not just those with types)
873
- // This ensures plain JS projects without explicit exports still get indexed
874
- typesByFile[file] = types;
875
-
876
- for (const t of types) {
877
- allTypes.push(t);
878
-
879
- if (!typesByProject[project]) typesByProject[project] = [];
880
- typesByProject[project].push(t);
881
-
882
- const dir = getDirectory(t.file);
883
- if (!typesByDir[dir]) typesByDir[dir] = [];
884
- typesByDir[dir].push(t);
885
- }
886
- }
887
-
888
- log(`Extracted ${allTypes.length} type declarations from ${Object.keys(filesByProject).length} projects`);
889
- log(`Files with exported types: ${Object.keys(typesByFile).length}`);
890
-
891
- // 4. Generate all chunk types
892
- log('Generating chunks...');
893
- const projectChunks = generateProjectOverviews(filesByProject, typesByProject);
894
- const dirChunks = generateDirectoryDetails(typesByDir);
895
- const ifaceChunks = generateInterfaceMaps(allTypes);
896
- const typeIdxChunks = generateTypeIndex(allTypes);
897
- const fileChunks = generateFileEntries(typesByFile);
898
-
899
- const allChunks = [...projectChunks, ...dirChunks, ...ifaceChunks, ...typeIdxChunks, ...fileChunks];
900
-
901
- log(`Generated ${allChunks.length} chunks:`);
902
- log(` Project overviews: ${projectChunks.length}`);
903
- log(` Directory details: ${dirChunks.length}`);
904
- log(` Interface maps: ${ifaceChunks.length}`);
905
- log(` Type index: ${typeIdxChunks.length}`);
906
- log(` File entries: ${fileChunks.length} (NEW — file-level granularity)`);
907
-
908
- // 5. Write to database
909
- log('Writing to memory database...');
910
- const db = await getDb();
911
- deleteNamespace(db);
912
-
913
- for (const chunk of allChunks) {
914
- storeEntry(db, chunk.key, chunk.content, chunk.metadata, chunk.tags);
915
- }
916
-
917
- saveDb(db);
918
- db.close();
919
-
920
- // 6. Save hash for incremental caching
921
- writeFileSync(HASH_CACHE_PATH, currentHash, 'utf-8');
922
-
923
- const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
924
- log(`Done in ${elapsed}s — ${allChunks.length} chunks written to code-map namespace`);
925
-
926
- // 7. Generate embeddings inline (not detached — ensures Xenova runs reliably)
927
- if (!skipEmbeddings) {
928
- await runEmbeddings();
929
- }
930
- }
931
-
932
- async function runEmbeddings() {
933
- const embedCandidates = [
934
- resolve(dirname(fileURLToPath(import.meta.url)), 'build-embeddings.mjs'),
935
- resolve(projectRoot, '.claude/scripts/build-embeddings.mjs'),
936
- ];
937
- const embedScript = embedCandidates.find(p => existsSync(p));
938
- if (!embedScript) return;
939
-
940
- log('Generating embeddings for code-map...');
941
- try {
942
- execSync(`node "${embedScript}" --namespace code-map`, {
943
- cwd: projectRoot,
944
- stdio: 'inherit',
945
- timeout: 120000,
946
- windowsHide: true,
947
- });
948
- } catch (err) {
949
- log(`Warning: embedding generation failed: ${err.message?.split('\n')[0]}`);
950
- }
951
- }
952
-
953
- main().catch(err => {
954
- console.error('[code-map] Fatal error:', err);
955
- process.exit(1);
956
- });
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Generate structural code map for a monorepo or project.
4
+ *
5
+ * Produces five chunk types stored in the `code-map` namespace of .swarm/memory.db:
6
+ * 1. project: — one per top-level project directory (bird's-eye overview)
7
+ * 2. dir: — one per directory with 2+ exported types (drill-down detail)
8
+ * 3. iface-map: — batched interface-to-implementation mappings
9
+ * 4. type-index: — batched type-name-to-file-path lookups
10
+ * 5. file: — ONE PER FILE with exported types (file-level granularity)
11
+ *
12
+ * The `file:` entries are the key improvement — they enable precise semantic search
13
+ * for individual types, entities, and services instead of diluting results across
14
+ * large batches.
15
+ *
16
+ * Design: regex-based extraction (no AST parser), incremental via SHA-256 hash,
17
+ * stores in sql.js memory DB, triggers embedding generation in background.
18
+ *
19
+ * Usage:
20
+ * node node_modules/moflo/bin/generate-code-map.mjs # Incremental
21
+ * node node_modules/moflo/bin/generate-code-map.mjs --force # Full regenerate
22
+ * node node_modules/moflo/bin/generate-code-map.mjs --verbose # Detailed logging
23
+ * node node_modules/moflo/bin/generate-code-map.mjs --no-embeddings # Skip embedding generation
24
+ * node node_modules/moflo/bin/generate-code-map.mjs --stats # Print stats and exit
25
+ * npx flo-codemap # Via npx
26
+ */
27
+
28
+ import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync } from 'fs';
29
+ import { resolve, dirname, relative, basename, extname } from 'path';
30
+ import { fileURLToPath } from 'url';
31
+ import { createHash } from 'crypto';
32
+ import { execSync, spawn } from 'child_process';
33
+ import { mofloResolveURL } from './lib/moflo-resolve.mjs';
34
+ const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
35
+
36
+
37
+ const __dirname = dirname(fileURLToPath(import.meta.url));
38
+
39
+ // Detect project root: walk up from cwd to find a package.json
40
+ function findProjectRoot() {
41
+ let dir = process.cwd();
42
+ const root = resolve(dir, '/');
43
+ while (dir !== root) {
44
+ if (existsSync(resolve(dir, 'package.json'))) return dir;
45
+ dir = dirname(dir);
46
+ }
47
+ return process.cwd();
48
+ }
49
+
50
+ const projectRoot = findProjectRoot();
51
+ const NAMESPACE = 'code-map';
52
+ const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
53
+ const HASH_CACHE_PATH = resolve(projectRoot, '.swarm/code-map-hash.txt');
54
+
55
+ // Directories to exclude from indexing
56
+ const EXCLUDE_DIRS = [
57
+ 'node_modules', 'dist', 'build', '.next', 'coverage',
58
+ '.claude', 'template', 'back-office-template',
59
+ ];
60
+
61
+ // Heuristic descriptions for well-known directory names
62
+ const DIR_DESCRIPTIONS = {
63
+ entities: 'MikroORM entity definitions',
64
+ services: 'business logic services',
65
+ routes: 'Fastify route handlers',
66
+ middleware: 'request middleware (auth, validation, tenancy)',
67
+ schemas: 'Zod validation schemas',
68
+ types: 'TypeScript type definitions',
69
+ utils: 'utility helpers',
70
+ config: 'configuration',
71
+ migrations: 'database migrations',
72
+ scripts: 'CLI scripts',
73
+ components: 'React components',
74
+ pages: 'route page components',
75
+ contexts: 'React context providers',
76
+ hooks: 'React custom hooks',
77
+ layout: 'app shell layout',
78
+ themes: 'MUI theme configuration',
79
+ api: 'API client layer',
80
+ locales: 'i18n translation files',
81
+ tests: 'test suites',
82
+ e2e: 'end-to-end tests',
83
+ providers: 'dependency injection providers',
84
+ };
85
+
86
+ // Batch sizes for chunking
87
+ const IFACE_MAP_BATCH = 20;
88
+ const TYPE_INDEX_BATCH = 30; // Reduced from 80 for better search relevance
89
+
90
+ // Parse args
91
+ const args = process.argv.slice(2);
92
+ const force = args.includes('--force');
93
+ const verbose = args.includes('--verbose') || args.includes('-v');
94
+ const skipEmbeddings = args.includes('--no-embeddings');
95
+ const statsOnly = args.includes('--stats');
96
+
97
+ function log(msg) { console.log(`[code-map] ${msg}`); }
98
+ function debug(msg) { if (verbose) console.log(`[code-map] ${msg}`); }
99
+
100
+ // ---------------------------------------------------------------------------
101
+ // Database helpers
102
+ // ---------------------------------------------------------------------------
103
+
104
+ function ensureDbDir() {
105
+ const dir = dirname(DB_PATH);
106
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
107
+ }
108
+
109
+ async function getDb() {
110
+ ensureDbDir();
111
+ const SQL = await initSqlJs();
112
+ let db;
113
+ if (existsSync(DB_PATH)) {
114
+ const buffer = readFileSync(DB_PATH);
115
+ db = new SQL.Database(buffer);
116
+ } else {
117
+ db = new SQL.Database();
118
+ }
119
+
120
+ db.run(`
121
+ CREATE TABLE IF NOT EXISTS memory_entries (
122
+ id TEXT PRIMARY KEY,
123
+ key TEXT NOT NULL,
124
+ namespace TEXT DEFAULT 'default',
125
+ content TEXT NOT NULL,
126
+ type TEXT DEFAULT 'semantic',
127
+ embedding TEXT,
128
+ embedding_model TEXT DEFAULT 'local',
129
+ embedding_dimensions INTEGER,
130
+ tags TEXT,
131
+ metadata TEXT,
132
+ owner_id TEXT,
133
+ created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
134
+ updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now') * 1000),
135
+ expires_at INTEGER,
136
+ last_accessed_at INTEGER,
137
+ access_count INTEGER DEFAULT 0,
138
+ status TEXT DEFAULT 'active',
139
+ UNIQUE(namespace, key)
140
+ )
141
+ `);
142
+ db.run(`CREATE INDEX IF NOT EXISTS idx_memory_key_ns ON memory_entries(key, namespace)`);
143
+ db.run(`CREATE INDEX IF NOT EXISTS idx_memory_namespace ON memory_entries(namespace)`);
144
+ return db;
145
+ }
146
+
147
+ function saveDb(db) {
148
+ const data = db.export();
149
+ writeFileSync(DB_PATH, Buffer.from(data));
150
+ }
151
+
152
+ function generateId() {
153
+ return `mem_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
154
+ }
155
+
156
+ function storeEntry(db, key, content, metadata = {}, tags = []) {
157
+ const now = Date.now();
158
+ const id = generateId();
159
+ db.run(`
160
+ INSERT OR REPLACE INTO memory_entries
161
+ (id, key, namespace, content, metadata, tags, created_at, updated_at, status)
162
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active')
163
+ `, [id, key, NAMESPACE, content, JSON.stringify(metadata), JSON.stringify(tags), now, now]);
164
+ }
165
+
166
+ function deleteNamespace(db) {
167
+ db.run(`DELETE FROM memory_entries WHERE namespace = ?`, [NAMESPACE]);
168
+ }
169
+
170
+ function countNamespace(db) {
171
+ const stmt = db.prepare(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = ?`);
172
+ stmt.bind([NAMESPACE]);
173
+ let count = 0;
174
+ if (stmt.step()) count = stmt.getAsObject().cnt;
175
+ stmt.free();
176
+ return count;
177
+ }
178
+
179
+ function countMissingEmbeddings(db) {
180
+ const stmt = db.prepare(`SELECT COUNT(*) as cnt FROM memory_entries WHERE namespace = ? AND (embedding IS NULL OR embedding = '')`);
181
+ stmt.bind([NAMESPACE]);
182
+ let count = 0;
183
+ if (stmt.step()) count = stmt.getAsObject().cnt;
184
+ stmt.free();
185
+ return count;
186
+ }
187
+
188
+ // ---------------------------------------------------------------------------
189
+ // Source file enumeration — git ls-files with filesystem fallback
190
+ // ---------------------------------------------------------------------------
191
+
192
+ /** Read code_map config from moflo.yaml (directories, extensions, exclude). */
193
+ function readCodeMapConfig() {
194
+ const defaults = {
195
+ directories: ['src'],
196
+ extensions: [
197
+ '.ts', '.tsx', '.js', '.mjs', '.jsx', // JS/TS
198
+ '.py', '.pyi', // Python
199
+ '.go', // Go
200
+ '.java', '.kt', '.kts', // JVM
201
+ '.cs', // C#
202
+ '.rs', // Rust
203
+ '.rb', // Ruby
204
+ '.swift', // Swift
205
+ '.php', // PHP
206
+ '.c', '.h', '.cpp', '.hpp', '.cc', // C/C++
207
+ ],
208
+ exclude: [...EXCLUDE_DIRS],
209
+ };
210
+ try {
211
+ const yamlPath = resolve(projectRoot, 'moflo.yaml');
212
+ if (!existsSync(yamlPath)) return defaults;
213
+ const content = readFileSync(yamlPath, 'utf-8');
214
+ // Simple YAML parsing for code_map block
215
+ const block = content.match(/code_map:\s*\n((?:\s+\w+:.*\n?|\s+- .*\n?)+)/);
216
+ if (!block) return defaults;
217
+ const lines = block[1].split('\n');
218
+ let currentKey = null;
219
+ const result = { ...defaults };
220
+ for (const line of lines) {
221
+ const keyMatch = line.match(/^\s+(\w+):/);
222
+ const itemMatch = line.match(/^\s+- (.+)/);
223
+ if (keyMatch) {
224
+ currentKey = keyMatch[1];
225
+ // Inline array: extensions: [".ts", ".tsx"]
226
+ const inlineArray = line.match(/\[([^\]]+)\]/);
227
+ if (inlineArray && (currentKey === 'extensions' || currentKey === 'exclude' || currentKey === 'directories')) {
228
+ result[currentKey] = inlineArray[1].split(',').map(s => s.trim().replace(/^["']|["']$/g, ''));
229
+ }
230
+ } else if (itemMatch && currentKey) {
231
+ if (!Array.isArray(result[currentKey])) result[currentKey] = [];
232
+ result[currentKey].push(itemMatch[1].trim().replace(/^["']|["']$/g, ''));
233
+ }
234
+ }
235
+ return result;
236
+ } catch { return defaults; }
237
+ }
238
+
239
+ /** Walk a directory tree collecting source files (filesystem fallback). */
240
+ function walkDir(dir, extensions, excludeSet, maxDepth = 8, depth = 0) {
241
+ if (depth > maxDepth) return [];
242
+ const results = [];
243
+ let entries;
244
+ try {
245
+ entries = readdirSync(resolve(projectRoot, dir), { withFileTypes: true });
246
+ } catch { return []; }
247
+ for (const entry of entries) {
248
+ if (excludeSet.has(entry.name)) continue;
249
+ // Use forward slashes for consistent cross-platform paths
250
+ const rel = dir ? `${dir}/${entry.name}` : entry.name;
251
+ if (entry.isDirectory()) {
252
+ results.push(...walkDir(rel, extensions, excludeSet, maxDepth, depth + 1));
253
+ } else if (entry.isFile()) {
254
+ const ext = extname(entry.name);
255
+ if (extensions.has(ext)) results.push(rel);
256
+ }
257
+ }
258
+ return results;
259
+ }
260
+
261
+ function getSourceFiles() {
262
+ const config = readCodeMapConfig();
263
+ const extSet = new Set(config.extensions);
264
+ const excludeSet = new Set(config.exclude);
265
+
266
+ // Build git glob patterns from configured extensions
267
+ const gitGlobs = config.extensions.map(ext => `"*${ext}"`).join(' ');
268
+
269
+ // Try git ls-files first (fast, respects .gitignore)
270
+ try {
271
+ const raw = execSync(
272
+ `git ls-files -- ${gitGlobs}`,
273
+ { cwd: projectRoot, encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024 }
274
+ ).trim();
275
+
276
+ if (raw) {
277
+ const files = raw.split('\n').filter(f => {
278
+ for (const ex of EXCLUDE_DIRS) {
279
+ if (f.startsWith(ex + '/') || f.startsWith(ex + '\\')) return false;
280
+ }
281
+ return true;
282
+ });
283
+ if (files.length > 0) return files;
284
+ }
285
+ } catch {
286
+ // git not available or not a git repo — fall through
287
+ }
288
+
289
+ // Fallback: walk configured directories from moflo.yaml
290
+ log('git ls-files returned no files — falling back to filesystem walk');
291
+ const files = [];
292
+
293
+ for (const dir of config.directories) {
294
+ if (existsSync(resolve(projectRoot, dir))) {
295
+ files.push(...walkDir(dir, extSet, excludeSet));
296
+ }
297
+ }
298
+
299
+ return files;
300
+ }
301
+
302
+ function computeFileListHash(files) {
303
+ const sorted = [...files].sort();
304
+ return createHash('sha256').update(sorted.join('\n')).digest('hex');
305
+ }
306
+
307
+ function isUnchanged(currentHash) {
308
+ if (force) return false;
309
+ if (!existsSync(HASH_CACHE_PATH)) return false;
310
+ const cached = readFileSync(HASH_CACHE_PATH, 'utf-8').trim();
311
+ return cached === currentHash;
312
+ }
313
+
314
+ // ---------------------------------------------------------------------------
315
+ // Type extraction (regex-based, no AST) — multi-language
316
+ // ---------------------------------------------------------------------------
317
+
318
+ // Per-language extraction patterns: each entry is [regex, kindOverride?]
319
+ // Group 1 = name, Group 2 = base/extends (optional), Group 3 = implements (optional)
320
+ const LANG_PATTERNS = {
321
+ // JS/TS — require `export` keyword
322
+ ts: [
323
+ [/^export\s+(?:default\s+)?(?:abstract\s+)?class\s+(\w+)(?:\s+extends\s+([\w.]+))?(?:\s+implements\s+([\w,\s.]+))?/],
324
+ [/^export\s+(?:default\s+)?interface\s+(\w+)(?:\s+extends\s+([\w,\s.]+))?/],
325
+ [/^export\s+(?:default\s+)?type\s+(\w+)\s*[=<]/],
326
+ [/^export\s+(?:const\s+)?enum\s+(\w+)/],
327
+ [/^export\s+(?:default\s+)?(?:async\s+)?function\s+(\w+)/],
328
+ [/^export\s+(?:default\s+)?const\s+(\w+)\s*[=:]/],
329
+ // CommonJS / plain JS (no export keyword)
330
+ [/^(?:module\.exports\s*=\s*)?class\s+(\w+)(?:\s+extends\s+([\w.]+))?/],
331
+ [/^(?:async\s+)?function\s+(\w+)\s*\(/],
332
+ [/^const\s+(\w+)\s*=\s*(?:async\s+)?\(?.*\)?\s*=>/],
333
+ [/^(?:var|let|const)\s+(\w+)\s*=\s*require\s*\(/],
334
+ ],
335
+
336
+ // Python — class/def at module level
337
+ py: [
338
+ [/^class\s+(\w+)(?:\(([^)]+)\))?:/],
339
+ [/^(?:async\s+)?def\s+(\w+)\s*\(/],
340
+ [/^(\w+)\s*:\s*TypeAlias\s*=/, 'type'],
341
+ [/^(\w+)\s*=\s*(?:TypeVar|NewType|NamedTuple|dataclass)\s*\(/, 'type'],
342
+ ],
343
+
344
+ // Go — top-level type/func/var declarations
345
+ go: [
346
+ [/^type\s+(\w+)\s+struct\b/, 'struct'],
347
+ [/^type\s+(\w+)\s+interface\b/, 'interface'],
348
+ [/^type\s+(\w+)\s+/, 'type'],
349
+ [/^func\s+(\w+)\s*\(/],
350
+ [/^func\s+\([^)]+\)\s+(\w+)\s*\(/, 'method'],
351
+ [/^var\s+(\w+)\s+/, 'var'],
352
+ [/^const\s+(\w+)\s+/, 'const'],
353
+ ],
354
+
355
+ // Java/Kotlin
356
+ java: [
357
+ [/^(?:public|protected|private|abstract|static|final|sealed|open|\s)*class\s+(\w+)(?:\s+extends\s+([\w.]+))?(?:\s+implements\s+([\w,\s.]+))?/],
358
+ [/^(?:public|protected|private|abstract|static|sealed|\s)*interface\s+(\w+)(?:\s+extends\s+([\w,\s.]+))?/],
359
+ [/^(?:public|protected|private|abstract|static|\s)*enum\s+(\w+)/],
360
+ [/^(?:public|protected|private|abstract|static|\s)*@?interface\s+(\w+)/, 'annotation'],
361
+ [/^(?:public|protected|private|abstract|static|final|synchronized|\s)*(?:[\w<>\[\],\s]+)\s+(\w+)\s*\(/, 'method'],
362
+ [/^(?:data\s+)?class\s+(\w+)(?:\s*:\s*([\w.]+))?/, 'class'], // Kotlin
363
+ [/^(?:fun|suspend\s+fun)\s+(\w+)\s*\(/], // Kotlin
364
+ [/^object\s+(\w+)/, 'object'], // Kotlin
365
+ ],
366
+
367
+ // C#
368
+ cs: [
369
+ [/^(?:public|protected|private|internal|abstract|static|sealed|partial|\s)*class\s+(\w+)(?:\s*:\s*([\w.,\s<>]+))?/],
370
+ [/^(?:public|protected|private|internal|abstract|static|\s)*interface\s+(\w+)(?:\s*:\s*([\w.,\s<>]+))?/],
371
+ [/^(?:public|protected|private|internal|abstract|static|\s)*enum\s+(\w+)/],
372
+ [/^(?:public|protected|private|internal|abstract|static|\s)*struct\s+(\w+)/],
373
+ [/^(?:public|protected|private|internal|abstract|static|\s)*record\s+(\w+)/],
374
+ [/^(?:public|protected|private|internal|abstract|static|\s)*delegate\s+\S+\s+(\w+)\s*\(/, 'delegate'],
375
+ [/^namespace\s+([\w.]+)/, 'namespace'],
376
+ ],
377
+
378
+ // Rust
379
+ rs: [
380
+ [/^pub(?:\([\w]+\))?\s+struct\s+(\w+)/, 'struct'],
381
+ [/^pub(?:\([\w]+\))?\s+enum\s+(\w+)/],
382
+ [/^pub(?:\([\w]+\))?\s+trait\s+(\w+)(?:\s*:\s*([\w\s+]+))?/, 'trait'],
383
+ [/^pub(?:\([\w]+\))?\s+(?:async\s+)?fn\s+(\w+)/],
384
+ [/^pub(?:\([\w]+\))?\s+type\s+(\w+)\s*=/, 'type'],
385
+ [/^pub(?:\([\w]+\))?\s+mod\s+(\w+)/, 'module'],
386
+ [/^impl(?:<[^>]+>)?\s+(\w+)/, 'impl'],
387
+ [/^struct\s+(\w+)/, 'struct'],
388
+ [/^enum\s+(\w+)/],
389
+ [/^trait\s+(\w+)/, 'trait'],
390
+ [/^(?:async\s+)?fn\s+(\w+)/],
391
+ ],
392
+
393
+ // Ruby
394
+ rb: [
395
+ [/^class\s+(\w+)(?:\s*<\s*([\w:]+))?/],
396
+ [/^module\s+(\w+)/, 'module'],
397
+ [/^def\s+(self\.)?(\w+)/, 'method'],
398
+ ],
399
+
400
+ // Swift
401
+ swift: [
402
+ [/^(?:public|open|internal|fileprivate|private|\s)*(?:final\s+)?class\s+(\w+)(?:\s*:\s*([\w,\s]+))?/],
403
+ [/^(?:public|open|internal|fileprivate|private|\s)*protocol\s+(\w+)(?:\s*:\s*([\w,\s]+))?/, 'protocol'],
404
+ [/^(?:public|open|internal|fileprivate|private|\s)*struct\s+(\w+)/, 'struct'],
405
+ [/^(?:public|open|internal|fileprivate|private|\s)*enum\s+(\w+)/],
406
+ [/^(?:public|open|internal|fileprivate|private|\s)*func\s+(\w+)\s*\(/],
407
+ [/^(?:public|open|internal|fileprivate|private|\s)*typealias\s+(\w+)/, 'type'],
408
+ ],
409
+
410
+ // PHP
411
+ php: [
412
+ [/^(?:abstract\s+|final\s+)?class\s+(\w+)(?:\s+extends\s+([\w\\]+))?(?:\s+implements\s+([\w\\,\s]+))?/],
413
+ [/^interface\s+(\w+)(?:\s+extends\s+([\w\\,\s]+))?/],
414
+ [/^trait\s+(\w+)/, 'trait'],
415
+ [/^enum\s+(\w+)/],
416
+ [/^(?:public|protected|private|static|\s)*function\s+(\w+)\s*\(/],
417
+ ],
418
+
419
+ // C/C++
420
+ c: [
421
+ [/^(?:typedef\s+)?struct\s+(\w+)/, 'struct'],
422
+ [/^(?:typedef\s+)?enum\s+(\w+)/],
423
+ [/^(?:typedef\s+)?union\s+(\w+)/, 'union'],
424
+ [/^class\s+(\w+)(?:\s*:\s*(?:public|protected|private)\s+([\w:]+))?/],
425
+ [/^namespace\s+(\w+)/, 'namespace'],
426
+ [/^template\s*<[^>]*>\s*class\s+(\w+)/, 'template-class'],
427
+ [/^(?:static\s+|inline\s+|extern\s+)*(?:[\w*&:<>,\s]+)\s+(\w+)\s*\(/, 'function'],
428
+ ],
429
+ };
430
+
431
+ // Map file extensions to language keys
432
+ const EXT_TO_LANG = {
433
+ '.ts': 'ts', '.tsx': 'ts', '.js': 'ts', '.mjs': 'ts', '.jsx': 'ts', '.cjs': 'ts',
434
+ '.py': 'py', '.pyi': 'py',
435
+ '.go': 'go',
436
+ '.java': 'java', '.kt': 'java', '.kts': 'java',
437
+ '.cs': 'cs',
438
+ '.rs': 'rs',
439
+ '.rb': 'rb',
440
+ '.swift': 'swift',
441
+ '.php': 'php',
442
+ '.c': 'c', '.h': 'c', '.cpp': 'c', '.hpp': 'c', '.cc': 'c',
443
+ };
444
+
445
+ const ENTITY_DECORATOR = /@Entity\s*\(/;
446
+
447
+ function extractTypes(filePath) {
448
+ const fullPath = resolve(projectRoot, filePath);
449
+ if (!existsSync(fullPath)) return [];
450
+
451
+ let content;
452
+ try {
453
+ content = readFileSync(fullPath, 'utf-8');
454
+ } catch {
455
+ return [];
456
+ }
457
+
458
+ const ext = extname(filePath);
459
+ const lang = EXT_TO_LANG[ext] || 'ts';
460
+ const patterns = LANG_PATTERNS[lang] || LANG_PATTERNS.ts;
461
+
462
+ const lines = content.split('\n');
463
+ const types = [];
464
+ const seen = new Set();
465
+ let isEntityNext = false;
466
+
467
+ for (let i = 0; i < lines.length; i++) {
468
+ const line = lines[i].trim();
469
+
470
+ if (ENTITY_DECORATOR.test(line)) {
471
+ isEntityNext = true;
472
+ continue;
473
+ }
474
+
475
+ for (const [pattern, kindOverride] of patterns) {
476
+ const m = line.match(pattern);
477
+ if (m) {
478
+ // Ruby's def self.name captures differently — normalize
479
+ const name = m[1] === 'self.' ? m[2] : m[1];
480
+ if (!name || seen.has(name)) continue;
481
+ seen.add(name);
482
+ const kind = kindOverride || detectKind(line, name, lang);
483
+ const bases = (m[2] || '').trim();
484
+ const implements_ = (m[3] || '').trim();
485
+ types.push({
486
+ name,
487
+ kind,
488
+ bases: bases || null,
489
+ implements: implements_ || null,
490
+ isEntity: isEntityNext,
491
+ file: filePath,
492
+ });
493
+ isEntityNext = false;
494
+ break;
495
+ }
496
+ }
497
+
498
+ if (isEntityNext && !line.startsWith('@') && !line.startsWith('export') && line.length > 0) {
499
+ isEntityNext = false;
500
+ }
501
+ }
502
+
503
+ return types;
504
+ }
505
+
506
+ function detectKind(line, name, lang) {
507
+ if (/\bclass\b/.test(line)) return 'class';
508
+ if (/\binterface\b/.test(line)) return 'interface';
509
+ if (/\btype\b/.test(line)) return 'type';
510
+ if (/\benum\b/.test(line)) return 'enum';
511
+ if (/\bstruct\b/.test(line)) return 'struct';
512
+ if (/\btrait\b/.test(line)) return 'trait';
513
+ if (/\bprotocol\b/.test(line)) return 'protocol';
514
+ if (/\bmodule\b/.test(line)) return 'module';
515
+ if (/\bfunction\b/.test(line) || /\bfunc\b/.test(line) || /\bdef\b/.test(line) || /\bfn\b/.test(line)) return 'function';
516
+ if (/\bconst\b/.test(line)) return 'const';
517
+ return 'export';
518
+ }
519
+
520
+ // ---------------------------------------------------------------------------
521
+ // Project structure analysis
522
+ // ---------------------------------------------------------------------------
523
+
524
+ function getProjectName(filePath) {
525
+ const parts = filePath.split('/');
526
+
527
+ if (parts[0] === 'packages' && parts.length >= 2) return `${parts[0]}/${parts[1]}`;
528
+ if (parts[0] === 'back-office' && parts.length >= 2) return `${parts[0]}/${parts[1]}`;
529
+ if (parts[0] === 'customer-portal' && parts.length >= 2) return `${parts[0]}/${parts[1]}`;
530
+ if (parts[0] === 'admin-console' && parts.length >= 2) return `${parts[0]}/${parts[1]}`;
531
+ if (parts[0] === 'webhooks' && parts.length >= 2) return `${parts[0]}/${parts[1]}`;
532
+ if (parts[0] === 'mobile-app') return 'mobile-app';
533
+ if (parts[0] === 'tests') return 'tests';
534
+ if (parts[0] === 'scripts') return 'scripts';
535
+ return parts[0];
536
+ }
537
+
538
+ function getDirectory(filePath) {
539
+ return dirname(filePath).replace(/\\/g, '/');
540
+ }
541
+
542
+ function getDirDescription(dirName) {
543
+ const last = dirName.split('/').pop();
544
+ return DIR_DESCRIPTIONS[last] || null;
545
+ }
546
+
547
+ function detectLanguage(filePath) {
548
+ const ext = extname(filePath);
549
+ const langMap = {
550
+ '.tsx': 'React/TypeScript', '.jsx': 'React/JavaScript',
551
+ '.ts': 'TypeScript', '.mjs': 'ESM', '.cjs': 'CommonJS', '.js': 'JavaScript',
552
+ '.py': 'Python', '.pyi': 'Python',
553
+ '.go': 'Go',
554
+ '.java': 'Java', '.kt': 'Kotlin', '.kts': 'Kotlin',
555
+ '.cs': 'C#',
556
+ '.rs': 'Rust',
557
+ '.rb': 'Ruby',
558
+ '.swift': 'Swift',
559
+ '.php': 'PHP',
560
+ '.c': 'C', '.h': 'C/C++ Header', '.cpp': 'C++', '.hpp': 'C++ Header', '.cc': 'C++',
561
+ };
562
+ return langMap[ext] || 'Unknown';
563
+ }
564
+
565
+ // ---------------------------------------------------------------------------
566
+ // Chunk generators
567
+ // ---------------------------------------------------------------------------
568
+
569
+ function generateProjectOverviews(filesByProject, typesByProject) {
570
+ const chunks = [];
571
+
572
+ for (const [project, files] of Object.entries(filesByProject)) {
573
+ const types = typesByProject[project] || [];
574
+ const lang = detectProjectLang(files);
575
+ const dirMap = {};
576
+
577
+ for (const t of types) {
578
+ const rel = relative(project, dirname(t.file)).replace(/\\/g, '/') || '(root)';
579
+ if (!dirMap[rel]) dirMap[rel] = [];
580
+ dirMap[rel].push(t.name);
581
+ }
582
+
583
+ let content = `# ${project} [${lang}, ${files.length} files, ${types.length} types]\n\n`;
584
+
585
+ const sortedDirs = Object.keys(dirMap).sort();
586
+ for (const dir of sortedDirs) {
587
+ const names = dirMap[dir];
588
+ const desc = getDirDescription(dir);
589
+ const descStr = desc ? ` -- ${desc}` : '';
590
+ const shown = names.slice(0, 8).join(', ');
591
+ const overflow = names.length > 8 ? `, ... (+${names.length - 8} more)` : '';
592
+ content += ` ${dir}${descStr}: ${shown}${overflow}\n`;
593
+ }
594
+
595
+ chunks.push({
596
+ key: `project:${project}`,
597
+ content: content.trim(),
598
+ metadata: { kind: 'project-overview', project, language: lang, fileCount: files.length, typeCount: types.length },
599
+ tags: ['project', project],
600
+ });
601
+ }
602
+
603
+ return chunks;
604
+ }
605
+
606
+ function detectProjectLang(files) {
607
+ const counts = {};
608
+ for (const f of files) {
609
+ const lang = detectLanguage(f);
610
+ counts[lang] = (counts[lang] || 0) + 1;
611
+ }
612
+ // Return the dominant language, or list top 2 if mixed
613
+ const sorted = Object.entries(counts).sort((a, b) => b[1] - a[1]);
614
+ if (sorted.length === 0) return 'Unknown';
615
+ if (sorted.length === 1 || sorted[0][1] > sorted[1][1] * 2) return sorted[0][0];
616
+ return `${sorted[0][0]}/${sorted[1][0]}`;
617
+ }
618
+
619
+ function generateDirectoryDetails(typesByDir) {
620
+ const chunks = [];
621
+
622
+ for (const [dir, types] of Object.entries(typesByDir)) {
623
+ if (types.length < 2) continue;
624
+
625
+ const desc = getDirDescription(dir);
626
+ let content = `# ${dir} (${types.length} types)\n`;
627
+ if (desc) content += `${desc}\n`;
628
+ content += '\n';
629
+
630
+ const sorted = [...types].sort((a, b) => a.name.localeCompare(b.name));
631
+ for (const t of sorted) {
632
+ const suffix = [];
633
+ if (t.bases) suffix.push(`: ${t.bases}`);
634
+ if (t.implements) suffix.push(`: ${t.implements}`);
635
+ const suffixStr = suffix.length ? ` ${suffix.join(' ')}` : '';
636
+ const fileName = basename(t.file);
637
+ content += ` ${t.name}${suffixStr} (${fileName})\n`;
638
+ }
639
+
640
+ chunks.push({
641
+ key: `dir:${dir}`,
642
+ content: content.trim(),
643
+ metadata: { kind: 'directory-detail', directory: dir, typeCount: types.length },
644
+ tags: ['directory', dir.split('/')[0]],
645
+ });
646
+ }
647
+
648
+ return chunks;
649
+ }
650
+
651
+ function generateInterfaceMaps(allTypes) {
652
+ const interfaces = new Map();
653
+
654
+ for (const t of allTypes) {
655
+ if (t.kind === 'interface') {
656
+ if (!interfaces.has(t.name)) {
657
+ interfaces.set(t.name, { defined: t.file, implementations: [] });
658
+ }
659
+ }
660
+ }
661
+
662
+ for (const t of allTypes) {
663
+ if (t.kind !== 'class') continue;
664
+ const impls = t.implements ? t.implements.split(',').map(s => s.trim()) : [];
665
+ const bases = t.bases ? [t.bases.trim()] : [];
666
+ for (const iface of [...impls, ...bases]) {
667
+ if (interfaces.has(iface)) {
668
+ interfaces.get(iface).implementations.push({
669
+ name: t.name,
670
+ project: getProjectName(t.file),
671
+ });
672
+ }
673
+ }
674
+ }
675
+
676
+ const mapped = [...interfaces.entries()]
677
+ .filter(([, v]) => v.implementations.length > 0)
678
+ .sort(([a], [b]) => a.localeCompare(b));
679
+
680
+ if (mapped.length === 0) return [];
681
+
682
+ const chunks = [];
683
+ const totalBatches = Math.ceil(mapped.length / IFACE_MAP_BATCH);
684
+
685
+ for (let i = 0; i < mapped.length; i += IFACE_MAP_BATCH) {
686
+ const batch = mapped.slice(i, i + IFACE_MAP_BATCH);
687
+ const batchNum = Math.floor(i / IFACE_MAP_BATCH) + 1;
688
+
689
+ let content = `# Interface-to-Implementation Map (${batchNum}/${totalBatches})\n\n`;
690
+ for (const [name, info] of batch) {
691
+ const implStr = info.implementations
692
+ .map(impl => `${impl.name} (${impl.project})`)
693
+ .join(', ');
694
+ content += ` ${name} -> ${implStr}\n`;
695
+ }
696
+
697
+ chunks.push({
698
+ key: `iface-map:${batchNum}`,
699
+ content: content.trim(),
700
+ metadata: { kind: 'interface-map', batch: batchNum, totalBatches, count: batch.length },
701
+ tags: ['interface-map'],
702
+ });
703
+ }
704
+
705
+ return chunks;
706
+ }
707
+
708
+ function generateTypeIndex(allTypes) {
709
+ const sorted = [...allTypes].sort((a, b) => a.name.localeCompare(b.name));
710
+ const chunks = [];
711
+ const totalBatches = Math.ceil(sorted.length / TYPE_INDEX_BATCH);
712
+
713
+ for (let i = 0; i < sorted.length; i += TYPE_INDEX_BATCH) {
714
+ const batch = sorted.slice(i, i + TYPE_INDEX_BATCH);
715
+ const batchNum = Math.floor(i / TYPE_INDEX_BATCH) + 1;
716
+
717
+ let content = `# Type Index (batch ${batchNum}, ${batch.length} types)\n\n`;
718
+ for (const t of batch) {
719
+ const lang = detectLanguage(t.file);
720
+ content += ` ${t.name} -> ${t.file} [${lang}]\n`;
721
+ }
722
+
723
+ chunks.push({
724
+ key: `type-index:${batchNum}`,
725
+ content: content.trim(),
726
+ metadata: { kind: 'type-index', batch: batchNum, totalBatches, count: batch.length },
727
+ tags: ['type-index'],
728
+ });
729
+ }
730
+
731
+ return chunks;
732
+ }
733
+
734
+ /**
735
+ * NEW: Generate file-level entries for each source file that has exported types.
736
+ *
737
+ * Each file gets its own entry keyed as `file:<path>`, containing:
738
+ * - The file path
739
+ * - All exported type names with their kind, base class, and implementations
740
+ * - Whether it's a MikroORM entity
741
+ * - The project and directory it belongs to
742
+ *
743
+ * This enables precise semantic search: a query for "CompanyAuditLog" will match
744
+ * the specific file entry rather than being diluted across a batch of 80 types.
745
+ */
746
+ function generateFileEntries(typesByFile) {
747
+ const chunks = [];
748
+
749
+ for (const [filePath, types] of Object.entries(typesByFile)) {
750
+ const project = getProjectName(filePath);
751
+ const dir = getDirectory(filePath);
752
+ const dirDesc = getDirDescription(dir);
753
+ const lang = detectLanguage(filePath);
754
+ const fileName = basename(filePath);
755
+
756
+ // Build a rich, searchable content string
757
+ let content = `# ${fileName} (${filePath})\n`;
758
+ content += `Project: ${project} | Language: ${lang}\n`;
759
+ if (dirDesc) content += `Directory: ${dirDesc}\n`;
760
+
761
+ if (types.length > 0) {
762
+ content += '\nExported types:\n';
763
+ for (const t of types) {
764
+ let line = ` ${t.kind} ${t.name}`;
765
+ if (t.isEntity) line += ' [MikroORM entity]';
766
+ if (t.bases) line += ` extends ${t.bases}`;
767
+ if (t.implements) line += ` implements ${t.implements}`;
768
+ content += line + '\n';
769
+ }
770
+ } else {
771
+ // For files without detected exports, include a summary line
772
+ // so the file is still discoverable via semantic search
773
+ content += '\nSource file (no detected exports)\n';
774
+ }
775
+
776
+ // Build tags for filtering
777
+ const tags = ['file', project];
778
+ if (types.some(t => t.isEntity)) tags.push('entity');
779
+ if (types.some(t => t.kind === 'interface')) tags.push('interface');
780
+ // Use path separator pattern that works cross-platform
781
+ if (filePath.includes('/services/') || filePath.includes('\\services\\')) tags.push('service');
782
+ if (filePath.includes('/routes/') || filePath.includes('\\routes\\')) tags.push('route');
783
+ if (filePath.includes('/middleware/') || filePath.includes('\\middleware\\')) tags.push('middleware');
784
+ if (filePath.includes('/components/') || filePath.includes('\\components\\')) tags.push('component');
785
+ if (filePath.includes('/hooks/') || filePath.includes('\\hooks\\')) tags.push('hook');
786
+ if (filePath.includes('/api/') || filePath.includes('\\api\\')) tags.push('api');
787
+ if (filePath.includes('/utils/') || filePath.includes('\\utils\\')) tags.push('util');
788
+
789
+ chunks.push({
790
+ key: `file:${filePath}`,
791
+ content: content.trim(),
792
+ metadata: {
793
+ kind: 'file-detail',
794
+ filePath,
795
+ project,
796
+ directory: dir,
797
+ language: lang,
798
+ typeCount: types.length,
799
+ hasEntities: types.some(t => t.isEntity),
800
+ typeNames: types.map(t => t.name),
801
+ },
802
+ tags,
803
+ });
804
+ }
805
+
806
+ return chunks;
807
+ }
808
+
809
+ // ---------------------------------------------------------------------------
810
+ // Main
811
+ // ---------------------------------------------------------------------------
812
+
813
+ async function main() {
814
+ const startTime = Date.now();
815
+
816
+ log(`Project root: ${projectRoot}`);
817
+
818
+ // 1. Get source files
819
+ log('Enumerating source files via git ls-files...');
820
+ const files = getSourceFiles();
821
+ log(`Found ${files.length} source files`);
822
+
823
+ if (files.length === 0) {
824
+ log('No source files found — nothing to index');
825
+ return;
826
+ }
827
+
828
+ // 2. Check hash for incremental skip
829
+ const currentHash = computeFileListHash(files);
830
+
831
+ if (statsOnly) {
832
+ const db = await getDb();
833
+ const count = countNamespace(db);
834
+ db.close();
835
+ log(`Stats: ${files.length} source files, ${count} chunks in code-map namespace`);
836
+ log(`File list hash: ${currentHash.slice(0, 12)}...`);
837
+ return;
838
+ }
839
+
840
+ if (isUnchanged(currentHash)) {
841
+ const db = await getDb();
842
+ const count = countNamespace(db);
843
+ const missing = countMissingEmbeddings(db);
844
+ db.close();
845
+ if (count > 0) {
846
+ if (missing > 0 && !skipEmbeddings) {
847
+ log(`File list unchanged but ${missing}/${count} entries missing embeddings — generating...`);
848
+ await runEmbeddings();
849
+ } else {
850
+ log(`Skipping — file list unchanged (${count} chunks in DB, hash ${currentHash.slice(0, 12)}...)`);
851
+ }
852
+ return;
853
+ }
854
+ log('File list unchanged but no chunks in DB — forcing regeneration');
855
+ }
856
+
857
+ // 3. Extract types from all files
858
+ log('Extracting type declarations...');
859
+ const allTypes = [];
860
+ const filesByProject = {};
861
+ const typesByProject = {};
862
+ const typesByDir = {};
863
+ const typesByFile = {};
864
+
865
+ for (const file of files) {
866
+ const project = getProjectName(file);
867
+ if (!filesByProject[project]) filesByProject[project] = [];
868
+ filesByProject[project].push(file);
869
+
870
+ const types = extractTypes(file);
871
+
872
+ // Track ALL files for file-level entries (not just those with types)
873
+ // This ensures plain JS projects without explicit exports still get indexed
874
+ typesByFile[file] = types;
875
+
876
+ for (const t of types) {
877
+ allTypes.push(t);
878
+
879
+ if (!typesByProject[project]) typesByProject[project] = [];
880
+ typesByProject[project].push(t);
881
+
882
+ const dir = getDirectory(t.file);
883
+ if (!typesByDir[dir]) typesByDir[dir] = [];
884
+ typesByDir[dir].push(t);
885
+ }
886
+ }
887
+
888
+ log(`Extracted ${allTypes.length} type declarations from ${Object.keys(filesByProject).length} projects`);
889
+ log(`Files with exported types: ${Object.keys(typesByFile).length}`);
890
+
891
+ // 4. Generate all chunk types
892
+ log('Generating chunks...');
893
+ const projectChunks = generateProjectOverviews(filesByProject, typesByProject);
894
+ const dirChunks = generateDirectoryDetails(typesByDir);
895
+ const ifaceChunks = generateInterfaceMaps(allTypes);
896
+ const typeIdxChunks = generateTypeIndex(allTypes);
897
+ const fileChunks = generateFileEntries(typesByFile);
898
+
899
+ const allChunks = [...projectChunks, ...dirChunks, ...ifaceChunks, ...typeIdxChunks, ...fileChunks];
900
+
901
+ log(`Generated ${allChunks.length} chunks:`);
902
+ log(` Project overviews: ${projectChunks.length}`);
903
+ log(` Directory details: ${dirChunks.length}`);
904
+ log(` Interface maps: ${ifaceChunks.length}`);
905
+ log(` Type index: ${typeIdxChunks.length}`);
906
+ log(` File entries: ${fileChunks.length} (NEW — file-level granularity)`);
907
+
908
+ // 5. Write to database
909
+ log('Writing to memory database...');
910
+ const db = await getDb();
911
+ deleteNamespace(db);
912
+
913
+ for (const chunk of allChunks) {
914
+ storeEntry(db, chunk.key, chunk.content, chunk.metadata, chunk.tags);
915
+ }
916
+
917
+ saveDb(db);
918
+ db.close();
919
+
920
+ // 6. Save hash for incremental caching
921
+ writeFileSync(HASH_CACHE_PATH, currentHash, 'utf-8');
922
+
923
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
924
+ log(`Done in ${elapsed}s — ${allChunks.length} chunks written to code-map namespace`);
925
+
926
+ // 7. Generate embeddings inline (not detached — ensures Xenova runs reliably)
927
+ if (!skipEmbeddings) {
928
+ await runEmbeddings();
929
+ }
930
+ }
931
+
932
+ async function runEmbeddings() {
933
+ const embedCandidates = [
934
+ resolve(dirname(fileURLToPath(import.meta.url)), 'build-embeddings.mjs'),
935
+ resolve(projectRoot, '.claude/scripts/build-embeddings.mjs'),
936
+ ];
937
+ const embedScript = embedCandidates.find(p => existsSync(p));
938
+ if (!embedScript) return;
939
+
940
+ log('Generating embeddings for code-map...');
941
+ try {
942
+ execSync(`node "${embedScript}" --namespace code-map`, {
943
+ cwd: projectRoot,
944
+ stdio: 'inherit',
945
+ timeout: 120000,
946
+ windowsHide: true,
947
+ });
948
+ } catch (err) {
949
+ log(`Warning: embedding generation failed: ${err.message?.split('\n')[0]}`);
950
+ }
951
+ }
952
+
953
+ main().catch(err => {
954
+ console.error('[code-map] Fatal error:', err);
955
+ process.exit(1);
956
+ });