neuronlayer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/CONTRIBUTING.md +127 -0
  2. package/LICENSE +21 -0
  3. package/README.md +305 -0
  4. package/dist/index.js +38016 -0
  5. package/esbuild.config.js +26 -0
  6. package/package.json +63 -0
  7. package/src/cli/commands.ts +382 -0
  8. package/src/core/adr-exporter.ts +253 -0
  9. package/src/core/architecture/architecture-enforcement.ts +228 -0
  10. package/src/core/architecture/duplicate-detector.ts +288 -0
  11. package/src/core/architecture/index.ts +6 -0
  12. package/src/core/architecture/pattern-learner.ts +306 -0
  13. package/src/core/architecture/pattern-library.ts +403 -0
  14. package/src/core/architecture/pattern-validator.ts +324 -0
  15. package/src/core/change-intelligence/bug-correlator.ts +444 -0
  16. package/src/core/change-intelligence/change-intelligence.ts +221 -0
  17. package/src/core/change-intelligence/change-tracker.ts +334 -0
  18. package/src/core/change-intelligence/fix-suggester.ts +340 -0
  19. package/src/core/change-intelligence/index.ts +5 -0
  20. package/src/core/code-verifier.ts +843 -0
  21. package/src/core/confidence/confidence-scorer.ts +251 -0
  22. package/src/core/confidence/conflict-checker.ts +289 -0
  23. package/src/core/confidence/index.ts +5 -0
  24. package/src/core/confidence/source-tracker.ts +263 -0
  25. package/src/core/confidence/warning-detector.ts +241 -0
  26. package/src/core/context-rot/compaction.ts +284 -0
  27. package/src/core/context-rot/context-health.ts +243 -0
  28. package/src/core/context-rot/context-rot-prevention.ts +213 -0
  29. package/src/core/context-rot/critical-context.ts +221 -0
  30. package/src/core/context-rot/drift-detector.ts +255 -0
  31. package/src/core/context-rot/index.ts +7 -0
  32. package/src/core/context.ts +263 -0
  33. package/src/core/decision-extractor.ts +339 -0
  34. package/src/core/decisions.ts +69 -0
  35. package/src/core/deja-vu.ts +421 -0
  36. package/src/core/engine.ts +1455 -0
  37. package/src/core/feature-context.ts +726 -0
  38. package/src/core/ghost-mode.ts +412 -0
  39. package/src/core/learning.ts +485 -0
  40. package/src/core/living-docs/activity-tracker.ts +296 -0
  41. package/src/core/living-docs/architecture-generator.ts +428 -0
  42. package/src/core/living-docs/changelog-generator.ts +348 -0
  43. package/src/core/living-docs/component-generator.ts +230 -0
  44. package/src/core/living-docs/doc-engine.ts +110 -0
  45. package/src/core/living-docs/doc-validator.ts +282 -0
  46. package/src/core/living-docs/index.ts +8 -0
  47. package/src/core/project-manager.ts +297 -0
  48. package/src/core/summarizer.ts +267 -0
  49. package/src/core/test-awareness/change-validator.ts +499 -0
  50. package/src/core/test-awareness/index.ts +5 -0
  51. package/src/index.ts +49 -0
  52. package/src/indexing/ast.ts +563 -0
  53. package/src/indexing/embeddings.ts +85 -0
  54. package/src/indexing/indexer.ts +245 -0
  55. package/src/indexing/watcher.ts +78 -0
  56. package/src/server/gateways/aggregator.ts +374 -0
  57. package/src/server/gateways/index.ts +473 -0
  58. package/src/server/gateways/memory-ghost.ts +343 -0
  59. package/src/server/gateways/memory-query.ts +452 -0
  60. package/src/server/gateways/memory-record.ts +346 -0
  61. package/src/server/gateways/memory-review.ts +410 -0
  62. package/src/server/gateways/memory-status.ts +517 -0
  63. package/src/server/gateways/memory-verify.ts +392 -0
  64. package/src/server/gateways/router.ts +434 -0
  65. package/src/server/gateways/types.ts +610 -0
  66. package/src/server/mcp.ts +154 -0
  67. package/src/server/resources.ts +85 -0
  68. package/src/server/tools.ts +2261 -0
  69. package/src/storage/database.ts +262 -0
  70. package/src/storage/tier1.ts +135 -0
  71. package/src/storage/tier2.ts +764 -0
  72. package/src/storage/tier3.ts +123 -0
  73. package/src/types/documentation.ts +619 -0
  74. package/src/types/index.ts +222 -0
  75. package/src/utils/config.ts +193 -0
  76. package/src/utils/files.ts +117 -0
  77. package/src/utils/time.ts +37 -0
  78. package/src/utils/tokens.ts +52 -0
@@ -0,0 +1,563 @@
1
+ import Parser from 'web-tree-sitter';
2
+ import { readFileSync, existsSync, mkdirSync, writeFileSync } from 'fs';
3
+ import { join, dirname } from 'path';
4
+ import { fileURLToPath } from 'url';
5
+ import type { CodeSymbol, Import, Export, SymbolKind } from '../types/index.js';
6
+
7
+ // Language configurations for parsing
8
+ interface LanguageConfig {
9
+ wasmFile: string;
10
+ extensions: string[];
11
+ queries: {
12
+ functions?: string;
13
+ classes?: string;
14
+ interfaces?: string;
15
+ types?: string;
16
+ imports?: string;
17
+ exports?: string;
18
+ };
19
+ }
20
+
21
+ const LANGUAGE_CONFIGS: Record<string, LanguageConfig> = {
22
+ typescript: {
23
+ wasmFile: 'tree-sitter-typescript.wasm',
24
+ extensions: ['.ts', '.tsx'],
25
+ queries: {
26
+ functions: `
27
+ (function_declaration name: (identifier) @name) @func
28
+ (arrow_function) @func
29
+ (method_definition name: (property_identifier) @name) @func
30
+ `,
31
+ classes: `
32
+ (class_declaration name: (type_identifier) @name) @class
33
+ `,
34
+ interfaces: `
35
+ (interface_declaration name: (type_identifier) @name) @interface
36
+ `,
37
+ types: `
38
+ (type_alias_declaration name: (type_identifier) @name) @type
39
+ `,
40
+ imports: `
41
+ (import_statement) @import
42
+ `,
43
+ exports: `
44
+ (export_statement) @export
45
+ `
46
+ }
47
+ },
48
+ javascript: {
49
+ wasmFile: 'tree-sitter-javascript.wasm',
50
+ extensions: ['.js', '.jsx', '.mjs', '.cjs'],
51
+ queries: {
52
+ functions: `
53
+ (function_declaration name: (identifier) @name) @func
54
+ (arrow_function) @func
55
+ (method_definition name: (property_identifier) @name) @func
56
+ `,
57
+ classes: `
58
+ (class_declaration name: (identifier) @name) @class
59
+ `,
60
+ imports: `
61
+ (import_statement) @import
62
+ `,
63
+ exports: `
64
+ (export_statement) @export
65
+ `
66
+ }
67
+ },
68
+ python: {
69
+ wasmFile: 'tree-sitter-python.wasm',
70
+ extensions: ['.py'],
71
+ queries: {
72
+ functions: `
73
+ (function_definition name: (identifier) @name) @func
74
+ `,
75
+ classes: `
76
+ (class_definition name: (identifier) @name) @class
77
+ `,
78
+ imports: `
79
+ (import_statement) @import
80
+ (import_from_statement) @import
81
+ `
82
+ }
83
+ }
84
+ };
85
+
86
+ export class ASTParser {
87
+ private parser: Parser | null = null;
88
+ private languages: Map<string, Parser.Language> = new Map();
89
+ private initialized = false;
90
+ private dataDir: string;
91
+
92
+ constructor(dataDir: string) {
93
+ this.dataDir = dataDir;
94
+ }
95
+
96
+ async initialize(): Promise<void> {
97
+ if (this.initialized) return;
98
+
99
+ try {
100
+ await Parser.init();
101
+ this.parser = new Parser();
102
+ this.initialized = true;
103
+ console.error('AST Parser initialized');
104
+ } catch (error) {
105
+ console.error('Failed to initialize AST parser:', error);
106
+ throw error;
107
+ }
108
+ }
109
+
110
+ private async loadLanguage(langName: string): Promise<Parser.Language | null> {
111
+ if (this.languages.has(langName)) {
112
+ return this.languages.get(langName)!;
113
+ }
114
+
115
+ const config = LANGUAGE_CONFIGS[langName];
116
+ if (!config) {
117
+ return null;
118
+ }
119
+
120
+ try {
121
+ // Try to load from node_modules or bundled location
122
+ const wasmDir = join(this.dataDir, 'wasm');
123
+ const wasmPath = join(wasmDir, config.wasmFile);
124
+
125
+ // For now, we'll use a simplified approach without external WASM files
126
+ // In production, you'd download these from tree-sitter releases
127
+ console.error(`Language ${langName} WASM not available yet`);
128
+ return null;
129
+ } catch (error) {
130
+ console.error(`Failed to load language ${langName}:`, error);
131
+ return null;
132
+ }
133
+ }
134
+
135
+ getLanguageForFile(filePath: string): string | null {
136
+ const ext = filePath.slice(filePath.lastIndexOf('.')).toLowerCase();
137
+
138
+ for (const [lang, config] of Object.entries(LANGUAGE_CONFIGS)) {
139
+ if (config.extensions.includes(ext)) {
140
+ return lang;
141
+ }
142
+ }
143
+ return null;
144
+ }
145
+
146
+ async parseFile(filePath: string, content: string): Promise<{
147
+ symbols: CodeSymbol[];
148
+ imports: Import[];
149
+ exports: Export[];
150
+ } | null> {
151
+ if (!this.initialized) {
152
+ await this.initialize();
153
+ }
154
+
155
+ // Use regex-based parsing as fallback since WASM loading is complex
156
+ return this.parseWithRegex(filePath, content);
157
+ }
158
+
159
+ // Regex-based parsing fallback (works without WASM)
160
+ private parseWithRegex(filePath: string, content: string): {
161
+ symbols: CodeSymbol[];
162
+ imports: Import[];
163
+ exports: Export[];
164
+ } {
165
+ const symbols: CodeSymbol[] = [];
166
+ const imports: Import[] = [];
167
+ const exports: Export[] = [];
168
+ const lines = content.split('\n');
169
+ const lang = this.getLanguageForFile(filePath);
170
+
171
+ if (lang === 'typescript' || lang === 'javascript') {
172
+ this.parseTypeScriptJS(filePath, content, lines, symbols, imports, exports);
173
+ } else if (lang === 'python') {
174
+ this.parsePython(filePath, content, lines, symbols, imports, exports);
175
+ }
176
+
177
+ return { symbols, imports, exports };
178
+ }
179
+
180
+ private parseTypeScriptJS(
181
+ filePath: string,
182
+ content: string,
183
+ lines: string[],
184
+ symbols: CodeSymbol[],
185
+ imports: Import[],
186
+ exports: Export[]
187
+ ): void {
188
+ // Patterns for TypeScript/JavaScript
189
+ const patterns = {
190
+ // Functions: function name(), const name = () =>, const name = function()
191
+ function: /^(?:export\s+)?(?:async\s+)?function\s+(\w+)/,
192
+ arrowFunc: /^(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(?:\([^)]*\)|[^=])\s*=>/,
193
+ // Classes
194
+ class: /^(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/,
195
+ // Interfaces (TS only)
196
+ interface: /^(?:export\s+)?interface\s+(\w+)/,
197
+ // Types (TS only)
198
+ type: /^(?:export\s+)?type\s+(\w+)\s*=/,
199
+ // Imports
200
+ import: /^import\s+(?:(\w+)(?:\s*,\s*)?)?(?:\{([^}]+)\})?\s*from\s*['"]([^'"]+)['"]/,
201
+ importAll: /^import\s+\*\s+as\s+(\w+)\s+from\s*['"]([^'"]+)['"]/,
202
+ importSideEffect: /^import\s*['"]([^'"]+)['"]/,
203
+ // Exports
204
+ exportNamed: /^export\s+\{([^}]+)\}/,
205
+ exportDefault: /^export\s+default\s+(?:class|function|const|let|var)?\s*(\w+)?/,
206
+ exportDirect: /^export\s+(?:const|let|var|function|class|interface|type|enum|async\s+function)\s+(\w+)/,
207
+ // Enums (TS)
208
+ enum: /^(?:export\s+)?(?:const\s+)?enum\s+(\w+)/,
209
+ // Methods inside classes (simplified)
210
+ method: /^\s+(?:async\s+)?(?:static\s+)?(?:private\s+|public\s+|protected\s+)?(\w+)\s*\([^)]*\)\s*[:{]/,
211
+ };
212
+
213
+ let currentClass: { name: string; startLine: number } | null = null;
214
+ let braceDepth = 0;
215
+
216
+ for (let i = 0; i < lines.length; i++) {
217
+ const line = lines[i] || '';
218
+ const trimmed = line.trim();
219
+ const lineNum = i + 1;
220
+
221
+ // Track brace depth for class scope
222
+ braceDepth += (line.match(/\{/g) || []).length;
223
+ braceDepth -= (line.match(/\}/g) || []).length;
224
+
225
+ if (currentClass && braceDepth === 0) {
226
+ // Class ended
227
+ const existingSymbol = symbols.find(s => s.name === currentClass!.name && s.kind === 'class');
228
+ if (existingSymbol) {
229
+ existingSymbol.lineEnd = lineNum;
230
+ }
231
+ currentClass = null;
232
+ }
233
+
234
+ // Skip comments
235
+ if (trimmed.startsWith('//') || trimmed.startsWith('/*') || trimmed.startsWith('*')) {
236
+ continue;
237
+ }
238
+
239
+ // Functions
240
+ let match = trimmed.match(patterns.function);
241
+ if (match && match[1]) {
242
+ symbols.push({
243
+ fileId: 0,
244
+ filePath,
245
+ kind: 'function',
246
+ name: match[1],
247
+ lineStart: lineNum,
248
+ lineEnd: this.findBlockEnd(lines, i),
249
+ exported: trimmed.startsWith('export'),
250
+ signature: this.extractSignature(trimmed)
251
+ });
252
+ continue;
253
+ }
254
+
255
+ // Arrow functions
256
+ match = trimmed.match(patterns.arrowFunc);
257
+ if (match && match[1]) {
258
+ symbols.push({
259
+ fileId: 0,
260
+ filePath,
261
+ kind: 'function',
262
+ name: match[1],
263
+ lineStart: lineNum,
264
+ lineEnd: this.findBlockEnd(lines, i),
265
+ exported: trimmed.startsWith('export'),
266
+ signature: this.extractSignature(trimmed)
267
+ });
268
+ continue;
269
+ }
270
+
271
+ // Classes
272
+ match = trimmed.match(patterns.class);
273
+ if (match && match[1]) {
274
+ currentClass = { name: match[1], startLine: lineNum };
275
+ braceDepth = 1; // Reset for class tracking
276
+ symbols.push({
277
+ fileId: 0,
278
+ filePath,
279
+ kind: 'class',
280
+ name: match[1],
281
+ lineStart: lineNum,
282
+ lineEnd: lineNum, // Will be updated when class ends
283
+ exported: trimmed.startsWith('export')
284
+ });
285
+ continue;
286
+ }
287
+
288
+ // Interfaces
289
+ match = trimmed.match(patterns.interface);
290
+ if (match && match[1]) {
291
+ symbols.push({
292
+ fileId: 0,
293
+ filePath,
294
+ kind: 'interface',
295
+ name: match[1],
296
+ lineStart: lineNum,
297
+ lineEnd: this.findBlockEnd(lines, i),
298
+ exported: trimmed.startsWith('export')
299
+ });
300
+ continue;
301
+ }
302
+
303
+ // Types
304
+ match = trimmed.match(patterns.type);
305
+ if (match && match[1]) {
306
+ symbols.push({
307
+ fileId: 0,
308
+ filePath,
309
+ kind: 'type',
310
+ name: match[1],
311
+ lineStart: lineNum,
312
+ lineEnd: this.findStatementEnd(lines, i),
313
+ exported: trimmed.startsWith('export')
314
+ });
315
+ continue;
316
+ }
317
+
318
+ // Enums
319
+ match = trimmed.match(patterns.enum);
320
+ if (match && match[1]) {
321
+ symbols.push({
322
+ fileId: 0,
323
+ filePath,
324
+ kind: 'enum',
325
+ name: match[1],
326
+ lineStart: lineNum,
327
+ lineEnd: this.findBlockEnd(lines, i),
328
+ exported: trimmed.startsWith('export')
329
+ });
330
+ continue;
331
+ }
332
+
333
+ // Methods (when inside a class)
334
+ if (currentClass) {
335
+ match = trimmed.match(patterns.method);
336
+ if (match && match[1] && !['if', 'for', 'while', 'switch', 'catch', 'constructor'].includes(match[1])) {
337
+ symbols.push({
338
+ fileId: 0,
339
+ filePath,
340
+ kind: 'method',
341
+ name: `${currentClass.name}.${match[1]}`,
342
+ lineStart: lineNum,
343
+ lineEnd: this.findBlockEnd(lines, i),
344
+ exported: false // Methods inherit class export status
345
+ });
346
+ }
347
+ }
348
+
349
+ // Imports
350
+ match = trimmed.match(patterns.import);
351
+ if (match) {
352
+ const defaultImport = match[1];
353
+ const namedImports = match[2]?.split(',').map(s => s.trim().split(/\s+as\s+/)[0]?.trim()).filter((s): s is string => !!s) || [];
354
+ const from = match[3] || '';
355
+
356
+ imports.push({
357
+ fileId: 0,
358
+ filePath,
359
+ importedFrom: from,
360
+ importedSymbols: defaultImport ? [defaultImport, ...namedImports] : namedImports,
361
+ isDefault: !!defaultImport,
362
+ isNamespace: false,
363
+ lineNumber: lineNum
364
+ });
365
+ continue;
366
+ }
367
+
368
+ match = trimmed.match(patterns.importAll);
369
+ if (match) {
370
+ imports.push({
371
+ fileId: 0,
372
+ filePath,
373
+ importedFrom: match[2] || '',
374
+ importedSymbols: ['*'],
375
+ isDefault: false,
376
+ isNamespace: true,
377
+ lineNumber: lineNum
378
+ });
379
+ continue;
380
+ }
381
+
382
+ // Exports
383
+ match = trimmed.match(patterns.exportDirect);
384
+ if (match && match[1]) {
385
+ exports.push({
386
+ fileId: 0,
387
+ filePath,
388
+ exportedName: match[1],
389
+ isDefault: false,
390
+ lineNumber: lineNum
391
+ });
392
+ continue;
393
+ }
394
+
395
+ match = trimmed.match(patterns.exportDefault);
396
+ if (match) {
397
+ exports.push({
398
+ fileId: 0,
399
+ filePath,
400
+ exportedName: match[1] || 'default',
401
+ isDefault: true,
402
+ lineNumber: lineNum
403
+ });
404
+ continue;
405
+ }
406
+
407
+ match = trimmed.match(patterns.exportNamed);
408
+ if (match && match[1]) {
409
+ const names = match[1].split(',').map(s => {
410
+ const parts = s.trim().split(/\s+as\s+/);
411
+ return parts[parts.length - 1]?.trim();
412
+ }).filter((n): n is string => !!n);
413
+
414
+ for (const name of names) {
415
+ exports.push({
416
+ fileId: 0,
417
+ filePath,
418
+ exportedName: name,
419
+ isDefault: false,
420
+ lineNumber: lineNum
421
+ });
422
+ }
423
+ }
424
+ }
425
+ }
426
+
427
+ private parsePython(
428
+ filePath: string,
429
+ content: string,
430
+ lines: string[],
431
+ symbols: CodeSymbol[],
432
+ imports: Import[],
433
+ exports: Export[]
434
+ ): void {
435
+ const patterns = {
436
+ function: /^(?:async\s+)?def\s+(\w+)\s*\(/,
437
+ class: /^class\s+(\w+)/,
438
+ import: /^import\s+(\w+(?:\.\w+)*)/,
439
+ fromImport: /^from\s+(\w+(?:\.\w+)*)\s+import\s+(.+)/,
440
+ };
441
+
442
+ for (let i = 0; i < lines.length; i++) {
443
+ const line = lines[i] || '';
444
+ const trimmed = line.trim();
445
+ const lineNum = i + 1;
446
+ const indent = line.length - line.trimStart().length;
447
+
448
+ // Skip comments
449
+ if (trimmed.startsWith('#')) continue;
450
+
451
+ // Functions
452
+ let match = trimmed.match(patterns.function);
453
+ if (match && match[1]) {
454
+ symbols.push({
455
+ fileId: 0,
456
+ filePath,
457
+ kind: 'function',
458
+ name: match[1],
459
+ lineStart: lineNum,
460
+ lineEnd: this.findPythonBlockEnd(lines, i, indent),
461
+ exported: !match[1].startsWith('_'),
462
+ signature: trimmed.split(':')[0]
463
+ });
464
+ continue;
465
+ }
466
+
467
+ // Classes
468
+ match = trimmed.match(patterns.class);
469
+ if (match && match[1]) {
470
+ symbols.push({
471
+ fileId: 0,
472
+ filePath,
473
+ kind: 'class',
474
+ name: match[1],
475
+ lineStart: lineNum,
476
+ lineEnd: this.findPythonBlockEnd(lines, i, indent),
477
+ exported: !match[1].startsWith('_')
478
+ });
479
+ continue;
480
+ }
481
+
482
+ // Imports
483
+ match = trimmed.match(patterns.import);
484
+ if (match && match[1]) {
485
+ imports.push({
486
+ fileId: 0,
487
+ filePath,
488
+ importedFrom: match[1],
489
+ importedSymbols: [match[1].split('.').pop() || match[1]],
490
+ isDefault: false,
491
+ isNamespace: true,
492
+ lineNumber: lineNum
493
+ });
494
+ continue;
495
+ }
496
+
497
+ match = trimmed.match(patterns.fromImport);
498
+ if (match) {
499
+ const from = match[1] || '';
500
+ const imported = match[2]?.split(',').map(s => s.trim().split(/\s+as\s+/)[0]?.trim()).filter((s): s is string => !!s) || [];
501
+ imports.push({
502
+ fileId: 0,
503
+ filePath,
504
+ importedFrom: from,
505
+ importedSymbols: imported,
506
+ isDefault: false,
507
+ isNamespace: imported.includes('*'),
508
+ lineNumber: lineNum
509
+ });
510
+ }
511
+ }
512
+ }
513
+
514
+ private findBlockEnd(lines: string[], startIndex: number): number {
515
+ let braceCount = 0;
516
+ let started = false;
517
+
518
+ for (let i = startIndex; i < lines.length; i++) {
519
+ const line = lines[i] || '';
520
+ for (const char of line) {
521
+ if (char === '{') {
522
+ braceCount++;
523
+ started = true;
524
+ } else if (char === '}') {
525
+ braceCount--;
526
+ if (started && braceCount === 0) {
527
+ return i + 1;
528
+ }
529
+ }
530
+ }
531
+ }
532
+ return startIndex + 1;
533
+ }
534
+
535
+ private findStatementEnd(lines: string[], startIndex: number): number {
536
+ for (let i = startIndex; i < lines.length; i++) {
537
+ const line = lines[i] || '';
538
+ if (line.includes(';') || (i > startIndex && !line.trim().startsWith('|') && !line.trim().startsWith('&'))) {
539
+ return i + 1;
540
+ }
541
+ }
542
+ return startIndex + 1;
543
+ }
544
+
545
+ private findPythonBlockEnd(lines: string[], startIndex: number, baseIndent: number): number {
546
+ for (let i = startIndex + 1; i < lines.length; i++) {
547
+ const line = lines[i] || '';
548
+ if (line.trim() === '') continue;
549
+
550
+ const indent = line.length - line.trimStart().length;
551
+ if (indent <= baseIndent) {
552
+ return i;
553
+ }
554
+ }
555
+ return lines.length;
556
+ }
557
+
558
+ private extractSignature(line: string): string {
559
+ // Extract function signature from line
560
+ const match = line.match(/(?:function\s+\w+|const\s+\w+\s*=\s*(?:async\s+)?)\s*(\([^)]*\))/);
561
+ return match?.[1] || '';
562
+ }
563
+ }
@@ -0,0 +1,85 @@
1
+ import { pipeline, type FeatureExtractionPipeline } from '@xenova/transformers';
2
+
3
+ export class EmbeddingGenerator {
4
+ private model: FeatureExtractionPipeline | null = null;
5
+ private initialized = false;
6
+ private initializing = false;
7
+ private modelName: string;
8
+ private dimension: number = 384; // Default for MiniLM
9
+
10
+ constructor(modelName: string = 'Xenova/all-MiniLM-L6-v2') {
11
+ this.modelName = modelName;
12
+ }
13
+
14
+ async initialize(): Promise<void> {
15
+ if (this.initialized) return;
16
+ if (this.initializing) {
17
+ // Wait for initialization to complete
18
+ while (this.initializing) {
19
+ await new Promise(resolve => setTimeout(resolve, 100));
20
+ }
21
+ return;
22
+ }
23
+
24
+ this.initializing = true;
25
+
26
+ try {
27
+ console.error(`Loading embedding model: ${this.modelName}...`);
28
+
29
+ this.model = await pipeline('feature-extraction', this.modelName, {
30
+ quantized: true
31
+ });
32
+
33
+ this.initialized = true;
34
+ console.error('Embedding model loaded successfully');
35
+ } catch (error) {
36
+ console.error('Failed to load embedding model:', error);
37
+ throw error;
38
+ } finally {
39
+ this.initializing = false;
40
+ }
41
+ }
42
+
43
+ async embed(text: string): Promise<Float32Array> {
44
+ await this.initialize();
45
+
46
+ if (!this.model) {
47
+ throw new Error('Embedding model not initialized');
48
+ }
49
+
50
+ // Truncate very long texts
51
+ const maxChars = 8000; // ~2000 tokens
52
+ const truncatedText = text.length > maxChars ? text.slice(0, maxChars) : text;
53
+
54
+ const output = await this.model(truncatedText, {
55
+ pooling: 'mean',
56
+ normalize: true
57
+ });
58
+
59
+ // Extract the embedding data
60
+ const data = output.data as Float32Array;
61
+ this.dimension = data.length;
62
+
63
+ return new Float32Array(data);
64
+ }
65
+
66
+ async embedBatch(texts: string[], batchSize: number = 8): Promise<Float32Array[]> {
67
+ const results: Float32Array[] = [];
68
+
69
+ for (let i = 0; i < texts.length; i += batchSize) {
70
+ const batch = texts.slice(i, i + batchSize);
71
+ const embeddings = await Promise.all(batch.map(t => this.embed(t)));
72
+ results.push(...embeddings);
73
+ }
74
+
75
+ return results;
76
+ }
77
+
78
+ getDimension(): number {
79
+ return this.dimension;
80
+ }
81
+
82
+ isInitialized(): boolean {
83
+ return this.initialized;
84
+ }
85
+ }