@timmeck/brain 1.8.0 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/BRAIN_PLAN.md +3324 -3324
  2. package/LICENSE +21 -21
  3. package/dist/api/server.d.ts +4 -0
  4. package/dist/api/server.js +73 -0
  5. package/dist/api/server.js.map +1 -1
  6. package/dist/brain.js +2 -1
  7. package/dist/brain.js.map +1 -1
  8. package/dist/cli/commands/dashboard.js +606 -572
  9. package/dist/cli/commands/dashboard.js.map +1 -1
  10. package/dist/dashboard/server.js +25 -25
  11. package/dist/db/migrations/001_core_schema.js +115 -115
  12. package/dist/db/migrations/002_learning_schema.js +33 -33
  13. package/dist/db/migrations/003_code_schema.js +48 -48
  14. package/dist/db/migrations/004_synapses_schema.js +52 -52
  15. package/dist/db/migrations/005_fts_indexes.js +73 -73
  16. package/dist/db/migrations/007_feedback.js +8 -8
  17. package/dist/db/migrations/008_git_integration.js +33 -33
  18. package/dist/db/migrations/009_embeddings.js +3 -3
  19. package/dist/db/repositories/antipattern.repository.js +3 -3
  20. package/dist/db/repositories/code-module.repository.js +32 -32
  21. package/dist/db/repositories/notification.repository.js +3 -3
  22. package/dist/db/repositories/project.repository.js +21 -21
  23. package/dist/db/repositories/rule.repository.js +24 -24
  24. package/dist/db/repositories/solution.repository.js +50 -50
  25. package/dist/db/repositories/synapse.repository.js +18 -18
  26. package/dist/db/repositories/terminal.repository.js +24 -24
  27. package/dist/embeddings/engine.d.ts +2 -2
  28. package/dist/embeddings/engine.js +17 -4
  29. package/dist/embeddings/engine.js.map +1 -1
  30. package/dist/index.js +1 -1
  31. package/dist/ipc/server.d.ts +8 -0
  32. package/dist/ipc/server.js +67 -1
  33. package/dist/ipc/server.js.map +1 -1
  34. package/dist/matching/error-matcher.js +5 -5
  35. package/dist/matching/fingerprint.js +6 -1
  36. package/dist/matching/fingerprint.js.map +1 -1
  37. package/dist/mcp/http-server.js +8 -2
  38. package/dist/mcp/http-server.js.map +1 -1
  39. package/dist/services/code.service.d.ts +3 -0
  40. package/dist/services/code.service.js +33 -4
  41. package/dist/services/code.service.js.map +1 -1
  42. package/dist/services/error.service.js +4 -3
  43. package/dist/services/error.service.js.map +1 -1
  44. package/dist/services/git.service.js +14 -14
  45. package/package.json +49 -49
  46. package/src/api/server.ts +395 -321
  47. package/src/brain.ts +266 -265
  48. package/src/cli/colors.ts +116 -116
  49. package/src/cli/commands/config.ts +169 -169
  50. package/src/cli/commands/dashboard.ts +755 -720
  51. package/src/cli/commands/doctor.ts +118 -118
  52. package/src/cli/commands/explain.ts +83 -83
  53. package/src/cli/commands/export.ts +31 -31
  54. package/src/cli/commands/import.ts +199 -199
  55. package/src/cli/commands/insights.ts +65 -65
  56. package/src/cli/commands/learn.ts +24 -24
  57. package/src/cli/commands/modules.ts +53 -53
  58. package/src/cli/commands/network.ts +67 -67
  59. package/src/cli/commands/projects.ts +42 -42
  60. package/src/cli/commands/query.ts +120 -120
  61. package/src/cli/commands/start.ts +62 -62
  62. package/src/cli/commands/status.ts +75 -75
  63. package/src/cli/commands/stop.ts +34 -34
  64. package/src/cli/ipc-helper.ts +22 -22
  65. package/src/cli/update-check.ts +63 -63
  66. package/src/code/fingerprint.ts +87 -87
  67. package/src/code/parsers/generic.ts +29 -29
  68. package/src/code/parsers/python.ts +54 -54
  69. package/src/code/parsers/typescript.ts +65 -65
  70. package/src/code/registry.ts +60 -60
  71. package/src/dashboard/server.ts +142 -142
  72. package/src/db/connection.ts +22 -22
  73. package/src/db/migrations/001_core_schema.ts +120 -120
  74. package/src/db/migrations/002_learning_schema.ts +38 -38
  75. package/src/db/migrations/003_code_schema.ts +53 -53
  76. package/src/db/migrations/004_synapses_schema.ts +57 -57
  77. package/src/db/migrations/005_fts_indexes.ts +78 -78
  78. package/src/db/migrations/006_synapses_phase3.ts +17 -17
  79. package/src/db/migrations/007_feedback.ts +13 -13
  80. package/src/db/migrations/008_git_integration.ts +38 -38
  81. package/src/db/migrations/009_embeddings.ts +8 -8
  82. package/src/db/repositories/antipattern.repository.ts +66 -66
  83. package/src/db/repositories/code-module.repository.ts +142 -142
  84. package/src/db/repositories/notification.repository.ts +66 -66
  85. package/src/db/repositories/project.repository.ts +93 -93
  86. package/src/db/repositories/rule.repository.ts +108 -108
  87. package/src/db/repositories/solution.repository.ts +154 -154
  88. package/src/db/repositories/synapse.repository.ts +153 -153
  89. package/src/db/repositories/terminal.repository.ts +101 -101
  90. package/src/embeddings/engine.ts +238 -217
  91. package/src/index.ts +63 -63
  92. package/src/ipc/client.ts +118 -118
  93. package/src/ipc/protocol.ts +35 -35
  94. package/src/ipc/router.ts +133 -133
  95. package/src/ipc/server.ts +176 -110
  96. package/src/learning/decay.ts +46 -46
  97. package/src/learning/pattern-extractor.ts +90 -90
  98. package/src/learning/rule-generator.ts +74 -74
  99. package/src/matching/error-matcher.ts +5 -5
  100. package/src/matching/fingerprint.ts +34 -29
  101. package/src/matching/similarity.ts +61 -61
  102. package/src/matching/tfidf.ts +74 -74
  103. package/src/matching/tokenizer.ts +41 -41
  104. package/src/mcp/auto-detect.ts +93 -93
  105. package/src/mcp/http-server.ts +140 -137
  106. package/src/mcp/server.ts +73 -73
  107. package/src/parsing/error-parser.ts +28 -28
  108. package/src/parsing/parsers/compiler.ts +93 -93
  109. package/src/parsing/parsers/generic.ts +28 -28
  110. package/src/parsing/parsers/go.ts +97 -97
  111. package/src/parsing/parsers/node.ts +69 -69
  112. package/src/parsing/parsers/python.ts +62 -62
  113. package/src/parsing/parsers/rust.ts +50 -50
  114. package/src/parsing/parsers/shell.ts +42 -42
  115. package/src/parsing/types.ts +47 -47
  116. package/src/research/gap-analyzer.ts +135 -135
  117. package/src/research/insight-generator.ts +123 -123
  118. package/src/research/research-engine.ts +116 -116
  119. package/src/research/synergy-detector.ts +126 -126
  120. package/src/research/template-extractor.ts +130 -130
  121. package/src/research/trend-analyzer.ts +127 -127
  122. package/src/services/code.service.ts +271 -238
  123. package/src/services/error.service.ts +4 -3
  124. package/src/services/git.service.ts +132 -132
  125. package/src/services/notification.service.ts +41 -41
  126. package/src/services/synapse.service.ts +59 -59
  127. package/src/services/terminal.service.ts +81 -81
  128. package/src/synapses/activation.ts +80 -80
  129. package/src/synapses/decay.ts +38 -38
  130. package/src/synapses/hebbian.ts +69 -69
  131. package/src/synapses/pathfinder.ts +81 -81
  132. package/src/synapses/synapse-manager.ts +109 -109
  133. package/src/types/code.types.ts +52 -52
  134. package/src/types/error.types.ts +67 -67
  135. package/src/types/ipc.types.ts +8 -8
  136. package/src/types/mcp.types.ts +53 -53
  137. package/src/types/research.types.ts +28 -28
  138. package/src/types/solution.types.ts +30 -30
  139. package/src/utils/events.ts +45 -45
  140. package/src/utils/hash.ts +5 -5
  141. package/src/utils/logger.ts +48 -48
  142. package/src/utils/paths.ts +19 -19
  143. package/tests/e2e/test_code_intelligence.py +1015 -0
  144. package/tests/e2e/test_error_memory.py +451 -0
  145. package/tests/e2e/test_full_integration.py +534 -0
  146. package/tests/fixtures/code-modules/modules.ts +83 -83
  147. package/tests/fixtures/errors/go.ts +9 -9
  148. package/tests/fixtures/errors/node.ts +24 -24
  149. package/tests/fixtures/errors/python.ts +21 -21
  150. package/tests/fixtures/errors/rust.ts +25 -25
  151. package/tests/fixtures/errors/shell.ts +15 -15
  152. package/tests/fixtures/solutions/solutions.ts +27 -27
  153. package/tests/helpers/setup-db.ts +52 -52
  154. package/tests/integration/code-flow.test.ts +86 -86
  155. package/tests/integration/error-flow.test.ts +83 -83
  156. package/tests/integration/ipc-flow.test.ts +166 -166
  157. package/tests/integration/learning-cycle.test.ts +82 -82
  158. package/tests/integration/synapse-flow.test.ts +117 -117
  159. package/tests/unit/code/analyzer.test.ts +58 -58
  160. package/tests/unit/code/fingerprint.test.ts +51 -51
  161. package/tests/unit/code/scorer.test.ts +55 -55
  162. package/tests/unit/learning/confidence-scorer.test.ts +60 -60
  163. package/tests/unit/learning/decay.test.ts +45 -45
  164. package/tests/unit/learning/pattern-extractor.test.ts +50 -50
  165. package/tests/unit/matching/error-matcher.test.ts +69 -69
  166. package/tests/unit/matching/fingerprint.test.ts +47 -47
  167. package/tests/unit/matching/similarity.test.ts +65 -65
  168. package/tests/unit/matching/tfidf.test.ts +71 -71
  169. package/tests/unit/matching/tokenizer.test.ts +83 -83
  170. package/tests/unit/parsing/parsers.test.ts +113 -113
  171. package/tests/unit/research/gap-analyzer.test.ts +45 -45
  172. package/tests/unit/research/trend-analyzer.test.ts +45 -45
  173. package/tests/unit/synapses/activation.test.ts +80 -80
  174. package/tests/unit/synapses/decay.test.ts +27 -27
  175. package/tests/unit/synapses/hebbian.test.ts +96 -96
  176. package/tests/unit/synapses/pathfinder.test.ts +72 -72
  177. package/tsconfig.json +18 -18
@@ -1,101 +1,101 @@
1
- import type Database from 'better-sqlite3';
2
- import type { Statement } from 'better-sqlite3';
3
-
4
- export interface TerminalRecord {
5
- id: number;
6
- uuid: string;
7
- project_id: number | null;
8
- pid: number | null;
9
- shell: string | null;
10
- cwd: string | null;
11
- connected_at: string;
12
- last_seen: string;
13
- disconnected_at: string | null;
14
- }
15
-
16
- type CreateTerminalData = Omit<TerminalRecord, 'id' | 'connected_at' | 'last_seen' | 'disconnected_at'>;
17
- type UpdateTerminalData = Partial<Omit<TerminalRecord, 'id' | 'connected_at'>>;
18
-
19
- export class TerminalRepository {
20
- private stmts: Record<string, Statement>;
21
-
22
- constructor(private db: Database.Database) {
23
- this.stmts = {
24
- create: this.db.prepare(`
25
- INSERT INTO terminals (uuid, project_id, pid, shell, cwd)
26
- VALUES (@uuid, @project_id, @pid, @shell, @cwd)
27
- `),
28
- getById: this.db.prepare(`
29
- SELECT * FROM terminals WHERE id = ?
30
- `),
31
- update: this.db.prepare(`
32
- UPDATE terminals
33
- SET project_id = COALESCE(@project_id, project_id),
34
- pid = COALESCE(@pid, pid),
35
- shell = COALESCE(@shell, shell),
36
- cwd = COALESCE(@cwd, cwd),
37
- last_seen = COALESCE(@last_seen, last_seen),
38
- disconnected_at = COALESCE(@disconnected_at, disconnected_at)
39
- WHERE id = @id
40
- `),
41
- delete: this.db.prepare(`
42
- DELETE FROM terminals WHERE id = ?
43
- `),
44
- findByUuid: this.db.prepare(`
45
- SELECT * FROM terminals WHERE uuid = ?
46
- `),
47
- findConnected: this.db.prepare(`
48
- SELECT * FROM terminals WHERE disconnected_at IS NULL ORDER BY connected_at DESC
49
- `),
50
- cleanupStale: this.db.prepare(`
51
- UPDATE terminals
52
- SET disconnected_at = datetime('now')
53
- WHERE disconnected_at IS NULL AND last_seen < ?
54
- `),
55
- };
56
- }
57
-
58
- create(data: CreateTerminalData): number {
59
- const result = this.stmts.create.run({
60
- uuid: data.uuid,
61
- project_id: data.project_id ?? null,
62
- pid: data.pid ?? null,
63
- shell: data.shell ?? null,
64
- cwd: data.cwd ?? null,
65
- });
66
- return result.lastInsertRowid as number;
67
- }
68
-
69
- getById(id: number): TerminalRecord | undefined {
70
- return this.stmts.getById.get(id) as TerminalRecord | undefined;
71
- }
72
-
73
- update(id: number, data: UpdateTerminalData): void {
74
- this.stmts.update.run({
75
- id,
76
- project_id: data.project_id ?? null,
77
- pid: data.pid ?? null,
78
- shell: data.shell ?? null,
79
- cwd: data.cwd ?? null,
80
- last_seen: data.last_seen ?? null,
81
- disconnected_at: data.disconnected_at ?? null,
82
- });
83
- }
84
-
85
- delete(id: number): void {
86
- this.stmts.delete.run(id);
87
- }
88
-
89
- findByUuid(uuid: string): TerminalRecord | undefined {
90
- return this.stmts.findByUuid.get(uuid) as TerminalRecord | undefined;
91
- }
92
-
93
- findConnected(): TerminalRecord[] {
94
- return this.stmts.findConnected.all() as TerminalRecord[];
95
- }
96
-
97
- cleanupStale(olderThan: string): number {
98
- const result = this.stmts.cleanupStale.run(olderThan);
99
- return result.changes;
100
- }
101
- }
1
+ import type Database from 'better-sqlite3';
2
+ import type { Statement } from 'better-sqlite3';
3
+
4
+ export interface TerminalRecord {
5
+ id: number;
6
+ uuid: string;
7
+ project_id: number | null;
8
+ pid: number | null;
9
+ shell: string | null;
10
+ cwd: string | null;
11
+ connected_at: string;
12
+ last_seen: string;
13
+ disconnected_at: string | null;
14
+ }
15
+
16
+ type CreateTerminalData = Omit<TerminalRecord, 'id' | 'connected_at' | 'last_seen' | 'disconnected_at'>;
17
+ type UpdateTerminalData = Partial<Omit<TerminalRecord, 'id' | 'connected_at'>>;
18
+
19
+ export class TerminalRepository {
20
+ private stmts: Record<string, Statement>;
21
+
22
+ constructor(private db: Database.Database) {
23
+ this.stmts = {
24
+ create: this.db.prepare(`
25
+ INSERT INTO terminals (uuid, project_id, pid, shell, cwd)
26
+ VALUES (@uuid, @project_id, @pid, @shell, @cwd)
27
+ `),
28
+ getById: this.db.prepare(`
29
+ SELECT * FROM terminals WHERE id = ?
30
+ `),
31
+ update: this.db.prepare(`
32
+ UPDATE terminals
33
+ SET project_id = COALESCE(@project_id, project_id),
34
+ pid = COALESCE(@pid, pid),
35
+ shell = COALESCE(@shell, shell),
36
+ cwd = COALESCE(@cwd, cwd),
37
+ last_seen = COALESCE(@last_seen, last_seen),
38
+ disconnected_at = COALESCE(@disconnected_at, disconnected_at)
39
+ WHERE id = @id
40
+ `),
41
+ delete: this.db.prepare(`
42
+ DELETE FROM terminals WHERE id = ?
43
+ `),
44
+ findByUuid: this.db.prepare(`
45
+ SELECT * FROM terminals WHERE uuid = ?
46
+ `),
47
+ findConnected: this.db.prepare(`
48
+ SELECT * FROM terminals WHERE disconnected_at IS NULL ORDER BY connected_at DESC
49
+ `),
50
+ cleanupStale: this.db.prepare(`
51
+ UPDATE terminals
52
+ SET disconnected_at = datetime('now')
53
+ WHERE disconnected_at IS NULL AND last_seen < ?
54
+ `),
55
+ };
56
+ }
57
+
58
+ create(data: CreateTerminalData): number {
59
+ const result = this.stmts.create.run({
60
+ uuid: data.uuid,
61
+ project_id: data.project_id ?? null,
62
+ pid: data.pid ?? null,
63
+ shell: data.shell ?? null,
64
+ cwd: data.cwd ?? null,
65
+ });
66
+ return result.lastInsertRowid as number;
67
+ }
68
+
69
+ getById(id: number): TerminalRecord | undefined {
70
+ return this.stmts.getById.get(id) as TerminalRecord | undefined;
71
+ }
72
+
73
+ update(id: number, data: UpdateTerminalData): void {
74
+ this.stmts.update.run({
75
+ id,
76
+ project_id: data.project_id ?? null,
77
+ pid: data.pid ?? null,
78
+ shell: data.shell ?? null,
79
+ cwd: data.cwd ?? null,
80
+ last_seen: data.last_seen ?? null,
81
+ disconnected_at: data.disconnected_at ?? null,
82
+ });
83
+ }
84
+
85
+ delete(id: number): void {
86
+ this.stmts.delete.run(id);
87
+ }
88
+
89
+ findByUuid(uuid: string): TerminalRecord | undefined {
90
+ return this.stmts.findByUuid.get(uuid) as TerminalRecord | undefined;
91
+ }
92
+
93
+ findConnected(): TerminalRecord[] {
94
+ return this.stmts.findConnected.all() as TerminalRecord[];
95
+ }
96
+
97
+ cleanupStale(olderThan: string): number {
98
+ const result = this.stmts.cleanupStale.run(olderThan);
99
+ return result.changes;
100
+ }
101
+ }
@@ -1,217 +1,238 @@
1
- import path from 'node:path';
2
- import type Database from 'better-sqlite3';
3
- import { getLogger } from '../utils/logger.js';
4
-
5
- export interface EmbeddingConfig {
6
- enabled: boolean;
7
- modelName: string;
8
- cacheDir: string;
9
- sweepIntervalMs: number;
10
- batchSize: number;
11
- }
12
-
13
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
14
- type Pipeline = any;
15
-
16
- export class EmbeddingEngine {
17
- private pipeline: Pipeline = null;
18
- private ready = false;
19
- private loading = false;
20
- private logger = getLogger();
21
- private sweepTimer: ReturnType<typeof setInterval> | null = null;
22
-
23
- constructor(
24
- private config: EmbeddingConfig,
25
- private db: Database.Database,
26
- ) {}
27
-
28
- async initialize(): Promise<void> {
29
- if (!this.config.enabled || this.loading || this.ready) return;
30
-
31
- this.loading = true;
32
- try {
33
- const { pipeline, env } = await import('@huggingface/transformers');
34
- env.cacheDir = this.config.cacheDir;
35
-
36
- this.pipeline = await pipeline(
37
- 'feature-extraction',
38
- this.config.modelName,
39
- { dtype: 'q8' },
40
- );
41
-
42
- this.ready = true;
43
- this.logger.info(`Embedding model loaded: ${this.config.modelName}`);
44
- } catch (err) {
45
- this.logger.warn(`Failed to load embedding model (will retry): ${err}`);
46
- this.ready = false;
47
- } finally {
48
- this.loading = false;
49
- }
50
- }
51
-
52
- /** Start background embedding sweep */
53
- start(): void {
54
- if (!this.config.enabled) return;
55
-
56
- // Initialize model in background
57
- this.initialize().then(() => {
58
- if (this.ready) {
59
- // Run initial sweep
60
- this.sweep().catch(err => this.logger.error('Embedding sweep error:', err));
61
- }
62
- });
63
-
64
- // Periodic sweep for new entries
65
- this.sweepTimer = setInterval(() => {
66
- if (this.ready) {
67
- this.sweep().catch(err => this.logger.error('Embedding sweep error:', err));
68
- }
69
- }, this.config.sweepIntervalMs);
70
- }
71
-
72
- stop(): void {
73
- if (this.sweepTimer) {
74
- clearInterval(this.sweepTimer);
75
- this.sweepTimer = null;
76
- }
77
- }
78
-
79
- isReady(): boolean {
80
- return this.ready;
81
- }
82
-
83
- /** Generate embedding for a single text */
84
- async embed(text: string): Promise<Float32Array | null> {
85
- if (!this.ready || !this.pipeline) return null;
86
-
87
- try {
88
- const output = await this.pipeline(text, { pooling: 'mean', normalize: true });
89
- const data = output.tolist()[0] as number[];
90
- return new Float32Array(data);
91
- } catch (err) {
92
- this.logger.error(`Embedding error: ${err}`);
93
- return null;
94
- }
95
- }
96
-
97
- /** Generate embeddings for a batch of texts */
98
- async embedBatch(texts: string[]): Promise<(Float32Array | null)[]> {
99
- if (!this.ready || !this.pipeline || texts.length === 0) return texts.map(() => null);
100
-
101
- try {
102
- const output = await this.pipeline(texts, { pooling: 'mean', normalize: true });
103
- const list = output.tolist() as number[][];
104
- return list.map(v => new Float32Array(v));
105
- } catch (err) {
106
- this.logger.error(`Batch embedding error: ${err}`);
107
- return texts.map(() => null);
108
- }
109
- }
110
-
111
- /** Compute and store embeddings for entries that don't have them yet */
112
- async sweep(): Promise<{ errors: number; modules: number }> {
113
- let errorsProcessed = 0;
114
- let modulesProcessed = 0;
115
-
116
- // Process errors without embeddings
117
- const pendingErrors = this.db.prepare(
118
- 'SELECT id, type, message, context FROM errors WHERE embedding IS NULL ORDER BY id DESC LIMIT ?'
119
- ).all(this.config.batchSize) as Array<{ id: number; type: string; message: string; context: string | null }>;
120
-
121
- if (pendingErrors.length > 0) {
122
- const texts = pendingErrors.map(e =>
123
- [e.type, e.message, e.context].filter(Boolean).join(' ')
124
- );
125
- const embeddings = await this.embedBatch(texts);
126
-
127
- const updateStmt = this.db.prepare('UPDATE errors SET embedding = ? WHERE id = ?');
128
- for (let i = 0; i < pendingErrors.length; i++) {
129
- const emb = embeddings[i];
130
- if (emb) {
131
- updateStmt.run(EmbeddingEngine.serialize(emb), pendingErrors[i]!.id);
132
- errorsProcessed++;
133
- }
134
- }
135
- }
136
-
137
- // Process code modules without embeddings
138
- const pendingModules = this.db.prepare(
139
- 'SELECT id, name, description, file_path FROM code_modules WHERE embedding IS NULL ORDER BY id DESC LIMIT ?'
140
- ).all(this.config.batchSize) as Array<{ id: number; name: string; description: string | null; file_path: string }>;
141
-
142
- if (pendingModules.length > 0) {
143
- const texts = pendingModules.map(m =>
144
- [m.name, m.description, m.file_path].filter(Boolean).join(' ')
145
- );
146
- const embeddings = await this.embedBatch(texts);
147
-
148
- const updateStmt = this.db.prepare('UPDATE code_modules SET embedding = ? WHERE id = ?');
149
- for (let i = 0; i < pendingModules.length; i++) {
150
- const emb = embeddings[i];
151
- if (emb) {
152
- updateStmt.run(EmbeddingEngine.serialize(emb), pendingModules[i]!.id);
153
- modulesProcessed++;
154
- }
155
- }
156
- }
157
-
158
- if (errorsProcessed > 0 || modulesProcessed > 0) {
159
- this.logger.info(`Embedding sweep: ${errorsProcessed} errors, ${modulesProcessed} modules processed`);
160
- }
161
-
162
- return { errors: errorsProcessed, modules: modulesProcessed };
163
- }
164
-
165
- /** Load vector scores for error matching (sync - reads pre-computed embeddings from DB) */
166
- computeErrorVectorScores(errorId: number, projectId: number): Map<number, number> {
167
- const scores = new Map<number, number>();
168
-
169
- const errorRow = this.db.prepare(
170
- 'SELECT embedding FROM errors WHERE id = ?'
171
- ).get(errorId) as { embedding: Buffer | null } | undefined;
172
-
173
- if (!errorRow?.embedding) return scores;
174
-
175
- const incoming = EmbeddingEngine.deserialize(errorRow.embedding);
176
-
177
- const candidates = this.db.prepare(
178
- 'SELECT id, embedding FROM errors WHERE project_id = ? AND id != ? AND embedding IS NOT NULL'
179
- ).all(projectId, errorId) as Array<{ id: number; embedding: Buffer }>;
180
-
181
- for (const c of candidates) {
182
- const candidate = EmbeddingEngine.deserialize(c.embedding);
183
- scores.set(c.id, EmbeddingEngine.similarity(incoming, candidate));
184
- }
185
-
186
- return scores;
187
- }
188
-
189
- /** Load vector scores for code module matching (sync) */
190
- computeModuleVectorScores(query: string, language?: string): Map<number, number> {
191
- const scores = new Map<number, number>();
192
- // This needs async embedding — use cached if available
193
- // For now, return empty (vector search for modules is done during sweep)
194
- return scores;
195
- }
196
-
197
- /** Serialize Float32Array to Buffer for SQLite BLOB storage */
198
- static serialize(embedding: Float32Array): Buffer {
199
- return Buffer.from(embedding.buffer, embedding.byteOffset, embedding.byteLength);
200
- }
201
-
202
- /** Deserialize SQLite BLOB to Float32Array */
203
- static deserialize(buffer: Buffer): Float32Array {
204
- const copy = Buffer.from(buffer);
205
- return new Float32Array(copy.buffer, copy.byteOffset, copy.length / 4);
206
- }
207
-
208
- /** Cosine similarity (embeddings are L2-normalized, so dot product = cosine) */
209
- static similarity(a: Float32Array, b: Float32Array): number {
210
- if (a.length !== b.length) return 0;
211
- let dot = 0;
212
- for (let i = 0; i < a.length; i++) {
213
- dot += a[i]! * b[i]!;
214
- }
215
- return Math.max(0, Math.min(1, dot));
216
- }
217
- }
1
+ import path from 'node:path';
2
+ import type Database from 'better-sqlite3';
3
+ import { getLogger } from '../utils/logger.js';
4
+
5
+ export interface EmbeddingConfig {
6
+ enabled: boolean;
7
+ modelName: string;
8
+ cacheDir: string;
9
+ sweepIntervalMs: number;
10
+ batchSize: number;
11
+ }
12
+
13
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
14
+ type Pipeline = any;
15
+
16
+ export class EmbeddingEngine {
17
+ private pipeline: Pipeline = null;
18
+ private ready = false;
19
+ private loading = false;
20
+ private logger = getLogger();
21
+ private sweepTimer: ReturnType<typeof setInterval> | null = null;
22
+
23
+ constructor(
24
+ private config: EmbeddingConfig,
25
+ private db: Database.Database,
26
+ ) {}
27
+
28
+ async initialize(): Promise<void> {
29
+ if (!this.config.enabled || this.loading || this.ready) return;
30
+
31
+ this.loading = true;
32
+ try {
33
+ const { pipeline, env } = await import('@huggingface/transformers');
34
+ env.cacheDir = this.config.cacheDir;
35
+
36
+ this.pipeline = await pipeline(
37
+ 'feature-extraction',
38
+ this.config.modelName,
39
+ { dtype: 'q8' },
40
+ );
41
+
42
+ this.ready = true;
43
+ this.logger.info(`Embedding model loaded: ${this.config.modelName}`);
44
+ } catch (err) {
45
+ this.logger.warn(`Failed to load embedding model (will retry): ${err}`);
46
+ this.ready = false;
47
+ } finally {
48
+ this.loading = false;
49
+ }
50
+ }
51
+
52
+ /** Start background embedding sweep */
53
+ start(): void {
54
+ if (!this.config.enabled) return;
55
+
56
+ // Initialize model in background
57
+ this.initialize().then(() => {
58
+ if (this.ready) {
59
+ // Run initial sweep
60
+ this.sweep().catch(err => this.logger.error('Embedding sweep error:', err));
61
+ }
62
+ });
63
+
64
+ // Periodic sweep for new entries
65
+ this.sweepTimer = setInterval(() => {
66
+ if (this.ready) {
67
+ this.sweep().catch(err => this.logger.error('Embedding sweep error:', err));
68
+ }
69
+ }, this.config.sweepIntervalMs);
70
+ }
71
+
72
+ stop(): void {
73
+ if (this.sweepTimer) {
74
+ clearInterval(this.sweepTimer);
75
+ this.sweepTimer = null;
76
+ }
77
+ }
78
+
79
+ isReady(): boolean {
80
+ return this.ready;
81
+ }
82
+
83
+ /** Generate embedding for a single text */
84
+ async embed(text: string): Promise<Float32Array | null> {
85
+ if (!this.ready || !this.pipeline) return null;
86
+
87
+ try {
88
+ const output = await this.pipeline(text, { pooling: 'mean', normalize: true });
89
+ const data = output.tolist()[0] as number[];
90
+ return new Float32Array(data);
91
+ } catch (err) {
92
+ this.logger.error(`Embedding error: ${err}`);
93
+ return null;
94
+ }
95
+ }
96
+
97
+ /** Generate embeddings for a batch of texts */
98
+ async embedBatch(texts: string[]): Promise<(Float32Array | null)[]> {
99
+ if (!this.ready || !this.pipeline || texts.length === 0) return texts.map(() => null);
100
+
101
+ try {
102
+ const output = await this.pipeline(texts, { pooling: 'mean', normalize: true });
103
+ const list = output.tolist() as number[][];
104
+ return list.map(v => new Float32Array(v));
105
+ } catch (err) {
106
+ this.logger.error(`Batch embedding error: ${err}`);
107
+ return texts.map(() => null);
108
+ }
109
+ }
110
+
111
+ /** Compute and store embeddings for entries that don't have them yet */
112
+ async sweep(): Promise<{ errors: number; modules: number }> {
113
+ let errorsProcessed = 0;
114
+ let modulesProcessed = 0;
115
+
116
+ // Process errors without embeddings
117
+ const pendingErrors = this.db.prepare(
118
+ 'SELECT id, type, message, context FROM errors WHERE embedding IS NULL ORDER BY id DESC LIMIT ?'
119
+ ).all(this.config.batchSize) as Array<{ id: number; type: string; message: string; context: string | null }>;
120
+
121
+ if (pendingErrors.length > 0) {
122
+ const texts = pendingErrors.map(e =>
123
+ [e.type, e.message, e.context].filter(Boolean).join(' ')
124
+ );
125
+ const embeddings = await this.embedBatch(texts);
126
+
127
+ const updateStmt = this.db.prepare('UPDATE errors SET embedding = ? WHERE id = ?');
128
+ for (let i = 0; i < pendingErrors.length; i++) {
129
+ const emb = embeddings[i];
130
+ if (emb) {
131
+ updateStmt.run(EmbeddingEngine.serialize(emb), pendingErrors[i]!.id);
132
+ errorsProcessed++;
133
+ }
134
+ }
135
+ }
136
+
137
+ // Process code modules without embeddings
138
+ const pendingModules = this.db.prepare(
139
+ 'SELECT id, name, description, file_path FROM code_modules WHERE embedding IS NULL ORDER BY id DESC LIMIT ?'
140
+ ).all(this.config.batchSize) as Array<{ id: number; name: string; description: string | null; file_path: string }>;
141
+
142
+ if (pendingModules.length > 0) {
143
+ const texts = pendingModules.map(m =>
144
+ [m.name, m.description, m.file_path].filter(Boolean).join(' ')
145
+ );
146
+ const embeddings = await this.embedBatch(texts);
147
+
148
+ const updateStmt = this.db.prepare('UPDATE code_modules SET embedding = ? WHERE id = ?');
149
+ for (let i = 0; i < pendingModules.length; i++) {
150
+ const emb = embeddings[i];
151
+ if (emb) {
152
+ updateStmt.run(EmbeddingEngine.serialize(emb), pendingModules[i]!.id);
153
+ modulesProcessed++;
154
+ }
155
+ }
156
+ }
157
+
158
+ if (errorsProcessed > 0 || modulesProcessed > 0) {
159
+ this.logger.info(`Embedding sweep: ${errorsProcessed} errors, ${modulesProcessed} modules processed`);
160
+ }
161
+
162
+ return { errors: errorsProcessed, modules: modulesProcessed };
163
+ }
164
+
165
+ /** Load vector scores for error matching (sync - reads pre-computed embeddings from DB) */
166
+ computeErrorVectorScores(errorId: number, projectId: number): Map<number, number> {
167
+ const scores = new Map<number, number>();
168
+
169
+ const errorRow = this.db.prepare(
170
+ 'SELECT embedding FROM errors WHERE id = ?'
171
+ ).get(errorId) as { embedding: Buffer | null } | undefined;
172
+
173
+ if (!errorRow?.embedding) return scores;
174
+
175
+ const incoming = EmbeddingEngine.deserialize(errorRow.embedding);
176
+
177
+ const candidates = this.db.prepare(
178
+ 'SELECT id, embedding FROM errors WHERE project_id = ? AND id != ? AND embedding IS NOT NULL'
179
+ ).all(projectId, errorId) as Array<{ id: number; embedding: Buffer }>;
180
+
181
+ for (const c of candidates) {
182
+ const candidate = EmbeddingEngine.deserialize(c.embedding);
183
+ scores.set(c.id, EmbeddingEngine.similarity(incoming, candidate));
184
+ }
185
+
186
+ return scores;
187
+ }
188
+
189
+ /** Load vector scores for module-to-module matching (sync - reads pre-computed embeddings from DB) */
190
+ computeModuleVectorScores(moduleId: number, language?: string): Map<number, number> {
191
+ const scores = new Map<number, number>();
192
+
193
+ const moduleRow = this.db.prepare(
194
+ 'SELECT embedding FROM code_modules WHERE id = ?'
195
+ ).get(moduleId) as { embedding: Buffer | null } | undefined;
196
+
197
+ if (!moduleRow?.embedding) return scores;
198
+
199
+ const incoming = EmbeddingEngine.deserialize(moduleRow.embedding);
200
+
201
+ let sql = 'SELECT id, embedding FROM code_modules WHERE id != ? AND embedding IS NOT NULL';
202
+ const params: unknown[] = [moduleId];
203
+ if (language) {
204
+ sql += ' AND language = ?';
205
+ params.push(language);
206
+ }
207
+
208
+ const candidates = this.db.prepare(sql).all(...params) as Array<{ id: number; embedding: Buffer }>;
209
+
210
+ for (const c of candidates) {
211
+ const candidate = EmbeddingEngine.deserialize(c.embedding);
212
+ scores.set(c.id, EmbeddingEngine.similarity(incoming, candidate));
213
+ }
214
+
215
+ return scores;
216
+ }
217
+
218
+ /** Serialize Float32Array to Buffer for SQLite BLOB storage */
219
+ static serialize(embedding: Float32Array): Buffer {
220
+ return Buffer.from(embedding.buffer, embedding.byteOffset, embedding.byteLength);
221
+ }
222
+
223
+ /** Deserialize SQLite BLOB to Float32Array */
224
+ static deserialize(buffer: Buffer): Float32Array {
225
+ const copy = Buffer.from(buffer);
226
+ return new Float32Array(copy.buffer, copy.byteOffset, copy.length / 4);
227
+ }
228
+
229
+ /** Cosine similarity (embeddings are L2-normalized, so dot product = cosine) */
230
+ static similarity(a: Float32Array, b: Float32Array): number {
231
+ if (a.length !== b.length) return 0;
232
+ let dot = 0;
233
+ for (let i = 0; i < a.length; i++) {
234
+ dot += a[i]! * b[i]!;
235
+ }
236
+ return Math.max(0, Math.min(1, dot));
237
+ }
238
+ }