@timmeck/brain 1.8.0 → 1.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BRAIN_PLAN.md +3324 -3324
- package/LICENSE +21 -21
- package/dist/api/server.d.ts +4 -0
- package/dist/api/server.js +73 -0
- package/dist/api/server.js.map +1 -1
- package/dist/brain.js +2 -1
- package/dist/brain.js.map +1 -1
- package/dist/cli/commands/dashboard.js +606 -572
- package/dist/cli/commands/dashboard.js.map +1 -1
- package/dist/dashboard/server.js +25 -25
- package/dist/db/migrations/001_core_schema.js +115 -115
- package/dist/db/migrations/002_learning_schema.js +33 -33
- package/dist/db/migrations/003_code_schema.js +48 -48
- package/dist/db/migrations/004_synapses_schema.js +52 -52
- package/dist/db/migrations/005_fts_indexes.js +73 -73
- package/dist/db/migrations/007_feedback.js +8 -8
- package/dist/db/migrations/008_git_integration.js +33 -33
- package/dist/db/migrations/009_embeddings.js +3 -3
- package/dist/db/repositories/antipattern.repository.js +3 -3
- package/dist/db/repositories/code-module.repository.js +32 -32
- package/dist/db/repositories/notification.repository.js +3 -3
- package/dist/db/repositories/project.repository.js +21 -21
- package/dist/db/repositories/rule.repository.js +24 -24
- package/dist/db/repositories/solution.repository.js +50 -50
- package/dist/db/repositories/synapse.repository.js +18 -18
- package/dist/db/repositories/terminal.repository.js +24 -24
- package/dist/embeddings/engine.d.ts +2 -2
- package/dist/embeddings/engine.js +17 -4
- package/dist/embeddings/engine.js.map +1 -1
- package/dist/index.js +1 -1
- package/dist/ipc/server.d.ts +8 -0
- package/dist/ipc/server.js +67 -1
- package/dist/ipc/server.js.map +1 -1
- package/dist/matching/error-matcher.js +5 -5
- package/dist/matching/fingerprint.js +6 -1
- package/dist/matching/fingerprint.js.map +1 -1
- package/dist/mcp/http-server.js +8 -2
- package/dist/mcp/http-server.js.map +1 -1
- package/dist/services/code.service.d.ts +3 -0
- package/dist/services/code.service.js +33 -4
- package/dist/services/code.service.js.map +1 -1
- package/dist/services/error.service.js +4 -3
- package/dist/services/error.service.js.map +1 -1
- package/dist/services/git.service.js +14 -14
- package/package.json +49 -49
- package/src/api/server.ts +395 -321
- package/src/brain.ts +266 -265
- package/src/cli/colors.ts +116 -116
- package/src/cli/commands/config.ts +169 -169
- package/src/cli/commands/dashboard.ts +755 -720
- package/src/cli/commands/doctor.ts +118 -118
- package/src/cli/commands/explain.ts +83 -83
- package/src/cli/commands/export.ts +31 -31
- package/src/cli/commands/import.ts +199 -199
- package/src/cli/commands/insights.ts +65 -65
- package/src/cli/commands/learn.ts +24 -24
- package/src/cli/commands/modules.ts +53 -53
- package/src/cli/commands/network.ts +67 -67
- package/src/cli/commands/projects.ts +42 -42
- package/src/cli/commands/query.ts +120 -120
- package/src/cli/commands/start.ts +62 -62
- package/src/cli/commands/status.ts +75 -75
- package/src/cli/commands/stop.ts +34 -34
- package/src/cli/ipc-helper.ts +22 -22
- package/src/cli/update-check.ts +63 -63
- package/src/code/fingerprint.ts +87 -87
- package/src/code/parsers/generic.ts +29 -29
- package/src/code/parsers/python.ts +54 -54
- package/src/code/parsers/typescript.ts +65 -65
- package/src/code/registry.ts +60 -60
- package/src/dashboard/server.ts +142 -142
- package/src/db/connection.ts +22 -22
- package/src/db/migrations/001_core_schema.ts +120 -120
- package/src/db/migrations/002_learning_schema.ts +38 -38
- package/src/db/migrations/003_code_schema.ts +53 -53
- package/src/db/migrations/004_synapses_schema.ts +57 -57
- package/src/db/migrations/005_fts_indexes.ts +78 -78
- package/src/db/migrations/006_synapses_phase3.ts +17 -17
- package/src/db/migrations/007_feedback.ts +13 -13
- package/src/db/migrations/008_git_integration.ts +38 -38
- package/src/db/migrations/009_embeddings.ts +8 -8
- package/src/db/repositories/antipattern.repository.ts +66 -66
- package/src/db/repositories/code-module.repository.ts +142 -142
- package/src/db/repositories/notification.repository.ts +66 -66
- package/src/db/repositories/project.repository.ts +93 -93
- package/src/db/repositories/rule.repository.ts +108 -108
- package/src/db/repositories/solution.repository.ts +154 -154
- package/src/db/repositories/synapse.repository.ts +153 -153
- package/src/db/repositories/terminal.repository.ts +101 -101
- package/src/embeddings/engine.ts +238 -217
- package/src/index.ts +63 -63
- package/src/ipc/client.ts +118 -118
- package/src/ipc/protocol.ts +35 -35
- package/src/ipc/router.ts +133 -133
- package/src/ipc/server.ts +176 -110
- package/src/learning/decay.ts +46 -46
- package/src/learning/pattern-extractor.ts +90 -90
- package/src/learning/rule-generator.ts +74 -74
- package/src/matching/error-matcher.ts +5 -5
- package/src/matching/fingerprint.ts +34 -29
- package/src/matching/similarity.ts +61 -61
- package/src/matching/tfidf.ts +74 -74
- package/src/matching/tokenizer.ts +41 -41
- package/src/mcp/auto-detect.ts +93 -93
- package/src/mcp/http-server.ts +140 -137
- package/src/mcp/server.ts +73 -73
- package/src/parsing/error-parser.ts +28 -28
- package/src/parsing/parsers/compiler.ts +93 -93
- package/src/parsing/parsers/generic.ts +28 -28
- package/src/parsing/parsers/go.ts +97 -97
- package/src/parsing/parsers/node.ts +69 -69
- package/src/parsing/parsers/python.ts +62 -62
- package/src/parsing/parsers/rust.ts +50 -50
- package/src/parsing/parsers/shell.ts +42 -42
- package/src/parsing/types.ts +47 -47
- package/src/research/gap-analyzer.ts +135 -135
- package/src/research/insight-generator.ts +123 -123
- package/src/research/research-engine.ts +116 -116
- package/src/research/synergy-detector.ts +126 -126
- package/src/research/template-extractor.ts +130 -130
- package/src/research/trend-analyzer.ts +127 -127
- package/src/services/code.service.ts +271 -238
- package/src/services/error.service.ts +4 -3
- package/src/services/git.service.ts +132 -132
- package/src/services/notification.service.ts +41 -41
- package/src/services/synapse.service.ts +59 -59
- package/src/services/terminal.service.ts +81 -81
- package/src/synapses/activation.ts +80 -80
- package/src/synapses/decay.ts +38 -38
- package/src/synapses/hebbian.ts +69 -69
- package/src/synapses/pathfinder.ts +81 -81
- package/src/synapses/synapse-manager.ts +109 -109
- package/src/types/code.types.ts +52 -52
- package/src/types/error.types.ts +67 -67
- package/src/types/ipc.types.ts +8 -8
- package/src/types/mcp.types.ts +53 -53
- package/src/types/research.types.ts +28 -28
- package/src/types/solution.types.ts +30 -30
- package/src/utils/events.ts +45 -45
- package/src/utils/hash.ts +5 -5
- package/src/utils/logger.ts +48 -48
- package/src/utils/paths.ts +19 -19
- package/tests/e2e/test_code_intelligence.py +1015 -0
- package/tests/e2e/test_error_memory.py +451 -0
- package/tests/e2e/test_full_integration.py +534 -0
- package/tests/fixtures/code-modules/modules.ts +83 -83
- package/tests/fixtures/errors/go.ts +9 -9
- package/tests/fixtures/errors/node.ts +24 -24
- package/tests/fixtures/errors/python.ts +21 -21
- package/tests/fixtures/errors/rust.ts +25 -25
- package/tests/fixtures/errors/shell.ts +15 -15
- package/tests/fixtures/solutions/solutions.ts +27 -27
- package/tests/helpers/setup-db.ts +52 -52
- package/tests/integration/code-flow.test.ts +86 -86
- package/tests/integration/error-flow.test.ts +83 -83
- package/tests/integration/ipc-flow.test.ts +166 -166
- package/tests/integration/learning-cycle.test.ts +82 -82
- package/tests/integration/synapse-flow.test.ts +117 -117
- package/tests/unit/code/analyzer.test.ts +58 -58
- package/tests/unit/code/fingerprint.test.ts +51 -51
- package/tests/unit/code/scorer.test.ts +55 -55
- package/tests/unit/learning/confidence-scorer.test.ts +60 -60
- package/tests/unit/learning/decay.test.ts +45 -45
- package/tests/unit/learning/pattern-extractor.test.ts +50 -50
- package/tests/unit/matching/error-matcher.test.ts +69 -69
- package/tests/unit/matching/fingerprint.test.ts +47 -47
- package/tests/unit/matching/similarity.test.ts +65 -65
- package/tests/unit/matching/tfidf.test.ts +71 -71
- package/tests/unit/matching/tokenizer.test.ts +83 -83
- package/tests/unit/parsing/parsers.test.ts +113 -113
- package/tests/unit/research/gap-analyzer.test.ts +45 -45
- package/tests/unit/research/trend-analyzer.test.ts +45 -45
- package/tests/unit/synapses/activation.test.ts +80 -80
- package/tests/unit/synapses/decay.test.ts +27 -27
- package/tests/unit/synapses/hebbian.test.ts +96 -96
- package/tests/unit/synapses/pathfinder.test.ts +72 -72
- package/tsconfig.json +18 -18
|
@@ -1,101 +1,101 @@
|
|
|
1
|
-
import type Database from 'better-sqlite3';
|
|
2
|
-
import type { Statement } from 'better-sqlite3';
|
|
3
|
-
|
|
4
|
-
export interface TerminalRecord {
|
|
5
|
-
id: number;
|
|
6
|
-
uuid: string;
|
|
7
|
-
project_id: number | null;
|
|
8
|
-
pid: number | null;
|
|
9
|
-
shell: string | null;
|
|
10
|
-
cwd: string | null;
|
|
11
|
-
connected_at: string;
|
|
12
|
-
last_seen: string;
|
|
13
|
-
disconnected_at: string | null;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
type CreateTerminalData = Omit<TerminalRecord, 'id' | 'connected_at' | 'last_seen' | 'disconnected_at'>;
|
|
17
|
-
type UpdateTerminalData = Partial<Omit<TerminalRecord, 'id' | 'connected_at'>>;
|
|
18
|
-
|
|
19
|
-
export class TerminalRepository {
|
|
20
|
-
private stmts: Record<string, Statement>;
|
|
21
|
-
|
|
22
|
-
constructor(private db: Database.Database) {
|
|
23
|
-
this.stmts = {
|
|
24
|
-
create: this.db.prepare(`
|
|
25
|
-
INSERT INTO terminals (uuid, project_id, pid, shell, cwd)
|
|
26
|
-
VALUES (@uuid, @project_id, @pid, @shell, @cwd)
|
|
27
|
-
`),
|
|
28
|
-
getById: this.db.prepare(`
|
|
29
|
-
SELECT * FROM terminals WHERE id = ?
|
|
30
|
-
`),
|
|
31
|
-
update: this.db.prepare(`
|
|
32
|
-
UPDATE terminals
|
|
33
|
-
SET project_id = COALESCE(@project_id, project_id),
|
|
34
|
-
pid = COALESCE(@pid, pid),
|
|
35
|
-
shell = COALESCE(@shell, shell),
|
|
36
|
-
cwd = COALESCE(@cwd, cwd),
|
|
37
|
-
last_seen = COALESCE(@last_seen, last_seen),
|
|
38
|
-
disconnected_at = COALESCE(@disconnected_at, disconnected_at)
|
|
39
|
-
WHERE id = @id
|
|
40
|
-
`),
|
|
41
|
-
delete: this.db.prepare(`
|
|
42
|
-
DELETE FROM terminals WHERE id = ?
|
|
43
|
-
`),
|
|
44
|
-
findByUuid: this.db.prepare(`
|
|
45
|
-
SELECT * FROM terminals WHERE uuid = ?
|
|
46
|
-
`),
|
|
47
|
-
findConnected: this.db.prepare(`
|
|
48
|
-
SELECT * FROM terminals WHERE disconnected_at IS NULL ORDER BY connected_at DESC
|
|
49
|
-
`),
|
|
50
|
-
cleanupStale: this.db.prepare(`
|
|
51
|
-
UPDATE terminals
|
|
52
|
-
SET disconnected_at = datetime('now')
|
|
53
|
-
WHERE disconnected_at IS NULL AND last_seen < ?
|
|
54
|
-
`),
|
|
55
|
-
};
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
create(data: CreateTerminalData): number {
|
|
59
|
-
const result = this.stmts.create.run({
|
|
60
|
-
uuid: data.uuid,
|
|
61
|
-
project_id: data.project_id ?? null,
|
|
62
|
-
pid: data.pid ?? null,
|
|
63
|
-
shell: data.shell ?? null,
|
|
64
|
-
cwd: data.cwd ?? null,
|
|
65
|
-
});
|
|
66
|
-
return result.lastInsertRowid as number;
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
getById(id: number): TerminalRecord | undefined {
|
|
70
|
-
return this.stmts.getById.get(id) as TerminalRecord | undefined;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
update(id: number, data: UpdateTerminalData): void {
|
|
74
|
-
this.stmts.update.run({
|
|
75
|
-
id,
|
|
76
|
-
project_id: data.project_id ?? null,
|
|
77
|
-
pid: data.pid ?? null,
|
|
78
|
-
shell: data.shell ?? null,
|
|
79
|
-
cwd: data.cwd ?? null,
|
|
80
|
-
last_seen: data.last_seen ?? null,
|
|
81
|
-
disconnected_at: data.disconnected_at ?? null,
|
|
82
|
-
});
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
delete(id: number): void {
|
|
86
|
-
this.stmts.delete.run(id);
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
findByUuid(uuid: string): TerminalRecord | undefined {
|
|
90
|
-
return this.stmts.findByUuid.get(uuid) as TerminalRecord | undefined;
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
findConnected(): TerminalRecord[] {
|
|
94
|
-
return this.stmts.findConnected.all() as TerminalRecord[];
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
cleanupStale(olderThan: string): number {
|
|
98
|
-
const result = this.stmts.cleanupStale.run(olderThan);
|
|
99
|
-
return result.changes;
|
|
100
|
-
}
|
|
101
|
-
}
|
|
1
|
+
import type Database from 'better-sqlite3';
|
|
2
|
+
import type { Statement } from 'better-sqlite3';
|
|
3
|
+
|
|
4
|
+
export interface TerminalRecord {
|
|
5
|
+
id: number;
|
|
6
|
+
uuid: string;
|
|
7
|
+
project_id: number | null;
|
|
8
|
+
pid: number | null;
|
|
9
|
+
shell: string | null;
|
|
10
|
+
cwd: string | null;
|
|
11
|
+
connected_at: string;
|
|
12
|
+
last_seen: string;
|
|
13
|
+
disconnected_at: string | null;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
type CreateTerminalData = Omit<TerminalRecord, 'id' | 'connected_at' | 'last_seen' | 'disconnected_at'>;
|
|
17
|
+
type UpdateTerminalData = Partial<Omit<TerminalRecord, 'id' | 'connected_at'>>;
|
|
18
|
+
|
|
19
|
+
export class TerminalRepository {
|
|
20
|
+
private stmts: Record<string, Statement>;
|
|
21
|
+
|
|
22
|
+
constructor(private db: Database.Database) {
|
|
23
|
+
this.stmts = {
|
|
24
|
+
create: this.db.prepare(`
|
|
25
|
+
INSERT INTO terminals (uuid, project_id, pid, shell, cwd)
|
|
26
|
+
VALUES (@uuid, @project_id, @pid, @shell, @cwd)
|
|
27
|
+
`),
|
|
28
|
+
getById: this.db.prepare(`
|
|
29
|
+
SELECT * FROM terminals WHERE id = ?
|
|
30
|
+
`),
|
|
31
|
+
update: this.db.prepare(`
|
|
32
|
+
UPDATE terminals
|
|
33
|
+
SET project_id = COALESCE(@project_id, project_id),
|
|
34
|
+
pid = COALESCE(@pid, pid),
|
|
35
|
+
shell = COALESCE(@shell, shell),
|
|
36
|
+
cwd = COALESCE(@cwd, cwd),
|
|
37
|
+
last_seen = COALESCE(@last_seen, last_seen),
|
|
38
|
+
disconnected_at = COALESCE(@disconnected_at, disconnected_at)
|
|
39
|
+
WHERE id = @id
|
|
40
|
+
`),
|
|
41
|
+
delete: this.db.prepare(`
|
|
42
|
+
DELETE FROM terminals WHERE id = ?
|
|
43
|
+
`),
|
|
44
|
+
findByUuid: this.db.prepare(`
|
|
45
|
+
SELECT * FROM terminals WHERE uuid = ?
|
|
46
|
+
`),
|
|
47
|
+
findConnected: this.db.prepare(`
|
|
48
|
+
SELECT * FROM terminals WHERE disconnected_at IS NULL ORDER BY connected_at DESC
|
|
49
|
+
`),
|
|
50
|
+
cleanupStale: this.db.prepare(`
|
|
51
|
+
UPDATE terminals
|
|
52
|
+
SET disconnected_at = datetime('now')
|
|
53
|
+
WHERE disconnected_at IS NULL AND last_seen < ?
|
|
54
|
+
`),
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
create(data: CreateTerminalData): number {
|
|
59
|
+
const result = this.stmts.create.run({
|
|
60
|
+
uuid: data.uuid,
|
|
61
|
+
project_id: data.project_id ?? null,
|
|
62
|
+
pid: data.pid ?? null,
|
|
63
|
+
shell: data.shell ?? null,
|
|
64
|
+
cwd: data.cwd ?? null,
|
|
65
|
+
});
|
|
66
|
+
return result.lastInsertRowid as number;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
getById(id: number): TerminalRecord | undefined {
|
|
70
|
+
return this.stmts.getById.get(id) as TerminalRecord | undefined;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
update(id: number, data: UpdateTerminalData): void {
|
|
74
|
+
this.stmts.update.run({
|
|
75
|
+
id,
|
|
76
|
+
project_id: data.project_id ?? null,
|
|
77
|
+
pid: data.pid ?? null,
|
|
78
|
+
shell: data.shell ?? null,
|
|
79
|
+
cwd: data.cwd ?? null,
|
|
80
|
+
last_seen: data.last_seen ?? null,
|
|
81
|
+
disconnected_at: data.disconnected_at ?? null,
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
delete(id: number): void {
|
|
86
|
+
this.stmts.delete.run(id);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
findByUuid(uuid: string): TerminalRecord | undefined {
|
|
90
|
+
return this.stmts.findByUuid.get(uuid) as TerminalRecord | undefined;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
findConnected(): TerminalRecord[] {
|
|
94
|
+
return this.stmts.findConnected.all() as TerminalRecord[];
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
cleanupStale(olderThan: string): number {
|
|
98
|
+
const result = this.stmts.cleanupStale.run(olderThan);
|
|
99
|
+
return result.changes;
|
|
100
|
+
}
|
|
101
|
+
}
|
package/src/embeddings/engine.ts
CHANGED
|
@@ -1,217 +1,238 @@
|
|
|
1
|
-
import path from 'node:path';
|
|
2
|
-
import type Database from 'better-sqlite3';
|
|
3
|
-
import { getLogger } from '../utils/logger.js';
|
|
4
|
-
|
|
5
|
-
export interface EmbeddingConfig {
|
|
6
|
-
enabled: boolean;
|
|
7
|
-
modelName: string;
|
|
8
|
-
cacheDir: string;
|
|
9
|
-
sweepIntervalMs: number;
|
|
10
|
-
batchSize: number;
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
14
|
-
type Pipeline = any;
|
|
15
|
-
|
|
16
|
-
export class EmbeddingEngine {
|
|
17
|
-
private pipeline: Pipeline = null;
|
|
18
|
-
private ready = false;
|
|
19
|
-
private loading = false;
|
|
20
|
-
private logger = getLogger();
|
|
21
|
-
private sweepTimer: ReturnType<typeof setInterval> | null = null;
|
|
22
|
-
|
|
23
|
-
constructor(
|
|
24
|
-
private config: EmbeddingConfig,
|
|
25
|
-
private db: Database.Database,
|
|
26
|
-
) {}
|
|
27
|
-
|
|
28
|
-
async initialize(): Promise<void> {
|
|
29
|
-
if (!this.config.enabled || this.loading || this.ready) return;
|
|
30
|
-
|
|
31
|
-
this.loading = true;
|
|
32
|
-
try {
|
|
33
|
-
const { pipeline, env } = await import('@huggingface/transformers');
|
|
34
|
-
env.cacheDir = this.config.cacheDir;
|
|
35
|
-
|
|
36
|
-
this.pipeline = await pipeline(
|
|
37
|
-
'feature-extraction',
|
|
38
|
-
this.config.modelName,
|
|
39
|
-
{ dtype: 'q8' },
|
|
40
|
-
);
|
|
41
|
-
|
|
42
|
-
this.ready = true;
|
|
43
|
-
this.logger.info(`Embedding model loaded: ${this.config.modelName}`);
|
|
44
|
-
} catch (err) {
|
|
45
|
-
this.logger.warn(`Failed to load embedding model (will retry): ${err}`);
|
|
46
|
-
this.ready = false;
|
|
47
|
-
} finally {
|
|
48
|
-
this.loading = false;
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/** Start background embedding sweep */
|
|
53
|
-
start(): void {
|
|
54
|
-
if (!this.config.enabled) return;
|
|
55
|
-
|
|
56
|
-
// Initialize model in background
|
|
57
|
-
this.initialize().then(() => {
|
|
58
|
-
if (this.ready) {
|
|
59
|
-
// Run initial sweep
|
|
60
|
-
this.sweep().catch(err => this.logger.error('Embedding sweep error:', err));
|
|
61
|
-
}
|
|
62
|
-
});
|
|
63
|
-
|
|
64
|
-
// Periodic sweep for new entries
|
|
65
|
-
this.sweepTimer = setInterval(() => {
|
|
66
|
-
if (this.ready) {
|
|
67
|
-
this.sweep().catch(err => this.logger.error('Embedding sweep error:', err));
|
|
68
|
-
}
|
|
69
|
-
}, this.config.sweepIntervalMs);
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
stop(): void {
|
|
73
|
-
if (this.sweepTimer) {
|
|
74
|
-
clearInterval(this.sweepTimer);
|
|
75
|
-
this.sweepTimer = null;
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
isReady(): boolean {
|
|
80
|
-
return this.ready;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
/** Generate embedding for a single text */
|
|
84
|
-
async embed(text: string): Promise<Float32Array | null> {
|
|
85
|
-
if (!this.ready || !this.pipeline) return null;
|
|
86
|
-
|
|
87
|
-
try {
|
|
88
|
-
const output = await this.pipeline(text, { pooling: 'mean', normalize: true });
|
|
89
|
-
const data = output.tolist()[0] as number[];
|
|
90
|
-
return new Float32Array(data);
|
|
91
|
-
} catch (err) {
|
|
92
|
-
this.logger.error(`Embedding error: ${err}`);
|
|
93
|
-
return null;
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
/** Generate embeddings for a batch of texts */
|
|
98
|
-
async embedBatch(texts: string[]): Promise<(Float32Array | null)[]> {
|
|
99
|
-
if (!this.ready || !this.pipeline || texts.length === 0) return texts.map(() => null);
|
|
100
|
-
|
|
101
|
-
try {
|
|
102
|
-
const output = await this.pipeline(texts, { pooling: 'mean', normalize: true });
|
|
103
|
-
const list = output.tolist() as number[][];
|
|
104
|
-
return list.map(v => new Float32Array(v));
|
|
105
|
-
} catch (err) {
|
|
106
|
-
this.logger.error(`Batch embedding error: ${err}`);
|
|
107
|
-
return texts.map(() => null);
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
/** Compute and store embeddings for entries that don't have them yet */
|
|
112
|
-
async sweep(): Promise<{ errors: number; modules: number }> {
|
|
113
|
-
let errorsProcessed = 0;
|
|
114
|
-
let modulesProcessed = 0;
|
|
115
|
-
|
|
116
|
-
// Process errors without embeddings
|
|
117
|
-
const pendingErrors = this.db.prepare(
|
|
118
|
-
'SELECT id, type, message, context FROM errors WHERE embedding IS NULL ORDER BY id DESC LIMIT ?'
|
|
119
|
-
).all(this.config.batchSize) as Array<{ id: number; type: string; message: string; context: string | null }>;
|
|
120
|
-
|
|
121
|
-
if (pendingErrors.length > 0) {
|
|
122
|
-
const texts = pendingErrors.map(e =>
|
|
123
|
-
[e.type, e.message, e.context].filter(Boolean).join(' ')
|
|
124
|
-
);
|
|
125
|
-
const embeddings = await this.embedBatch(texts);
|
|
126
|
-
|
|
127
|
-
const updateStmt = this.db.prepare('UPDATE errors SET embedding = ? WHERE id = ?');
|
|
128
|
-
for (let i = 0; i < pendingErrors.length; i++) {
|
|
129
|
-
const emb = embeddings[i];
|
|
130
|
-
if (emb) {
|
|
131
|
-
updateStmt.run(EmbeddingEngine.serialize(emb), pendingErrors[i]!.id);
|
|
132
|
-
errorsProcessed++;
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
// Process code modules without embeddings
|
|
138
|
-
const pendingModules = this.db.prepare(
|
|
139
|
-
'SELECT id, name, description, file_path FROM code_modules WHERE embedding IS NULL ORDER BY id DESC LIMIT ?'
|
|
140
|
-
).all(this.config.batchSize) as Array<{ id: number; name: string; description: string | null; file_path: string }>;
|
|
141
|
-
|
|
142
|
-
if (pendingModules.length > 0) {
|
|
143
|
-
const texts = pendingModules.map(m =>
|
|
144
|
-
[m.name, m.description, m.file_path].filter(Boolean).join(' ')
|
|
145
|
-
);
|
|
146
|
-
const embeddings = await this.embedBatch(texts);
|
|
147
|
-
|
|
148
|
-
const updateStmt = this.db.prepare('UPDATE code_modules SET embedding = ? WHERE id = ?');
|
|
149
|
-
for (let i = 0; i < pendingModules.length; i++) {
|
|
150
|
-
const emb = embeddings[i];
|
|
151
|
-
if (emb) {
|
|
152
|
-
updateStmt.run(EmbeddingEngine.serialize(emb), pendingModules[i]!.id);
|
|
153
|
-
modulesProcessed++;
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
if (errorsProcessed > 0 || modulesProcessed > 0) {
|
|
159
|
-
this.logger.info(`Embedding sweep: ${errorsProcessed} errors, ${modulesProcessed} modules processed`);
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
return { errors: errorsProcessed, modules: modulesProcessed };
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
/** Load vector scores for error matching (sync - reads pre-computed embeddings from DB) */
|
|
166
|
-
computeErrorVectorScores(errorId: number, projectId: number): Map<number, number> {
|
|
167
|
-
const scores = new Map<number, number>();
|
|
168
|
-
|
|
169
|
-
const errorRow = this.db.prepare(
|
|
170
|
-
'SELECT embedding FROM errors WHERE id = ?'
|
|
171
|
-
).get(errorId) as { embedding: Buffer | null } | undefined;
|
|
172
|
-
|
|
173
|
-
if (!errorRow?.embedding) return scores;
|
|
174
|
-
|
|
175
|
-
const incoming = EmbeddingEngine.deserialize(errorRow.embedding);
|
|
176
|
-
|
|
177
|
-
const candidates = this.db.prepare(
|
|
178
|
-
'SELECT id, embedding FROM errors WHERE project_id = ? AND id != ? AND embedding IS NOT NULL'
|
|
179
|
-
).all(projectId, errorId) as Array<{ id: number; embedding: Buffer }>;
|
|
180
|
-
|
|
181
|
-
for (const c of candidates) {
|
|
182
|
-
const candidate = EmbeddingEngine.deserialize(c.embedding);
|
|
183
|
-
scores.set(c.id, EmbeddingEngine.similarity(incoming, candidate));
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
return scores;
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
/** Load vector scores for
|
|
190
|
-
computeModuleVectorScores(
|
|
191
|
-
const scores = new Map<number, number>();
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
return
|
|
216
|
-
}
|
|
217
|
-
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import type Database from 'better-sqlite3';
|
|
3
|
+
import { getLogger } from '../utils/logger.js';
|
|
4
|
+
|
|
5
|
+
export interface EmbeddingConfig {
|
|
6
|
+
enabled: boolean;
|
|
7
|
+
modelName: string;
|
|
8
|
+
cacheDir: string;
|
|
9
|
+
sweepIntervalMs: number;
|
|
10
|
+
batchSize: number;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
14
|
+
type Pipeline = any;
|
|
15
|
+
|
|
16
|
+
export class EmbeddingEngine {
|
|
17
|
+
private pipeline: Pipeline = null;
|
|
18
|
+
private ready = false;
|
|
19
|
+
private loading = false;
|
|
20
|
+
private logger = getLogger();
|
|
21
|
+
private sweepTimer: ReturnType<typeof setInterval> | null = null;
|
|
22
|
+
|
|
23
|
+
constructor(
|
|
24
|
+
private config: EmbeddingConfig,
|
|
25
|
+
private db: Database.Database,
|
|
26
|
+
) {}
|
|
27
|
+
|
|
28
|
+
async initialize(): Promise<void> {
|
|
29
|
+
if (!this.config.enabled || this.loading || this.ready) return;
|
|
30
|
+
|
|
31
|
+
this.loading = true;
|
|
32
|
+
try {
|
|
33
|
+
const { pipeline, env } = await import('@huggingface/transformers');
|
|
34
|
+
env.cacheDir = this.config.cacheDir;
|
|
35
|
+
|
|
36
|
+
this.pipeline = await pipeline(
|
|
37
|
+
'feature-extraction',
|
|
38
|
+
this.config.modelName,
|
|
39
|
+
{ dtype: 'q8' },
|
|
40
|
+
);
|
|
41
|
+
|
|
42
|
+
this.ready = true;
|
|
43
|
+
this.logger.info(`Embedding model loaded: ${this.config.modelName}`);
|
|
44
|
+
} catch (err) {
|
|
45
|
+
this.logger.warn(`Failed to load embedding model (will retry): ${err}`);
|
|
46
|
+
this.ready = false;
|
|
47
|
+
} finally {
|
|
48
|
+
this.loading = false;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/** Start background embedding sweep */
|
|
53
|
+
start(): void {
|
|
54
|
+
if (!this.config.enabled) return;
|
|
55
|
+
|
|
56
|
+
// Initialize model in background
|
|
57
|
+
this.initialize().then(() => {
|
|
58
|
+
if (this.ready) {
|
|
59
|
+
// Run initial sweep
|
|
60
|
+
this.sweep().catch(err => this.logger.error('Embedding sweep error:', err));
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
// Periodic sweep for new entries
|
|
65
|
+
this.sweepTimer = setInterval(() => {
|
|
66
|
+
if (this.ready) {
|
|
67
|
+
this.sweep().catch(err => this.logger.error('Embedding sweep error:', err));
|
|
68
|
+
}
|
|
69
|
+
}, this.config.sweepIntervalMs);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
stop(): void {
|
|
73
|
+
if (this.sweepTimer) {
|
|
74
|
+
clearInterval(this.sweepTimer);
|
|
75
|
+
this.sweepTimer = null;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
isReady(): boolean {
|
|
80
|
+
return this.ready;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/** Generate embedding for a single text */
|
|
84
|
+
async embed(text: string): Promise<Float32Array | null> {
|
|
85
|
+
if (!this.ready || !this.pipeline) return null;
|
|
86
|
+
|
|
87
|
+
try {
|
|
88
|
+
const output = await this.pipeline(text, { pooling: 'mean', normalize: true });
|
|
89
|
+
const data = output.tolist()[0] as number[];
|
|
90
|
+
return new Float32Array(data);
|
|
91
|
+
} catch (err) {
|
|
92
|
+
this.logger.error(`Embedding error: ${err}`);
|
|
93
|
+
return null;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/** Generate embeddings for a batch of texts */
|
|
98
|
+
async embedBatch(texts: string[]): Promise<(Float32Array | null)[]> {
|
|
99
|
+
if (!this.ready || !this.pipeline || texts.length === 0) return texts.map(() => null);
|
|
100
|
+
|
|
101
|
+
try {
|
|
102
|
+
const output = await this.pipeline(texts, { pooling: 'mean', normalize: true });
|
|
103
|
+
const list = output.tolist() as number[][];
|
|
104
|
+
return list.map(v => new Float32Array(v));
|
|
105
|
+
} catch (err) {
|
|
106
|
+
this.logger.error(`Batch embedding error: ${err}`);
|
|
107
|
+
return texts.map(() => null);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/** Compute and store embeddings for entries that don't have them yet */
|
|
112
|
+
async sweep(): Promise<{ errors: number; modules: number }> {
|
|
113
|
+
let errorsProcessed = 0;
|
|
114
|
+
let modulesProcessed = 0;
|
|
115
|
+
|
|
116
|
+
// Process errors without embeddings
|
|
117
|
+
const pendingErrors = this.db.prepare(
|
|
118
|
+
'SELECT id, type, message, context FROM errors WHERE embedding IS NULL ORDER BY id DESC LIMIT ?'
|
|
119
|
+
).all(this.config.batchSize) as Array<{ id: number; type: string; message: string; context: string | null }>;
|
|
120
|
+
|
|
121
|
+
if (pendingErrors.length > 0) {
|
|
122
|
+
const texts = pendingErrors.map(e =>
|
|
123
|
+
[e.type, e.message, e.context].filter(Boolean).join(' ')
|
|
124
|
+
);
|
|
125
|
+
const embeddings = await this.embedBatch(texts);
|
|
126
|
+
|
|
127
|
+
const updateStmt = this.db.prepare('UPDATE errors SET embedding = ? WHERE id = ?');
|
|
128
|
+
for (let i = 0; i < pendingErrors.length; i++) {
|
|
129
|
+
const emb = embeddings[i];
|
|
130
|
+
if (emb) {
|
|
131
|
+
updateStmt.run(EmbeddingEngine.serialize(emb), pendingErrors[i]!.id);
|
|
132
|
+
errorsProcessed++;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Process code modules without embeddings
|
|
138
|
+
const pendingModules = this.db.prepare(
|
|
139
|
+
'SELECT id, name, description, file_path FROM code_modules WHERE embedding IS NULL ORDER BY id DESC LIMIT ?'
|
|
140
|
+
).all(this.config.batchSize) as Array<{ id: number; name: string; description: string | null; file_path: string }>;
|
|
141
|
+
|
|
142
|
+
if (pendingModules.length > 0) {
|
|
143
|
+
const texts = pendingModules.map(m =>
|
|
144
|
+
[m.name, m.description, m.file_path].filter(Boolean).join(' ')
|
|
145
|
+
);
|
|
146
|
+
const embeddings = await this.embedBatch(texts);
|
|
147
|
+
|
|
148
|
+
const updateStmt = this.db.prepare('UPDATE code_modules SET embedding = ? WHERE id = ?');
|
|
149
|
+
for (let i = 0; i < pendingModules.length; i++) {
|
|
150
|
+
const emb = embeddings[i];
|
|
151
|
+
if (emb) {
|
|
152
|
+
updateStmt.run(EmbeddingEngine.serialize(emb), pendingModules[i]!.id);
|
|
153
|
+
modulesProcessed++;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if (errorsProcessed > 0 || modulesProcessed > 0) {
|
|
159
|
+
this.logger.info(`Embedding sweep: ${errorsProcessed} errors, ${modulesProcessed} modules processed`);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return { errors: errorsProcessed, modules: modulesProcessed };
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/** Load vector scores for error matching (sync - reads pre-computed embeddings from DB) */
|
|
166
|
+
computeErrorVectorScores(errorId: number, projectId: number): Map<number, number> {
|
|
167
|
+
const scores = new Map<number, number>();
|
|
168
|
+
|
|
169
|
+
const errorRow = this.db.prepare(
|
|
170
|
+
'SELECT embedding FROM errors WHERE id = ?'
|
|
171
|
+
).get(errorId) as { embedding: Buffer | null } | undefined;
|
|
172
|
+
|
|
173
|
+
if (!errorRow?.embedding) return scores;
|
|
174
|
+
|
|
175
|
+
const incoming = EmbeddingEngine.deserialize(errorRow.embedding);
|
|
176
|
+
|
|
177
|
+
const candidates = this.db.prepare(
|
|
178
|
+
'SELECT id, embedding FROM errors WHERE project_id = ? AND id != ? AND embedding IS NOT NULL'
|
|
179
|
+
).all(projectId, errorId) as Array<{ id: number; embedding: Buffer }>;
|
|
180
|
+
|
|
181
|
+
for (const c of candidates) {
|
|
182
|
+
const candidate = EmbeddingEngine.deserialize(c.embedding);
|
|
183
|
+
scores.set(c.id, EmbeddingEngine.similarity(incoming, candidate));
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return scores;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/** Load vector scores for module-to-module matching (sync - reads pre-computed embeddings from DB) */
|
|
190
|
+
computeModuleVectorScores(moduleId: number, language?: string): Map<number, number> {
|
|
191
|
+
const scores = new Map<number, number>();
|
|
192
|
+
|
|
193
|
+
const moduleRow = this.db.prepare(
|
|
194
|
+
'SELECT embedding FROM code_modules WHERE id = ?'
|
|
195
|
+
).get(moduleId) as { embedding: Buffer | null } | undefined;
|
|
196
|
+
|
|
197
|
+
if (!moduleRow?.embedding) return scores;
|
|
198
|
+
|
|
199
|
+
const incoming = EmbeddingEngine.deserialize(moduleRow.embedding);
|
|
200
|
+
|
|
201
|
+
let sql = 'SELECT id, embedding FROM code_modules WHERE id != ? AND embedding IS NOT NULL';
|
|
202
|
+
const params: unknown[] = [moduleId];
|
|
203
|
+
if (language) {
|
|
204
|
+
sql += ' AND language = ?';
|
|
205
|
+
params.push(language);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
const candidates = this.db.prepare(sql).all(...params) as Array<{ id: number; embedding: Buffer }>;
|
|
209
|
+
|
|
210
|
+
for (const c of candidates) {
|
|
211
|
+
const candidate = EmbeddingEngine.deserialize(c.embedding);
|
|
212
|
+
scores.set(c.id, EmbeddingEngine.similarity(incoming, candidate));
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
return scores;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/** Serialize Float32Array to Buffer for SQLite BLOB storage */
|
|
219
|
+
static serialize(embedding: Float32Array): Buffer {
|
|
220
|
+
return Buffer.from(embedding.buffer, embedding.byteOffset, embedding.byteLength);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/** Deserialize SQLite BLOB to Float32Array */
|
|
224
|
+
static deserialize(buffer: Buffer): Float32Array {
|
|
225
|
+
const copy = Buffer.from(buffer);
|
|
226
|
+
return new Float32Array(copy.buffer, copy.byteOffset, copy.length / 4);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/** Cosine similarity (embeddings are L2-normalized, so dot product = cosine) */
|
|
230
|
+
static similarity(a: Float32Array, b: Float32Array): number {
|
|
231
|
+
if (a.length !== b.length) return 0;
|
|
232
|
+
let dot = 0;
|
|
233
|
+
for (let i = 0; i < a.length; i++) {
|
|
234
|
+
dot += a[i]! * b[i]!;
|
|
235
|
+
}
|
|
236
|
+
return Math.max(0, Math.min(1, dot));
|
|
237
|
+
}
|
|
238
|
+
}
|