trellis 1.0.8 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +533 -82
- package/bin/trellis.mjs +2 -0
- package/dist/cli/index.js +4718 -0
- package/dist/core/index.js +12 -0
- package/dist/decisions/index.js +19 -0
- package/dist/embeddings/index.js +43 -0
- package/dist/index-1j1anhmr.js +4038 -0
- package/dist/index-3s0eak0p.js +1556 -0
- package/dist/index-8pce39mh.js +272 -0
- package/dist/index-a76rekgs.js +67 -0
- package/dist/index-cy9k1g6v.js +684 -0
- package/dist/index-fd4e26s4.js +69 -0
- package/dist/{store/eav-store.js → index-gkvhzm9f.js} +4 -6
- package/dist/index-gnw8d7d6.js +51 -0
- package/dist/index-vkpkfwhq.js +817 -0
- package/dist/index.js +118 -2876
- package/dist/links/index.js +55 -0
- package/dist/transformers-m9je15kg.js +32491 -0
- package/dist/vcs/index.js +110 -0
- package/logo.png +0 -0
- package/logo.svg +9 -0
- package/package.json +79 -76
- package/src/cli/index.ts +2340 -0
- package/src/core/index.ts +35 -0
- package/src/core/kernel/middleware.ts +44 -0
- package/src/core/persist/backend.ts +64 -0
- package/src/core/store/eav-store.ts +467 -0
- package/src/decisions/auto-capture.ts +136 -0
- package/src/decisions/hooks.ts +163 -0
- package/src/decisions/index.ts +261 -0
- package/src/decisions/types.ts +103 -0
- package/src/embeddings/chunker.ts +327 -0
- package/src/embeddings/index.ts +41 -0
- package/src/embeddings/model.ts +95 -0
- package/src/embeddings/search.ts +305 -0
- package/src/embeddings/store.ts +313 -0
- package/src/embeddings/types.ts +85 -0
- package/src/engine.ts +1083 -0
- package/src/garden/cluster.ts +330 -0
- package/src/garden/garden.ts +306 -0
- package/src/garden/index.ts +29 -0
- package/src/git/git-exporter.ts +286 -0
- package/src/git/git-importer.ts +329 -0
- package/src/git/git-reader.ts +189 -0
- package/src/git/index.ts +22 -0
- package/src/identity/governance.ts +211 -0
- package/src/identity/identity.ts +224 -0
- package/src/identity/index.ts +30 -0
- package/src/identity/signing-middleware.ts +97 -0
- package/src/index.ts +20 -0
- package/src/links/index.ts +49 -0
- package/src/links/lifecycle.ts +400 -0
- package/src/links/parser.ts +484 -0
- package/src/links/ref-index.ts +186 -0
- package/src/links/resolver.ts +314 -0
- package/src/links/types.ts +108 -0
- package/src/mcp/index.ts +22 -0
- package/src/mcp/server.ts +1278 -0
- package/src/semantic/csharp-parser.ts +493 -0
- package/src/semantic/go-parser.ts +585 -0
- package/src/semantic/index.ts +34 -0
- package/src/semantic/java-parser.ts +456 -0
- package/src/semantic/python-parser.ts +659 -0
- package/src/semantic/ruby-parser.ts +446 -0
- package/src/semantic/rust-parser.ts +784 -0
- package/src/semantic/semantic-merge.ts +210 -0
- package/src/semantic/ts-parser.ts +681 -0
- package/src/semantic/types.ts +175 -0
- package/src/sync/index.ts +32 -0
- package/src/sync/memory-transport.ts +66 -0
- package/src/sync/reconciler.ts +237 -0
- package/src/sync/sync-engine.ts +258 -0
- package/src/sync/types.ts +104 -0
- package/src/vcs/blob-store.ts +124 -0
- package/src/vcs/branch.ts +150 -0
- package/src/vcs/checkpoint.ts +64 -0
- package/src/vcs/decompose.ts +469 -0
- package/src/vcs/diff.ts +409 -0
- package/src/vcs/engine-context.ts +26 -0
- package/src/vcs/index.ts +23 -0
- package/src/vcs/issue.ts +800 -0
- package/src/vcs/merge.ts +425 -0
- package/src/vcs/milestone.ts +124 -0
- package/src/vcs/ops.ts +59 -0
- package/src/vcs/types.ts +213 -0
- package/src/vcs/vcs-middleware.ts +81 -0
- package/src/watcher/fs-watcher.ts +217 -0
- package/src/watcher/index.ts +9 -0
- package/src/watcher/ingestion.ts +116 -0
- package/dist/ai/index.js +0 -688
- package/dist/cli/server.js +0 -3321
- package/dist/cli/tql.js +0 -5282
- package/dist/client/tql-client.js +0 -108
- package/dist/graph/index.js +0 -2248
- package/dist/kernel/logic-middleware.js +0 -179
- package/dist/kernel/middleware.js +0 -0
- package/dist/kernel/operations.js +0 -32
- package/dist/kernel/schema-middleware.js +0 -34
- package/dist/kernel/security-middleware.js +0 -53
- package/dist/kernel/trellis-kernel.js +0 -2239
- package/dist/kernel/workspace.js +0 -91
- package/dist/persist/backend.js +0 -0
- package/dist/persist/sqlite-backend.js +0 -123
- package/dist/query/index.js +0 -1643
- package/dist/server/index.js +0 -3309
- package/dist/workflows/index.js +0 -3160
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Search Integration
|
|
3
|
+
*
|
|
4
|
+
* Connects the TrellisVcsEngine to the embedding system.
|
|
5
|
+
* Provides reindex (full rebuild) and search (query → ranked results).
|
|
6
|
+
* The embedder function is pluggable for testing with mock vectors.
|
|
7
|
+
*
|
|
8
|
+
* @see TRL-20
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { join } from 'path';
|
|
12
|
+
import { readFileSync, existsSync } from 'fs';
|
|
13
|
+
import { VectorStore } from './store.js';
|
|
14
|
+
import { embed } from './model.js';
|
|
15
|
+
import {
|
|
16
|
+
chunkIssue,
|
|
17
|
+
chunkMilestone,
|
|
18
|
+
chunkDecision,
|
|
19
|
+
chunkMarkdown,
|
|
20
|
+
chunkCodeEntities,
|
|
21
|
+
chunkFile,
|
|
22
|
+
} from './chunker.js';
|
|
23
|
+
import type {
|
|
24
|
+
ChunkMeta,
|
|
25
|
+
EmbeddingRecord,
|
|
26
|
+
SearchOptions,
|
|
27
|
+
SearchResult,
|
|
28
|
+
} from './types.js';
|
|
29
|
+
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
// Types
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
/** Minimal engine interface — avoids importing the full engine for testability */
|
|
35
|
+
export interface SearchableEngine {
|
|
36
|
+
getRootPath(): string;
|
|
37
|
+
trackedFiles(): Array<{ path: string; contentHash?: string }>;
|
|
38
|
+
listIssues(filters?: any): Array<{
|
|
39
|
+
id: string;
|
|
40
|
+
title?: string;
|
|
41
|
+
description?: string;
|
|
42
|
+
}>;
|
|
43
|
+
listMilestones(): Array<{ id: string; message?: string }>;
|
|
44
|
+
queryDecisions?(): Array<{
|
|
45
|
+
id: string;
|
|
46
|
+
toolName: string;
|
|
47
|
+
rationale?: string;
|
|
48
|
+
context?: string;
|
|
49
|
+
outputSummary?: string;
|
|
50
|
+
}>;
|
|
51
|
+
parseFile?(filePath: string): any;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Embedder function type — maps text → vector. Pluggable for testing. */
|
|
55
|
+
export type Embedder = (text: string) => Promise<Float32Array>;
|
|
56
|
+
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
// EmbeddingManager
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
export class EmbeddingManager {
|
|
62
|
+
private store: VectorStore;
|
|
63
|
+
private embedFn: Embedder;
|
|
64
|
+
|
|
65
|
+
constructor(dbPath: string, embedFn?: Embedder) {
|
|
66
|
+
this.store = new VectorStore(dbPath);
|
|
67
|
+
this.embedFn = embedFn ?? embed;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Full reindex: clear store, re-chunk all entities, embed, and insert.
|
|
72
|
+
*/
|
|
73
|
+
async reindex(engine: SearchableEngine): Promise<{ chunks: number }> {
|
|
74
|
+
this.store.clear();
|
|
75
|
+
|
|
76
|
+
const allChunks: ChunkMeta[] = [];
|
|
77
|
+
|
|
78
|
+
// 1. Issues
|
|
79
|
+
const issues = engine.listIssues();
|
|
80
|
+
for (const issue of issues) {
|
|
81
|
+
allChunks.push(...chunkIssue(issue));
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// 2. Milestones
|
|
85
|
+
const milestones = engine.listMilestones();
|
|
86
|
+
for (const ms of milestones) {
|
|
87
|
+
allChunks.push(...chunkMilestone(ms));
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// 3. Decisions
|
|
91
|
+
if (engine.queryDecisions) {
|
|
92
|
+
const decisions = engine.queryDecisions();
|
|
93
|
+
for (const dec of decisions) {
|
|
94
|
+
allChunks.push(...chunkDecision(dec));
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// 4. Files (markdown, summaries)
|
|
99
|
+
const rootPath = engine.getRootPath();
|
|
100
|
+
const trackedFiles = engine.trackedFiles();
|
|
101
|
+
for (const tf of trackedFiles) {
|
|
102
|
+
try {
|
|
103
|
+
const absPath = join(rootPath, tf.path);
|
|
104
|
+
if (!existsSync(absPath)) continue;
|
|
105
|
+
const content = readFileSync(absPath, 'utf-8');
|
|
106
|
+
allChunks.push(...chunkFile(tf.path, content));
|
|
107
|
+
} catch {}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// 5. Code entities (from parsed files)
|
|
111
|
+
if (engine.parseFile) {
|
|
112
|
+
for (const tf of trackedFiles) {
|
|
113
|
+
const ext = tf.path.split('.').pop()?.toLowerCase() ?? '';
|
|
114
|
+
if (
|
|
115
|
+
![
|
|
116
|
+
'ts',
|
|
117
|
+
'js',
|
|
118
|
+
'tsx',
|
|
119
|
+
'jsx',
|
|
120
|
+
'py',
|
|
121
|
+
'go',
|
|
122
|
+
'rs',
|
|
123
|
+
'rb',
|
|
124
|
+
'java',
|
|
125
|
+
'cs',
|
|
126
|
+
].includes(ext)
|
|
127
|
+
) {
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
try {
|
|
131
|
+
const parsed = engine.parseFile(tf.path);
|
|
132
|
+
if (parsed && Array.isArray(parsed.entities)) {
|
|
133
|
+
const declarations = parsed.entities.map((e: any) => ({
|
|
134
|
+
id: e.id ?? e.name,
|
|
135
|
+
name: e.name,
|
|
136
|
+
kind: e.kind,
|
|
137
|
+
signature: e.signature ?? e.rawText?.split('\n')[0] ?? '',
|
|
138
|
+
docComment: e.docComment,
|
|
139
|
+
}));
|
|
140
|
+
allChunks.push(...chunkCodeEntities(tf.path, declarations));
|
|
141
|
+
}
|
|
142
|
+
} catch {}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Embed and insert all chunks
|
|
147
|
+
const records: EmbeddingRecord[] = [];
|
|
148
|
+
for (const chunk of allChunks) {
|
|
149
|
+
try {
|
|
150
|
+
const vector = await this.embedFn(chunk.content);
|
|
151
|
+
records.push({ ...chunk, embedding: vector });
|
|
152
|
+
} catch {}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
this.store.upsertBatch(records);
|
|
156
|
+
|
|
157
|
+
return { chunks: records.length };
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Incrementally index a single file (on file change).
|
|
162
|
+
*/
|
|
163
|
+
async indexFile(
|
|
164
|
+
filePath: string,
|
|
165
|
+
content: string,
|
|
166
|
+
engine?: SearchableEngine,
|
|
167
|
+
): Promise<number> {
|
|
168
|
+
// Remove old chunks for this file
|
|
169
|
+
this.store.deleteByFile(filePath);
|
|
170
|
+
|
|
171
|
+
const chunks = chunkFile(filePath, content);
|
|
172
|
+
|
|
173
|
+
// Also index code entities if engine is available
|
|
174
|
+
if (engine?.parseFile) {
|
|
175
|
+
const ext = filePath.split('.').pop()?.toLowerCase() ?? '';
|
|
176
|
+
if (
|
|
177
|
+
[
|
|
178
|
+
'ts',
|
|
179
|
+
'js',
|
|
180
|
+
'tsx',
|
|
181
|
+
'jsx',
|
|
182
|
+
'py',
|
|
183
|
+
'go',
|
|
184
|
+
'rs',
|
|
185
|
+
'rb',
|
|
186
|
+
'java',
|
|
187
|
+
'cs',
|
|
188
|
+
].includes(ext)
|
|
189
|
+
) {
|
|
190
|
+
try {
|
|
191
|
+
const parsed = engine.parseFile(filePath);
|
|
192
|
+
if (parsed && Array.isArray(parsed.entities)) {
|
|
193
|
+
const declarations = parsed.entities.map((e: any) => ({
|
|
194
|
+
id: e.id ?? e.name,
|
|
195
|
+
name: e.name,
|
|
196
|
+
kind: e.kind,
|
|
197
|
+
signature: e.signature ?? e.rawText?.split('\n')[0] ?? '',
|
|
198
|
+
docComment: e.docComment,
|
|
199
|
+
}));
|
|
200
|
+
chunks.push(...chunkCodeEntities(filePath, declarations));
|
|
201
|
+
}
|
|
202
|
+
} catch {}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const records: EmbeddingRecord[] = [];
|
|
207
|
+
for (const chunk of chunks) {
|
|
208
|
+
try {
|
|
209
|
+
const vector = await this.embedFn(chunk.content);
|
|
210
|
+
records.push({ ...chunk, embedding: vector });
|
|
211
|
+
} catch {}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (records.length > 0) {
|
|
215
|
+
this.store.upsertBatch(records);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
return records.length;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Index an issue (on create/update).
|
|
223
|
+
*/
|
|
224
|
+
async indexIssue(issue: {
|
|
225
|
+
id: string;
|
|
226
|
+
title?: string;
|
|
227
|
+
description?: string;
|
|
228
|
+
}): Promise<number> {
|
|
229
|
+
this.store.deleteByEntity(`issue:${issue.id}`);
|
|
230
|
+
|
|
231
|
+
const chunks = chunkIssue(issue);
|
|
232
|
+
const records: EmbeddingRecord[] = [];
|
|
233
|
+
|
|
234
|
+
for (const chunk of chunks) {
|
|
235
|
+
try {
|
|
236
|
+
const vector = await this.embedFn(chunk.content);
|
|
237
|
+
records.push({ ...chunk, embedding: vector });
|
|
238
|
+
} catch {}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
if (records.length > 0) {
|
|
242
|
+
this.store.upsertBatch(records);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
return records.length;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Index a milestone (on create).
|
|
250
|
+
*/
|
|
251
|
+
async indexMilestone(milestone: {
|
|
252
|
+
id: string;
|
|
253
|
+
message?: string;
|
|
254
|
+
}): Promise<number> {
|
|
255
|
+
this.store.deleteByEntity(`milestone:${milestone.id}`);
|
|
256
|
+
|
|
257
|
+
const chunks = chunkMilestone(milestone);
|
|
258
|
+
const records: EmbeddingRecord[] = [];
|
|
259
|
+
|
|
260
|
+
for (const chunk of chunks) {
|
|
261
|
+
try {
|
|
262
|
+
const vector = await this.embedFn(chunk.content);
|
|
263
|
+
records.push({ ...chunk, embedding: vector });
|
|
264
|
+
} catch {}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
if (records.length > 0) {
|
|
268
|
+
this.store.upsertBatch(records);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
return records.length;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Semantic search: embed query → vector search → ranked results.
|
|
276
|
+
*/
|
|
277
|
+
async search(query: string, opts?: SearchOptions): Promise<SearchResult[]> {
|
|
278
|
+
const queryVector = await this.embedFn(query);
|
|
279
|
+
return this.store.search(queryVector, opts);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Remove all data for a file.
|
|
284
|
+
*/
|
|
285
|
+
removeFile(filePath: string): void {
|
|
286
|
+
this.store.deleteByFile(filePath);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Get store statistics.
|
|
291
|
+
*/
|
|
292
|
+
stats(): { total: number; byType: Record<string, number> } {
|
|
293
|
+
return {
|
|
294
|
+
total: this.store.count(),
|
|
295
|
+
byType: this.store.countByType(),
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Close the store.
|
|
301
|
+
*/
|
|
302
|
+
close(): void {
|
|
303
|
+
this.store.close();
|
|
304
|
+
}
|
|
305
|
+
}
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Vector Store
|
|
3
|
+
*
|
|
4
|
+
* Persistent storage for embedding vectors using bun:sqlite.
|
|
5
|
+
* Vectors are stored as Float32Array blobs; cosine similarity
|
|
6
|
+
* is computed in JavaScript for cross-platform portability.
|
|
7
|
+
*
|
|
8
|
+
* @see TRL-18
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { Database } from 'bun:sqlite';
|
|
12
|
+
import type {
|
|
13
|
+
ChunkMeta,
|
|
14
|
+
ChunkType,
|
|
15
|
+
EmbeddingRecord,
|
|
16
|
+
SearchOptions,
|
|
17
|
+
SearchResult,
|
|
18
|
+
} from './types.js';
|
|
19
|
+
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
// Schema
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
const SCHEMA_SQL = `
|
|
25
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
26
|
+
id TEXT PRIMARY KEY,
|
|
27
|
+
entity_id TEXT NOT NULL,
|
|
28
|
+
content TEXT NOT NULL,
|
|
29
|
+
chunk_type TEXT NOT NULL,
|
|
30
|
+
file_path TEXT,
|
|
31
|
+
updated_at TEXT NOT NULL
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
CREATE TABLE IF NOT EXISTS vectors (
|
|
35
|
+
id TEXT PRIMARY KEY,
|
|
36
|
+
embedding BLOB NOT NULL,
|
|
37
|
+
FOREIGN KEY (id) REFERENCES chunks(id) ON DELETE CASCADE
|
|
38
|
+
);
|
|
39
|
+
|
|
40
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_entity ON chunks(entity_id);
|
|
41
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_type ON chunks(chunk_type);
|
|
42
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_path);
|
|
43
|
+
`;
|
|
44
|
+
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
// Vector Store
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
|
|
49
|
+
export class VectorStore {
|
|
50
|
+
private db: Database;
|
|
51
|
+
|
|
52
|
+
constructor(dbPath: string) {
|
|
53
|
+
this.db = new Database(dbPath);
|
|
54
|
+
this.db.exec('PRAGMA journal_mode=WAL;');
|
|
55
|
+
this.db.exec('PRAGMA foreign_keys=ON;');
|
|
56
|
+
this.db.exec(SCHEMA_SQL);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Insert or update a chunk with its embedding vector.
|
|
61
|
+
*/
|
|
62
|
+
upsert(record: EmbeddingRecord): void {
|
|
63
|
+
const insertChunk = this.db.prepare(`
|
|
64
|
+
INSERT OR REPLACE INTO chunks (id, entity_id, content, chunk_type, file_path, updated_at)
|
|
65
|
+
VALUES ($id, $entityId, $content, $chunkType, $filePath, $updatedAt)
|
|
66
|
+
`);
|
|
67
|
+
const insertVector = this.db.prepare(`
|
|
68
|
+
INSERT OR REPLACE INTO vectors (id, embedding)
|
|
69
|
+
VALUES ($id, $embedding)
|
|
70
|
+
`);
|
|
71
|
+
|
|
72
|
+
const embeddingBlob = Buffer.from(record.embedding.buffer);
|
|
73
|
+
|
|
74
|
+
this.db.transaction(() => {
|
|
75
|
+
insertChunk.run({
|
|
76
|
+
$id: record.id,
|
|
77
|
+
$entityId: record.entityId,
|
|
78
|
+
$content: record.content,
|
|
79
|
+
$chunkType: record.chunkType,
|
|
80
|
+
$filePath: record.filePath ?? null,
|
|
81
|
+
$updatedAt: record.updatedAt,
|
|
82
|
+
});
|
|
83
|
+
insertVector.run({
|
|
84
|
+
$id: record.id,
|
|
85
|
+
$embedding: embeddingBlob,
|
|
86
|
+
});
|
|
87
|
+
})();
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Batch upsert multiple records.
|
|
92
|
+
*/
|
|
93
|
+
upsertBatch(records: EmbeddingRecord[]): void {
|
|
94
|
+
if (records.length === 0) return;
|
|
95
|
+
|
|
96
|
+
const insertChunk = this.db.prepare(`
|
|
97
|
+
INSERT OR REPLACE INTO chunks (id, entity_id, content, chunk_type, file_path, updated_at)
|
|
98
|
+
VALUES ($id, $entityId, $content, $chunkType, $filePath, $updatedAt)
|
|
99
|
+
`);
|
|
100
|
+
const insertVector = this.db.prepare(`
|
|
101
|
+
INSERT OR REPLACE INTO vectors (id, embedding)
|
|
102
|
+
VALUES ($id, $embedding)
|
|
103
|
+
`);
|
|
104
|
+
|
|
105
|
+
this.db.transaction(() => {
|
|
106
|
+
for (const record of records) {
|
|
107
|
+
const embeddingBlob = Buffer.from(record.embedding.buffer);
|
|
108
|
+
insertChunk.run({
|
|
109
|
+
$id: record.id,
|
|
110
|
+
$entityId: record.entityId,
|
|
111
|
+
$content: record.content,
|
|
112
|
+
$chunkType: record.chunkType,
|
|
113
|
+
$filePath: record.filePath ?? null,
|
|
114
|
+
$updatedAt: record.updatedAt,
|
|
115
|
+
});
|
|
116
|
+
insertVector.run({
|
|
117
|
+
$id: record.id,
|
|
118
|
+
$embedding: embeddingBlob,
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
})();
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Delete a chunk and its vector by ID.
|
|
126
|
+
*/
|
|
127
|
+
delete(id: string): void {
|
|
128
|
+
this.db.prepare('DELETE FROM vectors WHERE id = ?').run(id);
|
|
129
|
+
this.db.prepare('DELETE FROM chunks WHERE id = ?').run(id);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Delete all chunks for an entity.
|
|
134
|
+
*/
|
|
135
|
+
deleteByEntity(entityId: string): void {
|
|
136
|
+
const ids = this.db
|
|
137
|
+
.prepare('SELECT id FROM chunks WHERE entity_id = ?')
|
|
138
|
+
.all(entityId) as Array<{ id: string }>;
|
|
139
|
+
|
|
140
|
+
if (ids.length === 0) return;
|
|
141
|
+
|
|
142
|
+
this.db.transaction(() => {
|
|
143
|
+
for (const { id } of ids) {
|
|
144
|
+
this.db.prepare('DELETE FROM vectors WHERE id = ?').run(id);
|
|
145
|
+
this.db.prepare('DELETE FROM chunks WHERE id = ?').run(id);
|
|
146
|
+
}
|
|
147
|
+
})();
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Delete all chunks associated with a file path.
|
|
152
|
+
*/
|
|
153
|
+
deleteByFile(filePath: string): void {
|
|
154
|
+
const ids = this.db
|
|
155
|
+
.prepare('SELECT id FROM chunks WHERE file_path = ?')
|
|
156
|
+
.all(filePath) as Array<{ id: string }>;
|
|
157
|
+
|
|
158
|
+
if (ids.length === 0) return;
|
|
159
|
+
|
|
160
|
+
this.db.transaction(() => {
|
|
161
|
+
for (const { id } of ids) {
|
|
162
|
+
this.db.prepare('DELETE FROM vectors WHERE id = ?').run(id);
|
|
163
|
+
this.db.prepare('DELETE FROM chunks WHERE id = ?').run(id);
|
|
164
|
+
}
|
|
165
|
+
})();
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Get a chunk by ID (without vector).
|
|
170
|
+
*/
|
|
171
|
+
getChunk(id: string): ChunkMeta | null {
|
|
172
|
+
const row = this.db
|
|
173
|
+
.prepare('SELECT * FROM chunks WHERE id = ?')
|
|
174
|
+
.get(id) as any;
|
|
175
|
+
if (!row) return null;
|
|
176
|
+
return rowToChunkMeta(row);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Search for chunks similar to the query vector.
|
|
181
|
+
* Uses brute-force cosine similarity scan.
|
|
182
|
+
*/
|
|
183
|
+
search(queryVector: Float32Array, opts: SearchOptions = {}): SearchResult[] {
|
|
184
|
+
const limit = opts.limit ?? 10;
|
|
185
|
+
const minScore = opts.minScore ?? 0.0;
|
|
186
|
+
|
|
187
|
+
// Build SQL filter
|
|
188
|
+
const conditions: string[] = [];
|
|
189
|
+
const params: Record<string, unknown> = {};
|
|
190
|
+
|
|
191
|
+
if (opts.types && opts.types.length > 0) {
|
|
192
|
+
const placeholders = opts.types.map((_, i) => `$type${i}`).join(', ');
|
|
193
|
+
conditions.push(`c.chunk_type IN (${placeholders})`);
|
|
194
|
+
opts.types.forEach((t, i) => {
|
|
195
|
+
params[`$type${i}`] = t;
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
if (opts.filePrefix) {
|
|
200
|
+
conditions.push('c.file_path LIKE $filePrefix');
|
|
201
|
+
params.$filePrefix = `${opts.filePrefix}%`;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
const where =
|
|
205
|
+
conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
|
206
|
+
|
|
207
|
+
const sql = `
|
|
208
|
+
SELECT c.id, c.entity_id, c.content, c.chunk_type, c.file_path, c.updated_at,
|
|
209
|
+
v.embedding
|
|
210
|
+
FROM chunks c
|
|
211
|
+
JOIN vectors v ON c.id = v.id
|
|
212
|
+
${where}
|
|
213
|
+
`;
|
|
214
|
+
|
|
215
|
+
const rows = this.db.prepare(sql).all(params) as any[];
|
|
216
|
+
|
|
217
|
+
// Compute cosine similarity for each row
|
|
218
|
+
const scored: SearchResult[] = [];
|
|
219
|
+
for (const row of rows) {
|
|
220
|
+
const storedVec = new Float32Array(
|
|
221
|
+
(row.embedding as Buffer).buffer,
|
|
222
|
+
(row.embedding as Buffer).byteOffset,
|
|
223
|
+
(row.embedding as Buffer).byteLength / 4,
|
|
224
|
+
);
|
|
225
|
+
const score = cosineSimilarity(queryVector, storedVec);
|
|
226
|
+
if (score >= minScore) {
|
|
227
|
+
scored.push({
|
|
228
|
+
chunk: rowToChunkMeta(row),
|
|
229
|
+
score,
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Sort by score descending and limit
|
|
235
|
+
scored.sort((a, b) => b.score - a.score);
|
|
236
|
+
return scored.slice(0, limit);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Get total count of chunks in the store.
|
|
241
|
+
*/
|
|
242
|
+
count(): number {
|
|
243
|
+
const row = this.db
|
|
244
|
+
.prepare('SELECT COUNT(*) as cnt FROM chunks')
|
|
245
|
+
.get() as any;
|
|
246
|
+
return row?.cnt ?? 0;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Get count by chunk type.
|
|
251
|
+
*/
|
|
252
|
+
countByType(): Record<string, number> {
|
|
253
|
+
const rows = this.db
|
|
254
|
+
.prepare(
|
|
255
|
+
'SELECT chunk_type, COUNT(*) as cnt FROM chunks GROUP BY chunk_type',
|
|
256
|
+
)
|
|
257
|
+
.all() as any[];
|
|
258
|
+
const result: Record<string, number> = {};
|
|
259
|
+
for (const row of rows) {
|
|
260
|
+
result[row.chunk_type] = row.cnt;
|
|
261
|
+
}
|
|
262
|
+
return result;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Clear all data from the store.
|
|
267
|
+
*/
|
|
268
|
+
clear(): void {
|
|
269
|
+
this.db.exec('DELETE FROM vectors');
|
|
270
|
+
this.db.exec('DELETE FROM chunks');
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Close the database connection.
|
|
275
|
+
*/
|
|
276
|
+
close(): void {
|
|
277
|
+
this.db.close();
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// ---------------------------------------------------------------------------
|
|
282
|
+
// Helpers
|
|
283
|
+
// ---------------------------------------------------------------------------
|
|
284
|
+
|
|
285
|
+
function rowToChunkMeta(row: any): ChunkMeta {
|
|
286
|
+
return {
|
|
287
|
+
id: row.id,
|
|
288
|
+
entityId: row.entity_id,
|
|
289
|
+
content: row.content,
|
|
290
|
+
chunkType: row.chunk_type as ChunkType,
|
|
291
|
+
filePath: row.file_path ?? undefined,
|
|
292
|
+
updatedAt: row.updated_at,
|
|
293
|
+
};
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Compute cosine similarity between two vectors.
|
|
298
|
+
* Both vectors should already be normalized (output of mean pooling + normalize).
|
|
299
|
+
* For normalized vectors, cosine similarity = dot product.
|
|
300
|
+
*/
|
|
301
|
+
export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
|
302
|
+
if (a.length !== b.length) return 0;
|
|
303
|
+
let dot = 0;
|
|
304
|
+
let normA = 0;
|
|
305
|
+
let normB = 0;
|
|
306
|
+
for (let i = 0; i < a.length; i++) {
|
|
307
|
+
dot += a[i] * b[i];
|
|
308
|
+
normA += a[i] * a[i];
|
|
309
|
+
normB += b[i] * b[i];
|
|
310
|
+
}
|
|
311
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
312
|
+
return denom === 0 ? 0 : dot / denom;
|
|
313
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Types
|
|
3
|
+
*
|
|
4
|
+
* Types for the semantic embedding and vector search system.
|
|
5
|
+
*
|
|
6
|
+
* @see TRL-18
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
// Chunk types — what gets embedded
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
|
|
13
|
+
export type ChunkType =
|
|
14
|
+
| 'issue_title'
|
|
15
|
+
| 'issue_desc'
|
|
16
|
+
| 'milestone_msg'
|
|
17
|
+
| 'decision_rationale'
|
|
18
|
+
| 'summary_md'
|
|
19
|
+
| 'code_entity'
|
|
20
|
+
| 'doc_comment'
|
|
21
|
+
| 'markdown';
|
|
22
|
+
|
|
23
|
+
export interface ChunkMeta {
|
|
24
|
+
/** Unique chunk ID, e.g. "issue:TRL-5:title", "file:src/engine.ts:chunk:0" */
|
|
25
|
+
id: string;
|
|
26
|
+
/** EAV entity ID this chunk belongs to */
|
|
27
|
+
entityId: string;
|
|
28
|
+
/** Original text content */
|
|
29
|
+
content: string;
|
|
30
|
+
/** Chunk classification */
|
|
31
|
+
chunkType: ChunkType;
|
|
32
|
+
/** Source file path (nullable for non-file entities) */
|
|
33
|
+
filePath?: string;
|
|
34
|
+
/** When this chunk was last updated */
|
|
35
|
+
updatedAt: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
// Embedding record — chunk + vector
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
export interface EmbeddingRecord extends ChunkMeta {
|
|
43
|
+
/** 384-dimensional embedding vector */
|
|
44
|
+
embedding: Float32Array;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
// Search
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
export interface SearchResult {
|
|
52
|
+
/** Chunk metadata */
|
|
53
|
+
chunk: ChunkMeta;
|
|
54
|
+
/** Cosine similarity score (0..1) */
|
|
55
|
+
score: number;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export interface SearchOptions {
|
|
59
|
+
/** Max results to return (default: 10) */
|
|
60
|
+
limit?: number;
|
|
61
|
+
/** Filter by chunk type(s) */
|
|
62
|
+
types?: ChunkType[];
|
|
63
|
+
/** Filter by file path prefix */
|
|
64
|
+
filePrefix?: string;
|
|
65
|
+
/** Minimum similarity threshold (default: 0.0) */
|
|
66
|
+
minScore?: number;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
// Embedding model config
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
export interface EmbeddingModelConfig {
|
|
74
|
+
/** Model name for @xenova/transformers (default: "Xenova/all-MiniLM-L6-v2") */
|
|
75
|
+
modelName: string;
|
|
76
|
+
/** Embedding dimension (default: 384) */
|
|
77
|
+
dimension: number;
|
|
78
|
+
/** Cache directory for model files */
|
|
79
|
+
cacheDir?: string;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export const DEFAULT_MODEL_CONFIG: EmbeddingModelConfig = {
|
|
83
|
+
modelName: 'Xenova/all-MiniLM-L6-v2',
|
|
84
|
+
dimension: 384,
|
|
85
|
+
};
|