trellis 1.0.8 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +533 -82
  3. package/bin/trellis.mjs +2 -0
  4. package/dist/cli/index.js +4718 -0
  5. package/dist/core/index.js +12 -0
  6. package/dist/decisions/index.js +19 -0
  7. package/dist/embeddings/index.js +43 -0
  8. package/dist/index-1j1anhmr.js +4038 -0
  9. package/dist/index-3s0eak0p.js +1556 -0
  10. package/dist/index-8pce39mh.js +272 -0
  11. package/dist/index-a76rekgs.js +67 -0
  12. package/dist/index-cy9k1g6v.js +684 -0
  13. package/dist/index-fd4e26s4.js +69 -0
  14. package/dist/{store/eav-store.js → index-gkvhzm9f.js} +4 -6
  15. package/dist/index-gnw8d7d6.js +51 -0
  16. package/dist/index-vkpkfwhq.js +817 -0
  17. package/dist/index.js +118 -2876
  18. package/dist/links/index.js +55 -0
  19. package/dist/transformers-m9je15kg.js +32491 -0
  20. package/dist/vcs/index.js +110 -0
  21. package/logo.png +0 -0
  22. package/logo.svg +9 -0
  23. package/package.json +79 -76
  24. package/src/cli/index.ts +2340 -0
  25. package/src/core/index.ts +35 -0
  26. package/src/core/kernel/middleware.ts +44 -0
  27. package/src/core/persist/backend.ts +64 -0
  28. package/src/core/store/eav-store.ts +467 -0
  29. package/src/decisions/auto-capture.ts +136 -0
  30. package/src/decisions/hooks.ts +163 -0
  31. package/src/decisions/index.ts +261 -0
  32. package/src/decisions/types.ts +103 -0
  33. package/src/embeddings/chunker.ts +327 -0
  34. package/src/embeddings/index.ts +41 -0
  35. package/src/embeddings/model.ts +95 -0
  36. package/src/embeddings/search.ts +305 -0
  37. package/src/embeddings/store.ts +313 -0
  38. package/src/embeddings/types.ts +85 -0
  39. package/src/engine.ts +1083 -0
  40. package/src/garden/cluster.ts +330 -0
  41. package/src/garden/garden.ts +306 -0
  42. package/src/garden/index.ts +29 -0
  43. package/src/git/git-exporter.ts +286 -0
  44. package/src/git/git-importer.ts +329 -0
  45. package/src/git/git-reader.ts +189 -0
  46. package/src/git/index.ts +22 -0
  47. package/src/identity/governance.ts +211 -0
  48. package/src/identity/identity.ts +224 -0
  49. package/src/identity/index.ts +30 -0
  50. package/src/identity/signing-middleware.ts +97 -0
  51. package/src/index.ts +20 -0
  52. package/src/links/index.ts +49 -0
  53. package/src/links/lifecycle.ts +400 -0
  54. package/src/links/parser.ts +484 -0
  55. package/src/links/ref-index.ts +186 -0
  56. package/src/links/resolver.ts +314 -0
  57. package/src/links/types.ts +108 -0
  58. package/src/mcp/index.ts +22 -0
  59. package/src/mcp/server.ts +1278 -0
  60. package/src/semantic/csharp-parser.ts +493 -0
  61. package/src/semantic/go-parser.ts +585 -0
  62. package/src/semantic/index.ts +34 -0
  63. package/src/semantic/java-parser.ts +456 -0
  64. package/src/semantic/python-parser.ts +659 -0
  65. package/src/semantic/ruby-parser.ts +446 -0
  66. package/src/semantic/rust-parser.ts +784 -0
  67. package/src/semantic/semantic-merge.ts +210 -0
  68. package/src/semantic/ts-parser.ts +681 -0
  69. package/src/semantic/types.ts +175 -0
  70. package/src/sync/index.ts +32 -0
  71. package/src/sync/memory-transport.ts +66 -0
  72. package/src/sync/reconciler.ts +237 -0
  73. package/src/sync/sync-engine.ts +258 -0
  74. package/src/sync/types.ts +104 -0
  75. package/src/vcs/blob-store.ts +124 -0
  76. package/src/vcs/branch.ts +150 -0
  77. package/src/vcs/checkpoint.ts +64 -0
  78. package/src/vcs/decompose.ts +469 -0
  79. package/src/vcs/diff.ts +409 -0
  80. package/src/vcs/engine-context.ts +26 -0
  81. package/src/vcs/index.ts +23 -0
  82. package/src/vcs/issue.ts +800 -0
  83. package/src/vcs/merge.ts +425 -0
  84. package/src/vcs/milestone.ts +124 -0
  85. package/src/vcs/ops.ts +59 -0
  86. package/src/vcs/types.ts +213 -0
  87. package/src/vcs/vcs-middleware.ts +81 -0
  88. package/src/watcher/fs-watcher.ts +217 -0
  89. package/src/watcher/index.ts +9 -0
  90. package/src/watcher/ingestion.ts +116 -0
  91. package/dist/ai/index.js +0 -688
  92. package/dist/cli/server.js +0 -3321
  93. package/dist/cli/tql.js +0 -5282
  94. package/dist/client/tql-client.js +0 -108
  95. package/dist/graph/index.js +0 -2248
  96. package/dist/kernel/logic-middleware.js +0 -179
  97. package/dist/kernel/middleware.js +0 -0
  98. package/dist/kernel/operations.js +0 -32
  99. package/dist/kernel/schema-middleware.js +0 -34
  100. package/dist/kernel/security-middleware.js +0 -53
  101. package/dist/kernel/trellis-kernel.js +0 -2239
  102. package/dist/kernel/workspace.js +0 -91
  103. package/dist/persist/backend.js +0 -0
  104. package/dist/persist/sqlite-backend.js +0 -123
  105. package/dist/query/index.js +0 -1643
  106. package/dist/server/index.js +0 -3309
  107. package/dist/workflows/index.js +0 -3160
@@ -0,0 +1,305 @@
1
+ /**
2
+ * Semantic Search Integration
3
+ *
4
+ * Connects the TrellisVcsEngine to the embedding system.
5
+ * Provides reindex (full rebuild) and search (query → ranked results).
6
+ * The embedder function is pluggable for testing with mock vectors.
7
+ *
8
+ * @see TRL-20
9
+ */
10
+
11
+ import { join } from 'path';
12
+ import { readFileSync, existsSync } from 'fs';
13
+ import { VectorStore } from './store.js';
14
+ import { embed } from './model.js';
15
+ import {
16
+ chunkIssue,
17
+ chunkMilestone,
18
+ chunkDecision,
19
+ chunkMarkdown,
20
+ chunkCodeEntities,
21
+ chunkFile,
22
+ } from './chunker.js';
23
+ import type {
24
+ ChunkMeta,
25
+ EmbeddingRecord,
26
+ SearchOptions,
27
+ SearchResult,
28
+ } from './types.js';
29
+
30
+ // ---------------------------------------------------------------------------
31
+ // Types
32
+ // ---------------------------------------------------------------------------
33
+
34
+ /** Minimal engine interface — avoids importing the full engine for testability */
35
+ export interface SearchableEngine {
36
+ getRootPath(): string;
37
+ trackedFiles(): Array<{ path: string; contentHash?: string }>;
38
+ listIssues(filters?: any): Array<{
39
+ id: string;
40
+ title?: string;
41
+ description?: string;
42
+ }>;
43
+ listMilestones(): Array<{ id: string; message?: string }>;
44
+ queryDecisions?(): Array<{
45
+ id: string;
46
+ toolName: string;
47
+ rationale?: string;
48
+ context?: string;
49
+ outputSummary?: string;
50
+ }>;
51
+ parseFile?(filePath: string): any;
52
+ }
53
+
54
+ /** Embedder function type — maps text → vector. Pluggable for testing. */
55
+ export type Embedder = (text: string) => Promise<Float32Array>;
56
+
57
+ // ---------------------------------------------------------------------------
58
+ // EmbeddingManager
59
+ // ---------------------------------------------------------------------------
60
+
61
+ export class EmbeddingManager {
62
+ private store: VectorStore;
63
+ private embedFn: Embedder;
64
+
65
+ constructor(dbPath: string, embedFn?: Embedder) {
66
+ this.store = new VectorStore(dbPath);
67
+ this.embedFn = embedFn ?? embed;
68
+ }
69
+
70
+ /**
71
+ * Full reindex: clear store, re-chunk all entities, embed, and insert.
72
+ */
73
+ async reindex(engine: SearchableEngine): Promise<{ chunks: number }> {
74
+ this.store.clear();
75
+
76
+ const allChunks: ChunkMeta[] = [];
77
+
78
+ // 1. Issues
79
+ const issues = engine.listIssues();
80
+ for (const issue of issues) {
81
+ allChunks.push(...chunkIssue(issue));
82
+ }
83
+
84
+ // 2. Milestones
85
+ const milestones = engine.listMilestones();
86
+ for (const ms of milestones) {
87
+ allChunks.push(...chunkMilestone(ms));
88
+ }
89
+
90
+ // 3. Decisions
91
+ if (engine.queryDecisions) {
92
+ const decisions = engine.queryDecisions();
93
+ for (const dec of decisions) {
94
+ allChunks.push(...chunkDecision(dec));
95
+ }
96
+ }
97
+
98
+ // 4. Files (markdown, summaries)
99
+ const rootPath = engine.getRootPath();
100
+ const trackedFiles = engine.trackedFiles();
101
+ for (const tf of trackedFiles) {
102
+ try {
103
+ const absPath = join(rootPath, tf.path);
104
+ if (!existsSync(absPath)) continue;
105
+ const content = readFileSync(absPath, 'utf-8');
106
+ allChunks.push(...chunkFile(tf.path, content));
107
+ } catch {}
108
+ }
109
+
110
+ // 5. Code entities (from parsed files)
111
+ if (engine.parseFile) {
112
+ for (const tf of trackedFiles) {
113
+ const ext = tf.path.split('.').pop()?.toLowerCase() ?? '';
114
+ if (
115
+ ![
116
+ 'ts',
117
+ 'js',
118
+ 'tsx',
119
+ 'jsx',
120
+ 'py',
121
+ 'go',
122
+ 'rs',
123
+ 'rb',
124
+ 'java',
125
+ 'cs',
126
+ ].includes(ext)
127
+ ) {
128
+ continue;
129
+ }
130
+ try {
131
+ const parsed = engine.parseFile(tf.path);
132
+ if (parsed && Array.isArray(parsed.entities)) {
133
+ const declarations = parsed.entities.map((e: any) => ({
134
+ id: e.id ?? e.name,
135
+ name: e.name,
136
+ kind: e.kind,
137
+ signature: e.signature ?? e.rawText?.split('\n')[0] ?? '',
138
+ docComment: e.docComment,
139
+ }));
140
+ allChunks.push(...chunkCodeEntities(tf.path, declarations));
141
+ }
142
+ } catch {}
143
+ }
144
+ }
145
+
146
+ // Embed and insert all chunks
147
+ const records: EmbeddingRecord[] = [];
148
+ for (const chunk of allChunks) {
149
+ try {
150
+ const vector = await this.embedFn(chunk.content);
151
+ records.push({ ...chunk, embedding: vector });
152
+ } catch {}
153
+ }
154
+
155
+ this.store.upsertBatch(records);
156
+
157
+ return { chunks: records.length };
158
+ }
159
+
160
+ /**
161
+ * Incrementally index a single file (on file change).
162
+ */
163
+ async indexFile(
164
+ filePath: string,
165
+ content: string,
166
+ engine?: SearchableEngine,
167
+ ): Promise<number> {
168
+ // Remove old chunks for this file
169
+ this.store.deleteByFile(filePath);
170
+
171
+ const chunks = chunkFile(filePath, content);
172
+
173
+ // Also index code entities if engine is available
174
+ if (engine?.parseFile) {
175
+ const ext = filePath.split('.').pop()?.toLowerCase() ?? '';
176
+ if (
177
+ [
178
+ 'ts',
179
+ 'js',
180
+ 'tsx',
181
+ 'jsx',
182
+ 'py',
183
+ 'go',
184
+ 'rs',
185
+ 'rb',
186
+ 'java',
187
+ 'cs',
188
+ ].includes(ext)
189
+ ) {
190
+ try {
191
+ const parsed = engine.parseFile(filePath);
192
+ if (parsed && Array.isArray(parsed.entities)) {
193
+ const declarations = parsed.entities.map((e: any) => ({
194
+ id: e.id ?? e.name,
195
+ name: e.name,
196
+ kind: e.kind,
197
+ signature: e.signature ?? e.rawText?.split('\n')[0] ?? '',
198
+ docComment: e.docComment,
199
+ }));
200
+ chunks.push(...chunkCodeEntities(filePath, declarations));
201
+ }
202
+ } catch {}
203
+ }
204
+ }
205
+
206
+ const records: EmbeddingRecord[] = [];
207
+ for (const chunk of chunks) {
208
+ try {
209
+ const vector = await this.embedFn(chunk.content);
210
+ records.push({ ...chunk, embedding: vector });
211
+ } catch {}
212
+ }
213
+
214
+ if (records.length > 0) {
215
+ this.store.upsertBatch(records);
216
+ }
217
+
218
+ return records.length;
219
+ }
220
+
221
+ /**
222
+ * Index an issue (on create/update).
223
+ */
224
+ async indexIssue(issue: {
225
+ id: string;
226
+ title?: string;
227
+ description?: string;
228
+ }): Promise<number> {
229
+ this.store.deleteByEntity(`issue:${issue.id}`);
230
+
231
+ const chunks = chunkIssue(issue);
232
+ const records: EmbeddingRecord[] = [];
233
+
234
+ for (const chunk of chunks) {
235
+ try {
236
+ const vector = await this.embedFn(chunk.content);
237
+ records.push({ ...chunk, embedding: vector });
238
+ } catch {}
239
+ }
240
+
241
+ if (records.length > 0) {
242
+ this.store.upsertBatch(records);
243
+ }
244
+
245
+ return records.length;
246
+ }
247
+
248
+ /**
249
+ * Index a milestone (on create).
250
+ */
251
+ async indexMilestone(milestone: {
252
+ id: string;
253
+ message?: string;
254
+ }): Promise<number> {
255
+ this.store.deleteByEntity(`milestone:${milestone.id}`);
256
+
257
+ const chunks = chunkMilestone(milestone);
258
+ const records: EmbeddingRecord[] = [];
259
+
260
+ for (const chunk of chunks) {
261
+ try {
262
+ const vector = await this.embedFn(chunk.content);
263
+ records.push({ ...chunk, embedding: vector });
264
+ } catch {}
265
+ }
266
+
267
+ if (records.length > 0) {
268
+ this.store.upsertBatch(records);
269
+ }
270
+
271
+ return records.length;
272
+ }
273
+
274
+ /**
275
+ * Semantic search: embed query → vector search → ranked results.
276
+ */
277
+ async search(query: string, opts?: SearchOptions): Promise<SearchResult[]> {
278
+ const queryVector = await this.embedFn(query);
279
+ return this.store.search(queryVector, opts);
280
+ }
281
+
282
+ /**
283
+ * Remove all data for a file.
284
+ */
285
+ removeFile(filePath: string): void {
286
+ this.store.deleteByFile(filePath);
287
+ }
288
+
289
+ /**
290
+ * Get store statistics.
291
+ */
292
+ stats(): { total: number; byType: Record<string, number> } {
293
+ return {
294
+ total: this.store.count(),
295
+ byType: this.store.countByType(),
296
+ };
297
+ }
298
+
299
+ /**
300
+ * Close the store.
301
+ */
302
+ close(): void {
303
+ this.store.close();
304
+ }
305
+ }
@@ -0,0 +1,313 @@
1
+ /**
2
+ * Embedding Vector Store
3
+ *
4
+ * Persistent storage for embedding vectors using bun:sqlite.
5
+ * Vectors are stored as Float32Array blobs; cosine similarity
6
+ * is computed in JavaScript for cross-platform portability.
7
+ *
8
+ * @see TRL-18
9
+ */
10
+
11
+ import { Database } from 'bun:sqlite';
12
+ import type {
13
+ ChunkMeta,
14
+ ChunkType,
15
+ EmbeddingRecord,
16
+ SearchOptions,
17
+ SearchResult,
18
+ } from './types.js';
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // Schema
22
+ // ---------------------------------------------------------------------------
23
+
24
+ const SCHEMA_SQL = `
25
+ CREATE TABLE IF NOT EXISTS chunks (
26
+ id TEXT PRIMARY KEY,
27
+ entity_id TEXT NOT NULL,
28
+ content TEXT NOT NULL,
29
+ chunk_type TEXT NOT NULL,
30
+ file_path TEXT,
31
+ updated_at TEXT NOT NULL
32
+ );
33
+
34
+ CREATE TABLE IF NOT EXISTS vectors (
35
+ id TEXT PRIMARY KEY,
36
+ embedding BLOB NOT NULL,
37
+ FOREIGN KEY (id) REFERENCES chunks(id) ON DELETE CASCADE
38
+ );
39
+
40
+ CREATE INDEX IF NOT EXISTS idx_chunks_entity ON chunks(entity_id);
41
+ CREATE INDEX IF NOT EXISTS idx_chunks_type ON chunks(chunk_type);
42
+ CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_path);
43
+ `;
44
+
45
+ // ---------------------------------------------------------------------------
46
+ // Vector Store
47
+ // ---------------------------------------------------------------------------
48
+
49
+ export class VectorStore {
50
+ private db: Database;
51
+
52
+ constructor(dbPath: string) {
53
+ this.db = new Database(dbPath);
54
+ this.db.exec('PRAGMA journal_mode=WAL;');
55
+ this.db.exec('PRAGMA foreign_keys=ON;');
56
+ this.db.exec(SCHEMA_SQL);
57
+ }
58
+
59
+ /**
60
+ * Insert or update a chunk with its embedding vector.
61
+ */
62
+ upsert(record: EmbeddingRecord): void {
63
+ const insertChunk = this.db.prepare(`
64
+ INSERT OR REPLACE INTO chunks (id, entity_id, content, chunk_type, file_path, updated_at)
65
+ VALUES ($id, $entityId, $content, $chunkType, $filePath, $updatedAt)
66
+ `);
67
+ const insertVector = this.db.prepare(`
68
+ INSERT OR REPLACE INTO vectors (id, embedding)
69
+ VALUES ($id, $embedding)
70
+ `);
71
+
72
+ const embeddingBlob = Buffer.from(record.embedding.buffer);
73
+
74
+ this.db.transaction(() => {
75
+ insertChunk.run({
76
+ $id: record.id,
77
+ $entityId: record.entityId,
78
+ $content: record.content,
79
+ $chunkType: record.chunkType,
80
+ $filePath: record.filePath ?? null,
81
+ $updatedAt: record.updatedAt,
82
+ });
83
+ insertVector.run({
84
+ $id: record.id,
85
+ $embedding: embeddingBlob,
86
+ });
87
+ })();
88
+ }
89
+
90
+ /**
91
+ * Batch upsert multiple records.
92
+ */
93
+ upsertBatch(records: EmbeddingRecord[]): void {
94
+ if (records.length === 0) return;
95
+
96
+ const insertChunk = this.db.prepare(`
97
+ INSERT OR REPLACE INTO chunks (id, entity_id, content, chunk_type, file_path, updated_at)
98
+ VALUES ($id, $entityId, $content, $chunkType, $filePath, $updatedAt)
99
+ `);
100
+ const insertVector = this.db.prepare(`
101
+ INSERT OR REPLACE INTO vectors (id, embedding)
102
+ VALUES ($id, $embedding)
103
+ `);
104
+
105
+ this.db.transaction(() => {
106
+ for (const record of records) {
107
+ const embeddingBlob = Buffer.from(record.embedding.buffer);
108
+ insertChunk.run({
109
+ $id: record.id,
110
+ $entityId: record.entityId,
111
+ $content: record.content,
112
+ $chunkType: record.chunkType,
113
+ $filePath: record.filePath ?? null,
114
+ $updatedAt: record.updatedAt,
115
+ });
116
+ insertVector.run({
117
+ $id: record.id,
118
+ $embedding: embeddingBlob,
119
+ });
120
+ }
121
+ })();
122
+ }
123
+
124
+ /**
125
+ * Delete a chunk and its vector by ID.
126
+ */
127
+ delete(id: string): void {
128
+ this.db.prepare('DELETE FROM vectors WHERE id = ?').run(id);
129
+ this.db.prepare('DELETE FROM chunks WHERE id = ?').run(id);
130
+ }
131
+
132
+ /**
133
+ * Delete all chunks for an entity.
134
+ */
135
+ deleteByEntity(entityId: string): void {
136
+ const ids = this.db
137
+ .prepare('SELECT id FROM chunks WHERE entity_id = ?')
138
+ .all(entityId) as Array<{ id: string }>;
139
+
140
+ if (ids.length === 0) return;
141
+
142
+ this.db.transaction(() => {
143
+ for (const { id } of ids) {
144
+ this.db.prepare('DELETE FROM vectors WHERE id = ?').run(id);
145
+ this.db.prepare('DELETE FROM chunks WHERE id = ?').run(id);
146
+ }
147
+ })();
148
+ }
149
+
150
+ /**
151
+ * Delete all chunks associated with a file path.
152
+ */
153
+ deleteByFile(filePath: string): void {
154
+ const ids = this.db
155
+ .prepare('SELECT id FROM chunks WHERE file_path = ?')
156
+ .all(filePath) as Array<{ id: string }>;
157
+
158
+ if (ids.length === 0) return;
159
+
160
+ this.db.transaction(() => {
161
+ for (const { id } of ids) {
162
+ this.db.prepare('DELETE FROM vectors WHERE id = ?').run(id);
163
+ this.db.prepare('DELETE FROM chunks WHERE id = ?').run(id);
164
+ }
165
+ })();
166
+ }
167
+
168
+ /**
169
+ * Get a chunk by ID (without vector).
170
+ */
171
+ getChunk(id: string): ChunkMeta | null {
172
+ const row = this.db
173
+ .prepare('SELECT * FROM chunks WHERE id = ?')
174
+ .get(id) as any;
175
+ if (!row) return null;
176
+ return rowToChunkMeta(row);
177
+ }
178
+
179
+ /**
180
+ * Search for chunks similar to the query vector.
181
+ * Uses brute-force cosine similarity scan.
182
+ */
183
+ search(queryVector: Float32Array, opts: SearchOptions = {}): SearchResult[] {
184
+ const limit = opts.limit ?? 10;
185
+ const minScore = opts.minScore ?? 0.0;
186
+
187
+ // Build SQL filter
188
+ const conditions: string[] = [];
189
+ const params: Record<string, unknown> = {};
190
+
191
+ if (opts.types && opts.types.length > 0) {
192
+ const placeholders = opts.types.map((_, i) => `$type${i}`).join(', ');
193
+ conditions.push(`c.chunk_type IN (${placeholders})`);
194
+ opts.types.forEach((t, i) => {
195
+ params[`$type${i}`] = t;
196
+ });
197
+ }
198
+
199
+ if (opts.filePrefix) {
200
+ conditions.push('c.file_path LIKE $filePrefix');
201
+ params.$filePrefix = `${opts.filePrefix}%`;
202
+ }
203
+
204
+ const where =
205
+ conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
206
+
207
+ const sql = `
208
+ SELECT c.id, c.entity_id, c.content, c.chunk_type, c.file_path, c.updated_at,
209
+ v.embedding
210
+ FROM chunks c
211
+ JOIN vectors v ON c.id = v.id
212
+ ${where}
213
+ `;
214
+
215
+ const rows = this.db.prepare(sql).all(params) as any[];
216
+
217
+ // Compute cosine similarity for each row
218
+ const scored: SearchResult[] = [];
219
+ for (const row of rows) {
220
+ const storedVec = new Float32Array(
221
+ (row.embedding as Buffer).buffer,
222
+ (row.embedding as Buffer).byteOffset,
223
+ (row.embedding as Buffer).byteLength / 4,
224
+ );
225
+ const score = cosineSimilarity(queryVector, storedVec);
226
+ if (score >= minScore) {
227
+ scored.push({
228
+ chunk: rowToChunkMeta(row),
229
+ score,
230
+ });
231
+ }
232
+ }
233
+
234
+ // Sort by score descending and limit
235
+ scored.sort((a, b) => b.score - a.score);
236
+ return scored.slice(0, limit);
237
+ }
238
+
239
+ /**
240
+ * Get total count of chunks in the store.
241
+ */
242
+ count(): number {
243
+ const row = this.db
244
+ .prepare('SELECT COUNT(*) as cnt FROM chunks')
245
+ .get() as any;
246
+ return row?.cnt ?? 0;
247
+ }
248
+
249
+ /**
250
+ * Get count by chunk type.
251
+ */
252
+ countByType(): Record<string, number> {
253
+ const rows = this.db
254
+ .prepare(
255
+ 'SELECT chunk_type, COUNT(*) as cnt FROM chunks GROUP BY chunk_type',
256
+ )
257
+ .all() as any[];
258
+ const result: Record<string, number> = {};
259
+ for (const row of rows) {
260
+ result[row.chunk_type] = row.cnt;
261
+ }
262
+ return result;
263
+ }
264
+
265
+ /**
266
+ * Clear all data from the store.
267
+ */
268
+ clear(): void {
269
+ this.db.exec('DELETE FROM vectors');
270
+ this.db.exec('DELETE FROM chunks');
271
+ }
272
+
273
+ /**
274
+ * Close the database connection.
275
+ */
276
+ close(): void {
277
+ this.db.close();
278
+ }
279
+ }
280
+
281
+ // ---------------------------------------------------------------------------
282
+ // Helpers
283
+ // ---------------------------------------------------------------------------
284
+
285
+ function rowToChunkMeta(row: any): ChunkMeta {
286
+ return {
287
+ id: row.id,
288
+ entityId: row.entity_id,
289
+ content: row.content,
290
+ chunkType: row.chunk_type as ChunkType,
291
+ filePath: row.file_path ?? undefined,
292
+ updatedAt: row.updated_at,
293
+ };
294
+ }
295
+
296
+ /**
297
+ * Compute cosine similarity between two vectors.
298
+ * Both vectors should already be normalized (output of mean pooling + normalize).
299
+ * For normalized vectors, cosine similarity = dot product.
300
+ */
301
+ export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
302
+ if (a.length !== b.length) return 0;
303
+ let dot = 0;
304
+ let normA = 0;
305
+ let normB = 0;
306
+ for (let i = 0; i < a.length; i++) {
307
+ dot += a[i] * b[i];
308
+ normA += a[i] * a[i];
309
+ normB += b[i] * b[i];
310
+ }
311
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
312
+ return denom === 0 ? 0 : dot / denom;
313
+ }
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Embedding Types
3
+ *
4
+ * Types for the semantic embedding and vector search system.
5
+ *
6
+ * @see TRL-18
7
+ */
8
+
9
+ // ---------------------------------------------------------------------------
10
+ // Chunk types — what gets embedded
11
+ // ---------------------------------------------------------------------------
12
+
13
+ export type ChunkType =
14
+ | 'issue_title'
15
+ | 'issue_desc'
16
+ | 'milestone_msg'
17
+ | 'decision_rationale'
18
+ | 'summary_md'
19
+ | 'code_entity'
20
+ | 'doc_comment'
21
+ | 'markdown';
22
+
23
+ export interface ChunkMeta {
24
+ /** Unique chunk ID, e.g. "issue:TRL-5:title", "file:src/engine.ts:chunk:0" */
25
+ id: string;
26
+ /** EAV entity ID this chunk belongs to */
27
+ entityId: string;
28
+ /** Original text content */
29
+ content: string;
30
+ /** Chunk classification */
31
+ chunkType: ChunkType;
32
+ /** Source file path (nullable for non-file entities) */
33
+ filePath?: string;
34
+ /** When this chunk was last updated */
35
+ updatedAt: string;
36
+ }
37
+
38
+ // ---------------------------------------------------------------------------
39
+ // Embedding record — chunk + vector
40
+ // ---------------------------------------------------------------------------
41
+
42
+ export interface EmbeddingRecord extends ChunkMeta {
43
+ /** 384-dimensional embedding vector */
44
+ embedding: Float32Array;
45
+ }
46
+
47
+ // ---------------------------------------------------------------------------
48
+ // Search
49
+ // ---------------------------------------------------------------------------
50
+
51
+ export interface SearchResult {
52
+ /** Chunk metadata */
53
+ chunk: ChunkMeta;
54
+ /** Cosine similarity score (0..1) */
55
+ score: number;
56
+ }
57
+
58
+ export interface SearchOptions {
59
+ /** Max results to return (default: 10) */
60
+ limit?: number;
61
+ /** Filter by chunk type(s) */
62
+ types?: ChunkType[];
63
+ /** Filter by file path prefix */
64
+ filePrefix?: string;
65
+ /** Minimum similarity threshold (default: 0.0) */
66
+ minScore?: number;
67
+ }
68
+
69
+ // ---------------------------------------------------------------------------
70
+ // Embedding model config
71
+ // ---------------------------------------------------------------------------
72
+
73
+ export interface EmbeddingModelConfig {
74
+ /** Model name for @xenova/transformers (default: "Xenova/all-MiniLM-L6-v2") */
75
+ modelName: string;
76
+ /** Embedding dimension (default: 384) */
77
+ dimension: number;
78
+ /** Cache directory for model files */
79
+ cacheDir?: string;
80
+ }
81
+
82
+ export const DEFAULT_MODEL_CONFIG: EmbeddingModelConfig = {
83
+ modelName: 'Xenova/all-MiniLM-L6-v2',
84
+ dimension: 384,
85
+ };