trellis 1.0.7 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +533 -82
- package/bin/trellis.mjs +2 -0
- package/dist/cli/index.js +4718 -0
- package/dist/core/index.js +12 -0
- package/dist/decisions/index.js +19 -0
- package/dist/embeddings/index.js +43 -0
- package/dist/index-1j1anhmr.js +4038 -0
- package/dist/index-3s0eak0p.js +1556 -0
- package/dist/index-8pce39mh.js +272 -0
- package/dist/index-a76rekgs.js +67 -0
- package/dist/index-cy9k1g6v.js +684 -0
- package/dist/index-fd4e26s4.js +69 -0
- package/dist/{store/eav-store.js → index-gkvhzm9f.js} +4 -6
- package/dist/index-gnw8d7d6.js +51 -0
- package/dist/index-vkpkfwhq.js +817 -0
- package/dist/index.js +118 -2876
- package/dist/links/index.js +55 -0
- package/dist/transformers-m9je15kg.js +32491 -0
- package/dist/vcs/index.js +110 -0
- package/logo.png +0 -0
- package/logo.svg +9 -0
- package/package.json +79 -76
- package/src/cli/index.ts +2340 -0
- package/src/core/index.ts +35 -0
- package/src/core/kernel/middleware.ts +44 -0
- package/src/core/persist/backend.ts +64 -0
- package/src/core/store/eav-store.ts +467 -0
- package/src/decisions/auto-capture.ts +136 -0
- package/src/decisions/hooks.ts +163 -0
- package/src/decisions/index.ts +261 -0
- package/src/decisions/types.ts +103 -0
- package/src/embeddings/chunker.ts +327 -0
- package/src/embeddings/index.ts +41 -0
- package/src/embeddings/model.ts +95 -0
- package/src/embeddings/search.ts +305 -0
- package/src/embeddings/store.ts +313 -0
- package/src/embeddings/types.ts +85 -0
- package/src/engine.ts +1083 -0
- package/src/garden/cluster.ts +330 -0
- package/src/garden/garden.ts +306 -0
- package/src/garden/index.ts +29 -0
- package/src/git/git-exporter.ts +286 -0
- package/src/git/git-importer.ts +329 -0
- package/src/git/git-reader.ts +189 -0
- package/src/git/index.ts +22 -0
- package/src/identity/governance.ts +211 -0
- package/src/identity/identity.ts +224 -0
- package/src/identity/index.ts +30 -0
- package/src/identity/signing-middleware.ts +97 -0
- package/src/index.ts +20 -0
- package/src/links/index.ts +49 -0
- package/src/links/lifecycle.ts +400 -0
- package/src/links/parser.ts +484 -0
- package/src/links/ref-index.ts +186 -0
- package/src/links/resolver.ts +314 -0
- package/src/links/types.ts +108 -0
- package/src/mcp/index.ts +22 -0
- package/src/mcp/server.ts +1278 -0
- package/src/semantic/csharp-parser.ts +493 -0
- package/src/semantic/go-parser.ts +585 -0
- package/src/semantic/index.ts +34 -0
- package/src/semantic/java-parser.ts +456 -0
- package/src/semantic/python-parser.ts +659 -0
- package/src/semantic/ruby-parser.ts +446 -0
- package/src/semantic/rust-parser.ts +784 -0
- package/src/semantic/semantic-merge.ts +210 -0
- package/src/semantic/ts-parser.ts +681 -0
- package/src/semantic/types.ts +175 -0
- package/src/sync/index.ts +32 -0
- package/src/sync/memory-transport.ts +66 -0
- package/src/sync/reconciler.ts +237 -0
- package/src/sync/sync-engine.ts +258 -0
- package/src/sync/types.ts +104 -0
- package/src/vcs/blob-store.ts +124 -0
- package/src/vcs/branch.ts +150 -0
- package/src/vcs/checkpoint.ts +64 -0
- package/src/vcs/decompose.ts +469 -0
- package/src/vcs/diff.ts +409 -0
- package/src/vcs/engine-context.ts +26 -0
- package/src/vcs/index.ts +23 -0
- package/src/vcs/issue.ts +800 -0
- package/src/vcs/merge.ts +425 -0
- package/src/vcs/milestone.ts +124 -0
- package/src/vcs/ops.ts +59 -0
- package/src/vcs/types.ts +213 -0
- package/src/vcs/vcs-middleware.ts +81 -0
- package/src/watcher/fs-watcher.ts +217 -0
- package/src/watcher/index.ts +9 -0
- package/src/watcher/ingestion.ts +116 -0
- package/dist/ai/index.js +0 -688
- package/dist/cli/server.js +0 -3321
- package/dist/cli/tql.js +0 -5282
- package/dist/client/tql-client.js +0 -108
- package/dist/graph/index.js +0 -2248
- package/dist/kernel/logic-middleware.js +0 -179
- package/dist/kernel/middleware.js +0 -0
- package/dist/kernel/operations.js +0 -32
- package/dist/kernel/schema-middleware.js +0 -34
- package/dist/kernel/security-middleware.js +0 -53
- package/dist/kernel/trellis-kernel.js +0 -2239
- package/dist/kernel/workspace.js +0 -91
- package/dist/persist/backend.js +0 -0
- package/dist/persist/sqlite-backend.js +0 -123
- package/dist/query/index.js +0 -1643
- package/dist/server/index.js +0 -3309
- package/dist/workflows/index.js +0 -3160
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text Chunker
|
|
3
|
+
*
|
|
4
|
+
* Strategies for splitting different content types into embeddable chunks.
|
|
5
|
+
* Short text is embedded as-is; markdown is split by headings; code entities
|
|
6
|
+
* are chunked by declaration; large text uses sliding window.
|
|
7
|
+
*
|
|
8
|
+
* @see TRL-19
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { ChunkMeta, ChunkType } from './types.js';
|
|
12
|
+
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Chunk configuration
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
/** Max characters per chunk before splitting (roughly ~128 tokens) */
|
|
18
|
+
const MAX_CHUNK_CHARS = 512;
|
|
19
|
+
/** Overlap characters for sliding window */
|
|
20
|
+
const OVERLAP_CHARS = 64;
|
|
21
|
+
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Public API
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Chunk an issue into embeddable pieces.
|
|
28
|
+
*/
|
|
29
|
+
export function chunkIssue(issue: {
|
|
30
|
+
id: string;
|
|
31
|
+
title?: string;
|
|
32
|
+
description?: string;
|
|
33
|
+
}): ChunkMeta[] {
|
|
34
|
+
const now = new Date().toISOString();
|
|
35
|
+
const chunks: ChunkMeta[] = [];
|
|
36
|
+
|
|
37
|
+
if (issue.title) {
|
|
38
|
+
chunks.push({
|
|
39
|
+
id: `issue:${issue.id}:title`,
|
|
40
|
+
entityId: `issue:${issue.id}`,
|
|
41
|
+
content: issue.title,
|
|
42
|
+
chunkType: 'issue_title',
|
|
43
|
+
updatedAt: now,
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (issue.description) {
|
|
48
|
+
chunks.push({
|
|
49
|
+
id: `issue:${issue.id}:desc`,
|
|
50
|
+
entityId: `issue:${issue.id}`,
|
|
51
|
+
content: issue.description,
|
|
52
|
+
chunkType: 'issue_desc',
|
|
53
|
+
updatedAt: now,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return chunks;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Chunk a decision trace into embeddable pieces.
|
|
62
|
+
* Combines tool name, rationale, context, and output summary.
|
|
63
|
+
*/
|
|
64
|
+
export function chunkDecision(decision: {
|
|
65
|
+
id: string;
|
|
66
|
+
toolName: string;
|
|
67
|
+
rationale?: string;
|
|
68
|
+
context?: string;
|
|
69
|
+
outputSummary?: string;
|
|
70
|
+
}): ChunkMeta[] {
|
|
71
|
+
const parts: string[] = [];
|
|
72
|
+
parts.push(`Decision ${decision.id}: ${decision.toolName}`);
|
|
73
|
+
if (decision.rationale) parts.push(`Rationale: ${decision.rationale}`);
|
|
74
|
+
if (decision.context) parts.push(`Context: ${decision.context}`);
|
|
75
|
+
if (decision.outputSummary) parts.push(`Output: ${decision.outputSummary}`);
|
|
76
|
+
|
|
77
|
+
const content = parts.join('\n');
|
|
78
|
+
if (!content.trim()) return [];
|
|
79
|
+
|
|
80
|
+
return [
|
|
81
|
+
{
|
|
82
|
+
id: `decision:${decision.id}:rationale`,
|
|
83
|
+
entityId: `decision:${decision.id}`,
|
|
84
|
+
content,
|
|
85
|
+
chunkType: 'decision_rationale',
|
|
86
|
+
updatedAt: new Date().toISOString(),
|
|
87
|
+
},
|
|
88
|
+
];
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Chunk a milestone message.
|
|
93
|
+
*/
|
|
94
|
+
export function chunkMilestone(milestone: {
|
|
95
|
+
id: string;
|
|
96
|
+
message?: string;
|
|
97
|
+
}): ChunkMeta[] {
|
|
98
|
+
if (!milestone.message) return [];
|
|
99
|
+
|
|
100
|
+
return [
|
|
101
|
+
{
|
|
102
|
+
id: `milestone:${milestone.id}:msg`,
|
|
103
|
+
entityId: `milestone:${milestone.id}`,
|
|
104
|
+
content: milestone.message,
|
|
105
|
+
chunkType: 'milestone_msg',
|
|
106
|
+
updatedAt: new Date().toISOString(),
|
|
107
|
+
},
|
|
108
|
+
];
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Chunk a markdown file by heading sections.
|
|
113
|
+
* Each H1/H2/H3 section becomes a separate chunk.
|
|
114
|
+
* Sections that exceed MAX_CHUNK_CHARS get sliding-window split.
|
|
115
|
+
*/
|
|
116
|
+
export function chunkMarkdown(filePath: string, content: string): ChunkMeta[] {
|
|
117
|
+
if (!content.trim()) return [];
|
|
118
|
+
|
|
119
|
+
const entityId = `file:${filePath}`;
|
|
120
|
+
const now = new Date().toISOString();
|
|
121
|
+
const sections = splitByHeadings(content);
|
|
122
|
+
const chunks: ChunkMeta[] = [];
|
|
123
|
+
|
|
124
|
+
for (let i = 0; i < sections.length; i++) {
|
|
125
|
+
const section = sections[i];
|
|
126
|
+
if (!section.text.trim()) continue;
|
|
127
|
+
|
|
128
|
+
if (section.text.length <= MAX_CHUNK_CHARS) {
|
|
129
|
+
chunks.push({
|
|
130
|
+
id: `${entityId}:section:${i}`,
|
|
131
|
+
entityId,
|
|
132
|
+
content: section.text,
|
|
133
|
+
chunkType: 'markdown',
|
|
134
|
+
filePath,
|
|
135
|
+
updatedAt: now,
|
|
136
|
+
});
|
|
137
|
+
} else {
|
|
138
|
+
// Split long sections with sliding window
|
|
139
|
+
const windows = slidingWindow(section.text);
|
|
140
|
+
for (let w = 0; w < windows.length; w++) {
|
|
141
|
+
chunks.push({
|
|
142
|
+
id: `${entityId}:section:${i}:w${w}`,
|
|
143
|
+
entityId,
|
|
144
|
+
content: windows[w],
|
|
145
|
+
chunkType: 'markdown',
|
|
146
|
+
filePath,
|
|
147
|
+
updatedAt: now,
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return chunks;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Chunk code entities (functions, classes, interfaces) from a parsed file.
|
|
158
|
+
* Each declaration's signature + doc-comment becomes a chunk.
|
|
159
|
+
*/
|
|
160
|
+
export function chunkCodeEntities(
|
|
161
|
+
filePath: string,
|
|
162
|
+
declarations: Array<{
|
|
163
|
+
id: string;
|
|
164
|
+
name: string;
|
|
165
|
+
kind: string;
|
|
166
|
+
signature: string;
|
|
167
|
+
docComment?: string;
|
|
168
|
+
}>,
|
|
169
|
+
): ChunkMeta[] {
|
|
170
|
+
const now = new Date().toISOString();
|
|
171
|
+
const chunks: ChunkMeta[] = [];
|
|
172
|
+
|
|
173
|
+
for (const decl of declarations) {
|
|
174
|
+
const parts: string[] = [];
|
|
175
|
+
if (decl.docComment) parts.push(decl.docComment);
|
|
176
|
+
parts.push(`${decl.kind} ${decl.name}`);
|
|
177
|
+
parts.push(decl.signature);
|
|
178
|
+
|
|
179
|
+
const content = parts.join('\n').slice(0, MAX_CHUNK_CHARS);
|
|
180
|
+
|
|
181
|
+
chunks.push({
|
|
182
|
+
id: `symbol:${filePath}#${decl.name}`,
|
|
183
|
+
entityId: `symbol:${filePath}#${decl.name}`,
|
|
184
|
+
content,
|
|
185
|
+
chunkType: 'code_entity',
|
|
186
|
+
filePath,
|
|
187
|
+
updatedAt: now,
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
return chunks;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Chunk doc-comments extracted from source files.
|
|
196
|
+
*/
|
|
197
|
+
export function chunkDocComments(
|
|
198
|
+
filePath: string,
|
|
199
|
+
comments: Array<{ line: number; text: string }>,
|
|
200
|
+
): ChunkMeta[] {
|
|
201
|
+
if (comments.length === 0) return [];
|
|
202
|
+
|
|
203
|
+
const entityId = `file:${filePath}`;
|
|
204
|
+
const now = new Date().toISOString();
|
|
205
|
+
const chunks: ChunkMeta[] = [];
|
|
206
|
+
|
|
207
|
+
for (let i = 0; i < comments.length; i++) {
|
|
208
|
+
const comment = comments[i];
|
|
209
|
+
if (!comment.text.trim()) continue;
|
|
210
|
+
|
|
211
|
+
chunks.push({
|
|
212
|
+
id: `${entityId}:doc:${i}`,
|
|
213
|
+
entityId,
|
|
214
|
+
content: comment.text.slice(0, MAX_CHUNK_CHARS),
|
|
215
|
+
chunkType: 'doc_comment',
|
|
216
|
+
filePath,
|
|
217
|
+
updatedAt: now,
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return chunks;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Chunk a summary.md or similar short-to-medium text file.
|
|
226
|
+
*/
|
|
227
|
+
export function chunkSummary(filePath: string, content: string): ChunkMeta[] {
|
|
228
|
+
if (!content.trim()) return [];
|
|
229
|
+
|
|
230
|
+
const entityId = `file:${filePath}`;
|
|
231
|
+
const now = new Date().toISOString();
|
|
232
|
+
|
|
233
|
+
if (content.length <= MAX_CHUNK_CHARS) {
|
|
234
|
+
return [
|
|
235
|
+
{
|
|
236
|
+
id: `${entityId}:summary`,
|
|
237
|
+
entityId,
|
|
238
|
+
content,
|
|
239
|
+
chunkType: 'summary_md',
|
|
240
|
+
filePath,
|
|
241
|
+
updatedAt: now,
|
|
242
|
+
},
|
|
243
|
+
];
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Split by headings for longer summaries
|
|
247
|
+
return chunkMarkdown(filePath, content).map((c) => ({
|
|
248
|
+
...c,
|
|
249
|
+
chunkType: 'summary_md' as ChunkType,
|
|
250
|
+
}));
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* Auto-detect file type and chunk accordingly.
|
|
255
|
+
*/
|
|
256
|
+
export function chunkFile(filePath: string, content: string): ChunkMeta[] {
|
|
257
|
+
if (!content.trim()) return [];
|
|
258
|
+
|
|
259
|
+
const ext = filePath.split('.').pop()?.toLowerCase() ?? '';
|
|
260
|
+
|
|
261
|
+
// summary.md files
|
|
262
|
+
if (filePath.endsWith('summary.md')) {
|
|
263
|
+
return chunkSummary(filePath, content);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// Markdown files
|
|
267
|
+
if (ext === 'md') {
|
|
268
|
+
return chunkMarkdown(filePath, content);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Source files — we only chunk doc-comments (code entities are handled separately)
|
|
272
|
+
// Return empty — code entities are handled via chunkCodeEntities from parsed AST
|
|
273
|
+
return [];
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// ---------------------------------------------------------------------------
|
|
277
|
+
// Internal helpers
|
|
278
|
+
// ---------------------------------------------------------------------------
|
|
279
|
+
|
|
280
|
+
interface Section {
|
|
281
|
+
heading?: string;
|
|
282
|
+
text: string;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Split markdown content by heading boundaries (# H1, ## H2, ### H3).
|
|
287
|
+
*/
|
|
288
|
+
function splitByHeadings(content: string): Section[] {
|
|
289
|
+
const lines = content.split('\n');
|
|
290
|
+
const sections: Section[] = [];
|
|
291
|
+
let currentSection: Section = { text: '' };
|
|
292
|
+
|
|
293
|
+
for (const line of lines) {
|
|
294
|
+
if (/^#{1,3}\s/.test(line)) {
|
|
295
|
+
// New heading — start a new section
|
|
296
|
+
if (currentSection.text.trim()) {
|
|
297
|
+
sections.push(currentSection);
|
|
298
|
+
}
|
|
299
|
+
currentSection = { heading: line, text: line + '\n' };
|
|
300
|
+
} else {
|
|
301
|
+
currentSection.text += line + '\n';
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
if (currentSection.text.trim()) {
|
|
306
|
+
sections.push(currentSection);
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
return sections;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Split long text into overlapping windows.
|
|
314
|
+
*/
|
|
315
|
+
export function slidingWindow(text: string): string[] {
|
|
316
|
+
const windows: string[] = [];
|
|
317
|
+
let start = 0;
|
|
318
|
+
|
|
319
|
+
while (start < text.length) {
|
|
320
|
+
const end = Math.min(start + MAX_CHUNK_CHARS, text.length);
|
|
321
|
+
windows.push(text.slice(start, end));
|
|
322
|
+
if (end >= text.length) break;
|
|
323
|
+
start += MAX_CHUNK_CHARS - OVERLAP_CHARS;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
return windows;
|
|
327
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embeddings Module — Public API
|
|
3
|
+
*
|
|
4
|
+
* Provides semantic embedding and vector search for TrellisVCS entities.
|
|
5
|
+
*
|
|
6
|
+
* @see TRL-18
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// Types
|
|
10
|
+
export type {
|
|
11
|
+
ChunkType,
|
|
12
|
+
ChunkMeta,
|
|
13
|
+
EmbeddingRecord,
|
|
14
|
+
SearchResult,
|
|
15
|
+
SearchOptions,
|
|
16
|
+
EmbeddingModelConfig,
|
|
17
|
+
} from './types.js';
|
|
18
|
+
export { DEFAULT_MODEL_CONFIG } from './types.js';
|
|
19
|
+
|
|
20
|
+
// Model
|
|
21
|
+
export { embed, embedBatch, loadModel, resetModel } from './model.js';
|
|
22
|
+
|
|
23
|
+
// Store
|
|
24
|
+
export { VectorStore, cosineSimilarity } from './store.js';
|
|
25
|
+
|
|
26
|
+
// Search
|
|
27
|
+
export { EmbeddingManager } from './search.js';
|
|
28
|
+
export type { SearchableEngine, Embedder } from './search.js';
|
|
29
|
+
|
|
30
|
+
// Chunker
|
|
31
|
+
export {
|
|
32
|
+
chunkIssue,
|
|
33
|
+
chunkMilestone,
|
|
34
|
+
chunkDecision,
|
|
35
|
+
chunkMarkdown,
|
|
36
|
+
chunkCodeEntities,
|
|
37
|
+
chunkDocComments,
|
|
38
|
+
chunkSummary,
|
|
39
|
+
chunkFile,
|
|
40
|
+
slidingWindow,
|
|
41
|
+
} from './chunker.js';
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Model
|
|
3
|
+
*
|
|
4
|
+
* Lazy-loads @xenova/transformers with all-MiniLM-L6-v2 (384-dim).
|
|
5
|
+
* Model is loaded once on first use and cached for subsequent calls.
|
|
6
|
+
*
|
|
7
|
+
* @see TRL-18
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { EmbeddingModelConfig, DEFAULT_MODEL_CONFIG } from './types.js';
|
|
11
|
+
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Model singleton
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
|
|
16
|
+
let pipeline: any = null;
|
|
17
|
+
let loadPromise: Promise<any> | null = null;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Load the embedding model lazily. Returns the feature-extraction pipeline.
|
|
21
|
+
* Subsequent calls return the cached pipeline.
|
|
22
|
+
*/
|
|
23
|
+
export async function loadModel(
|
|
24
|
+
config: EmbeddingModelConfig = DEFAULT_MODEL_CONFIG,
|
|
25
|
+
): Promise<any> {
|
|
26
|
+
if (pipeline) return pipeline;
|
|
27
|
+
|
|
28
|
+
if (!loadPromise) {
|
|
29
|
+
loadPromise = (async () => {
|
|
30
|
+
const { pipeline: createPipeline } = await import(
|
|
31
|
+
'@xenova/transformers'
|
|
32
|
+
);
|
|
33
|
+
const opts: Record<string, unknown> = {};
|
|
34
|
+
if (config.cacheDir) {
|
|
35
|
+
opts.cache_dir = config.cacheDir;
|
|
36
|
+
}
|
|
37
|
+
pipeline = await createPipeline(
|
|
38
|
+
'feature-extraction',
|
|
39
|
+
config.modelName,
|
|
40
|
+
opts,
|
|
41
|
+
);
|
|
42
|
+
return pipeline;
|
|
43
|
+
})();
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
return loadPromise;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Generate an embedding vector for the given text.
|
|
51
|
+
* Returns a Float32Array of length `config.dimension` (default: 384).
|
|
52
|
+
*/
|
|
53
|
+
export async function embed(
|
|
54
|
+
text: string,
|
|
55
|
+
config: EmbeddingModelConfig = DEFAULT_MODEL_CONFIG,
|
|
56
|
+
): Promise<Float32Array> {
|
|
57
|
+
const pipe = await loadModel(config);
|
|
58
|
+
const output = await pipe(text, { pooling: 'mean', normalize: true });
|
|
59
|
+
// output.data is a Float32Array of shape [1, dimension]
|
|
60
|
+
return new Float32Array(output.data);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Generate embeddings for multiple texts in a batch.
|
|
65
|
+
* More efficient than calling embed() individually.
|
|
66
|
+
*/
|
|
67
|
+
export async function embedBatch(
|
|
68
|
+
texts: string[],
|
|
69
|
+
config: EmbeddingModelConfig = DEFAULT_MODEL_CONFIG,
|
|
70
|
+
): Promise<Float32Array[]> {
|
|
71
|
+
if (texts.length === 0) return [];
|
|
72
|
+
|
|
73
|
+
const pipe = await loadModel(config);
|
|
74
|
+
const results: Float32Array[] = [];
|
|
75
|
+
|
|
76
|
+
// Process in batches of 32 to manage memory
|
|
77
|
+
const batchSize = 32;
|
|
78
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
79
|
+
const batch = texts.slice(i, i + batchSize);
|
|
80
|
+
for (const text of batch) {
|
|
81
|
+
const output = await pipe(text, { pooling: 'mean', normalize: true });
|
|
82
|
+
results.push(new Float32Array(output.data));
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return results;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Reset the model singleton. Useful for testing.
|
|
91
|
+
*/
|
|
92
|
+
export function resetModel(): void {
|
|
93
|
+
pipeline = null;
|
|
94
|
+
loadPromise = null;
|
|
95
|
+
}
|