@defai.digital/semantic-context 13.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +38 -0
- package/dist/embedding-service.d.ts +66 -0
- package/dist/embedding-service.d.ts.map +1 -0
- package/dist/embedding-service.js +265 -0
- package/dist/embedding-service.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +16 -0
- package/dist/index.js.map +1 -0
- package/dist/semantic-manager.d.ts +30 -0
- package/dist/semantic-manager.d.ts.map +1 -0
- package/dist/semantic-manager.js +186 -0
- package/dist/semantic-manager.js.map +1 -0
- package/dist/similarity.d.ts +89 -0
- package/dist/similarity.d.ts.map +1 -0
- package/dist/similarity.js +216 -0
- package/dist/similarity.js.map +1 -0
- package/dist/types.d.ts +236 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +258 -0
- package/dist/types.js.map +1 -0
- package/package.json +48 -0
- package/src/embedding-service.ts +323 -0
- package/src/index.ts +56 -0
- package/src/semantic-manager.ts +246 -0
- package/src/similarity.ts +265 -0
- package/src/types.ts +561 -0
package/dist/types.js
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Context Types
|
|
3
|
+
*
|
|
4
|
+
* Port interfaces and type definitions for semantic context storage.
|
|
5
|
+
*/
|
|
6
|
+
// ============================================================================
|
|
7
|
+
// Stub Implementations (for testing)
|
|
8
|
+
// ============================================================================
|
|
9
|
+
/**
|
|
10
|
+
* Stub embedding port for testing
|
|
11
|
+
*/
|
|
12
|
+
export class StubEmbeddingPort {
|
|
13
|
+
dimension;
|
|
14
|
+
model;
|
|
15
|
+
constructor(dimension = 384, model = 'stub') {
|
|
16
|
+
this.dimension = dimension;
|
|
17
|
+
this.model = model;
|
|
18
|
+
}
|
|
19
|
+
async embed(request) {
|
|
20
|
+
// Generate deterministic embedding based on text hash
|
|
21
|
+
const embedding = this.generateEmbedding(request.text);
|
|
22
|
+
return {
|
|
23
|
+
embedding,
|
|
24
|
+
model: request.model ?? this.model,
|
|
25
|
+
dimension: this.dimension,
|
|
26
|
+
durationMs: 10,
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
async embedBatch(texts) {
|
|
30
|
+
return Promise.all(texts.map((text) => this.embed({ text })));
|
|
31
|
+
}
|
|
32
|
+
getConfig() {
|
|
33
|
+
return {
|
|
34
|
+
provider: 'local',
|
|
35
|
+
model: this.model,
|
|
36
|
+
dimension: this.dimension,
|
|
37
|
+
batchSize: 32,
|
|
38
|
+
cacheEnabled: true,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
async isAvailable() {
|
|
42
|
+
return true;
|
|
43
|
+
}
|
|
44
|
+
generateEmbedding(text) {
|
|
45
|
+
// Simple hash-based embedding for testing
|
|
46
|
+
const embedding = [];
|
|
47
|
+
for (let i = 0; i < this.dimension; i++) {
|
|
48
|
+
const charCode = text.charCodeAt(i % text.length) || 0;
|
|
49
|
+
embedding.push(Math.sin(charCode * (i + 1) * 0.1));
|
|
50
|
+
}
|
|
51
|
+
// Normalize
|
|
52
|
+
const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
|
|
53
|
+
return embedding.map((v) => v / (norm || 1));
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* In-memory semantic store for testing
|
|
58
|
+
*/
|
|
59
|
+
export class InMemorySemanticStore {
|
|
60
|
+
items = new Map();
|
|
61
|
+
embeddingPort;
|
|
62
|
+
constructor(embeddingPort) {
|
|
63
|
+
this.embeddingPort = embeddingPort ?? new StubEmbeddingPort();
|
|
64
|
+
}
|
|
65
|
+
makeKey(key, namespace) {
|
|
66
|
+
return `${namespace}:${key}`;
|
|
67
|
+
}
|
|
68
|
+
async store(request) {
|
|
69
|
+
const namespace = request.namespace ?? 'default';
|
|
70
|
+
const storageKey = this.makeKey(request.key, namespace);
|
|
71
|
+
const existing = this.items.get(storageKey);
|
|
72
|
+
// Compute content hash
|
|
73
|
+
const contentHash = await this.hashContent(request.content);
|
|
74
|
+
const needsEmbedding = !existing ||
|
|
75
|
+
existing.contentHash !== contentHash ||
|
|
76
|
+
request.forceRecompute;
|
|
77
|
+
let embedding = request.embedding;
|
|
78
|
+
let embeddingComputed = false;
|
|
79
|
+
if (needsEmbedding && !embedding) {
|
|
80
|
+
const result = await this.embeddingPort.embed({ text: request.content });
|
|
81
|
+
embedding = result.embedding;
|
|
82
|
+
embeddingComputed = true;
|
|
83
|
+
}
|
|
84
|
+
const item = {
|
|
85
|
+
key: request.key,
|
|
86
|
+
namespace,
|
|
87
|
+
content: request.content,
|
|
88
|
+
embedding,
|
|
89
|
+
embeddingDimension: embedding?.length,
|
|
90
|
+
embeddingModel: this.embeddingPort.getConfig().model,
|
|
91
|
+
metadata: request.metadata,
|
|
92
|
+
tags: request.tags,
|
|
93
|
+
contentHash,
|
|
94
|
+
createdAt: existing?.createdAt ?? new Date().toISOString(),
|
|
95
|
+
updatedAt: new Date().toISOString(),
|
|
96
|
+
};
|
|
97
|
+
this.items.set(storageKey, item);
|
|
98
|
+
// Omit embedding from the response item
|
|
99
|
+
const { embedding: _, ...itemWithoutEmbedding } = item;
|
|
100
|
+
return {
|
|
101
|
+
success: true,
|
|
102
|
+
item: itemWithoutEmbedding,
|
|
103
|
+
created: !existing,
|
|
104
|
+
embeddingComputed,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
async search(request) {
|
|
108
|
+
const startTime = Date.now();
|
|
109
|
+
const namespace = request.namespace;
|
|
110
|
+
// Get query embedding
|
|
111
|
+
const queryResult = await this.embeddingPort.embed({ text: request.query });
|
|
112
|
+
const queryEmbedding = queryResult.embedding;
|
|
113
|
+
// Filter items
|
|
114
|
+
const candidates = Array.from(this.items.values()).filter((item) => {
|
|
115
|
+
if (namespace && item.namespace !== namespace)
|
|
116
|
+
return false;
|
|
117
|
+
if (!item.embedding)
|
|
118
|
+
return false;
|
|
119
|
+
if (request.filterTags) {
|
|
120
|
+
const itemTags = new Set(item.tags ?? []);
|
|
121
|
+
if (!request.filterTags.every((t) => itemTags.has(t)))
|
|
122
|
+
return false;
|
|
123
|
+
}
|
|
124
|
+
return true;
|
|
125
|
+
});
|
|
126
|
+
// Compute similarities
|
|
127
|
+
const scored = candidates.map((item) => ({
|
|
128
|
+
item,
|
|
129
|
+
similarity: this.cosineSimilarity(queryEmbedding, item.embedding),
|
|
130
|
+
}));
|
|
131
|
+
// Filter by minSimilarity and sort (INV-SEM-002, INV-SEM-003)
|
|
132
|
+
const filtered = scored
|
|
133
|
+
.filter((s) => s.similarity >= (request.minSimilarity ?? 0.7))
|
|
134
|
+
.sort((a, b) => b.similarity - a.similarity)
|
|
135
|
+
.slice(0, request.topK ?? 10);
|
|
136
|
+
const results = filtered.map((s, index) => ({
|
|
137
|
+
item: request.includeEmbeddings
|
|
138
|
+
? s.item
|
|
139
|
+
: { ...s.item, embedding: undefined },
|
|
140
|
+
similarity: s.similarity,
|
|
141
|
+
rank: index + 1,
|
|
142
|
+
snippet: s.item.content.slice(0, 200),
|
|
143
|
+
}));
|
|
144
|
+
return {
|
|
145
|
+
results,
|
|
146
|
+
totalMatches: filtered.length,
|
|
147
|
+
query: request.query,
|
|
148
|
+
namespace,
|
|
149
|
+
durationMs: Date.now() - startTime,
|
|
150
|
+
queryEmbedding: request.includeEmbeddings ? queryEmbedding : undefined,
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
async get(key, namespace = 'default') {
|
|
154
|
+
return this.items.get(this.makeKey(key, namespace)) ?? null;
|
|
155
|
+
}
|
|
156
|
+
async list(request) {
|
|
157
|
+
let items = Array.from(this.items.values());
|
|
158
|
+
// Filter by namespace
|
|
159
|
+
if (request.namespace) {
|
|
160
|
+
items = items.filter((i) => i.namespace === request.namespace);
|
|
161
|
+
}
|
|
162
|
+
// Filter by tags
|
|
163
|
+
if (request.filterTags) {
|
|
164
|
+
items = items.filter((item) => {
|
|
165
|
+
const itemTags = new Set(item.tags ?? []);
|
|
166
|
+
return request.filterTags.every((t) => itemTags.has(t));
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
// Filter by key prefix
|
|
170
|
+
if (request.keyPrefix) {
|
|
171
|
+
items = items.filter((i) => i.key.startsWith(request.keyPrefix));
|
|
172
|
+
}
|
|
173
|
+
// Sort
|
|
174
|
+
const sortDir = request.orderDir === 'asc' ? 1 : -1;
|
|
175
|
+
items.sort((a, b) => {
|
|
176
|
+
const aVal = a[request.orderBy ?? 'createdAt'] ?? '';
|
|
177
|
+
const bVal = b[request.orderBy ?? 'createdAt'] ?? '';
|
|
178
|
+
return aVal < bVal ? -sortDir : sortDir;
|
|
179
|
+
});
|
|
180
|
+
// Paginate
|
|
181
|
+
const offset = request.offset ?? 0;
|
|
182
|
+
const limit = request.limit ?? 10;
|
|
183
|
+
const paginated = items.slice(offset, offset + limit);
|
|
184
|
+
return {
|
|
185
|
+
items: paginated.map((i) => ({ ...i, embedding: undefined })),
|
|
186
|
+
total: items.length,
|
|
187
|
+
hasMore: offset + limit < items.length,
|
|
188
|
+
namespace: request.namespace,
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
async delete(key, namespace = 'default') {
|
|
192
|
+
const storageKey = this.makeKey(key, namespace);
|
|
193
|
+
const deleted = this.items.delete(storageKey);
|
|
194
|
+
return { deleted, key, namespace };
|
|
195
|
+
}
|
|
196
|
+
async exists(key, namespace = 'default') {
|
|
197
|
+
return this.items.has(this.makeKey(key, namespace));
|
|
198
|
+
}
|
|
199
|
+
async getStats(namespace) {
|
|
200
|
+
let items = Array.from(this.items.values());
|
|
201
|
+
if (namespace) {
|
|
202
|
+
items = items.filter((i) => i.namespace === namespace);
|
|
203
|
+
}
|
|
204
|
+
const withEmbeddings = items.filter((i) => i.embedding);
|
|
205
|
+
const allNamespaces = [...new Set(Array.from(this.items.values()).map((i) => i.namespace))];
|
|
206
|
+
const result = {
|
|
207
|
+
totalItems: items.length,
|
|
208
|
+
itemsWithEmbeddings: withEmbeddings.length,
|
|
209
|
+
embeddingDimension: withEmbeddings[0]?.embeddingDimension ?? null,
|
|
210
|
+
embeddingModel: withEmbeddings[0]?.embeddingModel ?? null,
|
|
211
|
+
namespace: namespace ?? null,
|
|
212
|
+
};
|
|
213
|
+
// Only include namespaces if no specific namespace was queried
|
|
214
|
+
if (!namespace) {
|
|
215
|
+
result.namespaces = allNamespaces;
|
|
216
|
+
}
|
|
217
|
+
return result;
|
|
218
|
+
}
|
|
219
|
+
async clear(namespace) {
|
|
220
|
+
if (namespace) {
|
|
221
|
+
const keysToDelete = Array.from(this.items.entries())
|
|
222
|
+
.filter(([_, item]) => item.namespace === namespace)
|
|
223
|
+
.map(([key]) => key);
|
|
224
|
+
keysToDelete.forEach((k) => this.items.delete(k));
|
|
225
|
+
return keysToDelete.length;
|
|
226
|
+
}
|
|
227
|
+
else {
|
|
228
|
+
const count = this.items.size;
|
|
229
|
+
this.items.clear();
|
|
230
|
+
return count;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
cosineSimilarity(a, b) {
|
|
234
|
+
if (a.length !== b.length)
|
|
235
|
+
return 0;
|
|
236
|
+
let dot = 0;
|
|
237
|
+
let normA = 0;
|
|
238
|
+
let normB = 0;
|
|
239
|
+
for (let i = 0; i < a.length; i++) {
|
|
240
|
+
dot += a[i] * b[i];
|
|
241
|
+
normA += a[i] * a[i];
|
|
242
|
+
normB += b[i] * b[i];
|
|
243
|
+
}
|
|
244
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
245
|
+
if (denom === 0)
|
|
246
|
+
return 0;
|
|
247
|
+
// Cosine similarity is in [-1, 1], normalize to [0, 1]
|
|
248
|
+
return (dot / denom + 1) / 2;
|
|
249
|
+
}
|
|
250
|
+
async hashContent(content) {
|
|
251
|
+
const encoder = new TextEncoder();
|
|
252
|
+
const data = encoder.encode(content);
|
|
253
|
+
const hashBuffer = await crypto.subtle.digest('SHA-256', data);
|
|
254
|
+
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
|
255
|
+
return hashArray.map((b) => b.toString(16).padStart(2, '0')).join('');
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AA6QH,+EAA+E;AAC/E,qCAAqC;AACrC,+EAA+E;AAE/E;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,SAAS,CAAS;IAClB,KAAK,CAAS;IAEtB,YAAY,SAAS,GAAG,GAAG,EAAE,KAAK,GAAG,MAAM;QACzC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,OAAyB;QACnC,sDAAsD;QACtD,MAAM,SAAS,GAAG,IAAI,CAAC,iBAAiB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACvD,OAAO;YACL,SAAS;YACT,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK;YAClC,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,UAAU,EAAE,EAAE;SACf,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAChE,CAAC;IAED,SAAS;QACP,OAAO;YACL,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,SAAS,EAAE,EAAE;YACb,YAAY,EAAE,IAAI;SACnB,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,WAAW;QACf,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,iBAAiB,CAAC,IAAY;QACpC,0CAA0C;QAC1C,MAAM,SAAS,GAAa,EAAE,CAAC;QAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YACvD,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC;QACrD,CAAC;QACD,YAAY;QACZ,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACrE,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC;IAC/C,CAAC;CACF;AAED;;GAEG;AACH,MAAM,OAAO,qBAAqB;IACxB,KAAK,GAA8B,IAAI,GAAG,EAAE,CAAC;IAC7C,aAAa,CAAgB;IAErC,YAAY,aAA6B;QACvC,IAAI,CAAC,aAAa,GAAG,aAAa,IAAI,IAAI,iBAAiB,EAAE,CAAC;IAChE,CAAC;IAEO,OAAO,CAAC,GAAW,EAAE,SAAiB;QAC5C,OAAO,GAAG,SAAS,IAAI,GAAG,EAAE,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,OAA6B;QACvC,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,SAAS,CAAC;QACjD,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAE5C,uBAAuB;QACvB,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC5D,MAAM,cAAc,GAAG,CAAC,QAAQ;YAC9B,QAAQ,CAAC,WAAW,KAAK,WAAW;YACpC,OAAO,CAAC,cAAc,CAAC;QAEzB,IAAI,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;QAClC,IAAI,iBAAiB,GAAG,KAAK,CAAC;QAE9B,IAAI,cAAc,IAAI,CAAC,SAAS,EAAE,CAAC;YACjC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;YACzE,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;YAC7B,iBAAiB,GAAG,IAAI,CAAC;QAC3B,CAAC;QAED,MAAM,IAAI,GAAiB;YACzB,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,SAAS;YACT,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,SAAS;YACT,kBAAkB,EAAE,SAAS,EAAE,MAAM;YACrC,cAAc,EAAE,IAAI,CAAC,aAAa,CAAC,SAAS,EAAE,CAAC,KAAK;YACpD,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,WAAW;YACX,SAAS,EAAE,QAAQ,EAAE,SAAS,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YAC1D,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC;QAEF,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QAEjC,wCAAwC;QACxC,MAAM,EAAE,SAAS,EAAE,CAAC,EAAE,GAAG,oBAAoB,EAAE,GAAG,IAAI,CAAC;QACvD,OAAO;YACL,OAAO,EAAE,IAAI;YACb,IAAI,EAAE,oBAAoB;YAC1B,OAAO,EAAE,CAAC,QAAQ;YAClB,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAA8B;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;QAEpC,sBAAsB;QACtB,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;QAC5E,MAAM,cAAc,GAAG,WAAW,CAAC,SAAS,CAAC;QAE7C,eAAe;QACf,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;YACjE,IAAI,SAAS,IAAI,IAAI,CAAC,SAAS,KAAK,SAAS;gBAAE,OAAO,KAAK,CAAC;YAC5D,IAAI,CAAC,IAAI,CAAC,SAAS;gBAAE,OAAO,KAAK,CAAC;YAClC,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;gBACvB,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;gBAC1C,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;oBAAE,OAAO,KAAK,CAAC;YACtE,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;QAEH,uBAAuB;QACvB,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;YACvC,IAAI;YACJ,UAAU,EAAE,IAAI,CAAC,gBAAgB,CAAC,cAAc,EAAE,IAAI,CAAC,SAAU,CAAC;SACnE,CAAC,CAAC,CAAC;QAEJ,8DAA8D;QAC9D,MAAM,QAAQ,GAAG,MAAM;aACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,OAAO,CAAC,aAAa,IAAI,GAAG,CAAC,CAAC;aAC7D,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;aAC3C,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;QAEhC,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;YAC1C,IAAI,EAAE,OAAO,CAAC,iBAAiB;gBAC7B,CAAC,CAAC,CAAC,CAAC,IAAI;gBACR,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,SAAS,EAAE,SAAS,EAAE;YACvC,UAAU,EAAE,CAAC,CAAC,UAAU;YACxB,IAAI,EAAE,KAAK,GAAG,CAAC;YACf,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;SACtC,CAAC,CAAC,CAAC;QAEJ,OAAO;YACL,OAAO;YACP,YAAY,EAAE,QAAQ,CAAC,MAAM;YAC7B,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,SAAS;YACT,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,cAAc,EAAE,OAAO,CAAC,iBAAiB,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,SAAS;SACvE,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,SAAS,GAAG,SAAS;QAC1C,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC,IAAI,IAAI,CAAC;IAC9D,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,OAA4B;QACrC,IAAI,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QAE5C,sBAAsB;QACtB,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,OAAO,CAAC,SAAS,CAAC,CAAC;QACjE,CAAC;QAED,iBAAiB;QACjB,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;YACvB,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;gBAC5B,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;gBAC1C,OAAO,OAAO,CAAC,UAAW,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAC3D,CAAC,CAAC,CAAC;QACL,CAAC;QAED,uBAAuB;QACvB,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,SAAU,CAAC,CAAC,CAAC;QACpE,CAAC;QAED,OAAO;QACP,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpD,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAClB,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,OAAO,IAAI,WAAW,CAAC,IAAI,EAAE,CAAC;YACrD,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,OAAO,IAAI,WAAW,CAAC,IAAI,EAAE,CAAC;YACrD,OAAO,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,WAAW;QACX,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,CAAC,CAAC;QACnC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;QAClC,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,KAAK,CAAC,CAAC;QAEtD,OAAO;YACL,KAAK,EAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;YAC7D,KAAK,EAAE,KAAK,CAAC,MAAM;YACnB,OAAO,EAAE,MAAM,GAAG,KAAK,GAAG,KAAK,CAAC,MAAM;YACtC,SAAS,EAAE,OAAO,CAAC,SAAS;SAC7B,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,SAAS,GAAG,SAAS;QAC7C,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAC9C,OAAO,EAAE,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC;IACrC,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,SAAS,GAAG,SAAS;QAC7C,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;IACtD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,SAAkB;QAC/B,IAAI,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QAC5C,IAAI,SAAS,EAAE,CAAC;YACd,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC;QACzD,CAAC;QAED,MAAM,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACxD,MAAM,aAAa,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;QAE5F,MAAM,MAAM,GAAuB;YACjC,UAAU,EAAE,KAAK,CAAC,MAAM;YACxB,mBAAmB,EAAE,cAAc,CAAC,MAAM;YAC1C,kBAAkB,EAAE,cAAc,CAAC,CAAC,CAAC,EAAE,kBAAkB,IAAI,IAAI;YACjE,cAAc,EAAE,cAAc,CAAC,CAAC,CAAC,EAAE,cAAc,IAAI,IAAI;YACzD,SAAS,EAAE,SAAS,IAAI,IAAI;SAC7B,CAAC;QAEF,+DAA+D;QAC/D,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,CAAC,UAAU,GAAG,aAAa,CAAC;QACpC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,SAAkB;QAC5B,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,YAAY,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;iBAClD,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,KAAK,SAAS,CAAC;iBACnD,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC;YACvB,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YAClD,OAAO,YAAY,CAAC,MAAM,CAAC;QAC7B,CAAC;aAAM,CAAC;YACN,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;YAC9B,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAEO,gBAAgB,CAAC,CAAW,EAAE,CAAW;QAC/C,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM;YAAE,OAAO,CAAC,CAAC;QACpC,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAClC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC;YACrB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC;YACvB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC;QACzB,CAAC;QACD,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClD,IAAI,KAAK,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAC1B,uDAAuD;QACvD,OAAO,CAAC,GAAG,GAAG,KAAK,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IAC/B,CAAC;IAEO,KAAK,CAAC,WAAW,CAAC,OAAe;QACvC,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAC;QAClC,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;QAC/D,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC;QACzD,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACxE,CAAC;CACF"}
|
package/package.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@defai.digital/semantic-context",
|
|
3
|
+
"version": "13.4.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Vector-indexed semantic search for AutomatosX",
|
|
6
|
+
"license": "BUSL-1.1",
|
|
7
|
+
"author": "DEFAI Private Limited",
|
|
8
|
+
"repository": {
|
|
9
|
+
"type": "git",
|
|
10
|
+
"url": "https://github.com/defai-digital/automatosx.git",
|
|
11
|
+
"directory": "packages/core/semantic-context"
|
|
12
|
+
},
|
|
13
|
+
"homepage": "https://github.com/defai-digital/automatosx#readme",
|
|
14
|
+
"bugs": {
|
|
15
|
+
"url": "https://github.com/defai-digital/automatosx/issues"
|
|
16
|
+
},
|
|
17
|
+
"main": "dist/index.js",
|
|
18
|
+
"types": "dist/index.d.ts",
|
|
19
|
+
"exports": {
|
|
20
|
+
".": {
|
|
21
|
+
"types": "./dist/index.d.ts",
|
|
22
|
+
"import": "./dist/index.js"
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"files": [
|
|
26
|
+
"dist",
|
|
27
|
+
"src"
|
|
28
|
+
],
|
|
29
|
+
"engines": {
|
|
30
|
+
"node": ">=20.0.0"
|
|
31
|
+
},
|
|
32
|
+
"publishConfig": {
|
|
33
|
+
"access": "public"
|
|
34
|
+
},
|
|
35
|
+
"dependencies": {
|
|
36
|
+
"@defai.digital/contracts": "13.4.0"
|
|
37
|
+
},
|
|
38
|
+
"devDependencies": {
|
|
39
|
+
"typescript": "^5.7.2"
|
|
40
|
+
},
|
|
41
|
+
"peerDependencies": {
|
|
42
|
+
"zod": "^3.23.0"
|
|
43
|
+
},
|
|
44
|
+
"scripts": {
|
|
45
|
+
"build": "tsc --build",
|
|
46
|
+
"clean": "rm -rf dist"
|
|
47
|
+
}
|
|
48
|
+
}
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Service
|
|
3
|
+
*
|
|
4
|
+
* Provides text embedding computation with a local TF-IDF fallback.
|
|
5
|
+
* Can be extended with external providers (OpenAI, Cohere, etc.)
|
|
6
|
+
*
|
|
7
|
+
* Invariants:
|
|
8
|
+
* - INV-SEM-001: Embeddings computed and cached
|
|
9
|
+
* - INV-SEM-200: Consistent dimension within namespace
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type {
|
|
13
|
+
EmbeddingPort,
|
|
14
|
+
EmbeddingRequest,
|
|
15
|
+
EmbeddingResult,
|
|
16
|
+
} from './types.js';
|
|
17
|
+
import type { EmbeddingConfig } from '@defai.digital/contracts';
|
|
18
|
+
import { normalizeVector } from './similarity.js';
|
|
19
|
+
|
|
20
|
+
// ============================================================================
|
|
21
|
+
// Local TF-IDF Embedding
|
|
22
|
+
// ============================================================================
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Simple tokenizer that splits text into tokens
|
|
26
|
+
*/
|
|
27
|
+
function tokenize(text: string): string[] {
|
|
28
|
+
return text
|
|
29
|
+
.toLowerCase()
|
|
30
|
+
.replace(/[^\w\s]/g, ' ')
|
|
31
|
+
.split(/\s+/)
|
|
32
|
+
.filter((t) => t.length > 1);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Compute term frequency for tokens
|
|
37
|
+
*/
|
|
38
|
+
function computeTF(tokens: string[]): Map<string, number> {
|
|
39
|
+
const tf = new Map<string, number>();
|
|
40
|
+
for (const token of tokens) {
|
|
41
|
+
tf.set(token, (tf.get(token) ?? 0) + 1);
|
|
42
|
+
}
|
|
43
|
+
// Normalize by total tokens
|
|
44
|
+
const total = tokens.length;
|
|
45
|
+
for (const [term, count] of tf) {
|
|
46
|
+
tf.set(term, count / total);
|
|
47
|
+
}
|
|
48
|
+
return tf;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Simple hash function for consistent dimension mapping
|
|
53
|
+
*/
|
|
54
|
+
function hashString(str: string, maxDim: number): number {
|
|
55
|
+
let hash = 0;
|
|
56
|
+
for (let i = 0; i < str.length; i++) {
|
|
57
|
+
const char = str.charCodeAt(i);
|
|
58
|
+
hash = ((hash << 5) - hash) + char;
|
|
59
|
+
hash = hash & hash; // Convert to 32bit integer
|
|
60
|
+
}
|
|
61
|
+
return Math.abs(hash) % maxDim;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Create TF-IDF based embedding
|
|
66
|
+
*
|
|
67
|
+
* This is a simple local embedding that:
|
|
68
|
+
* 1. Tokenizes text
|
|
69
|
+
* 2. Computes term frequency
|
|
70
|
+
* 3. Hashes terms to fixed dimension
|
|
71
|
+
* 4. Normalizes to unit vector
|
|
72
|
+
*/
|
|
73
|
+
export function createTFIDFEmbedding(text: string, dimension: number): number[] {
|
|
74
|
+
const tokens = tokenize(text);
|
|
75
|
+
const tf = computeTF(tokens);
|
|
76
|
+
|
|
77
|
+
// Create sparse vector and hash to fixed dimension
|
|
78
|
+
const embedding = new Array(dimension).fill(0);
|
|
79
|
+
|
|
80
|
+
for (const [term, freq] of tf) {
|
|
81
|
+
// Hash term to dimension index
|
|
82
|
+
const index = hashString(term, dimension);
|
|
83
|
+
// Add frequency (with sign based on secondary hash for better distribution)
|
|
84
|
+
const sign = hashString(term + '_sign', 2) === 0 ? 1 : -1;
|
|
85
|
+
embedding[index] += freq * sign;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Normalize to unit vector
|
|
89
|
+
return normalizeVector(embedding);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Batch create TF-IDF embeddings with IDF computation
|
|
94
|
+
*/
|
|
95
|
+
export function createTFIDFEmbeddingBatch(texts: string[], dimension: number): number[][] {
|
|
96
|
+
// Compute document frequency for IDF
|
|
97
|
+
const docFreq = new Map<string, number>();
|
|
98
|
+
const allTokens: string[][] = [];
|
|
99
|
+
|
|
100
|
+
for (const text of texts) {
|
|
101
|
+
const tokens = tokenize(text);
|
|
102
|
+
const uniqueTokens = new Set(tokens);
|
|
103
|
+
allTokens.push(tokens);
|
|
104
|
+
|
|
105
|
+
for (const token of uniqueTokens) {
|
|
106
|
+
docFreq.set(token, (docFreq.get(token) ?? 0) + 1);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const numDocs = texts.length;
|
|
111
|
+
const embeddings: number[][] = [];
|
|
112
|
+
|
|
113
|
+
for (let i = 0; i < texts.length; i++) {
|
|
114
|
+
const tokens = allTokens[i]!;
|
|
115
|
+
const tf = computeTF(tokens);
|
|
116
|
+
const embedding = new Array(dimension).fill(0);
|
|
117
|
+
|
|
118
|
+
for (const [term, freq] of tf) {
|
|
119
|
+
// Compute IDF: log(N / df)
|
|
120
|
+
const df = docFreq.get(term) ?? 1;
|
|
121
|
+
const idf = Math.log(numDocs / df);
|
|
122
|
+
const tfidf = freq * idf;
|
|
123
|
+
|
|
124
|
+
// Hash to dimension
|
|
125
|
+
const index = hashString(term, dimension);
|
|
126
|
+
const sign = hashString(term + '_sign', 2) === 0 ? 1 : -1;
|
|
127
|
+
embedding[index] += tfidf * sign;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
embeddings.push(normalizeVector(embedding));
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return embeddings;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// ============================================================================
|
|
137
|
+
// Local Embedding Provider
|
|
138
|
+
// ============================================================================
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Local embedding provider using TF-IDF
|
|
142
|
+
*/
|
|
143
|
+
export class LocalEmbeddingProvider implements EmbeddingPort {
|
|
144
|
+
private config: EmbeddingConfig;
|
|
145
|
+
|
|
146
|
+
constructor(config?: Partial<EmbeddingConfig>) {
|
|
147
|
+
this.config = {
|
|
148
|
+
provider: 'local',
|
|
149
|
+
model: 'tfidf',
|
|
150
|
+
dimension: 384,
|
|
151
|
+
batchSize: 32,
|
|
152
|
+
cacheEnabled: true,
|
|
153
|
+
...config,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
async embed(request: EmbeddingRequest): Promise<EmbeddingResult> {
|
|
158
|
+
const startTime = Date.now();
|
|
159
|
+
|
|
160
|
+
const embedding = createTFIDFEmbedding(
|
|
161
|
+
request.text,
|
|
162
|
+
this.config.dimension
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
embedding,
|
|
167
|
+
model: request.model ?? this.config.model,
|
|
168
|
+
dimension: this.config.dimension,
|
|
169
|
+
durationMs: Date.now() - startTime,
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {
|
|
174
|
+
const startTime = Date.now();
|
|
175
|
+
|
|
176
|
+
// Process in batches
|
|
177
|
+
const results: EmbeddingResult[] = [];
|
|
178
|
+
for (let i = 0; i < texts.length; i += this.config.batchSize) {
|
|
179
|
+
const batch = texts.slice(i, i + this.config.batchSize);
|
|
180
|
+
const embeddings = createTFIDFEmbeddingBatch(batch, this.config.dimension);
|
|
181
|
+
|
|
182
|
+
for (const embedding of embeddings) {
|
|
183
|
+
results.push({
|
|
184
|
+
embedding,
|
|
185
|
+
model: this.config.model,
|
|
186
|
+
dimension: this.config.dimension,
|
|
187
|
+
durationMs: Date.now() - startTime,
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return results;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
getConfig(): EmbeddingConfig {
|
|
196
|
+
return { ...this.config };
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
async isAvailable(): Promise<boolean> {
|
|
200
|
+
return true; // Local provider is always available
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// ============================================================================
|
|
205
|
+
// Embedding Service Factory
|
|
206
|
+
// ============================================================================
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Create embedding provider based on configuration
|
|
210
|
+
*/
|
|
211
|
+
export function createEmbeddingProvider(config?: Partial<EmbeddingConfig>): EmbeddingPort {
|
|
212
|
+
const provider = config?.provider ?? 'local';
|
|
213
|
+
|
|
214
|
+
switch (provider) {
|
|
215
|
+
case 'local':
|
|
216
|
+
return new LocalEmbeddingProvider(config);
|
|
217
|
+
|
|
218
|
+
// Future: Add OpenAI, Cohere, Voyage providers here
|
|
219
|
+
// case 'openai':
|
|
220
|
+
// return new OpenAIEmbeddingProvider(config);
|
|
221
|
+
|
|
222
|
+
default:
|
|
223
|
+
// Fall back to local
|
|
224
|
+
return new LocalEmbeddingProvider(config);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Cached embedding provider wrapper
|
|
230
|
+
*/
|
|
231
|
+
export class CachedEmbeddingProvider implements EmbeddingPort {
|
|
232
|
+
private cache: Map<string, EmbeddingResult> = new Map();
|
|
233
|
+
private delegate: EmbeddingPort;
|
|
234
|
+
private maxCacheSize: number;
|
|
235
|
+
|
|
236
|
+
constructor(delegate: EmbeddingPort, maxCacheSize = 10000) {
|
|
237
|
+
this.delegate = delegate;
|
|
238
|
+
this.maxCacheSize = maxCacheSize;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
async embed(request: EmbeddingRequest): Promise<EmbeddingResult> {
|
|
242
|
+
const cacheKey = `${request.model ?? 'default'}:${request.text}`;
|
|
243
|
+
|
|
244
|
+
// Check cache
|
|
245
|
+
const cached = this.cache.get(cacheKey);
|
|
246
|
+
if (cached) {
|
|
247
|
+
return { ...cached, durationMs: 0 };
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Compute and cache
|
|
251
|
+
const result = await this.delegate.embed(request);
|
|
252
|
+
|
|
253
|
+
// Evict old entries if cache is full
|
|
254
|
+
if (this.cache.size >= this.maxCacheSize) {
|
|
255
|
+
const firstKey = this.cache.keys().next().value;
|
|
256
|
+
if (firstKey) this.cache.delete(firstKey);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
this.cache.set(cacheKey, result);
|
|
260
|
+
return result;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {
|
|
264
|
+
// Check which texts need computation
|
|
265
|
+
const model = this.getConfig().model;
|
|
266
|
+
const toCompute: { index: number; text: string }[] = [];
|
|
267
|
+
const results: (EmbeddingResult | null)[] = new Array(texts.length).fill(null);
|
|
268
|
+
|
|
269
|
+
for (let i = 0; i < texts.length; i++) {
|
|
270
|
+
const cacheKey = `${model}:${texts[i]}`;
|
|
271
|
+
const cached = this.cache.get(cacheKey);
|
|
272
|
+
if (cached) {
|
|
273
|
+
results[i] = { ...cached, durationMs: 0 };
|
|
274
|
+
} else {
|
|
275
|
+
toCompute.push({ index: i, text: texts[i]! });
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Compute missing embeddings
|
|
280
|
+
if (toCompute.length > 0) {
|
|
281
|
+
const computed = await this.delegate.embedBatch(toCompute.map((t) => t.text));
|
|
282
|
+
|
|
283
|
+
for (let i = 0; i < toCompute.length; i++) {
|
|
284
|
+
const { index, text } = toCompute[i]!;
|
|
285
|
+
const result = computed[i]!;
|
|
286
|
+
results[index] = result;
|
|
287
|
+
|
|
288
|
+
// Cache
|
|
289
|
+
const cacheKey = `${model}:${text}`;
|
|
290
|
+
if (this.cache.size < this.maxCacheSize) {
|
|
291
|
+
this.cache.set(cacheKey, result);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
return results as EmbeddingResult[];
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
getConfig(): EmbeddingConfig {
|
|
300
|
+
return this.delegate.getConfig();
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
async isAvailable(): Promise<boolean> {
|
|
304
|
+
return this.delegate.isAvailable();
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Clear the cache
|
|
309
|
+
*/
|
|
310
|
+
clearCache(): void {
|
|
311
|
+
this.cache.clear();
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Get cache statistics
|
|
316
|
+
*/
|
|
317
|
+
getCacheStats(): { size: number; maxSize: number } {
|
|
318
|
+
return {
|
|
319
|
+
size: this.cache.size,
|
|
320
|
+
maxSize: this.maxCacheSize,
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
}
|