@defai.digital/semantic-context 13.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +38 -0
- package/dist/embedding-service.d.ts +66 -0
- package/dist/embedding-service.d.ts.map +1 -0
- package/dist/embedding-service.js +265 -0
- package/dist/embedding-service.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +16 -0
- package/dist/index.js.map +1 -0
- package/dist/semantic-manager.d.ts +30 -0
- package/dist/semantic-manager.d.ts.map +1 -0
- package/dist/semantic-manager.js +186 -0
- package/dist/semantic-manager.js.map +1 -0
- package/dist/similarity.d.ts +89 -0
- package/dist/similarity.d.ts.map +1 -0
- package/dist/similarity.js +216 -0
- package/dist/similarity.js.map +1 -0
- package/dist/types.d.ts +236 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +258 -0
- package/dist/types.js.map +1 -0
- package/package.json +48 -0
- package/src/embedding-service.ts +323 -0
- package/src/index.ts +56 -0
- package/src/semantic-manager.ts +246 -0
- package/src/similarity.ts +265 -0
- package/src/types.ts +561 -0
package/src/types.ts
ADDED
|
@@ -0,0 +1,561 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Context Types
|
|
3
|
+
*
|
|
4
|
+
* Port interfaces and type definitions for semantic context storage.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type {
|
|
8
|
+
SemanticItem,
|
|
9
|
+
SemanticSearchRequest,
|
|
10
|
+
SemanticSearchResponse,
|
|
11
|
+
SemanticStoreRequest,
|
|
12
|
+
SemanticStoreResponse,
|
|
13
|
+
SemanticListRequest,
|
|
14
|
+
SemanticListResponse,
|
|
15
|
+
SemanticDeleteResponse,
|
|
16
|
+
EmbeddingConfig,
|
|
17
|
+
} from '@defai.digital/contracts';
|
|
18
|
+
|
|
19
|
+
// ============================================================================
|
|
20
|
+
// Embedding Port
|
|
21
|
+
// ============================================================================
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Embedding request
|
|
25
|
+
*/
|
|
26
|
+
export interface EmbeddingRequest {
|
|
27
|
+
/**
|
|
28
|
+
* Text to embed
|
|
29
|
+
*/
|
|
30
|
+
text: string;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Model to use (optional, uses config default)
|
|
34
|
+
*/
|
|
35
|
+
model?: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Embedding result
|
|
40
|
+
*/
|
|
41
|
+
export interface EmbeddingResult {
|
|
42
|
+
/**
|
|
43
|
+
* The embedding vector
|
|
44
|
+
*/
|
|
45
|
+
embedding: number[];
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Model used
|
|
49
|
+
*/
|
|
50
|
+
model: string;
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Dimension of embedding
|
|
54
|
+
*/
|
|
55
|
+
dimension: number;
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Computation duration in ms
|
|
59
|
+
*/
|
|
60
|
+
durationMs: number;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Port interface for embedding computation
|
|
65
|
+
* Implementations inject actual embedding provider at runtime
|
|
66
|
+
*/
|
|
67
|
+
export interface EmbeddingPort {
|
|
68
|
+
/**
|
|
69
|
+
* Compute embedding for text
|
|
70
|
+
*/
|
|
71
|
+
embed(request: EmbeddingRequest): Promise<EmbeddingResult>;
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Compute embeddings for multiple texts (batch)
|
|
75
|
+
*/
|
|
76
|
+
embedBatch(texts: string[]): Promise<EmbeddingResult[]>;
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Get embedding configuration
|
|
80
|
+
*/
|
|
81
|
+
getConfig(): EmbeddingConfig;
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Check if provider is available
|
|
85
|
+
*/
|
|
86
|
+
isAvailable(): Promise<boolean>;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// ============================================================================
|
|
90
|
+
// Semantic Store Port
|
|
91
|
+
// ============================================================================
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Port interface for semantic storage
|
|
95
|
+
* Implementations provide actual persistence (SQLite, etc.)
|
|
96
|
+
*/
|
|
97
|
+
export interface SemanticStorePort {
|
|
98
|
+
/**
|
|
99
|
+
* Store item with embedding
|
|
100
|
+
* INV-SEM-001: Embedding cached until content changes
|
|
101
|
+
*/
|
|
102
|
+
store(request: SemanticStoreRequest): Promise<SemanticStoreResponse>;
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Search by semantic similarity
|
|
106
|
+
* INV-SEM-002: Results sorted by similarity descending
|
|
107
|
+
* INV-SEM-003: Scores normalized to [0, 1]
|
|
108
|
+
* INV-SEM-004: Namespace isolation
|
|
109
|
+
*/
|
|
110
|
+
search(request: SemanticSearchRequest): Promise<SemanticSearchResponse>;
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Get item by key
|
|
114
|
+
*/
|
|
115
|
+
get(key: string, namespace?: string): Promise<SemanticItem | null>;
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* List items
|
|
119
|
+
*/
|
|
120
|
+
list(request: SemanticListRequest): Promise<SemanticListResponse>;
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Delete item
|
|
124
|
+
*/
|
|
125
|
+
delete(key: string, namespace?: string): Promise<SemanticDeleteResponse>;
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Check if item exists
|
|
129
|
+
*/
|
|
130
|
+
exists(key: string, namespace?: string): Promise<boolean>;
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Get namespace statistics
|
|
134
|
+
*/
|
|
135
|
+
getStats(namespace?: string): Promise<SemanticStoreStats>;
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Clear namespace
|
|
139
|
+
*/
|
|
140
|
+
clear(namespace?: string): Promise<number>;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Semantic store statistics
|
|
145
|
+
*/
|
|
146
|
+
export interface SemanticStoreStats {
|
|
147
|
+
/**
|
|
148
|
+
* Total items in namespace
|
|
149
|
+
*/
|
|
150
|
+
totalItems: number;
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Items with embeddings
|
|
154
|
+
*/
|
|
155
|
+
itemsWithEmbeddings: number;
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Embedding dimension used
|
|
159
|
+
*/
|
|
160
|
+
embeddingDimension: number | null;
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Embedding model used
|
|
164
|
+
*/
|
|
165
|
+
embeddingModel: string | null;
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Namespace queried
|
|
169
|
+
*/
|
|
170
|
+
namespace: string | null;
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* All namespaces (if namespace not specified)
|
|
174
|
+
*/
|
|
175
|
+
namespaces?: string[];
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// ============================================================================
|
|
179
|
+
// Semantic Manager Interface
|
|
180
|
+
// ============================================================================
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* High-level semantic context manager
|
|
184
|
+
* Combines embedding and storage
|
|
185
|
+
*/
|
|
186
|
+
export interface SemanticManager {
|
|
187
|
+
/**
|
|
188
|
+
* Store content with automatic embedding
|
|
189
|
+
*/
|
|
190
|
+
store(request: SemanticStoreRequest): Promise<SemanticStoreResponse>;
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Search by semantic similarity
|
|
194
|
+
*/
|
|
195
|
+
search(request: SemanticSearchRequest): Promise<SemanticSearchResponse>;
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Get item by key
|
|
199
|
+
*/
|
|
200
|
+
get(key: string, namespace?: string): Promise<SemanticItem | null>;
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* List items
|
|
204
|
+
*/
|
|
205
|
+
list(request: SemanticListRequest): Promise<SemanticListResponse>;
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Delete item
|
|
209
|
+
*/
|
|
210
|
+
delete(key: string, namespace?: string): Promise<SemanticDeleteResponse>;
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Get statistics
|
|
214
|
+
*/
|
|
215
|
+
getStats(namespace?: string): Promise<SemanticStoreStats>;
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Clear namespace
|
|
219
|
+
*/
|
|
220
|
+
clear(namespace?: string): Promise<number>;
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Get embedding configuration
|
|
224
|
+
*/
|
|
225
|
+
getEmbeddingConfig(): EmbeddingConfig;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// ============================================================================
|
|
229
|
+
// Manager Options
|
|
230
|
+
// ============================================================================
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Options for creating semantic manager
|
|
234
|
+
*/
|
|
235
|
+
export interface SemanticManagerOptions {
|
|
236
|
+
/**
|
|
237
|
+
* Embedding provider port
|
|
238
|
+
*/
|
|
239
|
+
embeddingPort: EmbeddingPort;
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Storage port
|
|
243
|
+
*/
|
|
244
|
+
storePort: SemanticStorePort;
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Default namespace
|
|
248
|
+
*/
|
|
249
|
+
defaultNamespace?: string;
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Whether to auto-compute embeddings on store
|
|
253
|
+
*/
|
|
254
|
+
autoEmbed?: boolean;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// ============================================================================
|
|
258
|
+
// Similarity Types
|
|
259
|
+
// ============================================================================
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Similarity computation method
|
|
263
|
+
*/
|
|
264
|
+
export type SimilarityMethod = 'cosine' | 'dot' | 'euclidean';
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Similarity computation options
|
|
268
|
+
*/
|
|
269
|
+
export interface SimilarityOptions {
|
|
270
|
+
method: SimilarityMethod;
|
|
271
|
+
normalize: boolean;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// ============================================================================
|
|
275
|
+
// Stub Implementations (for testing)
|
|
276
|
+
// ============================================================================
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Stub embedding port for testing
|
|
280
|
+
*/
|
|
281
|
+
export class StubEmbeddingPort implements EmbeddingPort {
|
|
282
|
+
private dimension: number;
|
|
283
|
+
private model: string;
|
|
284
|
+
|
|
285
|
+
constructor(dimension = 384, model = 'stub') {
|
|
286
|
+
this.dimension = dimension;
|
|
287
|
+
this.model = model;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
async embed(request: EmbeddingRequest): Promise<EmbeddingResult> {
|
|
291
|
+
// Generate deterministic embedding based on text hash
|
|
292
|
+
const embedding = this.generateEmbedding(request.text);
|
|
293
|
+
return {
|
|
294
|
+
embedding,
|
|
295
|
+
model: request.model ?? this.model,
|
|
296
|
+
dimension: this.dimension,
|
|
297
|
+
durationMs: 10,
|
|
298
|
+
};
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {
|
|
302
|
+
return Promise.all(texts.map((text) => this.embed({ text })));
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
getConfig(): EmbeddingConfig {
|
|
306
|
+
return {
|
|
307
|
+
provider: 'local',
|
|
308
|
+
model: this.model,
|
|
309
|
+
dimension: this.dimension,
|
|
310
|
+
batchSize: 32,
|
|
311
|
+
cacheEnabled: true,
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
async isAvailable(): Promise<boolean> {
|
|
316
|
+
return true;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
private generateEmbedding(text: string): number[] {
|
|
320
|
+
// Simple hash-based embedding for testing
|
|
321
|
+
const embedding: number[] = [];
|
|
322
|
+
for (let i = 0; i < this.dimension; i++) {
|
|
323
|
+
const charCode = text.charCodeAt(i % text.length) || 0;
|
|
324
|
+
embedding.push(Math.sin(charCode * (i + 1) * 0.1));
|
|
325
|
+
}
|
|
326
|
+
// Normalize
|
|
327
|
+
const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
|
|
328
|
+
return embedding.map((v) => v / (norm || 1));
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* In-memory semantic store for testing
|
|
334
|
+
*/
|
|
335
|
+
export class InMemorySemanticStore implements SemanticStorePort {
|
|
336
|
+
private items: Map<string, SemanticItem> = new Map();
|
|
337
|
+
private embeddingPort: EmbeddingPort;
|
|
338
|
+
|
|
339
|
+
constructor(embeddingPort?: EmbeddingPort) {
|
|
340
|
+
this.embeddingPort = embeddingPort ?? new StubEmbeddingPort();
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
private makeKey(key: string, namespace: string): string {
|
|
344
|
+
return `${namespace}:${key}`;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
async store(request: SemanticStoreRequest): Promise<SemanticStoreResponse> {
|
|
348
|
+
const namespace = request.namespace ?? 'default';
|
|
349
|
+
const storageKey = this.makeKey(request.key, namespace);
|
|
350
|
+
const existing = this.items.get(storageKey);
|
|
351
|
+
|
|
352
|
+
// Compute content hash
|
|
353
|
+
const contentHash = await this.hashContent(request.content);
|
|
354
|
+
const needsEmbedding = !existing ||
|
|
355
|
+
existing.contentHash !== contentHash ||
|
|
356
|
+
request.forceRecompute;
|
|
357
|
+
|
|
358
|
+
let embedding = request.embedding;
|
|
359
|
+
let embeddingComputed = false;
|
|
360
|
+
|
|
361
|
+
if (needsEmbedding && !embedding) {
|
|
362
|
+
const result = await this.embeddingPort.embed({ text: request.content });
|
|
363
|
+
embedding = result.embedding;
|
|
364
|
+
embeddingComputed = true;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
const item: SemanticItem = {
|
|
368
|
+
key: request.key,
|
|
369
|
+
namespace,
|
|
370
|
+
content: request.content,
|
|
371
|
+
embedding,
|
|
372
|
+
embeddingDimension: embedding?.length,
|
|
373
|
+
embeddingModel: this.embeddingPort.getConfig().model,
|
|
374
|
+
metadata: request.metadata,
|
|
375
|
+
tags: request.tags,
|
|
376
|
+
contentHash,
|
|
377
|
+
createdAt: existing?.createdAt ?? new Date().toISOString(),
|
|
378
|
+
updatedAt: new Date().toISOString(),
|
|
379
|
+
};
|
|
380
|
+
|
|
381
|
+
this.items.set(storageKey, item);
|
|
382
|
+
|
|
383
|
+
// Omit embedding from the response item
|
|
384
|
+
const { embedding: _, ...itemWithoutEmbedding } = item;
|
|
385
|
+
return {
|
|
386
|
+
success: true,
|
|
387
|
+
item: itemWithoutEmbedding,
|
|
388
|
+
created: !existing,
|
|
389
|
+
embeddingComputed,
|
|
390
|
+
};
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
async search(request: SemanticSearchRequest): Promise<SemanticSearchResponse> {
|
|
394
|
+
const startTime = Date.now();
|
|
395
|
+
const namespace = request.namespace;
|
|
396
|
+
|
|
397
|
+
// Get query embedding
|
|
398
|
+
const queryResult = await this.embeddingPort.embed({ text: request.query });
|
|
399
|
+
const queryEmbedding = queryResult.embedding;
|
|
400
|
+
|
|
401
|
+
// Filter items
|
|
402
|
+
const candidates = Array.from(this.items.values()).filter((item) => {
|
|
403
|
+
if (namespace && item.namespace !== namespace) return false;
|
|
404
|
+
if (!item.embedding) return false;
|
|
405
|
+
if (request.filterTags) {
|
|
406
|
+
const itemTags = new Set(item.tags ?? []);
|
|
407
|
+
if (!request.filterTags.every((t) => itemTags.has(t))) return false;
|
|
408
|
+
}
|
|
409
|
+
return true;
|
|
410
|
+
});
|
|
411
|
+
|
|
412
|
+
// Compute similarities
|
|
413
|
+
const scored = candidates.map((item) => ({
|
|
414
|
+
item,
|
|
415
|
+
similarity: this.cosineSimilarity(queryEmbedding, item.embedding!),
|
|
416
|
+
}));
|
|
417
|
+
|
|
418
|
+
// Filter by minSimilarity and sort (INV-SEM-002, INV-SEM-003)
|
|
419
|
+
const filtered = scored
|
|
420
|
+
.filter((s) => s.similarity >= (request.minSimilarity ?? 0.7))
|
|
421
|
+
.sort((a, b) => b.similarity - a.similarity)
|
|
422
|
+
.slice(0, request.topK ?? 10);
|
|
423
|
+
|
|
424
|
+
const results = filtered.map((s, index) => ({
|
|
425
|
+
item: request.includeEmbeddings
|
|
426
|
+
? s.item
|
|
427
|
+
: { ...s.item, embedding: undefined },
|
|
428
|
+
similarity: s.similarity,
|
|
429
|
+
rank: index + 1,
|
|
430
|
+
snippet: s.item.content.slice(0, 200),
|
|
431
|
+
}));
|
|
432
|
+
|
|
433
|
+
return {
|
|
434
|
+
results,
|
|
435
|
+
totalMatches: filtered.length,
|
|
436
|
+
query: request.query,
|
|
437
|
+
namespace,
|
|
438
|
+
durationMs: Date.now() - startTime,
|
|
439
|
+
queryEmbedding: request.includeEmbeddings ? queryEmbedding : undefined,
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
async get(key: string, namespace = 'default'): Promise<SemanticItem | null> {
|
|
444
|
+
return this.items.get(this.makeKey(key, namespace)) ?? null;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
async list(request: SemanticListRequest): Promise<SemanticListResponse> {
|
|
448
|
+
let items = Array.from(this.items.values());
|
|
449
|
+
|
|
450
|
+
// Filter by namespace
|
|
451
|
+
if (request.namespace) {
|
|
452
|
+
items = items.filter((i) => i.namespace === request.namespace);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// Filter by tags
|
|
456
|
+
if (request.filterTags) {
|
|
457
|
+
items = items.filter((item) => {
|
|
458
|
+
const itemTags = new Set(item.tags ?? []);
|
|
459
|
+
return request.filterTags!.every((t) => itemTags.has(t));
|
|
460
|
+
});
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// Filter by key prefix
|
|
464
|
+
if (request.keyPrefix) {
|
|
465
|
+
items = items.filter((i) => i.key.startsWith(request.keyPrefix!));
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
// Sort
|
|
469
|
+
const sortDir = request.orderDir === 'asc' ? 1 : -1;
|
|
470
|
+
items.sort((a, b) => {
|
|
471
|
+
const aVal = a[request.orderBy ?? 'createdAt'] ?? '';
|
|
472
|
+
const bVal = b[request.orderBy ?? 'createdAt'] ?? '';
|
|
473
|
+
return aVal < bVal ? -sortDir : sortDir;
|
|
474
|
+
});
|
|
475
|
+
|
|
476
|
+
// Paginate
|
|
477
|
+
const offset = request.offset ?? 0;
|
|
478
|
+
const limit = request.limit ?? 10;
|
|
479
|
+
const paginated = items.slice(offset, offset + limit);
|
|
480
|
+
|
|
481
|
+
return {
|
|
482
|
+
items: paginated.map((i) => ({ ...i, embedding: undefined })),
|
|
483
|
+
total: items.length,
|
|
484
|
+
hasMore: offset + limit < items.length,
|
|
485
|
+
namespace: request.namespace,
|
|
486
|
+
};
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
async delete(key: string, namespace = 'default'): Promise<SemanticDeleteResponse> {
|
|
490
|
+
const storageKey = this.makeKey(key, namespace);
|
|
491
|
+
const deleted = this.items.delete(storageKey);
|
|
492
|
+
return { deleted, key, namespace };
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
async exists(key: string, namespace = 'default'): Promise<boolean> {
|
|
496
|
+
return this.items.has(this.makeKey(key, namespace));
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
async getStats(namespace?: string): Promise<SemanticStoreStats> {
|
|
500
|
+
let items = Array.from(this.items.values());
|
|
501
|
+
if (namespace) {
|
|
502
|
+
items = items.filter((i) => i.namespace === namespace);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
const withEmbeddings = items.filter((i) => i.embedding);
|
|
506
|
+
const allNamespaces = [...new Set(Array.from(this.items.values()).map((i) => i.namespace))];
|
|
507
|
+
|
|
508
|
+
const result: SemanticStoreStats = {
|
|
509
|
+
totalItems: items.length,
|
|
510
|
+
itemsWithEmbeddings: withEmbeddings.length,
|
|
511
|
+
embeddingDimension: withEmbeddings[0]?.embeddingDimension ?? null,
|
|
512
|
+
embeddingModel: withEmbeddings[0]?.embeddingModel ?? null,
|
|
513
|
+
namespace: namespace ?? null,
|
|
514
|
+
};
|
|
515
|
+
|
|
516
|
+
// Only include namespaces if no specific namespace was queried
|
|
517
|
+
if (!namespace) {
|
|
518
|
+
result.namespaces = allNamespaces;
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
return result;
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
async clear(namespace?: string): Promise<number> {
|
|
525
|
+
if (namespace) {
|
|
526
|
+
const keysToDelete = Array.from(this.items.entries())
|
|
527
|
+
.filter(([_, item]) => item.namespace === namespace)
|
|
528
|
+
.map(([key]) => key);
|
|
529
|
+
keysToDelete.forEach((k) => this.items.delete(k));
|
|
530
|
+
return keysToDelete.length;
|
|
531
|
+
} else {
|
|
532
|
+
const count = this.items.size;
|
|
533
|
+
this.items.clear();
|
|
534
|
+
return count;
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
private cosineSimilarity(a: number[], b: number[]): number {
|
|
539
|
+
if (a.length !== b.length) return 0;
|
|
540
|
+
let dot = 0;
|
|
541
|
+
let normA = 0;
|
|
542
|
+
let normB = 0;
|
|
543
|
+
for (let i = 0; i < a.length; i++) {
|
|
544
|
+
dot += a[i]! * b[i]!;
|
|
545
|
+
normA += a[i]! * a[i]!;
|
|
546
|
+
normB += b[i]! * b[i]!;
|
|
547
|
+
}
|
|
548
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
549
|
+
if (denom === 0) return 0;
|
|
550
|
+
// Cosine similarity is in [-1, 1], normalize to [0, 1]
|
|
551
|
+
return (dot / denom + 1) / 2;
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
private async hashContent(content: string): Promise<string> {
|
|
555
|
+
const encoder = new TextEncoder();
|
|
556
|
+
const data = encoder.encode(content);
|
|
557
|
+
const hashBuffer = await crypto.subtle.digest('SHA-256', data);
|
|
558
|
+
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
|
559
|
+
return hashArray.map((b) => b.toString(16).padStart(2, '0')).join('');
|
|
560
|
+
}
|
|
561
|
+
}
|