@defai.digital/semantic-context 13.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/types.ts ADDED
@@ -0,0 +1,561 @@
1
+ /**
2
+ * Semantic Context Types
3
+ *
4
+ * Port interfaces and type definitions for semantic context storage.
5
+ */
6
+
7
+ import type {
8
+ SemanticItem,
9
+ SemanticSearchRequest,
10
+ SemanticSearchResponse,
11
+ SemanticStoreRequest,
12
+ SemanticStoreResponse,
13
+ SemanticListRequest,
14
+ SemanticListResponse,
15
+ SemanticDeleteResponse,
16
+ EmbeddingConfig,
17
+ } from '@defai.digital/contracts';
18
+
19
+ // ============================================================================
20
+ // Embedding Port
21
+ // ============================================================================
22
+
23
+ /**
24
+ * Embedding request
25
+ */
26
+ export interface EmbeddingRequest {
27
+ /**
28
+ * Text to embed
29
+ */
30
+ text: string;
31
+
32
+ /**
33
+ * Model to use (optional, uses config default)
34
+ */
35
+ model?: string;
36
+ }
37
+
38
+ /**
39
+ * Embedding result
40
+ */
41
+ export interface EmbeddingResult {
42
+ /**
43
+ * The embedding vector
44
+ */
45
+ embedding: number[];
46
+
47
+ /**
48
+ * Model used
49
+ */
50
+ model: string;
51
+
52
+ /**
53
+ * Dimension of embedding
54
+ */
55
+ dimension: number;
56
+
57
+ /**
58
+ * Computation duration in ms
59
+ */
60
+ durationMs: number;
61
+ }
62
+
63
+ /**
64
+ * Port interface for embedding computation
65
+ * Implementations inject actual embedding provider at runtime
66
+ */
67
+ export interface EmbeddingPort {
68
+ /**
69
+ * Compute embedding for text
70
+ */
71
+ embed(request: EmbeddingRequest): Promise<EmbeddingResult>;
72
+
73
+ /**
74
+ * Compute embeddings for multiple texts (batch)
75
+ */
76
+ embedBatch(texts: string[]): Promise<EmbeddingResult[]>;
77
+
78
+ /**
79
+ * Get embedding configuration
80
+ */
81
+ getConfig(): EmbeddingConfig;
82
+
83
+ /**
84
+ * Check if provider is available
85
+ */
86
+ isAvailable(): Promise<boolean>;
87
+ }
88
+
89
+ // ============================================================================
90
+ // Semantic Store Port
91
+ // ============================================================================
92
+
93
+ /**
94
+ * Port interface for semantic storage
95
+ * Implementations provide actual persistence (SQLite, etc.)
96
+ */
97
+ export interface SemanticStorePort {
98
+ /**
99
+ * Store item with embedding
100
+ * INV-SEM-001: Embedding cached until content changes
101
+ */
102
+ store(request: SemanticStoreRequest): Promise<SemanticStoreResponse>;
103
+
104
+ /**
105
+ * Search by semantic similarity
106
+ * INV-SEM-002: Results sorted by similarity descending
107
+ * INV-SEM-003: Scores normalized to [0, 1]
108
+ * INV-SEM-004: Namespace isolation
109
+ */
110
+ search(request: SemanticSearchRequest): Promise<SemanticSearchResponse>;
111
+
112
+ /**
113
+ * Get item by key
114
+ */
115
+ get(key: string, namespace?: string): Promise<SemanticItem | null>;
116
+
117
+ /**
118
+ * List items
119
+ */
120
+ list(request: SemanticListRequest): Promise<SemanticListResponse>;
121
+
122
+ /**
123
+ * Delete item
124
+ */
125
+ delete(key: string, namespace?: string): Promise<SemanticDeleteResponse>;
126
+
127
+ /**
128
+ * Check if item exists
129
+ */
130
+ exists(key: string, namespace?: string): Promise<boolean>;
131
+
132
+ /**
133
+ * Get namespace statistics
134
+ */
135
+ getStats(namespace?: string): Promise<SemanticStoreStats>;
136
+
137
+ /**
138
+ * Clear namespace
139
+ */
140
+ clear(namespace?: string): Promise<number>;
141
+ }
142
+
143
+ /**
144
+ * Semantic store statistics
145
+ */
146
+ export interface SemanticStoreStats {
147
+ /**
148
+ * Total items in namespace
149
+ */
150
+ totalItems: number;
151
+
152
+ /**
153
+ * Items with embeddings
154
+ */
155
+ itemsWithEmbeddings: number;
156
+
157
+ /**
158
+ * Embedding dimension used
159
+ */
160
+ embeddingDimension: number | null;
161
+
162
+ /**
163
+ * Embedding model used
164
+ */
165
+ embeddingModel: string | null;
166
+
167
+ /**
168
+ * Namespace queried
169
+ */
170
+ namespace: string | null;
171
+
172
+ /**
173
+ * All namespaces (if namespace not specified)
174
+ */
175
+ namespaces?: string[];
176
+ }
177
+
178
+ // ============================================================================
179
+ // Semantic Manager Interface
180
+ // ============================================================================
181
+
182
+ /**
183
+ * High-level semantic context manager
184
+ * Combines embedding and storage
185
+ */
186
+ export interface SemanticManager {
187
+ /**
188
+ * Store content with automatic embedding
189
+ */
190
+ store(request: SemanticStoreRequest): Promise<SemanticStoreResponse>;
191
+
192
+ /**
193
+ * Search by semantic similarity
194
+ */
195
+ search(request: SemanticSearchRequest): Promise<SemanticSearchResponse>;
196
+
197
+ /**
198
+ * Get item by key
199
+ */
200
+ get(key: string, namespace?: string): Promise<SemanticItem | null>;
201
+
202
+ /**
203
+ * List items
204
+ */
205
+ list(request: SemanticListRequest): Promise<SemanticListResponse>;
206
+
207
+ /**
208
+ * Delete item
209
+ */
210
+ delete(key: string, namespace?: string): Promise<SemanticDeleteResponse>;
211
+
212
+ /**
213
+ * Get statistics
214
+ */
215
+ getStats(namespace?: string): Promise<SemanticStoreStats>;
216
+
217
+ /**
218
+ * Clear namespace
219
+ */
220
+ clear(namespace?: string): Promise<number>;
221
+
222
+ /**
223
+ * Get embedding configuration
224
+ */
225
+ getEmbeddingConfig(): EmbeddingConfig;
226
+ }
227
+
228
+ // ============================================================================
229
+ // Manager Options
230
+ // ============================================================================
231
+
232
+ /**
233
+ * Options for creating semantic manager
234
+ */
235
+ export interface SemanticManagerOptions {
236
+ /**
237
+ * Embedding provider port
238
+ */
239
+ embeddingPort: EmbeddingPort;
240
+
241
+ /**
242
+ * Storage port
243
+ */
244
+ storePort: SemanticStorePort;
245
+
246
+ /**
247
+ * Default namespace
248
+ */
249
+ defaultNamespace?: string;
250
+
251
+ /**
252
+ * Whether to auto-compute embeddings on store
253
+ */
254
+ autoEmbed?: boolean;
255
+ }
256
+
257
+ // ============================================================================
258
+ // Similarity Types
259
+ // ============================================================================
260
+
261
+ /**
262
+ * Similarity computation method
263
+ */
264
+ export type SimilarityMethod = 'cosine' | 'dot' | 'euclidean';
265
+
266
+ /**
267
+ * Similarity computation options
268
+ */
269
+ export interface SimilarityOptions {
270
+ method: SimilarityMethod;
271
+ normalize: boolean;
272
+ }
273
+
274
+ // ============================================================================
275
+ // Stub Implementations (for testing)
276
+ // ============================================================================
277
+
278
+ /**
279
+ * Stub embedding port for testing
280
+ */
281
+ export class StubEmbeddingPort implements EmbeddingPort {
282
+ private dimension: number;
283
+ private model: string;
284
+
285
+ constructor(dimension = 384, model = 'stub') {
286
+ this.dimension = dimension;
287
+ this.model = model;
288
+ }
289
+
290
+ async embed(request: EmbeddingRequest): Promise<EmbeddingResult> {
291
+ // Generate deterministic embedding based on text hash
292
+ const embedding = this.generateEmbedding(request.text);
293
+ return {
294
+ embedding,
295
+ model: request.model ?? this.model,
296
+ dimension: this.dimension,
297
+ durationMs: 10,
298
+ };
299
+ }
300
+
301
+ async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {
302
+ return Promise.all(texts.map((text) => this.embed({ text })));
303
+ }
304
+
305
+ getConfig(): EmbeddingConfig {
306
+ return {
307
+ provider: 'local',
308
+ model: this.model,
309
+ dimension: this.dimension,
310
+ batchSize: 32,
311
+ cacheEnabled: true,
312
+ };
313
+ }
314
+
315
+ async isAvailable(): Promise<boolean> {
316
+ return true;
317
+ }
318
+
319
+ private generateEmbedding(text: string): number[] {
320
+ // Simple hash-based embedding for testing
321
+ const embedding: number[] = [];
322
+ for (let i = 0; i < this.dimension; i++) {
323
+ const charCode = text.charCodeAt(i % text.length) || 0;
324
+ embedding.push(Math.sin(charCode * (i + 1) * 0.1));
325
+ }
326
+ // Normalize
327
+ const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
328
+ return embedding.map((v) => v / (norm || 1));
329
+ }
330
+ }
331
+
332
+ /**
333
+ * In-memory semantic store for testing
334
+ */
335
+ export class InMemorySemanticStore implements SemanticStorePort {
336
+ private items: Map<string, SemanticItem> = new Map();
337
+ private embeddingPort: EmbeddingPort;
338
+
339
+ constructor(embeddingPort?: EmbeddingPort) {
340
+ this.embeddingPort = embeddingPort ?? new StubEmbeddingPort();
341
+ }
342
+
343
+ private makeKey(key: string, namespace: string): string {
344
+ return `${namespace}:${key}`;
345
+ }
346
+
347
+ async store(request: SemanticStoreRequest): Promise<SemanticStoreResponse> {
348
+ const namespace = request.namespace ?? 'default';
349
+ const storageKey = this.makeKey(request.key, namespace);
350
+ const existing = this.items.get(storageKey);
351
+
352
+ // Compute content hash
353
+ const contentHash = await this.hashContent(request.content);
354
+ const needsEmbedding = !existing ||
355
+ existing.contentHash !== contentHash ||
356
+ request.forceRecompute;
357
+
358
+ let embedding = request.embedding;
359
+ let embeddingComputed = false;
360
+
361
+ if (needsEmbedding && !embedding) {
362
+ const result = await this.embeddingPort.embed({ text: request.content });
363
+ embedding = result.embedding;
364
+ embeddingComputed = true;
365
+ }
366
+
367
+ const item: SemanticItem = {
368
+ key: request.key,
369
+ namespace,
370
+ content: request.content,
371
+ embedding,
372
+ embeddingDimension: embedding?.length,
373
+ embeddingModel: this.embeddingPort.getConfig().model,
374
+ metadata: request.metadata,
375
+ tags: request.tags,
376
+ contentHash,
377
+ createdAt: existing?.createdAt ?? new Date().toISOString(),
378
+ updatedAt: new Date().toISOString(),
379
+ };
380
+
381
+ this.items.set(storageKey, item);
382
+
383
+ // Omit embedding from the response item
384
+ const { embedding: _, ...itemWithoutEmbedding } = item;
385
+ return {
386
+ success: true,
387
+ item: itemWithoutEmbedding,
388
+ created: !existing,
389
+ embeddingComputed,
390
+ };
391
+ }
392
+
393
+ async search(request: SemanticSearchRequest): Promise<SemanticSearchResponse> {
394
+ const startTime = Date.now();
395
+ const namespace = request.namespace;
396
+
397
+ // Get query embedding
398
+ const queryResult = await this.embeddingPort.embed({ text: request.query });
399
+ const queryEmbedding = queryResult.embedding;
400
+
401
+ // Filter items
402
+ const candidates = Array.from(this.items.values()).filter((item) => {
403
+ if (namespace && item.namespace !== namespace) return false;
404
+ if (!item.embedding) return false;
405
+ if (request.filterTags) {
406
+ const itemTags = new Set(item.tags ?? []);
407
+ if (!request.filterTags.every((t) => itemTags.has(t))) return false;
408
+ }
409
+ return true;
410
+ });
411
+
412
+ // Compute similarities
413
+ const scored = candidates.map((item) => ({
414
+ item,
415
+ similarity: this.cosineSimilarity(queryEmbedding, item.embedding!),
416
+ }));
417
+
418
+ // Filter by minSimilarity and sort (INV-SEM-002, INV-SEM-003)
419
+ const filtered = scored
420
+ .filter((s) => s.similarity >= (request.minSimilarity ?? 0.7))
421
+ .sort((a, b) => b.similarity - a.similarity)
422
+ .slice(0, request.topK ?? 10);
423
+
424
+ const results = filtered.map((s, index) => ({
425
+ item: request.includeEmbeddings
426
+ ? s.item
427
+ : { ...s.item, embedding: undefined },
428
+ similarity: s.similarity,
429
+ rank: index + 1,
430
+ snippet: s.item.content.slice(0, 200),
431
+ }));
432
+
433
+ return {
434
+ results,
435
+ totalMatches: filtered.length,
436
+ query: request.query,
437
+ namespace,
438
+ durationMs: Date.now() - startTime,
439
+ queryEmbedding: request.includeEmbeddings ? queryEmbedding : undefined,
440
+ };
441
+ }
442
+
443
+ async get(key: string, namespace = 'default'): Promise<SemanticItem | null> {
444
+ return this.items.get(this.makeKey(key, namespace)) ?? null;
445
+ }
446
+
447
+ async list(request: SemanticListRequest): Promise<SemanticListResponse> {
448
+ let items = Array.from(this.items.values());
449
+
450
+ // Filter by namespace
451
+ if (request.namespace) {
452
+ items = items.filter((i) => i.namespace === request.namespace);
453
+ }
454
+
455
+ // Filter by tags
456
+ if (request.filterTags) {
457
+ items = items.filter((item) => {
458
+ const itemTags = new Set(item.tags ?? []);
459
+ return request.filterTags!.every((t) => itemTags.has(t));
460
+ });
461
+ }
462
+
463
+ // Filter by key prefix
464
+ if (request.keyPrefix) {
465
+ items = items.filter((i) => i.key.startsWith(request.keyPrefix!));
466
+ }
467
+
468
+ // Sort
469
+ const sortDir = request.orderDir === 'asc' ? 1 : -1;
470
+ items.sort((a, b) => {
471
+ const aVal = a[request.orderBy ?? 'createdAt'] ?? '';
472
+ const bVal = b[request.orderBy ?? 'createdAt'] ?? '';
473
+ return aVal < bVal ? -sortDir : sortDir;
474
+ });
475
+
476
+ // Paginate
477
+ const offset = request.offset ?? 0;
478
+ const limit = request.limit ?? 10;
479
+ const paginated = items.slice(offset, offset + limit);
480
+
481
+ return {
482
+ items: paginated.map((i) => ({ ...i, embedding: undefined })),
483
+ total: items.length,
484
+ hasMore: offset + limit < items.length,
485
+ namespace: request.namespace,
486
+ };
487
+ }
488
+
489
+ async delete(key: string, namespace = 'default'): Promise<SemanticDeleteResponse> {
490
+ const storageKey = this.makeKey(key, namespace);
491
+ const deleted = this.items.delete(storageKey);
492
+ return { deleted, key, namespace };
493
+ }
494
+
495
+ async exists(key: string, namespace = 'default'): Promise<boolean> {
496
+ return this.items.has(this.makeKey(key, namespace));
497
+ }
498
+
499
+ async getStats(namespace?: string): Promise<SemanticStoreStats> {
500
+ let items = Array.from(this.items.values());
501
+ if (namespace) {
502
+ items = items.filter((i) => i.namespace === namespace);
503
+ }
504
+
505
+ const withEmbeddings = items.filter((i) => i.embedding);
506
+ const allNamespaces = [...new Set(Array.from(this.items.values()).map((i) => i.namespace))];
507
+
508
+ const result: SemanticStoreStats = {
509
+ totalItems: items.length,
510
+ itemsWithEmbeddings: withEmbeddings.length,
511
+ embeddingDimension: withEmbeddings[0]?.embeddingDimension ?? null,
512
+ embeddingModel: withEmbeddings[0]?.embeddingModel ?? null,
513
+ namespace: namespace ?? null,
514
+ };
515
+
516
+ // Only include namespaces if no specific namespace was queried
517
+ if (!namespace) {
518
+ result.namespaces = allNamespaces;
519
+ }
520
+
521
+ return result;
522
+ }
523
+
524
+ async clear(namespace?: string): Promise<number> {
525
+ if (namespace) {
526
+ const keysToDelete = Array.from(this.items.entries())
527
+ .filter(([_, item]) => item.namespace === namespace)
528
+ .map(([key]) => key);
529
+ keysToDelete.forEach((k) => this.items.delete(k));
530
+ return keysToDelete.length;
531
+ } else {
532
+ const count = this.items.size;
533
+ this.items.clear();
534
+ return count;
535
+ }
536
+ }
537
+
538
+ private cosineSimilarity(a: number[], b: number[]): number {
539
+ if (a.length !== b.length) return 0;
540
+ let dot = 0;
541
+ let normA = 0;
542
+ let normB = 0;
543
+ for (let i = 0; i < a.length; i++) {
544
+ dot += a[i]! * b[i]!;
545
+ normA += a[i]! * a[i]!;
546
+ normB += b[i]! * b[i]!;
547
+ }
548
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
549
+ if (denom === 0) return 0;
550
+ // Cosine similarity is in [-1, 1], normalize to [0, 1]
551
+ return (dot / denom + 1) / 2;
552
+ }
553
+
554
+ private async hashContent(content: string): Promise<string> {
555
+ const encoder = new TextEncoder();
556
+ const data = encoder.encode(content);
557
+ const hashBuffer = await crypto.subtle.digest('SHA-256', data);
558
+ const hashArray = Array.from(new Uint8Array(hashBuffer));
559
+ return hashArray.map((b) => b.toString(16).padStart(2, '0')).join('');
560
+ }
561
+ }