@defai.digital/semantic-context 13.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Semantic Context Domain
3
+ *
4
+ * Provides semantic search and vector-indexed storage.
5
+ *
6
+ * @packageDocumentation
7
+ */
8
+
9
+ // Types and interfaces
10
+ export type {
11
+ EmbeddingPort,
12
+ EmbeddingRequest,
13
+ EmbeddingResult,
14
+ SemanticStorePort,
15
+ SemanticStoreStats,
16
+ SemanticManager,
17
+ SemanticManagerOptions,
18
+ SimilarityMethod,
19
+ SimilarityOptions,
20
+ } from './types.js';
21
+
22
+ // Stub implementations for testing
23
+ export { StubEmbeddingPort, InMemorySemanticStore } from './types.js';
24
+
25
+ // Similarity utilities
26
+ export {
27
+ cosineSimilarity,
28
+ dotProductSimilarity,
29
+ euclideanDistance,
30
+ manhattanDistance,
31
+ computeSimilarity,
32
+ normalizeVector,
33
+ vectorNorm,
34
+ addVectors,
35
+ subtractVectors,
36
+ scaleVector,
37
+ computeCentroid,
38
+ findKNearest,
39
+ filterByThreshold,
40
+ DEFAULT_SIMILARITY_OPTIONS,
41
+ } from './similarity.js';
42
+
43
+ // Embedding service
44
+ export {
45
+ LocalEmbeddingProvider,
46
+ CachedEmbeddingProvider,
47
+ createEmbeddingProvider,
48
+ createTFIDFEmbedding,
49
+ createTFIDFEmbeddingBatch,
50
+ } from './embedding-service.js';
51
+
52
+ // Semantic manager
53
+ export {
54
+ createSemanticManager,
55
+ SemanticManagerError,
56
+ } from './semantic-manager.js';
@@ -0,0 +1,246 @@
1
+ /**
2
+ * Semantic Manager
3
+ *
4
+ * High-level manager for semantic context storage and search.
5
+ * Combines embedding computation with storage.
6
+ *
7
+ * Invariants:
8
+ * - INV-SEM-001: Embeddings computed on store, cached until content changes
9
+ * - INV-SEM-002: Search results sorted by similarity descending
10
+ * - INV-SEM-003: Similarity scores normalized to [0, 1]
11
+ * - INV-SEM-004: Namespace isolation
12
+ */
13
+
14
+ import type {
15
+ SemanticItem,
16
+ SemanticSearchRequest,
17
+ SemanticSearchResponse,
18
+ SemanticStoreRequest,
19
+ SemanticStoreResponse,
20
+ SemanticListRequest,
21
+ SemanticListResponse,
22
+ SemanticDeleteResponse,
23
+ EmbeddingConfig,
24
+ } from '@defai.digital/contracts';
25
+ import { SemanticContextErrorCodes, computeContentHash } from '@defai.digital/contracts';
26
+ import type {
27
+ SemanticManager,
28
+ SemanticManagerOptions,
29
+ SemanticStoreStats,
30
+ } from './types.js';
31
+
32
+ /**
33
+ * Error thrown by semantic manager
34
+ */
35
+ export class SemanticManagerError extends Error {
36
+ constructor(
37
+ public readonly code: string,
38
+ message: string,
39
+ public readonly details?: Record<string, unknown>
40
+ ) {
41
+ super(message);
42
+ this.name = 'SemanticManagerError';
43
+ }
44
+
45
+ static notFound(key: string, namespace: string): SemanticManagerError {
46
+ return new SemanticManagerError(
47
+ SemanticContextErrorCodes.NOT_FOUND,
48
+ `Item not found: ${namespace}:${key}`,
49
+ { key, namespace }
50
+ );
51
+ }
52
+
53
+ static embeddingFailed(message: string): SemanticManagerError {
54
+ return new SemanticManagerError(
55
+ SemanticContextErrorCodes.EMBEDDING_FAILED,
56
+ `Embedding computation failed: ${message}`
57
+ );
58
+ }
59
+
60
+ static searchFailed(message: string): SemanticManagerError {
61
+ return new SemanticManagerError(
62
+ SemanticContextErrorCodes.SEARCH_FAILED,
63
+ `Search failed: ${message}`
64
+ );
65
+ }
66
+
67
+ static dimensionMismatch(expected: number, actual: number): SemanticManagerError {
68
+ return new SemanticManagerError(
69
+ SemanticContextErrorCodes.DIMENSION_MISMATCH,
70
+ `Embedding dimension mismatch: expected ${expected}, got ${actual}`,
71
+ { expected, actual }
72
+ );
73
+ }
74
+ }
75
+
76
+ /**
77
+ * Creates a semantic manager
78
+ */
79
+ export function createSemanticManager(options: SemanticManagerOptions): SemanticManager {
80
+ const {
81
+ embeddingPort,
82
+ storePort,
83
+ defaultNamespace = 'default',
84
+ autoEmbed = true,
85
+ } = options;
86
+
87
+ // Track namespace embedding dimensions for consistency (INV-SEM-200)
88
+ const namespaceDimensions = new Map<string, number>();
89
+
90
+ /**
91
+ * Validate embedding dimension for namespace
92
+ */
93
+ function validateDimension(namespace: string, dimension: number): void {
94
+ const expected = namespaceDimensions.get(namespace);
95
+ if (expected !== undefined && expected !== dimension) {
96
+ throw SemanticManagerError.dimensionMismatch(expected, dimension);
97
+ }
98
+ if (expected === undefined) {
99
+ namespaceDimensions.set(namespace, dimension);
100
+ }
101
+ }
102
+
103
+ return {
104
+ /**
105
+ * Store content with automatic embedding
106
+ * INV-SEM-001: Embeddings computed and cached
107
+ */
108
+ async store(request: SemanticStoreRequest): Promise<SemanticStoreResponse> {
109
+ const namespace = request.namespace ?? defaultNamespace;
110
+
111
+ try {
112
+ // Check if content changed (for caching)
113
+ const contentHash = await computeContentHash(request.content);
114
+ const existing = await storePort.get(request.key, namespace);
115
+
116
+ // Determine if embedding needs computation
117
+ let embedding = request.embedding;
118
+ let embeddingComputed = false;
119
+
120
+ if (autoEmbed && !embedding) {
121
+ const needsEmbedding =
122
+ !existing ||
123
+ existing.contentHash !== contentHash ||
124
+ request.forceRecompute;
125
+
126
+ if (needsEmbedding) {
127
+ const result = await embeddingPort.embed({ text: request.content });
128
+ embedding = result.embedding;
129
+ embeddingComputed = true;
130
+
131
+ // Validate dimension consistency (INV-SEM-200)
132
+ validateDimension(namespace, result.dimension);
133
+ } else if (existing?.embedding) {
134
+ // Reuse existing embedding
135
+ embedding = existing.embedding;
136
+ }
137
+ }
138
+
139
+ // Validate provided embedding dimension
140
+ if (embedding) {
141
+ const config = embeddingPort.getConfig();
142
+ if (embedding.length !== config.dimension) {
143
+ throw SemanticManagerError.dimensionMismatch(config.dimension, embedding.length);
144
+ }
145
+ validateDimension(namespace, embedding.length);
146
+ }
147
+
148
+ // Store with embedding
149
+ const result = await storePort.store({
150
+ ...request,
151
+ namespace,
152
+ embedding,
153
+ });
154
+
155
+ return {
156
+ ...result,
157
+ embeddingComputed,
158
+ };
159
+ } catch (error) {
160
+ if (error instanceof SemanticManagerError) throw error;
161
+
162
+ const message = error instanceof Error ? error.message : 'Unknown error';
163
+ throw SemanticManagerError.embeddingFailed(message);
164
+ }
165
+ },
166
+
167
+ /**
168
+ * Search by semantic similarity
169
+ * INV-SEM-002: Results sorted by similarity descending
170
+ * INV-SEM-003: Scores normalized to [0, 1]
171
+ * INV-SEM-004: Namespace isolation
172
+ */
173
+ async search(request: SemanticSearchRequest): Promise<SemanticSearchResponse> {
174
+ const namespace = request.namespace;
175
+
176
+ try {
177
+ // Compute query embedding
178
+ const queryResult = await embeddingPort.embed({ text: request.query });
179
+
180
+ // Validate dimension if namespace has items
181
+ if (namespace) {
182
+ const stats = await storePort.getStats(namespace);
183
+ if (stats.embeddingDimension !== null) {
184
+ validateDimension(namespace, queryResult.dimension);
185
+ }
186
+ }
187
+
188
+ // Delegate search to store
189
+ return await storePort.search(request);
190
+ } catch (error) {
191
+ if (error instanceof SemanticManagerError) throw error;
192
+
193
+ const message = error instanceof Error ? error.message : 'Unknown error';
194
+ throw SemanticManagerError.searchFailed(message);
195
+ }
196
+ },
197
+
198
+ /**
199
+ * Get item by key
200
+ */
201
+ async get(key: string, namespace?: string): Promise<SemanticItem | null> {
202
+ return storePort.get(key, namespace ?? defaultNamespace);
203
+ },
204
+
205
+ /**
206
+ * List items
207
+ */
208
+ async list(request: SemanticListRequest): Promise<SemanticListResponse> {
209
+ return storePort.list({
210
+ ...request,
211
+ namespace: request.namespace ?? defaultNamespace,
212
+ });
213
+ },
214
+
215
+ /**
216
+ * Delete item
217
+ */
218
+ async delete(key: string, namespace?: string): Promise<SemanticDeleteResponse> {
219
+ return storePort.delete(key, namespace ?? defaultNamespace);
220
+ },
221
+
222
+ /**
223
+ * Get statistics
224
+ */
225
+ async getStats(namespace?: string): Promise<SemanticStoreStats> {
226
+ return storePort.getStats(namespace);
227
+ },
228
+
229
+ /**
230
+ * Clear namespace
231
+ */
232
+ async clear(namespace?: string): Promise<number> {
233
+ const ns = namespace ?? defaultNamespace;
234
+ // Reset dimension tracking for cleared namespace
235
+ namespaceDimensions.delete(ns);
236
+ return storePort.clear(ns);
237
+ },
238
+
239
+ /**
240
+ * Get embedding configuration
241
+ */
242
+ getEmbeddingConfig(): EmbeddingConfig {
243
+ return embeddingPort.getConfig();
244
+ },
245
+ };
246
+ }
@@ -0,0 +1,265 @@
1
+ /**
2
+ * Similarity Computation Utilities
3
+ *
4
+ * Provides various methods for computing vector similarity.
5
+ *
6
+ * Invariants:
7
+ * - INV-SEM-003: All scores normalized to [0, 1] range
8
+ */
9
+
10
+ import type { SimilarityMethod, SimilarityOptions } from './types.js';
11
+
12
+ /**
13
+ * Default similarity options
14
+ */
15
+ export const DEFAULT_SIMILARITY_OPTIONS: SimilarityOptions = {
16
+ method: 'cosine',
17
+ normalize: true,
18
+ };
19
+
20
+ /**
21
+ * Compute cosine similarity between two vectors
22
+ * Returns value in [-1, 1] (or [0, 1] if normalized)
23
+ *
24
+ * INV-SEM-003: Normalized to [0, 1] when normalize=true
25
+ */
26
+ export function cosineSimilarity(a: number[], b: number[], normalize = true): number {
27
+ if (a.length !== b.length) {
28
+ throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
29
+ }
30
+
31
+ if (a.length === 0) return 0;
32
+
33
+ let dotProduct = 0;
34
+ let normA = 0;
35
+ let normB = 0;
36
+
37
+ for (let i = 0; i < a.length; i++) {
38
+ dotProduct += a[i]! * b[i]!;
39
+ normA += a[i]! * a[i]!;
40
+ normB += b[i]! * b[i]!;
41
+ }
42
+
43
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
44
+
45
+ if (denominator === 0) return 0;
46
+
47
+ const similarity = dotProduct / denominator;
48
+
49
+ // Normalize from [-1, 1] to [0, 1]
50
+ return normalize ? (similarity + 1) / 2 : similarity;
51
+ }
52
+
53
+ /**
54
+ * Compute dot product similarity between two vectors
55
+ * Returns raw dot product (or normalized if requested)
56
+ *
57
+ * INV-SEM-003: When normalize=true, normalizes vectors first and maps to [0, 1]
58
+ */
59
+ export function dotProductSimilarity(a: number[], b: number[], normalize = true): number {
60
+ if (a.length !== b.length) {
61
+ throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
62
+ }
63
+
64
+ if (a.length === 0) return 0;
65
+
66
+ if (!normalize) {
67
+ // Raw dot product
68
+ let dotProduct = 0;
69
+ for (let i = 0; i < a.length; i++) {
70
+ dotProduct += a[i]! * b[i]!;
71
+ }
72
+ return dotProduct;
73
+ }
74
+
75
+ // Normalized: compute cosine similarity (dot product of unit vectors)
76
+ // This ensures result is in [-1, 1] range, then map to [0, 1]
77
+ let dotProduct = 0;
78
+ let normA = 0;
79
+ let normB = 0;
80
+
81
+ for (let i = 0; i < a.length; i++) {
82
+ dotProduct += a[i]! * b[i]!;
83
+ normA += a[i]! * a[i]!;
84
+ normB += b[i]! * b[i]!;
85
+ }
86
+
87
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
88
+
89
+ if (denominator === 0) return 0;
90
+
91
+ const similarity = dotProduct / denominator;
92
+
93
+ // Map from [-1, 1] to [0, 1]
94
+ return (similarity + 1) / 2;
95
+ }
96
+
97
+ /**
98
+ * Compute Euclidean distance between two vectors
99
+ * Returns distance (or similarity if normalize=true)
100
+ *
101
+ * INV-SEM-003: Converted to similarity via 1/(1+distance) when normalize=true
102
+ */
103
+ export function euclideanDistance(a: number[], b: number[], normalize = true): number {
104
+ if (a.length !== b.length) {
105
+ throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
106
+ }
107
+
108
+ if (a.length === 0) return 0;
109
+
110
+ let sumSquares = 0;
111
+ for (let i = 0; i < a.length; i++) {
112
+ const diff = a[i]! - b[i]!;
113
+ sumSquares += diff * diff;
114
+ }
115
+
116
+ const distance = Math.sqrt(sumSquares);
117
+
118
+ // Convert distance to similarity: smaller distance = higher similarity
119
+ return normalize ? 1 / (1 + distance) : distance;
120
+ }
121
+
122
+ /**
123
+ * Compute Manhattan distance between two vectors
124
+ */
125
+ export function manhattanDistance(a: number[], b: number[], normalize = true): number {
126
+ if (a.length !== b.length) {
127
+ throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
128
+ }
129
+
130
+ if (a.length === 0) return 0;
131
+
132
+ let distance = 0;
133
+ for (let i = 0; i < a.length; i++) {
134
+ distance += Math.abs(a[i]! - b[i]!);
135
+ }
136
+
137
+ // Convert to similarity
138
+ return normalize ? 1 / (1 + distance) : distance;
139
+ }
140
+
141
+ /**
142
+ * Compute similarity using specified method
143
+ */
144
+ export function computeSimilarity(
145
+ a: number[],
146
+ b: number[],
147
+ options: Partial<SimilarityOptions> = {}
148
+ ): number {
149
+ const { method, normalize } = { ...DEFAULT_SIMILARITY_OPTIONS, ...options };
150
+
151
+ switch (method) {
152
+ case 'cosine':
153
+ return cosineSimilarity(a, b, normalize);
154
+ case 'dot':
155
+ return dotProductSimilarity(a, b, normalize);
156
+ case 'euclidean':
157
+ return euclideanDistance(a, b, normalize);
158
+ default:
159
+ throw new Error(`Unknown similarity method: ${method}`);
160
+ }
161
+ }
162
+
163
+ /**
164
+ * Normalize a vector to unit length
165
+ */
166
+ export function normalizeVector(v: number[]): number[] {
167
+ const norm = Math.sqrt(v.reduce((sum, x) => sum + x * x, 0));
168
+ if (norm === 0) return v;
169
+ return v.map((x) => x / norm);
170
+ }
171
+
172
+ /**
173
+ * Compute vector norm (magnitude)
174
+ */
175
+ export function vectorNorm(v: number[]): number {
176
+ return Math.sqrt(v.reduce((sum, x) => sum + x * x, 0));
177
+ }
178
+
179
+ /**
180
+ * Add two vectors
181
+ */
182
+ export function addVectors(a: number[], b: number[]): number[] {
183
+ if (a.length !== b.length) {
184
+ throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
185
+ }
186
+ return a.map((x, i) => x + b[i]!);
187
+ }
188
+
189
+ /**
190
+ * Subtract vectors: a - b
191
+ */
192
+ export function subtractVectors(a: number[], b: number[]): number[] {
193
+ if (a.length !== b.length) {
194
+ throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
195
+ }
196
+ return a.map((x, i) => x - b[i]!);
197
+ }
198
+
199
+ /**
200
+ * Scale a vector by a scalar
201
+ */
202
+ export function scaleVector(v: number[], scalar: number): number[] {
203
+ return v.map((x) => x * scalar);
204
+ }
205
+
206
+ /**
207
+ * Compute centroid (average) of multiple vectors
208
+ */
209
+ export function computeCentroid(vectors: number[][]): number[] {
210
+ if (vectors.length === 0) return [];
211
+
212
+ const dim = vectors[0]!.length;
213
+ const centroid = new Array(dim).fill(0);
214
+
215
+ for (const v of vectors) {
216
+ if (v.length !== dim) {
217
+ throw new Error(`Inconsistent vector dimensions`);
218
+ }
219
+ for (let i = 0; i < dim; i++) {
220
+ centroid[i] += v[i]!;
221
+ }
222
+ }
223
+
224
+ return centroid.map((x) => x / vectors.length);
225
+ }
226
+
227
+ /**
228
+ * Find k nearest neighbors from candidates
229
+ * INV-SEM-002: Results sorted by similarity descending
230
+ */
231
+ export function findKNearest(
232
+ query: number[],
233
+ candidates: Array<{ id: string; embedding: number[] }>,
234
+ k: number,
235
+ method: SimilarityMethod = 'cosine'
236
+ ): Array<{ id: string; similarity: number }> {
237
+ const scored = candidates.map((c) => ({
238
+ id: c.id,
239
+ similarity: computeSimilarity(query, c.embedding, { method, normalize: true }),
240
+ }));
241
+
242
+ // Sort by similarity descending
243
+ scored.sort((a, b) => b.similarity - a.similarity);
244
+
245
+ return scored.slice(0, k);
246
+ }
247
+
248
+ /**
249
+ * Filter vectors by minimum similarity threshold
250
+ * INV-SEM-003: Threshold applied after normalization
251
+ */
252
+ export function filterByThreshold(
253
+ query: number[],
254
+ candidates: Array<{ id: string; embedding: number[] }>,
255
+ minSimilarity: number,
256
+ method: SimilarityMethod = 'cosine'
257
+ ): Array<{ id: string; similarity: number }> {
258
+ return candidates
259
+ .map((c) => ({
260
+ id: c.id,
261
+ similarity: computeSimilarity(query, c.embedding, { method, normalize: true }),
262
+ }))
263
+ .filter((s) => s.similarity >= minSimilarity)
264
+ .sort((a, b) => b.similarity - a.similarity);
265
+ }