@defai.digital/semantic-context 13.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +38 -0
- package/dist/embedding-service.d.ts +66 -0
- package/dist/embedding-service.d.ts.map +1 -0
- package/dist/embedding-service.js +265 -0
- package/dist/embedding-service.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +16 -0
- package/dist/index.js.map +1 -0
- package/dist/semantic-manager.d.ts +30 -0
- package/dist/semantic-manager.d.ts.map +1 -0
- package/dist/semantic-manager.js +186 -0
- package/dist/semantic-manager.js.map +1 -0
- package/dist/similarity.d.ts +89 -0
- package/dist/similarity.d.ts.map +1 -0
- package/dist/similarity.js +216 -0
- package/dist/similarity.js.map +1 -0
- package/dist/types.d.ts +236 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +258 -0
- package/dist/types.js.map +1 -0
- package/package.json +48 -0
- package/src/embedding-service.ts +323 -0
- package/src/index.ts +56 -0
- package/src/semantic-manager.ts +246 -0
- package/src/similarity.ts +265 -0
- package/src/types.ts +561 -0
package/src/index.ts
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Context Domain
|
|
3
|
+
*
|
|
4
|
+
* Provides semantic search and vector-indexed storage.
|
|
5
|
+
*
|
|
6
|
+
* @packageDocumentation
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// Types and interfaces
|
|
10
|
+
export type {
|
|
11
|
+
EmbeddingPort,
|
|
12
|
+
EmbeddingRequest,
|
|
13
|
+
EmbeddingResult,
|
|
14
|
+
SemanticStorePort,
|
|
15
|
+
SemanticStoreStats,
|
|
16
|
+
SemanticManager,
|
|
17
|
+
SemanticManagerOptions,
|
|
18
|
+
SimilarityMethod,
|
|
19
|
+
SimilarityOptions,
|
|
20
|
+
} from './types.js';
|
|
21
|
+
|
|
22
|
+
// Stub implementations for testing
|
|
23
|
+
export { StubEmbeddingPort, InMemorySemanticStore } from './types.js';
|
|
24
|
+
|
|
25
|
+
// Similarity utilities
|
|
26
|
+
export {
|
|
27
|
+
cosineSimilarity,
|
|
28
|
+
dotProductSimilarity,
|
|
29
|
+
euclideanDistance,
|
|
30
|
+
manhattanDistance,
|
|
31
|
+
computeSimilarity,
|
|
32
|
+
normalizeVector,
|
|
33
|
+
vectorNorm,
|
|
34
|
+
addVectors,
|
|
35
|
+
subtractVectors,
|
|
36
|
+
scaleVector,
|
|
37
|
+
computeCentroid,
|
|
38
|
+
findKNearest,
|
|
39
|
+
filterByThreshold,
|
|
40
|
+
DEFAULT_SIMILARITY_OPTIONS,
|
|
41
|
+
} from './similarity.js';
|
|
42
|
+
|
|
43
|
+
// Embedding service
|
|
44
|
+
export {
|
|
45
|
+
LocalEmbeddingProvider,
|
|
46
|
+
CachedEmbeddingProvider,
|
|
47
|
+
createEmbeddingProvider,
|
|
48
|
+
createTFIDFEmbedding,
|
|
49
|
+
createTFIDFEmbeddingBatch,
|
|
50
|
+
} from './embedding-service.js';
|
|
51
|
+
|
|
52
|
+
// Semantic manager
|
|
53
|
+
export {
|
|
54
|
+
createSemanticManager,
|
|
55
|
+
SemanticManagerError,
|
|
56
|
+
} from './semantic-manager.js';
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Manager
|
|
3
|
+
*
|
|
4
|
+
* High-level manager for semantic context storage and search.
|
|
5
|
+
* Combines embedding computation with storage.
|
|
6
|
+
*
|
|
7
|
+
* Invariants:
|
|
8
|
+
* - INV-SEM-001: Embeddings computed on store, cached until content changes
|
|
9
|
+
* - INV-SEM-002: Search results sorted by similarity descending
|
|
10
|
+
* - INV-SEM-003: Similarity scores normalized to [0, 1]
|
|
11
|
+
* - INV-SEM-004: Namespace isolation
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import type {
|
|
15
|
+
SemanticItem,
|
|
16
|
+
SemanticSearchRequest,
|
|
17
|
+
SemanticSearchResponse,
|
|
18
|
+
SemanticStoreRequest,
|
|
19
|
+
SemanticStoreResponse,
|
|
20
|
+
SemanticListRequest,
|
|
21
|
+
SemanticListResponse,
|
|
22
|
+
SemanticDeleteResponse,
|
|
23
|
+
EmbeddingConfig,
|
|
24
|
+
} from '@defai.digital/contracts';
|
|
25
|
+
import { SemanticContextErrorCodes, computeContentHash } from '@defai.digital/contracts';
|
|
26
|
+
import type {
|
|
27
|
+
SemanticManager,
|
|
28
|
+
SemanticManagerOptions,
|
|
29
|
+
SemanticStoreStats,
|
|
30
|
+
} from './types.js';
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Error thrown by semantic manager
|
|
34
|
+
*/
|
|
35
|
+
export class SemanticManagerError extends Error {
|
|
36
|
+
constructor(
|
|
37
|
+
public readonly code: string,
|
|
38
|
+
message: string,
|
|
39
|
+
public readonly details?: Record<string, unknown>
|
|
40
|
+
) {
|
|
41
|
+
super(message);
|
|
42
|
+
this.name = 'SemanticManagerError';
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
static notFound(key: string, namespace: string): SemanticManagerError {
|
|
46
|
+
return new SemanticManagerError(
|
|
47
|
+
SemanticContextErrorCodes.NOT_FOUND,
|
|
48
|
+
`Item not found: ${namespace}:${key}`,
|
|
49
|
+
{ key, namespace }
|
|
50
|
+
);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
static embeddingFailed(message: string): SemanticManagerError {
|
|
54
|
+
return new SemanticManagerError(
|
|
55
|
+
SemanticContextErrorCodes.EMBEDDING_FAILED,
|
|
56
|
+
`Embedding computation failed: ${message}`
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
static searchFailed(message: string): SemanticManagerError {
|
|
61
|
+
return new SemanticManagerError(
|
|
62
|
+
SemanticContextErrorCodes.SEARCH_FAILED,
|
|
63
|
+
`Search failed: ${message}`
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
static dimensionMismatch(expected: number, actual: number): SemanticManagerError {
|
|
68
|
+
return new SemanticManagerError(
|
|
69
|
+
SemanticContextErrorCodes.DIMENSION_MISMATCH,
|
|
70
|
+
`Embedding dimension mismatch: expected ${expected}, got ${actual}`,
|
|
71
|
+
{ expected, actual }
|
|
72
|
+
);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Creates a semantic manager
|
|
78
|
+
*/
|
|
79
|
+
export function createSemanticManager(options: SemanticManagerOptions): SemanticManager {
|
|
80
|
+
const {
|
|
81
|
+
embeddingPort,
|
|
82
|
+
storePort,
|
|
83
|
+
defaultNamespace = 'default',
|
|
84
|
+
autoEmbed = true,
|
|
85
|
+
} = options;
|
|
86
|
+
|
|
87
|
+
// Track namespace embedding dimensions for consistency (INV-SEM-200)
|
|
88
|
+
const namespaceDimensions = new Map<string, number>();
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Validate embedding dimension for namespace
|
|
92
|
+
*/
|
|
93
|
+
function validateDimension(namespace: string, dimension: number): void {
|
|
94
|
+
const expected = namespaceDimensions.get(namespace);
|
|
95
|
+
if (expected !== undefined && expected !== dimension) {
|
|
96
|
+
throw SemanticManagerError.dimensionMismatch(expected, dimension);
|
|
97
|
+
}
|
|
98
|
+
if (expected === undefined) {
|
|
99
|
+
namespaceDimensions.set(namespace, dimension);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return {
|
|
104
|
+
/**
|
|
105
|
+
* Store content with automatic embedding
|
|
106
|
+
* INV-SEM-001: Embeddings computed and cached
|
|
107
|
+
*/
|
|
108
|
+
async store(request: SemanticStoreRequest): Promise<SemanticStoreResponse> {
|
|
109
|
+
const namespace = request.namespace ?? defaultNamespace;
|
|
110
|
+
|
|
111
|
+
try {
|
|
112
|
+
// Check if content changed (for caching)
|
|
113
|
+
const contentHash = await computeContentHash(request.content);
|
|
114
|
+
const existing = await storePort.get(request.key, namespace);
|
|
115
|
+
|
|
116
|
+
// Determine if embedding needs computation
|
|
117
|
+
let embedding = request.embedding;
|
|
118
|
+
let embeddingComputed = false;
|
|
119
|
+
|
|
120
|
+
if (autoEmbed && !embedding) {
|
|
121
|
+
const needsEmbedding =
|
|
122
|
+
!existing ||
|
|
123
|
+
existing.contentHash !== contentHash ||
|
|
124
|
+
request.forceRecompute;
|
|
125
|
+
|
|
126
|
+
if (needsEmbedding) {
|
|
127
|
+
const result = await embeddingPort.embed({ text: request.content });
|
|
128
|
+
embedding = result.embedding;
|
|
129
|
+
embeddingComputed = true;
|
|
130
|
+
|
|
131
|
+
// Validate dimension consistency (INV-SEM-200)
|
|
132
|
+
validateDimension(namespace, result.dimension);
|
|
133
|
+
} else if (existing?.embedding) {
|
|
134
|
+
// Reuse existing embedding
|
|
135
|
+
embedding = existing.embedding;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Validate provided embedding dimension
|
|
140
|
+
if (embedding) {
|
|
141
|
+
const config = embeddingPort.getConfig();
|
|
142
|
+
if (embedding.length !== config.dimension) {
|
|
143
|
+
throw SemanticManagerError.dimensionMismatch(config.dimension, embedding.length);
|
|
144
|
+
}
|
|
145
|
+
validateDimension(namespace, embedding.length);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Store with embedding
|
|
149
|
+
const result = await storePort.store({
|
|
150
|
+
...request,
|
|
151
|
+
namespace,
|
|
152
|
+
embedding,
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
return {
|
|
156
|
+
...result,
|
|
157
|
+
embeddingComputed,
|
|
158
|
+
};
|
|
159
|
+
} catch (error) {
|
|
160
|
+
if (error instanceof SemanticManagerError) throw error;
|
|
161
|
+
|
|
162
|
+
const message = error instanceof Error ? error.message : 'Unknown error';
|
|
163
|
+
throw SemanticManagerError.embeddingFailed(message);
|
|
164
|
+
}
|
|
165
|
+
},
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Search by semantic similarity
|
|
169
|
+
* INV-SEM-002: Results sorted by similarity descending
|
|
170
|
+
* INV-SEM-003: Scores normalized to [0, 1]
|
|
171
|
+
* INV-SEM-004: Namespace isolation
|
|
172
|
+
*/
|
|
173
|
+
async search(request: SemanticSearchRequest): Promise<SemanticSearchResponse> {
|
|
174
|
+
const namespace = request.namespace;
|
|
175
|
+
|
|
176
|
+
try {
|
|
177
|
+
// Compute query embedding
|
|
178
|
+
const queryResult = await embeddingPort.embed({ text: request.query });
|
|
179
|
+
|
|
180
|
+
// Validate dimension if namespace has items
|
|
181
|
+
if (namespace) {
|
|
182
|
+
const stats = await storePort.getStats(namespace);
|
|
183
|
+
if (stats.embeddingDimension !== null) {
|
|
184
|
+
validateDimension(namespace, queryResult.dimension);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Delegate search to store
|
|
189
|
+
return await storePort.search(request);
|
|
190
|
+
} catch (error) {
|
|
191
|
+
if (error instanceof SemanticManagerError) throw error;
|
|
192
|
+
|
|
193
|
+
const message = error instanceof Error ? error.message : 'Unknown error';
|
|
194
|
+
throw SemanticManagerError.searchFailed(message);
|
|
195
|
+
}
|
|
196
|
+
},
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Get item by key
|
|
200
|
+
*/
|
|
201
|
+
async get(key: string, namespace?: string): Promise<SemanticItem | null> {
|
|
202
|
+
return storePort.get(key, namespace ?? defaultNamespace);
|
|
203
|
+
},
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* List items
|
|
207
|
+
*/
|
|
208
|
+
async list(request: SemanticListRequest): Promise<SemanticListResponse> {
|
|
209
|
+
return storePort.list({
|
|
210
|
+
...request,
|
|
211
|
+
namespace: request.namespace ?? defaultNamespace,
|
|
212
|
+
});
|
|
213
|
+
},
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Delete item
|
|
217
|
+
*/
|
|
218
|
+
async delete(key: string, namespace?: string): Promise<SemanticDeleteResponse> {
|
|
219
|
+
return storePort.delete(key, namespace ?? defaultNamespace);
|
|
220
|
+
},
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Get statistics
|
|
224
|
+
*/
|
|
225
|
+
async getStats(namespace?: string): Promise<SemanticStoreStats> {
|
|
226
|
+
return storePort.getStats(namespace);
|
|
227
|
+
},
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Clear namespace
|
|
231
|
+
*/
|
|
232
|
+
async clear(namespace?: string): Promise<number> {
|
|
233
|
+
const ns = namespace ?? defaultNamespace;
|
|
234
|
+
// Reset dimension tracking for cleared namespace
|
|
235
|
+
namespaceDimensions.delete(ns);
|
|
236
|
+
return storePort.clear(ns);
|
|
237
|
+
},
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Get embedding configuration
|
|
241
|
+
*/
|
|
242
|
+
getEmbeddingConfig(): EmbeddingConfig {
|
|
243
|
+
return embeddingPort.getConfig();
|
|
244
|
+
},
|
|
245
|
+
};
|
|
246
|
+
}
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Similarity Computation Utilities
|
|
3
|
+
*
|
|
4
|
+
* Provides various methods for computing vector similarity.
|
|
5
|
+
*
|
|
6
|
+
* Invariants:
|
|
7
|
+
* - INV-SEM-003: All scores normalized to [0, 1] range
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { SimilarityMethod, SimilarityOptions } from './types.js';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Default similarity options
|
|
14
|
+
*/
|
|
15
|
+
export const DEFAULT_SIMILARITY_OPTIONS: SimilarityOptions = {
|
|
16
|
+
method: 'cosine',
|
|
17
|
+
normalize: true,
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Compute cosine similarity between two vectors
|
|
22
|
+
* Returns value in [-1, 1] (or [0, 1] if normalized)
|
|
23
|
+
*
|
|
24
|
+
* INV-SEM-003: Normalized to [0, 1] when normalize=true
|
|
25
|
+
*/
|
|
26
|
+
export function cosineSimilarity(a: number[], b: number[], normalize = true): number {
|
|
27
|
+
if (a.length !== b.length) {
|
|
28
|
+
throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (a.length === 0) return 0;
|
|
32
|
+
|
|
33
|
+
let dotProduct = 0;
|
|
34
|
+
let normA = 0;
|
|
35
|
+
let normB = 0;
|
|
36
|
+
|
|
37
|
+
for (let i = 0; i < a.length; i++) {
|
|
38
|
+
dotProduct += a[i]! * b[i]!;
|
|
39
|
+
normA += a[i]! * a[i]!;
|
|
40
|
+
normB += b[i]! * b[i]!;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
44
|
+
|
|
45
|
+
if (denominator === 0) return 0;
|
|
46
|
+
|
|
47
|
+
const similarity = dotProduct / denominator;
|
|
48
|
+
|
|
49
|
+
// Normalize from [-1, 1] to [0, 1]
|
|
50
|
+
return normalize ? (similarity + 1) / 2 : similarity;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Compute dot product similarity between two vectors
|
|
55
|
+
* Returns raw dot product (or normalized if requested)
|
|
56
|
+
*
|
|
57
|
+
* INV-SEM-003: When normalize=true, normalizes vectors first and maps to [0, 1]
|
|
58
|
+
*/
|
|
59
|
+
export function dotProductSimilarity(a: number[], b: number[], normalize = true): number {
|
|
60
|
+
if (a.length !== b.length) {
|
|
61
|
+
throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (a.length === 0) return 0;
|
|
65
|
+
|
|
66
|
+
if (!normalize) {
|
|
67
|
+
// Raw dot product
|
|
68
|
+
let dotProduct = 0;
|
|
69
|
+
for (let i = 0; i < a.length; i++) {
|
|
70
|
+
dotProduct += a[i]! * b[i]!;
|
|
71
|
+
}
|
|
72
|
+
return dotProduct;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Normalized: compute cosine similarity (dot product of unit vectors)
|
|
76
|
+
// This ensures result is in [-1, 1] range, then map to [0, 1]
|
|
77
|
+
let dotProduct = 0;
|
|
78
|
+
let normA = 0;
|
|
79
|
+
let normB = 0;
|
|
80
|
+
|
|
81
|
+
for (let i = 0; i < a.length; i++) {
|
|
82
|
+
dotProduct += a[i]! * b[i]!;
|
|
83
|
+
normA += a[i]! * a[i]!;
|
|
84
|
+
normB += b[i]! * b[i]!;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
88
|
+
|
|
89
|
+
if (denominator === 0) return 0;
|
|
90
|
+
|
|
91
|
+
const similarity = dotProduct / denominator;
|
|
92
|
+
|
|
93
|
+
// Map from [-1, 1] to [0, 1]
|
|
94
|
+
return (similarity + 1) / 2;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Compute Euclidean distance between two vectors
|
|
99
|
+
* Returns distance (or similarity if normalize=true)
|
|
100
|
+
*
|
|
101
|
+
* INV-SEM-003: Converted to similarity via 1/(1+distance) when normalize=true
|
|
102
|
+
*/
|
|
103
|
+
export function euclideanDistance(a: number[], b: number[], normalize = true): number {
|
|
104
|
+
if (a.length !== b.length) {
|
|
105
|
+
throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (a.length === 0) return 0;
|
|
109
|
+
|
|
110
|
+
let sumSquares = 0;
|
|
111
|
+
for (let i = 0; i < a.length; i++) {
|
|
112
|
+
const diff = a[i]! - b[i]!;
|
|
113
|
+
sumSquares += diff * diff;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const distance = Math.sqrt(sumSquares);
|
|
117
|
+
|
|
118
|
+
// Convert distance to similarity: smaller distance = higher similarity
|
|
119
|
+
return normalize ? 1 / (1 + distance) : distance;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Compute Manhattan distance between two vectors
|
|
124
|
+
*/
|
|
125
|
+
export function manhattanDistance(a: number[], b: number[], normalize = true): number {
|
|
126
|
+
if (a.length !== b.length) {
|
|
127
|
+
throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (a.length === 0) return 0;
|
|
131
|
+
|
|
132
|
+
let distance = 0;
|
|
133
|
+
for (let i = 0; i < a.length; i++) {
|
|
134
|
+
distance += Math.abs(a[i]! - b[i]!);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Convert to similarity
|
|
138
|
+
return normalize ? 1 / (1 + distance) : distance;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Compute similarity using specified method
|
|
143
|
+
*/
|
|
144
|
+
export function computeSimilarity(
|
|
145
|
+
a: number[],
|
|
146
|
+
b: number[],
|
|
147
|
+
options: Partial<SimilarityOptions> = {}
|
|
148
|
+
): number {
|
|
149
|
+
const { method, normalize } = { ...DEFAULT_SIMILARITY_OPTIONS, ...options };
|
|
150
|
+
|
|
151
|
+
switch (method) {
|
|
152
|
+
case 'cosine':
|
|
153
|
+
return cosineSimilarity(a, b, normalize);
|
|
154
|
+
case 'dot':
|
|
155
|
+
return dotProductSimilarity(a, b, normalize);
|
|
156
|
+
case 'euclidean':
|
|
157
|
+
return euclideanDistance(a, b, normalize);
|
|
158
|
+
default:
|
|
159
|
+
throw new Error(`Unknown similarity method: ${method}`);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Normalize a vector to unit length
|
|
165
|
+
*/
|
|
166
|
+
export function normalizeVector(v: number[]): number[] {
|
|
167
|
+
const norm = Math.sqrt(v.reduce((sum, x) => sum + x * x, 0));
|
|
168
|
+
if (norm === 0) return v;
|
|
169
|
+
return v.map((x) => x / norm);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Compute vector norm (magnitude)
|
|
174
|
+
*/
|
|
175
|
+
export function vectorNorm(v: number[]): number {
|
|
176
|
+
return Math.sqrt(v.reduce((sum, x) => sum + x * x, 0));
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Add two vectors
|
|
181
|
+
*/
|
|
182
|
+
export function addVectors(a: number[], b: number[]): number[] {
|
|
183
|
+
if (a.length !== b.length) {
|
|
184
|
+
throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
|
|
185
|
+
}
|
|
186
|
+
return a.map((x, i) => x + b[i]!);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Subtract vectors: a - b
|
|
191
|
+
*/
|
|
192
|
+
export function subtractVectors(a: number[], b: number[]): number[] {
|
|
193
|
+
if (a.length !== b.length) {
|
|
194
|
+
throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
|
|
195
|
+
}
|
|
196
|
+
return a.map((x, i) => x - b[i]!);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Scale a vector by a scalar
|
|
201
|
+
*/
|
|
202
|
+
export function scaleVector(v: number[], scalar: number): number[] {
|
|
203
|
+
return v.map((x) => x * scalar);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Compute centroid (average) of multiple vectors
|
|
208
|
+
*/
|
|
209
|
+
export function computeCentroid(vectors: number[][]): number[] {
|
|
210
|
+
if (vectors.length === 0) return [];
|
|
211
|
+
|
|
212
|
+
const dim = vectors[0]!.length;
|
|
213
|
+
const centroid = new Array(dim).fill(0);
|
|
214
|
+
|
|
215
|
+
for (const v of vectors) {
|
|
216
|
+
if (v.length !== dim) {
|
|
217
|
+
throw new Error(`Inconsistent vector dimensions`);
|
|
218
|
+
}
|
|
219
|
+
for (let i = 0; i < dim; i++) {
|
|
220
|
+
centroid[i] += v[i]!;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return centroid.map((x) => x / vectors.length);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Find k nearest neighbors from candidates
|
|
229
|
+
* INV-SEM-002: Results sorted by similarity descending
|
|
230
|
+
*/
|
|
231
|
+
export function findKNearest(
|
|
232
|
+
query: number[],
|
|
233
|
+
candidates: Array<{ id: string; embedding: number[] }>,
|
|
234
|
+
k: number,
|
|
235
|
+
method: SimilarityMethod = 'cosine'
|
|
236
|
+
): Array<{ id: string; similarity: number }> {
|
|
237
|
+
const scored = candidates.map((c) => ({
|
|
238
|
+
id: c.id,
|
|
239
|
+
similarity: computeSimilarity(query, c.embedding, { method, normalize: true }),
|
|
240
|
+
}));
|
|
241
|
+
|
|
242
|
+
// Sort by similarity descending
|
|
243
|
+
scored.sort((a, b) => b.similarity - a.similarity);
|
|
244
|
+
|
|
245
|
+
return scored.slice(0, k);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Filter vectors by minimum similarity threshold
|
|
250
|
+
* INV-SEM-003: Threshold applied after normalization
|
|
251
|
+
*/
|
|
252
|
+
export function filterByThreshold(
|
|
253
|
+
query: number[],
|
|
254
|
+
candidates: Array<{ id: string; embedding: number[] }>,
|
|
255
|
+
minSimilarity: number,
|
|
256
|
+
method: SimilarityMethod = 'cosine'
|
|
257
|
+
): Array<{ id: string; similarity: number }> {
|
|
258
|
+
return candidates
|
|
259
|
+
.map((c) => ({
|
|
260
|
+
id: c.id,
|
|
261
|
+
similarity: computeSimilarity(query, c.embedding, { method, normalize: true }),
|
|
262
|
+
}))
|
|
263
|
+
.filter((s) => s.similarity >= minSimilarity)
|
|
264
|
+
.sort((a, b) => b.similarity - a.similarity);
|
|
265
|
+
}
|