@defai.digital/semantic-context 13.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/types.js ADDED
@@ -0,0 +1,258 @@
1
+ /**
2
+ * Semantic Context Types
3
+ *
4
+ * Port interfaces and type definitions for semantic context storage.
5
+ */
6
+ // ============================================================================
7
+ // Stub Implementations (for testing)
8
+ // ============================================================================
9
+ /**
10
+ * Stub embedding port for testing
11
+ */
12
+ export class StubEmbeddingPort {
13
+ dimension;
14
+ model;
15
+ constructor(dimension = 384, model = 'stub') {
16
+ this.dimension = dimension;
17
+ this.model = model;
18
+ }
19
+ async embed(request) {
20
+ // Generate deterministic embedding based on text hash
21
+ const embedding = this.generateEmbedding(request.text);
22
+ return {
23
+ embedding,
24
+ model: request.model ?? this.model,
25
+ dimension: this.dimension,
26
+ durationMs: 10,
27
+ };
28
+ }
29
+ async embedBatch(texts) {
30
+ return Promise.all(texts.map((text) => this.embed({ text })));
31
+ }
32
+ getConfig() {
33
+ return {
34
+ provider: 'local',
35
+ model: this.model,
36
+ dimension: this.dimension,
37
+ batchSize: 32,
38
+ cacheEnabled: true,
39
+ };
40
+ }
41
+ async isAvailable() {
42
+ return true;
43
+ }
44
+ generateEmbedding(text) {
45
+ // Simple hash-based embedding for testing
46
+ const embedding = [];
47
+ for (let i = 0; i < this.dimension; i++) {
48
+ const charCode = text.charCodeAt(i % text.length) || 0;
49
+ embedding.push(Math.sin(charCode * (i + 1) * 0.1));
50
+ }
51
+ // Normalize
52
+ const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
53
+ return embedding.map((v) => v / (norm || 1));
54
+ }
55
+ }
56
+ /**
57
+ * In-memory semantic store for testing
58
+ */
59
+ export class InMemorySemanticStore {
60
+ items = new Map();
61
+ embeddingPort;
62
+ constructor(embeddingPort) {
63
+ this.embeddingPort = embeddingPort ?? new StubEmbeddingPort();
64
+ }
65
+ makeKey(key, namespace) {
66
+ return `${namespace}:${key}`;
67
+ }
68
+ async store(request) {
69
+ const namespace = request.namespace ?? 'default';
70
+ const storageKey = this.makeKey(request.key, namespace);
71
+ const existing = this.items.get(storageKey);
72
+ // Compute content hash
73
+ const contentHash = await this.hashContent(request.content);
74
+ const needsEmbedding = !existing ||
75
+ existing.contentHash !== contentHash ||
76
+ request.forceRecompute;
77
+ let embedding = request.embedding;
78
+ let embeddingComputed = false;
79
+ if (needsEmbedding && !embedding) {
80
+ const result = await this.embeddingPort.embed({ text: request.content });
81
+ embedding = result.embedding;
82
+ embeddingComputed = true;
83
+ }
84
+ const item = {
85
+ key: request.key,
86
+ namespace,
87
+ content: request.content,
88
+ embedding,
89
+ embeddingDimension: embedding?.length,
90
+ embeddingModel: this.embeddingPort.getConfig().model,
91
+ metadata: request.metadata,
92
+ tags: request.tags,
93
+ contentHash,
94
+ createdAt: existing?.createdAt ?? new Date().toISOString(),
95
+ updatedAt: new Date().toISOString(),
96
+ };
97
+ this.items.set(storageKey, item);
98
+ // Omit embedding from the response item
99
+ const { embedding: _, ...itemWithoutEmbedding } = item;
100
+ return {
101
+ success: true,
102
+ item: itemWithoutEmbedding,
103
+ created: !existing,
104
+ embeddingComputed,
105
+ };
106
+ }
107
+ async search(request) {
108
+ const startTime = Date.now();
109
+ const namespace = request.namespace;
110
+ // Get query embedding
111
+ const queryResult = await this.embeddingPort.embed({ text: request.query });
112
+ const queryEmbedding = queryResult.embedding;
113
+ // Filter items
114
+ const candidates = Array.from(this.items.values()).filter((item) => {
115
+ if (namespace && item.namespace !== namespace)
116
+ return false;
117
+ if (!item.embedding)
118
+ return false;
119
+ if (request.filterTags) {
120
+ const itemTags = new Set(item.tags ?? []);
121
+ if (!request.filterTags.every((t) => itemTags.has(t)))
122
+ return false;
123
+ }
124
+ return true;
125
+ });
126
+ // Compute similarities
127
+ const scored = candidates.map((item) => ({
128
+ item,
129
+ similarity: this.cosineSimilarity(queryEmbedding, item.embedding),
130
+ }));
131
+ // Filter by minSimilarity and sort (INV-SEM-002, INV-SEM-003)
132
+ const filtered = scored
133
+ .filter((s) => s.similarity >= (request.minSimilarity ?? 0.7))
134
+ .sort((a, b) => b.similarity - a.similarity)
135
+ .slice(0, request.topK ?? 10);
136
+ const results = filtered.map((s, index) => ({
137
+ item: request.includeEmbeddings
138
+ ? s.item
139
+ : { ...s.item, embedding: undefined },
140
+ similarity: s.similarity,
141
+ rank: index + 1,
142
+ snippet: s.item.content.slice(0, 200),
143
+ }));
144
+ return {
145
+ results,
146
+ totalMatches: filtered.length,
147
+ query: request.query,
148
+ namespace,
149
+ durationMs: Date.now() - startTime,
150
+ queryEmbedding: request.includeEmbeddings ? queryEmbedding : undefined,
151
+ };
152
+ }
153
+ async get(key, namespace = 'default') {
154
+ return this.items.get(this.makeKey(key, namespace)) ?? null;
155
+ }
156
+ async list(request) {
157
+ let items = Array.from(this.items.values());
158
+ // Filter by namespace
159
+ if (request.namespace) {
160
+ items = items.filter((i) => i.namespace === request.namespace);
161
+ }
162
+ // Filter by tags
163
+ if (request.filterTags) {
164
+ items = items.filter((item) => {
165
+ const itemTags = new Set(item.tags ?? []);
166
+ return request.filterTags.every((t) => itemTags.has(t));
167
+ });
168
+ }
169
+ // Filter by key prefix
170
+ if (request.keyPrefix) {
171
+ items = items.filter((i) => i.key.startsWith(request.keyPrefix));
172
+ }
173
+ // Sort
174
+ const sortDir = request.orderDir === 'asc' ? 1 : -1;
175
+ items.sort((a, b) => {
176
+ const aVal = a[request.orderBy ?? 'createdAt'] ?? '';
177
+ const bVal = b[request.orderBy ?? 'createdAt'] ?? '';
178
+ return aVal < bVal ? -sortDir : sortDir;
179
+ });
180
+ // Paginate
181
+ const offset = request.offset ?? 0;
182
+ const limit = request.limit ?? 10;
183
+ const paginated = items.slice(offset, offset + limit);
184
+ return {
185
+ items: paginated.map((i) => ({ ...i, embedding: undefined })),
186
+ total: items.length,
187
+ hasMore: offset + limit < items.length,
188
+ namespace: request.namespace,
189
+ };
190
+ }
191
+ async delete(key, namespace = 'default') {
192
+ const storageKey = this.makeKey(key, namespace);
193
+ const deleted = this.items.delete(storageKey);
194
+ return { deleted, key, namespace };
195
+ }
196
+ async exists(key, namespace = 'default') {
197
+ return this.items.has(this.makeKey(key, namespace));
198
+ }
199
+ async getStats(namespace) {
200
+ let items = Array.from(this.items.values());
201
+ if (namespace) {
202
+ items = items.filter((i) => i.namespace === namespace);
203
+ }
204
+ const withEmbeddings = items.filter((i) => i.embedding);
205
+ const allNamespaces = [...new Set(Array.from(this.items.values()).map((i) => i.namespace))];
206
+ const result = {
207
+ totalItems: items.length,
208
+ itemsWithEmbeddings: withEmbeddings.length,
209
+ embeddingDimension: withEmbeddings[0]?.embeddingDimension ?? null,
210
+ embeddingModel: withEmbeddings[0]?.embeddingModel ?? null,
211
+ namespace: namespace ?? null,
212
+ };
213
+ // Only include namespaces if no specific namespace was queried
214
+ if (!namespace) {
215
+ result.namespaces = allNamespaces;
216
+ }
217
+ return result;
218
+ }
219
+ async clear(namespace) {
220
+ if (namespace) {
221
+ const keysToDelete = Array.from(this.items.entries())
222
+ .filter(([_, item]) => item.namespace === namespace)
223
+ .map(([key]) => key);
224
+ keysToDelete.forEach((k) => this.items.delete(k));
225
+ return keysToDelete.length;
226
+ }
227
+ else {
228
+ const count = this.items.size;
229
+ this.items.clear();
230
+ return count;
231
+ }
232
+ }
233
+ cosineSimilarity(a, b) {
234
+ if (a.length !== b.length)
235
+ return 0;
236
+ let dot = 0;
237
+ let normA = 0;
238
+ let normB = 0;
239
+ for (let i = 0; i < a.length; i++) {
240
+ dot += a[i] * b[i];
241
+ normA += a[i] * a[i];
242
+ normB += b[i] * b[i];
243
+ }
244
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
245
+ if (denom === 0)
246
+ return 0;
247
+ // Cosine similarity is in [-1, 1], normalize to [0, 1]
248
+ return (dot / denom + 1) / 2;
249
+ }
250
+ async hashContent(content) {
251
+ const encoder = new TextEncoder();
252
+ const data = encoder.encode(content);
253
+ const hashBuffer = await crypto.subtle.digest('SHA-256', data);
254
+ const hashArray = Array.from(new Uint8Array(hashBuffer));
255
+ return hashArray.map((b) => b.toString(16).padStart(2, '0')).join('');
256
+ }
257
+ }
258
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AA6QH,+EAA+E;AAC/E,qCAAqC;AACrC,+EAA+E;AAE/E;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,SAAS,CAAS;IAClB,KAAK,CAAS;IAEtB,YAAY,SAAS,GAAG,GAAG,EAAE,KAAK,GAAG,MAAM;QACzC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,OAAyB;QACnC,sDAAsD;QACtD,MAAM,SAAS,GAAG,IAAI,CAAC,iBAAiB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACvD,OAAO;YACL,SAAS;YACT,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK;YAClC,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,UAAU,EAAE,EAAE;SACf,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAChE,CAAC;IAED,SAAS;QACP,OAAO;YACL,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,SAAS,EAAE,EAAE;YACb,YAAY,EAAE,IAAI;SACnB,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,WAAW;QACf,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,iBAAiB,CAAC,IAAY;QACpC,0CAA0C;QAC1C,MAAM,SAAS,GAAa,EAAE,CAAC;QAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YACvD,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC;QACrD,CAAC;QACD,YAAY;QACZ,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACrE,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC;IAC/C,CAAC;CACF;AAED;;GAEG;AACH,MAAM,OAAO,qBAAqB;IACxB,KAAK,GAA8B,IAAI,GAAG,EAAE,CAAC;IAC7C,aAAa,CAAgB;IAErC,YAAY,aAA6B;QACvC,IAAI,CAAC,aAAa,GAAG,aAAa,IAAI,IAAI,iBAAiB,EAAE,CAAC;IAChE,CAAC;IAEO,OAAO,CAAC,GAAW,EAAE,SAAiB;QAC5C,OAAO,GAAG,SAAS,IAAI,GAAG,EAAE,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,OAA6B;QACvC,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,SAAS,CAAC;QACjD,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAE5C,uBAAuB;QACvB,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC5D,MAAM,cAAc,GAAG,CAAC,QAAQ;YAC9B,QAAQ,CAAC,WAAW,KAAK,WAAW;YACpC,OAAO,CAAC,cAAc,CAAC;QAEzB,IAAI,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;QAClC,IAAI,iBAAiB,GAAG,KAAK,CAAC;QAE9B,IAAI,cAAc,IAAI,CAAC,SAAS,EAAE,CAAC;YACjC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;YACzE,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;YAC7B,iBAAiB,GAAG,IAAI,CAAC;QAC3B,CAAC;QAED,MAAM,IAAI,GAAiB;YACzB,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,SAAS;YACT,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,SAAS;YACT,kBAAkB,EAAE,SAAS,EAAE,MAAM;YACrC,cAAc,EAAE,IAAI,CAAC,aAAa,CAAC,SAAS,EAAE,CAAC,KAAK;YACpD,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,WAAW;YACX,SAAS,EAAE,QAAQ,EAAE,SAAS,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YAC1D,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC;QAEF,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QAEjC,wCAAwC;QACxC,MAAM,EAAE,SAAS,EAAE,CAAC,EAAE,GAAG,oBAAoB,EAAE,GAAG,IAAI,CAAC;QACvD,OAAO;YACL,OAAO,EAAE,IAAI;YACb,IAAI,EAAE,oBAAoB;YAC1B,OAAO,EAAE,CAAC,QAAQ;YAClB,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAA8B;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;QAEpC,sBAAsB;QACtB,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;QAC5E,MAAM,cAAc,GAAG,WAAW,CAAC,SAAS,CAAC;QAE7C,eAAe;QACf,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;YACjE,IAAI,SAAS,IAAI,IAAI,CAAC,SAAS,KAAK,SAAS;gBAAE,OAAO,KAAK,CAAC;YAC5D,IAAI,CAAC,IAAI,CAAC,SAAS;gBAAE,OAAO,KAAK,CAAC;YAClC,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;gBACvB,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;gBAC1C,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;oBAAE,OAAO,KAAK,CAAC;YACtE,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;QAEH,uBAAuB;QACvB,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;YACvC,IAAI;YACJ,UAAU,EAAE,IAAI,CAAC,gBAAgB,CAAC,cAAc,EAAE,IAAI,CAAC,SAAU,CAAC;SACnE,CAAC,CAAC,CAAC;QAEJ,8DAA8D;QAC9D,MAAM,QAAQ,GAAG,MAAM;aACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,OAAO,CAAC,aAAa,IAAI,GAAG,CAAC,CAAC;aAC7D,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;aAC3C,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;QAEhC,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;YAC1C,IAAI,EAAE,OAAO,CAAC,iBAAiB;gBAC7B,CAAC,CAAC,CAAC,CAAC,IAAI;gBACR,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,SAAS,EAAE,SAAS,EAAE;YACvC,UAAU,EAAE,CAAC,CAAC,UAAU;YACxB,IAAI,EAAE,KAAK,GAAG,CAAC;YACf,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;SACtC,CAAC,CAAC,CAAC;QAEJ,OAAO;YACL,OAAO;YACP,YAAY,EAAE,QAAQ,CAAC,MAAM;YAC7B,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,SAAS;YACT,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,cAAc,EAAE,OAAO,CAAC,iBAAiB,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,SAAS;SACvE,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,SAAS,GAAG,SAAS;QAC1C,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC,IAAI,IAAI,CAAC;IAC9D,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,OAA4B;QACrC,IAAI,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QAE5C,sBAAsB;QACtB,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,OAAO,CAAC,SAAS,CAAC,CAAC;QACjE,CAAC;QAED,iBAAiB;QACjB,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;YACvB,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;gBAC5B,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;gBAC1C,OAAO,OAAO,CAAC,UAAW,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAC3D,CAAC,CAAC,CAAC;QACL,CAAC;QAED,uBAAuB;QACvB,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,SAAU,CAAC,CAAC,CAAC;QACpE,CAAC;QAED,OAAO;QACP,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpD,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAClB,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,OAAO,IAAI,WAAW,CAAC,IAAI,EAAE,CAAC;YACrD,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,OAAO,IAAI,WAAW,CAAC,IAAI,EAAE,CAAC;YACrD,OAAO,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,WAAW;QACX,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,CAAC,CAAC;QACnC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;QAClC,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,KAAK,CAAC,CAAC;QAEtD,OAAO;YACL,KAAK,EAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;YAC7D,KAAK,EAAE,KAAK,CAAC,MAAM;YACnB,OAAO,EAAE,MAAM,GAAG,KAAK,GAAG,KAAK,CAAC,MAAM;YACtC,SAAS,EAAE,OAAO,CAAC,SAAS;SAC7B,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,SAAS,GAAG,SAAS;QAC7C,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAC9C,OAAO,EAAE,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC;IACrC,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,SAAS,GAAG,SAAS;QAC7C,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;IACtD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,SAAkB;QAC/B,IAAI,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QAC5C,IAAI,SAAS,EAAE,CAAC;YACd,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC;QACzD,CAAC;QAED,MAAM,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACxD,MAAM,aAAa,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;QAE5F,MAAM,MAAM,GAAuB;YACjC,UAAU,EAAE,KAAK,CAAC,MAAM;YACxB,mBAAmB,EAAE,cAAc,CAAC,MAAM;YAC1C,kBAAkB,EAAE,cAAc,CAAC,CAAC,CAAC,EAAE,kBAAkB,IAAI,IAAI;YACjE,cAAc,EAAE,cAAc,CAAC,CAAC,CAAC,EAAE,cAAc,IAAI,IAAI;YACzD,SAAS,EAAE,SAAS,IAAI,IAAI;SAC7B,CAAC;QAEF,+DAA+D;QAC/D,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,CAAC,UAAU,GAAG,aAAa,CAAC;QACpC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,SAAkB;QAC5B,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,YAAY,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;iBAClD,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,KAAK,SAAS,CAAC;iBACnD,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC;YACvB,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YAClD,OAAO,YAAY,CAAC,MAAM,CAAC;QAC7B,CAAC;aAAM,CAAC;YACN,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;YAC9B,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAEO,gBAAgB,CAAC,CAAW,EAAE,CAAW;QAC/C,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM;YAAE,OAAO,CAAC,CAAC;QACpC,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAClC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC;YACrB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC;YACvB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC;QACzB,CAAC;QACD,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClD,IAAI,KAAK,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAC1B,uDAAuD;QACvD,OAAO,CAAC,GAAG,GAAG,KAAK,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IAC/B,CAAC;IAEO,KAAK,CAAC,WAAW,CAAC,OAAe;QACvC,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAC;QAClC,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;QAC/D,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC;QACzD,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACxE,CAAC;CACF"}
package/package.json ADDED
@@ -0,0 +1,48 @@
1
+ {
2
+ "name": "@defai.digital/semantic-context",
3
+ "version": "13.4.0",
4
+ "type": "module",
5
+ "description": "Vector-indexed semantic search for AutomatosX",
6
+ "license": "BUSL-1.1",
7
+ "author": "DEFAI Private Limited",
8
+ "repository": {
9
+ "type": "git",
10
+ "url": "https://github.com/defai-digital/automatosx.git",
11
+ "directory": "packages/core/semantic-context"
12
+ },
13
+ "homepage": "https://github.com/defai-digital/automatosx#readme",
14
+ "bugs": {
15
+ "url": "https://github.com/defai-digital/automatosx/issues"
16
+ },
17
+ "main": "dist/index.js",
18
+ "types": "dist/index.d.ts",
19
+ "exports": {
20
+ ".": {
21
+ "types": "./dist/index.d.ts",
22
+ "import": "./dist/index.js"
23
+ }
24
+ },
25
+ "files": [
26
+ "dist",
27
+ "src"
28
+ ],
29
+ "engines": {
30
+ "node": ">=20.0.0"
31
+ },
32
+ "publishConfig": {
33
+ "access": "public"
34
+ },
35
+ "dependencies": {
36
+ "@defai.digital/contracts": "13.4.0"
37
+ },
38
+ "devDependencies": {
39
+ "typescript": "^5.7.2"
40
+ },
41
+ "peerDependencies": {
42
+ "zod": "^3.23.0"
43
+ },
44
+ "scripts": {
45
+ "build": "tsc --build",
46
+ "clean": "rm -rf dist"
47
+ }
48
+ }
@@ -0,0 +1,323 @@
1
+ /**
2
+ * Embedding Service
3
+ *
4
+ * Provides text embedding computation with a local TF-IDF fallback.
5
+ * Can be extended with external providers (OpenAI, Cohere, etc.)
6
+ *
7
+ * Invariants:
8
+ * - INV-SEM-001: Embeddings computed and cached
9
+ * - INV-SEM-200: Consistent dimension within namespace
10
+ */
11
+
12
+ import type {
13
+ EmbeddingPort,
14
+ EmbeddingRequest,
15
+ EmbeddingResult,
16
+ } from './types.js';
17
+ import type { EmbeddingConfig } from '@defai.digital/contracts';
18
+ import { normalizeVector } from './similarity.js';
19
+
20
+ // ============================================================================
21
+ // Local TF-IDF Embedding
22
+ // ============================================================================
23
+
24
+ /**
25
+ * Simple tokenizer that splits text into tokens
26
+ */
27
+ function tokenize(text: string): string[] {
28
+ return text
29
+ .toLowerCase()
30
+ .replace(/[^\w\s]/g, ' ')
31
+ .split(/\s+/)
32
+ .filter((t) => t.length > 1);
33
+ }
34
+
35
+ /**
36
+ * Compute term frequency for tokens
37
+ */
38
+ function computeTF(tokens: string[]): Map<string, number> {
39
+ const tf = new Map<string, number>();
40
+ for (const token of tokens) {
41
+ tf.set(token, (tf.get(token) ?? 0) + 1);
42
+ }
43
+ // Normalize by total tokens
44
+ const total = tokens.length;
45
+ for (const [term, count] of tf) {
46
+ tf.set(term, count / total);
47
+ }
48
+ return tf;
49
+ }
50
+
51
+ /**
52
+ * Simple hash function for consistent dimension mapping
53
+ */
54
+ function hashString(str: string, maxDim: number): number {
55
+ let hash = 0;
56
+ for (let i = 0; i < str.length; i++) {
57
+ const char = str.charCodeAt(i);
58
+ hash = ((hash << 5) - hash) + char;
59
+ hash = hash & hash; // Convert to 32bit integer
60
+ }
61
+ return Math.abs(hash) % maxDim;
62
+ }
63
+
64
+ /**
65
+ * Create TF-IDF based embedding
66
+ *
67
+ * This is a simple local embedding that:
68
+ * 1. Tokenizes text
69
+ * 2. Computes term frequency
70
+ * 3. Hashes terms to fixed dimension
71
+ * 4. Normalizes to unit vector
72
+ */
73
+ export function createTFIDFEmbedding(text: string, dimension: number): number[] {
74
+ const tokens = tokenize(text);
75
+ const tf = computeTF(tokens);
76
+
77
+ // Create sparse vector and hash to fixed dimension
78
+ const embedding = new Array(dimension).fill(0);
79
+
80
+ for (const [term, freq] of tf) {
81
+ // Hash term to dimension index
82
+ const index = hashString(term, dimension);
83
+ // Add frequency (with sign based on secondary hash for better distribution)
84
+ const sign = hashString(term + '_sign', 2) === 0 ? 1 : -1;
85
+ embedding[index] += freq * sign;
86
+ }
87
+
88
+ // Normalize to unit vector
89
+ return normalizeVector(embedding);
90
+ }
91
+
92
+ /**
93
+ * Batch create TF-IDF embeddings with IDF computation
94
+ */
95
+ export function createTFIDFEmbeddingBatch(texts: string[], dimension: number): number[][] {
96
+ // Compute document frequency for IDF
97
+ const docFreq = new Map<string, number>();
98
+ const allTokens: string[][] = [];
99
+
100
+ for (const text of texts) {
101
+ const tokens = tokenize(text);
102
+ const uniqueTokens = new Set(tokens);
103
+ allTokens.push(tokens);
104
+
105
+ for (const token of uniqueTokens) {
106
+ docFreq.set(token, (docFreq.get(token) ?? 0) + 1);
107
+ }
108
+ }
109
+
110
+ const numDocs = texts.length;
111
+ const embeddings: number[][] = [];
112
+
113
+ for (let i = 0; i < texts.length; i++) {
114
+ const tokens = allTokens[i]!;
115
+ const tf = computeTF(tokens);
116
+ const embedding = new Array(dimension).fill(0);
117
+
118
+ for (const [term, freq] of tf) {
119
+ // Compute IDF: log(N / df)
120
+ const df = docFreq.get(term) ?? 1;
121
+ const idf = Math.log(numDocs / df);
122
+ const tfidf = freq * idf;
123
+
124
+ // Hash to dimension
125
+ const index = hashString(term, dimension);
126
+ const sign = hashString(term + '_sign', 2) === 0 ? 1 : -1;
127
+ embedding[index] += tfidf * sign;
128
+ }
129
+
130
+ embeddings.push(normalizeVector(embedding));
131
+ }
132
+
133
+ return embeddings;
134
+ }
135
+
136
+ // ============================================================================
137
+ // Local Embedding Provider
138
+ // ============================================================================
139
+
140
+ /**
141
+ * Local embedding provider using TF-IDF
142
+ */
143
+ export class LocalEmbeddingProvider implements EmbeddingPort {
144
+ private config: EmbeddingConfig;
145
+
146
+ constructor(config?: Partial<EmbeddingConfig>) {
147
+ this.config = {
148
+ provider: 'local',
149
+ model: 'tfidf',
150
+ dimension: 384,
151
+ batchSize: 32,
152
+ cacheEnabled: true,
153
+ ...config,
154
+ };
155
+ }
156
+
157
+ async embed(request: EmbeddingRequest): Promise<EmbeddingResult> {
158
+ const startTime = Date.now();
159
+
160
+ const embedding = createTFIDFEmbedding(
161
+ request.text,
162
+ this.config.dimension
163
+ );
164
+
165
+ return {
166
+ embedding,
167
+ model: request.model ?? this.config.model,
168
+ dimension: this.config.dimension,
169
+ durationMs: Date.now() - startTime,
170
+ };
171
+ }
172
+
173
+ async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {
174
+ const startTime = Date.now();
175
+
176
+ // Process in batches
177
+ const results: EmbeddingResult[] = [];
178
+ for (let i = 0; i < texts.length; i += this.config.batchSize) {
179
+ const batch = texts.slice(i, i + this.config.batchSize);
180
+ const embeddings = createTFIDFEmbeddingBatch(batch, this.config.dimension);
181
+
182
+ for (const embedding of embeddings) {
183
+ results.push({
184
+ embedding,
185
+ model: this.config.model,
186
+ dimension: this.config.dimension,
187
+ durationMs: Date.now() - startTime,
188
+ });
189
+ }
190
+ }
191
+
192
+ return results;
193
+ }
194
+
195
+ getConfig(): EmbeddingConfig {
196
+ return { ...this.config };
197
+ }
198
+
199
+ async isAvailable(): Promise<boolean> {
200
+ return true; // Local provider is always available
201
+ }
202
+ }
203
+
204
+ // ============================================================================
205
+ // Embedding Service Factory
206
+ // ============================================================================
207
+
208
+ /**
209
+ * Create embedding provider based on configuration
210
+ */
211
+ export function createEmbeddingProvider(config?: Partial<EmbeddingConfig>): EmbeddingPort {
212
+ const provider = config?.provider ?? 'local';
213
+
214
+ switch (provider) {
215
+ case 'local':
216
+ return new LocalEmbeddingProvider(config);
217
+
218
+ // Future: Add OpenAI, Cohere, Voyage providers here
219
+ // case 'openai':
220
+ // return new OpenAIEmbeddingProvider(config);
221
+
222
+ default:
223
+ // Fall back to local
224
+ return new LocalEmbeddingProvider(config);
225
+ }
226
+ }
227
+
228
+ /**
229
+ * Cached embedding provider wrapper
230
+ */
231
+ export class CachedEmbeddingProvider implements EmbeddingPort {
232
+ private cache: Map<string, EmbeddingResult> = new Map();
233
+ private delegate: EmbeddingPort;
234
+ private maxCacheSize: number;
235
+
236
+ constructor(delegate: EmbeddingPort, maxCacheSize = 10000) {
237
+ this.delegate = delegate;
238
+ this.maxCacheSize = maxCacheSize;
239
+ }
240
+
241
+ async embed(request: EmbeddingRequest): Promise<EmbeddingResult> {
242
+ const cacheKey = `${request.model ?? 'default'}:${request.text}`;
243
+
244
+ // Check cache
245
+ const cached = this.cache.get(cacheKey);
246
+ if (cached) {
247
+ return { ...cached, durationMs: 0 };
248
+ }
249
+
250
+ // Compute and cache
251
+ const result = await this.delegate.embed(request);
252
+
253
+ // Evict old entries if cache is full
254
+ if (this.cache.size >= this.maxCacheSize) {
255
+ const firstKey = this.cache.keys().next().value;
256
+ if (firstKey) this.cache.delete(firstKey);
257
+ }
258
+
259
+ this.cache.set(cacheKey, result);
260
+ return result;
261
+ }
262
+
263
+ async embedBatch(texts: string[]): Promise<EmbeddingResult[]> {
264
+ // Check which texts need computation
265
+ const model = this.getConfig().model;
266
+ const toCompute: { index: number; text: string }[] = [];
267
+ const results: (EmbeddingResult | null)[] = new Array(texts.length).fill(null);
268
+
269
+ for (let i = 0; i < texts.length; i++) {
270
+ const cacheKey = `${model}:${texts[i]}`;
271
+ const cached = this.cache.get(cacheKey);
272
+ if (cached) {
273
+ results[i] = { ...cached, durationMs: 0 };
274
+ } else {
275
+ toCompute.push({ index: i, text: texts[i]! });
276
+ }
277
+ }
278
+
279
+ // Compute missing embeddings
280
+ if (toCompute.length > 0) {
281
+ const computed = await this.delegate.embedBatch(toCompute.map((t) => t.text));
282
+
283
+ for (let i = 0; i < toCompute.length; i++) {
284
+ const { index, text } = toCompute[i]!;
285
+ const result = computed[i]!;
286
+ results[index] = result;
287
+
288
+ // Cache
289
+ const cacheKey = `${model}:${text}`;
290
+ if (this.cache.size < this.maxCacheSize) {
291
+ this.cache.set(cacheKey, result);
292
+ }
293
+ }
294
+ }
295
+
296
+ return results as EmbeddingResult[];
297
+ }
298
+
299
+ getConfig(): EmbeddingConfig {
300
+ return this.delegate.getConfig();
301
+ }
302
+
303
+ async isAvailable(): Promise<boolean> {
304
+ return this.delegate.isAvailable();
305
+ }
306
+
307
+ /**
308
+ * Clear the cache
309
+ */
310
+ clearCache(): void {
311
+ this.cache.clear();
312
+ }
313
+
314
+ /**
315
+ * Get cache statistics
316
+ */
317
+ getCacheStats(): { size: number; maxSize: number } {
318
+ return {
319
+ size: this.cache.size,
320
+ maxSize: this.maxCacheSize,
321
+ };
322
+ }
323
+ }