ai-database 2.0.2 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +36 -0
- package/dist/actions.d.ts +247 -0
- package/dist/actions.d.ts.map +1 -0
- package/dist/actions.js +260 -0
- package/dist/actions.js.map +1 -0
- package/dist/ai-promise-db.d.ts +34 -2
- package/dist/ai-promise-db.d.ts.map +1 -1
- package/dist/ai-promise-db.js +511 -66
- package/dist/ai-promise-db.js.map +1 -1
- package/dist/constants.d.ts +16 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +16 -0
- package/dist/constants.js.map +1 -0
- package/dist/events.d.ts +153 -0
- package/dist/events.d.ts.map +1 -0
- package/dist/events.js +154 -0
- package/dist/events.js.map +1 -0
- package/dist/index.d.ts +8 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +13 -1
- package/dist/index.js.map +1 -1
- package/dist/memory-provider.d.ts +144 -2
- package/dist/memory-provider.d.ts.map +1 -1
- package/dist/memory-provider.js +569 -13
- package/dist/memory-provider.js.map +1 -1
- package/dist/schema/cascade.d.ts +96 -0
- package/dist/schema/cascade.d.ts.map +1 -0
- package/dist/schema/cascade.js +528 -0
- package/dist/schema/cascade.js.map +1 -0
- package/dist/schema/index.d.ts +197 -0
- package/dist/schema/index.d.ts.map +1 -0
- package/dist/schema/index.js +1211 -0
- package/dist/schema/index.js.map +1 -0
- package/dist/schema/parse.d.ts +225 -0
- package/dist/schema/parse.d.ts.map +1 -0
- package/dist/schema/parse.js +732 -0
- package/dist/schema/parse.js.map +1 -0
- package/dist/schema/provider.d.ts +176 -0
- package/dist/schema/provider.d.ts.map +1 -0
- package/dist/schema/provider.js +258 -0
- package/dist/schema/provider.js.map +1 -0
- package/dist/schema/resolve.d.ts +87 -0
- package/dist/schema/resolve.d.ts.map +1 -0
- package/dist/schema/resolve.js +474 -0
- package/dist/schema/resolve.js.map +1 -0
- package/dist/schema/semantic.d.ts +53 -0
- package/dist/schema/semantic.d.ts.map +1 -0
- package/dist/schema/semantic.js +247 -0
- package/dist/schema/semantic.js.map +1 -0
- package/dist/schema/types.d.ts +528 -0
- package/dist/schema/types.d.ts.map +1 -0
- package/dist/schema/types.js +9 -0
- package/dist/schema/types.js.map +1 -0
- package/dist/schema.d.ts +24 -867
- package/dist/schema.d.ts.map +1 -1
- package/dist/schema.js +41 -1124
- package/dist/schema.js.map +1 -1
- package/dist/semantic.d.ts +175 -0
- package/dist/semantic.d.ts.map +1 -0
- package/dist/semantic.js +338 -0
- package/dist/semantic.js.map +1 -0
- package/dist/types.d.ts +14 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +13 -4
- package/.turbo/turbo-build.log +0 -5
- package/TESTING.md +0 -410
- package/TEST_SUMMARY.md +0 -250
- package/TODO.md +0 -128
- package/src/ai-promise-db.ts +0 -1243
- package/src/authorization.ts +0 -1102
- package/src/durable-clickhouse.ts +0 -596
- package/src/durable-promise.ts +0 -582
- package/src/execution-queue.ts +0 -608
- package/src/index.test.ts +0 -868
- package/src/index.ts +0 -337
- package/src/linguistic.ts +0 -404
- package/src/memory-provider.test.ts +0 -1036
- package/src/memory-provider.ts +0 -1119
- package/src/schema.test.ts +0 -1254
- package/src/schema.ts +0 -2296
- package/src/tests.ts +0 -725
- package/src/types.ts +0 -1177
- package/test/README.md +0 -153
- package/test/edge-cases.test.ts +0 -646
- package/test/provider-resolution.test.ts +0 -402
- package/tsconfig.json +0 -9
- package/vitest.config.ts +0 -19
package/dist/memory-provider.js
CHANGED
|
@@ -3,7 +3,10 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Simple provider implementation for testing and development.
|
|
5
5
|
* Includes concurrency control via Semaphore for rate limiting.
|
|
6
|
+
* Supports automatic embedding generation on create/update.
|
|
6
7
|
*/
|
|
8
|
+
import { cosineSimilarity, computeRRF, extractEmbeddableText, generateContentHash, } from './semantic.js';
|
|
9
|
+
import { EMBEDDING_DIMENSIONS } from './constants.js';
|
|
7
10
|
// =============================================================================
|
|
8
11
|
// Semaphore for Concurrency Control
|
|
9
12
|
// =============================================================================
|
|
@@ -67,6 +70,15 @@ export class Semaphore {
|
|
|
67
70
|
// =============================================================================
|
|
68
71
|
// Generate ID
|
|
69
72
|
// =============================================================================
|
|
73
|
+
/**
|
|
74
|
+
* Generate a unique ID for a new entity
|
|
75
|
+
*
|
|
76
|
+
* Uses crypto.randomUUID() to generate a UUID v4 identifier.
|
|
77
|
+
*
|
|
78
|
+
* @returns A new UUID string
|
|
79
|
+
*
|
|
80
|
+
* @internal
|
|
81
|
+
*/
|
|
70
82
|
function generateId() {
|
|
71
83
|
return crypto.randomUUID();
|
|
72
84
|
}
|
|
@@ -116,11 +128,36 @@ function conjugateVerb(verb) {
|
|
|
116
128
|
activity: toGerund(base),
|
|
117
129
|
};
|
|
118
130
|
}
|
|
119
|
-
/**
|
|
131
|
+
/**
|
|
132
|
+
* Check if a character is a vowel (a, e, i, o, u)
|
|
133
|
+
*
|
|
134
|
+
* @param char - The character to check
|
|
135
|
+
* @returns True if the character is a vowel
|
|
136
|
+
*
|
|
137
|
+
* @internal
|
|
138
|
+
*/
|
|
120
139
|
function isVowel(char) {
|
|
121
140
|
return char ? 'aeiou'.includes(char.toLowerCase()) : false;
|
|
122
141
|
}
|
|
123
|
-
/**
|
|
142
|
+
/**
|
|
143
|
+
* Check if we should double the final consonant when adding a suffix
|
|
144
|
+
*
|
|
145
|
+
* English spelling rules require doubling the final consonant in certain
|
|
146
|
+
* cases when adding suffixes like -ing or -ed. This applies to short words
|
|
147
|
+
* ending in consonant-vowel-consonant patterns.
|
|
148
|
+
*
|
|
149
|
+
* @param verb - The verb to check
|
|
150
|
+
* @returns True if the final consonant should be doubled
|
|
151
|
+
*
|
|
152
|
+
* @example
|
|
153
|
+
* ```ts
|
|
154
|
+
* shouldDoubleConsonant('run') // => true (running)
|
|
155
|
+
* shouldDoubleConsonant('play') // => false (playing)
|
|
156
|
+
* shouldDoubleConsonant('fix') // => false (fixing - x is excluded)
|
|
157
|
+
* ```
|
|
158
|
+
*
|
|
159
|
+
* @internal
|
|
160
|
+
*/
|
|
124
161
|
function shouldDoubleConsonant(verb) {
|
|
125
162
|
if (verb.length < 2)
|
|
126
163
|
return false;
|
|
@@ -135,7 +172,26 @@ function shouldDoubleConsonant(verb) {
|
|
|
135
172
|
return true;
|
|
136
173
|
return false;
|
|
137
174
|
}
|
|
138
|
-
/**
|
|
175
|
+
/**
|
|
176
|
+
* Convert a verb to present tense third person singular form
|
|
177
|
+
*
|
|
178
|
+
* Applies English conjugation rules for third person singular:
|
|
179
|
+
* - Verbs ending in consonant + y: change y to ies (try → tries)
|
|
180
|
+
* - Verbs ending in s, x, z, ch, sh: add es (push → pushes)
|
|
181
|
+
* - Other verbs: add s (run → runs)
|
|
182
|
+
*
|
|
183
|
+
* @param verb - The base form of the verb
|
|
184
|
+
* @returns The third person singular present tense form
|
|
185
|
+
*
|
|
186
|
+
* @example
|
|
187
|
+
* ```ts
|
|
188
|
+
* toPresent('create') // => 'creates'
|
|
189
|
+
* toPresent('push') // => 'pushes'
|
|
190
|
+
* toPresent('try') // => 'tries'
|
|
191
|
+
* ```
|
|
192
|
+
*
|
|
193
|
+
* @internal
|
|
194
|
+
*/
|
|
139
195
|
function toPresent(verb) {
|
|
140
196
|
if (verb.endsWith('y') && !isVowel(verb[verb.length - 2])) {
|
|
141
197
|
return verb.slice(0, -1) + 'ies';
|
|
@@ -146,7 +202,27 @@ function toPresent(verb) {
|
|
|
146
202
|
}
|
|
147
203
|
return verb + 's';
|
|
148
204
|
}
|
|
149
|
-
/**
|
|
205
|
+
/**
|
|
206
|
+
* Convert a verb to gerund/present participle form (-ing)
|
|
207
|
+
*
|
|
208
|
+
* Applies English spelling rules for adding -ing:
|
|
209
|
+
* - Verbs ending in ie: change ie to ying (die → dying)
|
|
210
|
+
* - Verbs ending in e (not ee): drop e, add ing (create → creating)
|
|
211
|
+
* - Verbs requiring consonant doubling: double + ing (run → running)
|
|
212
|
+
* - Other verbs: add ing (play → playing)
|
|
213
|
+
*
|
|
214
|
+
* @param verb - The base form of the verb
|
|
215
|
+
* @returns The gerund/present participle form
|
|
216
|
+
*
|
|
217
|
+
* @example
|
|
218
|
+
* ```ts
|
|
219
|
+
* toGerund('create') // => 'creating'
|
|
220
|
+
* toGerund('run') // => 'running'
|
|
221
|
+
* toGerund('die') // => 'dying'
|
|
222
|
+
* ```
|
|
223
|
+
*
|
|
224
|
+
* @internal
|
|
225
|
+
*/
|
|
150
226
|
function toGerund(verb) {
|
|
151
227
|
if (verb.endsWith('ie'))
|
|
152
228
|
return verb.slice(0, -2) + 'ying';
|
|
@@ -177,12 +253,305 @@ export class MemoryProvider {
|
|
|
177
253
|
artifacts = new Map();
|
|
178
254
|
// Concurrency control
|
|
179
255
|
semaphore;
|
|
256
|
+
// Embedding configuration
|
|
257
|
+
embeddingsConfig;
|
|
180
258
|
constructor(options = {}) {
|
|
181
259
|
this.semaphore = new Semaphore(options.concurrency ?? 10);
|
|
260
|
+
this.embeddingsConfig = options.embeddings ?? {};
|
|
261
|
+
}
|
|
262
|
+
/**
|
|
263
|
+
* Set embeddings configuration
|
|
264
|
+
*/
|
|
265
|
+
setEmbeddingsConfig(config) {
|
|
266
|
+
this.embeddingsConfig = config;
|
|
267
|
+
}
|
|
268
|
+
// ===========================================================================
|
|
269
|
+
// Embedding Generation
|
|
270
|
+
// ===========================================================================
|
|
271
|
+
/**
|
|
272
|
+
* Generate embedding for text (deterministic for testing)
|
|
273
|
+
*
|
|
274
|
+
* Uses semantic word vectors to create meaningful embeddings
|
|
275
|
+
* where similar concepts have higher cosine similarity.
|
|
276
|
+
*/
|
|
277
|
+
generateEmbedding(text) {
|
|
278
|
+
// Import semantic vectors for deterministic embeddings
|
|
279
|
+
const SEMANTIC_VECTORS = {
|
|
280
|
+
// AI/ML domain
|
|
281
|
+
machine: [0.9, 0.1, 0.05, 0.02],
|
|
282
|
+
learning: [0.85, 0.15, 0.08, 0.03],
|
|
283
|
+
artificial: [0.88, 0.12, 0.06, 0.04],
|
|
284
|
+
intelligence: [0.87, 0.13, 0.07, 0.05],
|
|
285
|
+
neural: [0.82, 0.18, 0.09, 0.06],
|
|
286
|
+
network: [0.75, 0.2, 0.15, 0.1],
|
|
287
|
+
deep: [0.8, 0.17, 0.1, 0.08],
|
|
288
|
+
ai: [0.92, 0.08, 0.04, 0.02],
|
|
289
|
+
ml: [0.88, 0.12, 0.06, 0.03],
|
|
290
|
+
// Programming domain
|
|
291
|
+
programming: [0.15, 0.85, 0.1, 0.05],
|
|
292
|
+
code: [0.12, 0.88, 0.12, 0.06],
|
|
293
|
+
software: [0.18, 0.82, 0.15, 0.08],
|
|
294
|
+
development: [0.2, 0.8, 0.18, 0.1],
|
|
295
|
+
typescript: [0.1, 0.9, 0.08, 0.04],
|
|
296
|
+
javascript: [0.12, 0.88, 0.1, 0.05],
|
|
297
|
+
python: [0.25, 0.75, 0.12, 0.06],
|
|
298
|
+
react: [0.08, 0.85, 0.2, 0.1],
|
|
299
|
+
vue: [0.06, 0.84, 0.18, 0.08],
|
|
300
|
+
frontend: [0.05, 0.8, 0.25, 0.12],
|
|
301
|
+
// Database domain
|
|
302
|
+
database: [0.1, 0.7, 0.08, 0.6],
|
|
303
|
+
query: [0.12, 0.65, 0.1, 0.7],
|
|
304
|
+
sql: [0.08, 0.6, 0.05, 0.75],
|
|
305
|
+
index: [0.1, 0.58, 0.08, 0.72],
|
|
306
|
+
optimization: [0.15, 0.55, 0.12, 0.68],
|
|
307
|
+
performance: [0.18, 0.5, 0.15, 0.65],
|
|
308
|
+
// DevOps domain
|
|
309
|
+
kubernetes: [0.05, 0.6, 0.8, 0.15],
|
|
310
|
+
docker: [0.08, 0.55, 0.82, 0.12],
|
|
311
|
+
container: [0.06, 0.5, 0.85, 0.1],
|
|
312
|
+
deployment: [0.1, 0.45, 0.78, 0.18],
|
|
313
|
+
devops: [0.12, 0.48, 0.75, 0.2],
|
|
314
|
+
// Food domain (distinctly different direction - high in dim 3, low elsewhere)
|
|
315
|
+
cooking: [0.05, 0.08, 0.05, 0.95],
|
|
316
|
+
recipe: [0.06, 0.07, 0.04, 0.93],
|
|
317
|
+
food: [0.04, 0.06, 0.04, 0.96],
|
|
318
|
+
pasta: [0.03, 0.05, 0.03, 0.97],
|
|
319
|
+
pizza: [0.03, 0.06, 0.04, 0.96],
|
|
320
|
+
italian: [0.04, 0.07, 0.04, 0.94],
|
|
321
|
+
garden: [0.05, 0.04, 0.03, 0.92],
|
|
322
|
+
flowers: [0.04, 0.03, 0.03, 0.91],
|
|
323
|
+
chef: [0.05, 0.1, 0.05, 0.95],
|
|
324
|
+
restaurant: [0.06, 0.08, 0.04, 0.93],
|
|
325
|
+
kitchen: [0.05, 0.09, 0.05, 0.94],
|
|
326
|
+
antonio: [0.05, 0.08, 0.04, 0.92],
|
|
327
|
+
// Research/Academic domain (similar to AI/ML)
|
|
328
|
+
researcher: [0.82, 0.2, 0.1, 0.08],
|
|
329
|
+
phd: [0.8, 0.18, 0.12, 0.1],
|
|
330
|
+
research: [0.85, 0.15, 0.1, 0.07],
|
|
331
|
+
professor: [0.78, 0.22, 0.12, 0.1],
|
|
332
|
+
academic: [0.75, 0.2, 0.15, 0.12],
|
|
333
|
+
// Location/Venue domain (for fuzzy threshold tests - need distinct clusters)
|
|
334
|
+
// "conference center downtown" cluster - high values in different dimensions
|
|
335
|
+
conference: [0.2, 0.25, 0.85, 0.2],
|
|
336
|
+
center: [0.18, 0.22, 0.88, 0.18],
|
|
337
|
+
downtown: [0.15, 0.2, 0.9, 0.15],
|
|
338
|
+
// "tech hub 123 main st" cluster - completely different direction
|
|
339
|
+
hub: [0.85, 0.15, 0.2, 0.15],
|
|
340
|
+
main: [0.12, 0.12, 0.15, 0.1],
|
|
341
|
+
st: [0.1, 0.1, 0.12, 0.08],
|
|
342
|
+
'123': [0.08, 0.08, 0.1, 0.05],
|
|
343
|
+
// GraphQL/API
|
|
344
|
+
graphql: [0.1, 0.75, 0.15, 0.55],
|
|
345
|
+
api: [0.15, 0.7, 0.2, 0.5],
|
|
346
|
+
rest: [0.12, 0.68, 0.18, 0.48],
|
|
347
|
+
queries: [0.14, 0.65, 0.12, 0.6],
|
|
348
|
+
// Testing
|
|
349
|
+
testing: [0.1, 0.78, 0.08, 0.15],
|
|
350
|
+
test: [0.08, 0.8, 0.06, 0.12],
|
|
351
|
+
unit: [0.06, 0.82, 0.05, 0.1],
|
|
352
|
+
integration: [0.12, 0.75, 0.1, 0.18],
|
|
353
|
+
// State management
|
|
354
|
+
state: [0.08, 0.82, 0.2, 0.08],
|
|
355
|
+
management: [0.15, 0.75, 0.25, 0.12],
|
|
356
|
+
hooks: [0.06, 0.88, 0.15, 0.05],
|
|
357
|
+
usestate: [0.05, 0.9, 0.12, 0.04],
|
|
358
|
+
useeffect: [0.04, 0.88, 0.1, 0.03],
|
|
359
|
+
// Related/Concept domain (for semantic similarity tests)
|
|
360
|
+
related: [0.5, 0.5, 0.5, 0.5],
|
|
361
|
+
concept: [0.55, 0.45, 0.55, 0.45],
|
|
362
|
+
similar: [0.52, 0.48, 0.52, 0.48],
|
|
363
|
+
different: [0.48, 0.52, 0.48, 0.52],
|
|
364
|
+
words: [0.45, 0.55, 0.45, 0.55],
|
|
365
|
+
semantically: [0.6, 0.4, 0.6, 0.4],
|
|
366
|
+
// Exact match domain (distinctly different vectors)
|
|
367
|
+
exact: [0.1, 0.1, 0.1, 0.9],
|
|
368
|
+
match: [0.15, 0.15, 0.1, 0.85],
|
|
369
|
+
title: [0.1, 0.2, 0.1, 0.8],
|
|
370
|
+
contains: [0.12, 0.18, 0.12, 0.78],
|
|
371
|
+
search: [0.08, 0.22, 0.08, 0.82],
|
|
372
|
+
terms: [0.05, 0.25, 0.05, 0.85],
|
|
373
|
+
// Business domain (for fuzzy forward resolution tests)
|
|
374
|
+
enterprise: [0.7, 0.3, 0.8, 0.6],
|
|
375
|
+
large: [0.65, 0.25, 0.75, 0.55],
|
|
376
|
+
corporations: [0.68, 0.28, 0.78, 0.58],
|
|
377
|
+
companies: [0.6, 0.4, 0.7, 0.5],
|
|
378
|
+
company: [0.62, 0.38, 0.72, 0.52],
|
|
379
|
+
thousands: [0.7, 0.2, 0.7, 0.5],
|
|
380
|
+
employees: [0.55, 0.35, 0.65, 0.45],
|
|
381
|
+
big: [0.68, 0.3, 0.75, 0.58],
|
|
382
|
+
small: [0.3, 0.6, 0.3, 0.4],
|
|
383
|
+
business: [0.5, 0.5, 0.6, 0.5],
|
|
384
|
+
owners: [0.4, 0.5, 0.5, 0.45],
|
|
385
|
+
consumer: [0.35, 0.55, 0.35, 0.35],
|
|
386
|
+
individual: [0.32, 0.58, 0.32, 0.32],
|
|
387
|
+
b2c: [0.3, 0.6, 0.3, 0.35],
|
|
388
|
+
// Tech professional domain
|
|
389
|
+
developer: [0.2, 0.85, 0.15, 0.1],
|
|
390
|
+
engineer: [0.25, 0.82, 0.18, 0.12],
|
|
391
|
+
engineers: [0.27, 0.8, 0.2, 0.14],
|
|
392
|
+
builds: [0.18, 0.78, 0.16, 0.08],
|
|
393
|
+
writes: [0.15, 0.75, 0.12, 0.06],
|
|
394
|
+
professional: [0.22, 0.72, 0.2, 0.15],
|
|
395
|
+
applications: [0.2, 0.78, 0.18, 0.1],
|
|
396
|
+
tech: [0.25, 0.8, 0.2, 0.12],
|
|
397
|
+
technology: [0.28, 0.78, 0.22, 0.14],
|
|
398
|
+
electronics: [0.3, 0.75, 0.25, 0.15],
|
|
399
|
+
device: [0.25, 0.82, 0.2, 0.1],
|
|
400
|
+
furniture: [0.1, 0.15, 0.2, 0.85],
|
|
401
|
+
home: [0.12, 0.18, 0.22, 0.8],
|
|
402
|
+
living: [0.1, 0.15, 0.2, 0.82],
|
|
403
|
+
goods: [0.3, 0.5, 0.35, 0.4],
|
|
404
|
+
leaders: [0.4, 0.5, 0.6, 0.4],
|
|
405
|
+
senior: [0.35, 0.55, 0.55, 0.35],
|
|
406
|
+
// Data science domain
|
|
407
|
+
data: [0.75, 0.3, 0.15, 0.55],
|
|
408
|
+
science: [0.78, 0.25, 0.12, 0.5],
|
|
409
|
+
scientist: [0.8, 0.28, 0.1, 0.52],
|
|
410
|
+
background: [0.72, 0.32, 0.14, 0.48],
|
|
411
|
+
// DevOps/cloud domain
|
|
412
|
+
cloud: [0.1, 0.55, 0.85, 0.15],
|
|
413
|
+
expertise: [0.15, 0.5, 0.8, 0.18],
|
|
414
|
+
// Support domain
|
|
415
|
+
support: [0.2, 0.45, 0.3, 0.55],
|
|
416
|
+
specialist: [0.22, 0.48, 0.32, 0.52],
|
|
417
|
+
technical: [0.25, 0.65, 0.35, 0.4],
|
|
418
|
+
issues: [0.18, 0.42, 0.28, 0.48],
|
|
419
|
+
// Security domain
|
|
420
|
+
security: [0.3, 0.6, 0.4, 0.7],
|
|
421
|
+
auth: [0.28, 0.58, 0.38, 0.72],
|
|
422
|
+
authentication: [0.32, 0.55, 0.42, 0.75],
|
|
423
|
+
identity: [0.35, 0.52, 0.45, 0.68],
|
|
424
|
+
oauth: [0.3, 0.62, 0.4, 0.7],
|
|
425
|
+
// CRM domain
|
|
426
|
+
crm: [0.45, 0.4, 0.7, 0.55],
|
|
427
|
+
sales: [0.42, 0.38, 0.68, 0.52],
|
|
428
|
+
salesforce: [0.48, 0.42, 0.72, 0.58],
|
|
429
|
+
provider: [0.5, 0.45, 0.65, 0.5],
|
|
430
|
+
};
|
|
431
|
+
const DEFAULT_VECTOR = [0.1, 0.1, 0.1, 0.1];
|
|
432
|
+
// Simple hash function
|
|
433
|
+
const simpleHash = (str) => {
|
|
434
|
+
let hash = 0;
|
|
435
|
+
for (let i = 0; i < str.length; i++) {
|
|
436
|
+
const char = str.charCodeAt(i);
|
|
437
|
+
hash = ((hash << 5) - hash) + char;
|
|
438
|
+
hash = hash & hash;
|
|
439
|
+
}
|
|
440
|
+
return Math.abs(hash);
|
|
441
|
+
};
|
|
442
|
+
// Seeded random
|
|
443
|
+
const seededRandom = (seed, index) => {
|
|
444
|
+
const x = Math.sin(seed + index) * 10000;
|
|
445
|
+
return x - Math.floor(x);
|
|
446
|
+
};
|
|
447
|
+
// Tokenize
|
|
448
|
+
const words = text
|
|
449
|
+
.toLowerCase()
|
|
450
|
+
.replace(/[^\w\s]/g, ' ')
|
|
451
|
+
.split(/\s+/)
|
|
452
|
+
.filter(w => w.length > 0);
|
|
453
|
+
if (words.length === 0) {
|
|
454
|
+
return Array.from({ length: EMBEDDING_DIMENSIONS }, (_, i) => seededRandom(0, i) * 0.01);
|
|
455
|
+
}
|
|
456
|
+
// Aggregate word vectors
|
|
457
|
+
const aggregated = [0, 0, 0, 0];
|
|
458
|
+
for (const word of words) {
|
|
459
|
+
const lower = word.toLowerCase();
|
|
460
|
+
const vec = SEMANTIC_VECTORS[lower] ?? DEFAULT_VECTOR.map((v, i) => v + seededRandom(simpleHash(lower), i) * 0.1);
|
|
461
|
+
for (let i = 0; i < 4; i++) {
|
|
462
|
+
aggregated[i] += vec[i];
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
// Normalize
|
|
466
|
+
const norm = Math.sqrt(aggregated.reduce((sum, v) => sum + v * v, 0));
|
|
467
|
+
const normalized = aggregated.map(v => v / (norm || 1));
|
|
468
|
+
// Expand to full dimensions
|
|
469
|
+
const textHash = simpleHash(text);
|
|
470
|
+
const embedding = new Array(EMBEDDING_DIMENSIONS);
|
|
471
|
+
for (let i = 0; i < EMBEDDING_DIMENSIONS; i++) {
|
|
472
|
+
const baseIndex = i % 4;
|
|
473
|
+
const base = normalized[baseIndex];
|
|
474
|
+
const noise = seededRandom(textHash, i) * 0.1 - 0.05;
|
|
475
|
+
embedding[i] = base + noise;
|
|
476
|
+
}
|
|
477
|
+
// Final normalization
|
|
478
|
+
const finalNorm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
|
|
479
|
+
return embedding.map((v) => v / (finalNorm || 1));
|
|
480
|
+
}
|
|
481
|
+
/**
|
|
482
|
+
* Check if embeddings should be generated for a given entity type
|
|
483
|
+
*
|
|
484
|
+
* Consults the embeddings configuration to determine:
|
|
485
|
+
* - If embeddings are disabled for this type (config === false)
|
|
486
|
+
* - If specific fields are configured for embedding
|
|
487
|
+
* - If auto-detection of text fields should be used (default)
|
|
488
|
+
*
|
|
489
|
+
* @param type - The entity type name
|
|
490
|
+
* @returns Object with enabled flag and optional field list
|
|
491
|
+
*
|
|
492
|
+
* @internal
|
|
493
|
+
*/
|
|
494
|
+
shouldEmbed(type) {
|
|
495
|
+
const config = this.embeddingsConfig[type];
|
|
496
|
+
if (config === false) {
|
|
497
|
+
return { enabled: false };
|
|
498
|
+
}
|
|
499
|
+
if (config && config.fields) {
|
|
500
|
+
return { enabled: true, fields: config.fields };
|
|
501
|
+
}
|
|
502
|
+
// Default: embed all text fields (auto-detect)
|
|
503
|
+
return { enabled: true };
|
|
504
|
+
}
|
|
505
|
+
/**
|
|
506
|
+
* Auto-generate and store an embedding for an entity
|
|
507
|
+
*
|
|
508
|
+
* Called during create/update operations to automatically generate
|
|
509
|
+
* embeddings for entities based on their text content. The embedding
|
|
510
|
+
* is stored as an artifact associated with the entity.
|
|
511
|
+
*
|
|
512
|
+
* @param type - The entity type name
|
|
513
|
+
* @param id - The entity ID
|
|
514
|
+
* @param data - The entity data to extract text from
|
|
515
|
+
*
|
|
516
|
+
* @internal
|
|
517
|
+
*/
|
|
518
|
+
async autoEmbed(type, id, data) {
|
|
519
|
+
const { enabled, fields } = this.shouldEmbed(type);
|
|
520
|
+
if (!enabled)
|
|
521
|
+
return;
|
|
522
|
+
// Extract embeddable text
|
|
523
|
+
const { text, fields: embeddedFields } = extractEmbeddableText(data, fields);
|
|
524
|
+
if (!text.trim())
|
|
525
|
+
return;
|
|
526
|
+
// Generate embedding
|
|
527
|
+
const embedding = this.generateEmbedding(text);
|
|
528
|
+
const contentHash = generateContentHash(text);
|
|
529
|
+
// Store as artifact with complete metadata
|
|
530
|
+
const url = `${type}/${id}`;
|
|
531
|
+
await this.setArtifact(url, 'embedding', {
|
|
532
|
+
content: embedding,
|
|
533
|
+
sourceHash: contentHash,
|
|
534
|
+
metadata: {
|
|
535
|
+
fields: embeddedFields,
|
|
536
|
+
dimensions: EMBEDDING_DIMENSIONS,
|
|
537
|
+
text: text.slice(0, 200),
|
|
538
|
+
},
|
|
539
|
+
});
|
|
182
540
|
}
|
|
183
541
|
// ===========================================================================
|
|
184
542
|
// Things (Records)
|
|
185
543
|
// ===========================================================================
|
|
544
|
+
/**
|
|
545
|
+
* Get or create the storage map for an entity type
|
|
546
|
+
*
|
|
547
|
+
* Lazily creates the type-specific storage map if it doesn't exist.
|
|
548
|
+
* This ensures each entity type has its own namespace for ID collisions.
|
|
549
|
+
*
|
|
550
|
+
* @param type - The entity type name
|
|
551
|
+
* @returns The Map storing entities of this type (id -> entity data)
|
|
552
|
+
*
|
|
553
|
+
* @internal
|
|
554
|
+
*/
|
|
186
555
|
getTypeStore(type) {
|
|
187
556
|
if (!this.entities.has(type)) {
|
|
188
557
|
this.entities.set(type, new Map());
|
|
@@ -269,6 +638,118 @@ export class MemoryProvider {
|
|
|
269
638
|
scored.sort((a, b) => b.score - a.score);
|
|
270
639
|
return scored.map((s) => s.entity);
|
|
271
640
|
}
|
|
641
|
+
/**
|
|
642
|
+
* Semantic search using embedding similarity
|
|
643
|
+
*/
|
|
644
|
+
async semanticSearch(type, query, options) {
|
|
645
|
+
const store = this.getTypeStore(type);
|
|
646
|
+
const limit = options?.limit ?? 10;
|
|
647
|
+
const minScore = options?.minScore ?? 0;
|
|
648
|
+
// Generate query embedding
|
|
649
|
+
const queryEmbedding = this.generateEmbedding(query);
|
|
650
|
+
const scored = [];
|
|
651
|
+
for (const [id, entity] of store) {
|
|
652
|
+
// Get stored embedding from artifacts
|
|
653
|
+
const url = `${type}/${id}`;
|
|
654
|
+
const artifact = await this.getArtifact(url, 'embedding');
|
|
655
|
+
if (!artifact || !Array.isArray(artifact.content)) {
|
|
656
|
+
continue;
|
|
657
|
+
}
|
|
658
|
+
const embedding = artifact.content;
|
|
659
|
+
const score = cosineSimilarity(queryEmbedding, embedding);
|
|
660
|
+
if (score >= minScore) {
|
|
661
|
+
scored.push({
|
|
662
|
+
entity: { ...entity, $id: id, $type: type },
|
|
663
|
+
score,
|
|
664
|
+
});
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
// Sort by score descending
|
|
668
|
+
scored.sort((a, b) => b.score - a.score);
|
|
669
|
+
// Apply limit and add $score
|
|
670
|
+
return scored.slice(0, limit).map(({ entity, score }) => ({
|
|
671
|
+
...entity,
|
|
672
|
+
$score: score,
|
|
673
|
+
}));
|
|
674
|
+
}
|
|
675
|
+
/**
|
|
676
|
+
* Hybrid search combining FTS and semantic with RRF scoring
|
|
677
|
+
*/
|
|
678
|
+
async hybridSearch(type, query, options) {
|
|
679
|
+
const limit = options?.limit ?? 10;
|
|
680
|
+
const offset = options?.offset ?? 0;
|
|
681
|
+
const rrfK = options?.rrfK ?? 60;
|
|
682
|
+
const ftsWeight = options?.ftsWeight ?? 0.5;
|
|
683
|
+
const semanticWeight = options?.semanticWeight ?? 0.5;
|
|
684
|
+
const minScore = options?.minScore ?? 0;
|
|
685
|
+
// Get FTS results with their ranks
|
|
686
|
+
const ftsResults = await this.search(type, query);
|
|
687
|
+
const ftsRanks = new Map();
|
|
688
|
+
ftsResults.forEach((entity, index) => {
|
|
689
|
+
const id = entity.$id || entity.id;
|
|
690
|
+
ftsRanks.set(id, index + 1); // 1-indexed rank
|
|
691
|
+
});
|
|
692
|
+
// Get semantic results with their ranks and scores
|
|
693
|
+
// Get more results to ensure we have enough after offset
|
|
694
|
+
const semanticResults = await this.semanticSearch(type, query, { limit: (limit + offset) * 2, minScore });
|
|
695
|
+
const semanticRanks = new Map();
|
|
696
|
+
semanticResults.forEach((entity, index) => {
|
|
697
|
+
const id = entity.$id || entity.id;
|
|
698
|
+
semanticRanks.set(id, { rank: index + 1, score: entity.$score });
|
|
699
|
+
});
|
|
700
|
+
// Combine results with RRF
|
|
701
|
+
const allIds = new Set([...ftsRanks.keys(), ...semanticRanks.keys()]);
|
|
702
|
+
const combined = [];
|
|
703
|
+
const store = this.getTypeStore(type);
|
|
704
|
+
for (const id of allIds) {
|
|
705
|
+
const entity = store.get(id);
|
|
706
|
+
if (!entity)
|
|
707
|
+
continue;
|
|
708
|
+
const ftsRank = ftsRanks.get(id) ?? Infinity;
|
|
709
|
+
const semantic = semanticRanks.get(id) ?? { rank: Infinity, score: 0 };
|
|
710
|
+
const semanticRank = semantic.rank;
|
|
711
|
+
const semanticScore = semantic.score;
|
|
712
|
+
// Skip if semantic score is below threshold (when we have a semantic result)
|
|
713
|
+
if (semanticRanks.has(id) && semanticScore < minScore)
|
|
714
|
+
continue;
|
|
715
|
+
const rrfScore = computeRRF(ftsRank, semanticRank, rrfK, ftsWeight, semanticWeight);
|
|
716
|
+
combined.push({
|
|
717
|
+
entity: { ...entity, $id: id, $type: type },
|
|
718
|
+
rrfScore,
|
|
719
|
+
ftsRank,
|
|
720
|
+
semanticRank,
|
|
721
|
+
semanticScore,
|
|
722
|
+
});
|
|
723
|
+
}
|
|
724
|
+
// Sort by RRF score descending
|
|
725
|
+
combined.sort((a, b) => b.rrfScore - a.rrfScore);
|
|
726
|
+
// Apply offset and limit, then return with scoring fields
|
|
727
|
+
return combined.slice(offset, offset + limit).map(({ entity, rrfScore, ftsRank, semanticRank, semanticScore }) => ({
|
|
728
|
+
...entity,
|
|
729
|
+
$rrfScore: rrfScore,
|
|
730
|
+
$ftsRank: ftsRank,
|
|
731
|
+
$semanticRank: semanticRank,
|
|
732
|
+
$score: semanticScore,
|
|
733
|
+
}));
|
|
734
|
+
}
|
|
735
|
+
/**
|
|
736
|
+
* Get all embeddings for a type
|
|
737
|
+
*/
|
|
738
|
+
async getAllEmbeddings(type) {
|
|
739
|
+
const store = this.getTypeStore(type);
|
|
740
|
+
const results = [];
|
|
741
|
+
for (const [id] of store) {
|
|
742
|
+
const url = `${type}/${id}`;
|
|
743
|
+
const artifact = await this.getArtifact(url, 'embedding');
|
|
744
|
+
if (artifact && Array.isArray(artifact.content)) {
|
|
745
|
+
results.push({
|
|
746
|
+
id,
|
|
747
|
+
embedding: artifact.content,
|
|
748
|
+
});
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
return results;
|
|
752
|
+
}
|
|
272
753
|
async create(type, id, data) {
|
|
273
754
|
const store = this.getTypeStore(type);
|
|
274
755
|
const entityId = id || generateId();
|
|
@@ -281,8 +762,12 @@ export class MemoryProvider {
|
|
|
281
762
|
updatedAt: new Date().toISOString(),
|
|
282
763
|
};
|
|
283
764
|
store.set(entityId, entity);
|
|
284
|
-
//
|
|
285
|
-
await this.
|
|
765
|
+
// Auto-generate embedding
|
|
766
|
+
await this.autoEmbed(type, entityId, entity);
|
|
767
|
+
// Emit type-specific and global events
|
|
768
|
+
const eventData = { $id: entityId, $type: type, ...entity };
|
|
769
|
+
await this.emit(`${type}.created`, eventData);
|
|
770
|
+
await this.emit('entity:created', eventData);
|
|
286
771
|
return { ...entity, $id: entityId, $type: type };
|
|
287
772
|
}
|
|
288
773
|
async update(type, id, data) {
|
|
@@ -297,10 +782,14 @@ export class MemoryProvider {
|
|
|
297
782
|
updatedAt: new Date().toISOString(),
|
|
298
783
|
};
|
|
299
784
|
store.set(id, updated);
|
|
300
|
-
//
|
|
301
|
-
await this.
|
|
302
|
-
// Invalidate artifacts when data changes
|
|
785
|
+
// Re-generate embedding with updated data
|
|
786
|
+
await this.autoEmbed(type, id, updated);
|
|
787
|
+
// Invalidate non-embedding artifacts when data changes
|
|
303
788
|
await this.invalidateArtifacts(`${type}/${id}`);
|
|
789
|
+
// Emit type-specific and global events
|
|
790
|
+
const eventData = { $id: id, $type: type, ...updated };
|
|
791
|
+
await this.emit(`${type}.updated`, eventData);
|
|
792
|
+
await this.emit('entity:updated', eventData);
|
|
304
793
|
return { ...updated, $id: id, $type: type };
|
|
305
794
|
}
|
|
306
795
|
async delete(type, id) {
|
|
@@ -309,8 +798,10 @@ export class MemoryProvider {
|
|
|
309
798
|
return false;
|
|
310
799
|
}
|
|
311
800
|
store.delete(id);
|
|
312
|
-
// Emit
|
|
313
|
-
|
|
801
|
+
// Emit type-specific and global events
|
|
802
|
+
const eventData = { $id: id, $type: type };
|
|
803
|
+
await this.emit(`${type}.deleted`, eventData);
|
|
804
|
+
await this.emit('entity:deleted', eventData);
|
|
314
805
|
// Clean up relations
|
|
315
806
|
for (const [key, targets] of this.relations) {
|
|
316
807
|
if (key.startsWith(`${type}:${id}:`)) {
|
|
@@ -325,6 +816,19 @@ export class MemoryProvider {
|
|
|
325
816
|
// ===========================================================================
|
|
326
817
|
// Relationships
|
|
327
818
|
// ===========================================================================
|
|
819
|
+
/**
|
|
820
|
+
* Generate a unique key for storing relationships
|
|
821
|
+
*
|
|
822
|
+
* Creates a composite key from source entity type, ID, and relation name
|
|
823
|
+
* that serves as the key in the relations Map.
|
|
824
|
+
*
|
|
825
|
+
* @param fromType - The source entity type
|
|
826
|
+
* @param fromId - The source entity ID
|
|
827
|
+
* @param relation - The relationship name
|
|
828
|
+
* @returns Composite key in format "type:id:relation"
|
|
829
|
+
*
|
|
830
|
+
* @internal
|
|
831
|
+
*/
|
|
328
832
|
relationKey(fromType, fromId, relation) {
|
|
329
833
|
return `${fromType}:${fromId}:${relation}`;
|
|
330
834
|
}
|
|
@@ -343,17 +847,19 @@ export class MemoryProvider {
|
|
|
343
847
|
}
|
|
344
848
|
return results;
|
|
345
849
|
}
|
|
346
|
-
async relate(fromType, fromId, relation, toType, toId) {
|
|
850
|
+
async relate(fromType, fromId, relation, toType, toId, metadata) {
|
|
347
851
|
const key = this.relationKey(fromType, fromId, relation);
|
|
348
852
|
if (!this.relations.has(key)) {
|
|
349
853
|
this.relations.set(key, new Set());
|
|
350
854
|
}
|
|
351
855
|
this.relations.get(key).add(`${toType}:${toId}`);
|
|
352
|
-
// Emit event
|
|
856
|
+
// Emit event with metadata
|
|
353
857
|
await this.emit('Relation.created', {
|
|
354
858
|
from: `${fromType}/${fromId}`,
|
|
355
859
|
type: relation,
|
|
356
860
|
to: `${toType}/${toId}`,
|
|
861
|
+
matchMode: metadata?.matchMode,
|
|
862
|
+
similarity: metadata?.similarity,
|
|
357
863
|
});
|
|
358
864
|
}
|
|
359
865
|
async unrelate(fromType, fromId, relation, toType, toId) {
|
|
@@ -427,6 +933,18 @@ export class MemoryProvider {
|
|
|
427
933
|
await this.semaphore.map(handlers, (handler) => Promise.resolve(handler(event)));
|
|
428
934
|
return event;
|
|
429
935
|
}
|
|
936
|
+
/**
|
|
937
|
+
* Get all event handlers matching an event type
|
|
938
|
+
*
|
|
939
|
+
* Collects handlers from all registered patterns that match the given
|
|
940
|
+
* event type. Supports exact matches, wildcards (*), and prefix/suffix
|
|
941
|
+
* patterns (*.created, Post.*).
|
|
942
|
+
*
|
|
943
|
+
* @param type - The event type to match handlers for
|
|
944
|
+
* @returns Array of matching event handlers
|
|
945
|
+
*
|
|
946
|
+
* @internal
|
|
947
|
+
*/
|
|
430
948
|
getEventHandlers(type) {
|
|
431
949
|
const handlers = [];
|
|
432
950
|
for (const [pattern, patternHandlers] of this.eventHandlers) {
|
|
@@ -436,6 +954,21 @@ export class MemoryProvider {
|
|
|
436
954
|
}
|
|
437
955
|
return handlers;
|
|
438
956
|
}
|
|
957
|
+
/**
|
|
958
|
+
* Check if an event type matches a subscription pattern
|
|
959
|
+
*
|
|
960
|
+
* Supports several pattern formats:
|
|
961
|
+
* - Exact match: 'Post.created' matches 'Post.created'
|
|
962
|
+
* - Global wildcard: '*' matches everything
|
|
963
|
+
* - Prefix wildcard: 'Post.*' matches 'Post.created', 'Post.updated', etc.
|
|
964
|
+
* - Suffix wildcard: '*.created' matches 'Post.created', 'User.created', etc.
|
|
965
|
+
*
|
|
966
|
+
* @param type - The event type to check
|
|
967
|
+
* @param pattern - The subscription pattern to match against
|
|
968
|
+
* @returns True if the type matches the pattern
|
|
969
|
+
*
|
|
970
|
+
* @internal
|
|
971
|
+
*/
|
|
439
972
|
matchesPattern(type, pattern) {
|
|
440
973
|
if (pattern === type)
|
|
441
974
|
return true;
|
|
@@ -674,6 +1207,18 @@ export class MemoryProvider {
|
|
|
674
1207
|
// ===========================================================================
|
|
675
1208
|
// Artifacts
|
|
676
1209
|
// ===========================================================================
|
|
1210
|
+
/**
|
|
1211
|
+
* Generate a unique key for storing artifacts
|
|
1212
|
+
*
|
|
1213
|
+
* Creates a composite key from URL and artifact type for storage
|
|
1214
|
+
* in the artifacts Map.
|
|
1215
|
+
*
|
|
1216
|
+
* @param url - The entity URL (e.g., 'Post/123')
|
|
1217
|
+
* @param type - The artifact type (e.g., 'embedding')
|
|
1218
|
+
* @returns Composite key in format "url:type"
|
|
1219
|
+
*
|
|
1220
|
+
* @internal
|
|
1221
|
+
*/
|
|
677
1222
|
artifactKey(url, type) {
|
|
678
1223
|
return `${url}:${type}`;
|
|
679
1224
|
}
|
|
@@ -704,6 +1249,17 @@ export class MemoryProvider {
|
|
|
704
1249
|
}
|
|
705
1250
|
}
|
|
706
1251
|
}
|
|
1252
|
+
/**
|
|
1253
|
+
* Invalidate cached artifacts for an entity (except embeddings)
|
|
1254
|
+
*
|
|
1255
|
+
* Called when entity data changes to ensure stale computed content
|
|
1256
|
+
* (like cached transformations) is regenerated. Embeddings are preserved
|
|
1257
|
+
* as they're regenerated separately via autoEmbed.
|
|
1258
|
+
*
|
|
1259
|
+
* @param url - The entity URL whose artifacts should be invalidated
|
|
1260
|
+
*
|
|
1261
|
+
* @internal
|
|
1262
|
+
*/
|
|
707
1263
|
async invalidateArtifacts(url) {
|
|
708
1264
|
// Keep embedding artifact but mark others for regeneration
|
|
709
1265
|
for (const [key, artifact] of this.artifacts) {
|