ai-database 2.0.1 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/CHANGELOG.md +43 -0
  2. package/dist/actions.d.ts +247 -0
  3. package/dist/actions.d.ts.map +1 -0
  4. package/dist/actions.js +260 -0
  5. package/dist/actions.js.map +1 -0
  6. package/dist/ai-promise-db.d.ts +34 -2
  7. package/dist/ai-promise-db.d.ts.map +1 -1
  8. package/dist/ai-promise-db.js +511 -66
  9. package/dist/ai-promise-db.js.map +1 -1
  10. package/dist/constants.d.ts +16 -0
  11. package/dist/constants.d.ts.map +1 -0
  12. package/dist/constants.js +16 -0
  13. package/dist/constants.js.map +1 -0
  14. package/dist/events.d.ts +153 -0
  15. package/dist/events.d.ts.map +1 -0
  16. package/dist/events.js +154 -0
  17. package/dist/events.js.map +1 -0
  18. package/dist/index.d.ts +8 -1
  19. package/dist/index.d.ts.map +1 -1
  20. package/dist/index.js +13 -1
  21. package/dist/index.js.map +1 -1
  22. package/dist/memory-provider.d.ts +144 -2
  23. package/dist/memory-provider.d.ts.map +1 -1
  24. package/dist/memory-provider.js +569 -13
  25. package/dist/memory-provider.js.map +1 -1
  26. package/dist/schema/cascade.d.ts +96 -0
  27. package/dist/schema/cascade.d.ts.map +1 -0
  28. package/dist/schema/cascade.js +528 -0
  29. package/dist/schema/cascade.js.map +1 -0
  30. package/dist/schema/index.d.ts +197 -0
  31. package/dist/schema/index.d.ts.map +1 -0
  32. package/dist/schema/index.js +1211 -0
  33. package/dist/schema/index.js.map +1 -0
  34. package/dist/schema/parse.d.ts +225 -0
  35. package/dist/schema/parse.d.ts.map +1 -0
  36. package/dist/schema/parse.js +732 -0
  37. package/dist/schema/parse.js.map +1 -0
  38. package/dist/schema/provider.d.ts +176 -0
  39. package/dist/schema/provider.d.ts.map +1 -0
  40. package/dist/schema/provider.js +258 -0
  41. package/dist/schema/provider.js.map +1 -0
  42. package/dist/schema/resolve.d.ts +87 -0
  43. package/dist/schema/resolve.d.ts.map +1 -0
  44. package/dist/schema/resolve.js +474 -0
  45. package/dist/schema/resolve.js.map +1 -0
  46. package/dist/schema/semantic.d.ts +53 -0
  47. package/dist/schema/semantic.d.ts.map +1 -0
  48. package/dist/schema/semantic.js +247 -0
  49. package/dist/schema/semantic.js.map +1 -0
  50. package/dist/schema/types.d.ts +528 -0
  51. package/dist/schema/types.d.ts.map +1 -0
  52. package/dist/schema/types.js +9 -0
  53. package/dist/schema/types.js.map +1 -0
  54. package/dist/schema.d.ts +24 -867
  55. package/dist/schema.d.ts.map +1 -1
  56. package/dist/schema.js +41 -1124
  57. package/dist/schema.js.map +1 -1
  58. package/dist/semantic.d.ts +175 -0
  59. package/dist/semantic.d.ts.map +1 -0
  60. package/dist/semantic.js +338 -0
  61. package/dist/semantic.js.map +1 -0
  62. package/dist/types.d.ts +14 -0
  63. package/dist/types.d.ts.map +1 -1
  64. package/dist/types.js.map +1 -1
  65. package/package.json +13 -4
  66. package/.turbo/turbo-build.log +0 -5
  67. package/TESTING.md +0 -410
  68. package/TEST_SUMMARY.md +0 -250
  69. package/TODO.md +0 -128
  70. package/src/ai-promise-db.ts +0 -1243
  71. package/src/authorization.ts +0 -1102
  72. package/src/durable-clickhouse.ts +0 -596
  73. package/src/durable-promise.ts +0 -582
  74. package/src/execution-queue.ts +0 -608
  75. package/src/index.test.ts +0 -868
  76. package/src/index.ts +0 -337
  77. package/src/linguistic.ts +0 -404
  78. package/src/memory-provider.test.ts +0 -1036
  79. package/src/memory-provider.ts +0 -1119
  80. package/src/schema.test.ts +0 -1254
  81. package/src/schema.ts +0 -2296
  82. package/src/tests.ts +0 -725
  83. package/src/types.ts +0 -1177
  84. package/test/README.md +0 -153
  85. package/test/edge-cases.test.ts +0 -646
  86. package/test/provider-resolution.test.ts +0 -402
  87. package/tsconfig.json +0 -9
  88. package/vitest.config.ts +0 -19
@@ -3,7 +3,10 @@
3
3
  *
4
4
  * Simple provider implementation for testing and development.
5
5
  * Includes concurrency control via Semaphore for rate limiting.
6
+ * Supports automatic embedding generation on create/update.
6
7
  */
8
+ import { cosineSimilarity, computeRRF, extractEmbeddableText, generateContentHash, } from './semantic.js';
9
+ import { EMBEDDING_DIMENSIONS } from './constants.js';
7
10
  // =============================================================================
8
11
  // Semaphore for Concurrency Control
9
12
  // =============================================================================
@@ -67,6 +70,15 @@ export class Semaphore {
67
70
  // =============================================================================
68
71
  // Generate ID
69
72
  // =============================================================================
73
+ /**
74
+ * Generate a unique ID for a new entity
75
+ *
76
+ * Uses crypto.randomUUID() to generate a UUID v4 identifier.
77
+ *
78
+ * @returns A new UUID string
79
+ *
80
+ * @internal
81
+ */
70
82
  function generateId() {
71
83
  return crypto.randomUUID();
72
84
  }
@@ -116,11 +128,36 @@ function conjugateVerb(verb) {
116
128
  activity: toGerund(base),
117
129
  };
118
130
  }
119
- /** Check if character is a vowel */
131
+ /**
132
+ * Check if a character is a vowel (a, e, i, o, u)
133
+ *
134
+ * @param char - The character to check
135
+ * @returns True if the character is a vowel
136
+ *
137
+ * @internal
138
+ */
120
139
  function isVowel(char) {
121
140
  return char ? 'aeiou'.includes(char.toLowerCase()) : false;
122
141
  }
123
- /** Check if we should double the final consonant */
142
+ /**
143
+ * Check if we should double the final consonant when adding a suffix
144
+ *
145
+ * English spelling rules require doubling the final consonant in certain
146
+ * cases when adding suffixes like -ing or -ed. This applies to short words
147
+ * ending in consonant-vowel-consonant patterns.
148
+ *
149
+ * @param verb - The verb to check
150
+ * @returns True if the final consonant should be doubled
151
+ *
152
+ * @example
153
+ * ```ts
154
+ * shouldDoubleConsonant('run') // => true (running)
155
+ * shouldDoubleConsonant('play') // => false (playing)
156
+ * shouldDoubleConsonant('fix') // => false (fixing - x is excluded)
157
+ * ```
158
+ *
159
+ * @internal
160
+ */
124
161
  function shouldDoubleConsonant(verb) {
125
162
  if (verb.length < 2)
126
163
  return false;
@@ -135,7 +172,26 @@ function shouldDoubleConsonant(verb) {
135
172
  return true;
136
173
  return false;
137
174
  }
138
- /** Convert verb to present 3rd person (create → creates) */
175
+ /**
176
+ * Convert a verb to present tense third person singular form
177
+ *
178
+ * Applies English conjugation rules for third person singular:
179
+ * - Verbs ending in consonant + y: change y to ies (try → tries)
180
+ * - Verbs ending in s, x, z, ch, sh: add es (push → pushes)
181
+ * - Other verbs: add s (run → runs)
182
+ *
183
+ * @param verb - The base form of the verb
184
+ * @returns The third person singular present tense form
185
+ *
186
+ * @example
187
+ * ```ts
188
+ * toPresent('create') // => 'creates'
189
+ * toPresent('push') // => 'pushes'
190
+ * toPresent('try') // => 'tries'
191
+ * ```
192
+ *
193
+ * @internal
194
+ */
139
195
  function toPresent(verb) {
140
196
  if (verb.endsWith('y') && !isVowel(verb[verb.length - 2])) {
141
197
  return verb.slice(0, -1) + 'ies';
@@ -146,7 +202,27 @@ function toPresent(verb) {
146
202
  }
147
203
  return verb + 's';
148
204
  }
149
- /** Convert verb to gerund (create → creating) */
205
+ /**
206
+ * Convert a verb to gerund/present participle form (-ing)
207
+ *
208
+ * Applies English spelling rules for adding -ing:
209
+ * - Verbs ending in ie: change ie to ying (die → dying)
210
+ * - Verbs ending in e (not ee): drop e, add ing (create → creating)
211
+ * - Verbs requiring consonant doubling: double + ing (run → running)
212
+ * - Other verbs: add ing (play → playing)
213
+ *
214
+ * @param verb - The base form of the verb
215
+ * @returns The gerund/present participle form
216
+ *
217
+ * @example
218
+ * ```ts
219
+ * toGerund('create') // => 'creating'
220
+ * toGerund('run') // => 'running'
221
+ * toGerund('die') // => 'dying'
222
+ * ```
223
+ *
224
+ * @internal
225
+ */
150
226
  function toGerund(verb) {
151
227
  if (verb.endsWith('ie'))
152
228
  return verb.slice(0, -2) + 'ying';
@@ -177,12 +253,305 @@ export class MemoryProvider {
177
253
  artifacts = new Map();
178
254
  // Concurrency control
179
255
  semaphore;
256
+ // Embedding configuration
257
+ embeddingsConfig;
180
258
  constructor(options = {}) {
181
259
  this.semaphore = new Semaphore(options.concurrency ?? 10);
260
+ this.embeddingsConfig = options.embeddings ?? {};
261
+ }
262
+ /**
263
+ * Set embeddings configuration
264
+ */
265
+ setEmbeddingsConfig(config) {
266
+ this.embeddingsConfig = config;
267
+ }
268
+ // ===========================================================================
269
+ // Embedding Generation
270
+ // ===========================================================================
271
+ /**
272
+ * Generate embedding for text (deterministic for testing)
273
+ *
274
+ * Uses semantic word vectors to create meaningful embeddings
275
+ * where similar concepts have higher cosine similarity.
276
+ */
277
+ generateEmbedding(text) {
278
+ // Import semantic vectors for deterministic embeddings
279
+ const SEMANTIC_VECTORS = {
280
+ // AI/ML domain
281
+ machine: [0.9, 0.1, 0.05, 0.02],
282
+ learning: [0.85, 0.15, 0.08, 0.03],
283
+ artificial: [0.88, 0.12, 0.06, 0.04],
284
+ intelligence: [0.87, 0.13, 0.07, 0.05],
285
+ neural: [0.82, 0.18, 0.09, 0.06],
286
+ network: [0.75, 0.2, 0.15, 0.1],
287
+ deep: [0.8, 0.17, 0.1, 0.08],
288
+ ai: [0.92, 0.08, 0.04, 0.02],
289
+ ml: [0.88, 0.12, 0.06, 0.03],
290
+ // Programming domain
291
+ programming: [0.15, 0.85, 0.1, 0.05],
292
+ code: [0.12, 0.88, 0.12, 0.06],
293
+ software: [0.18, 0.82, 0.15, 0.08],
294
+ development: [0.2, 0.8, 0.18, 0.1],
295
+ typescript: [0.1, 0.9, 0.08, 0.04],
296
+ javascript: [0.12, 0.88, 0.1, 0.05],
297
+ python: [0.25, 0.75, 0.12, 0.06],
298
+ react: [0.08, 0.85, 0.2, 0.1],
299
+ vue: [0.06, 0.84, 0.18, 0.08],
300
+ frontend: [0.05, 0.8, 0.25, 0.12],
301
+ // Database domain
302
+ database: [0.1, 0.7, 0.08, 0.6],
303
+ query: [0.12, 0.65, 0.1, 0.7],
304
+ sql: [0.08, 0.6, 0.05, 0.75],
305
+ index: [0.1, 0.58, 0.08, 0.72],
306
+ optimization: [0.15, 0.55, 0.12, 0.68],
307
+ performance: [0.18, 0.5, 0.15, 0.65],
308
+ // DevOps domain
309
+ kubernetes: [0.05, 0.6, 0.8, 0.15],
310
+ docker: [0.08, 0.55, 0.82, 0.12],
311
+ container: [0.06, 0.5, 0.85, 0.1],
312
+ deployment: [0.1, 0.45, 0.78, 0.18],
313
+ devops: [0.12, 0.48, 0.75, 0.2],
314
+ // Food domain (distinctly different direction - high in dim 3, low elsewhere)
315
+ cooking: [0.05, 0.08, 0.05, 0.95],
316
+ recipe: [0.06, 0.07, 0.04, 0.93],
317
+ food: [0.04, 0.06, 0.04, 0.96],
318
+ pasta: [0.03, 0.05, 0.03, 0.97],
319
+ pizza: [0.03, 0.06, 0.04, 0.96],
320
+ italian: [0.04, 0.07, 0.04, 0.94],
321
+ garden: [0.05, 0.04, 0.03, 0.92],
322
+ flowers: [0.04, 0.03, 0.03, 0.91],
323
+ chef: [0.05, 0.1, 0.05, 0.95],
324
+ restaurant: [0.06, 0.08, 0.04, 0.93],
325
+ kitchen: [0.05, 0.09, 0.05, 0.94],
326
+ antonio: [0.05, 0.08, 0.04, 0.92],
327
+ // Research/Academic domain (similar to AI/ML)
328
+ researcher: [0.82, 0.2, 0.1, 0.08],
329
+ phd: [0.8, 0.18, 0.12, 0.1],
330
+ research: [0.85, 0.15, 0.1, 0.07],
331
+ professor: [0.78, 0.22, 0.12, 0.1],
332
+ academic: [0.75, 0.2, 0.15, 0.12],
333
+ // Location/Venue domain (for fuzzy threshold tests - need distinct clusters)
334
+ // "conference center downtown" cluster - high values in different dimensions
335
+ conference: [0.2, 0.25, 0.85, 0.2],
336
+ center: [0.18, 0.22, 0.88, 0.18],
337
+ downtown: [0.15, 0.2, 0.9, 0.15],
338
+ // "tech hub 123 main st" cluster - completely different direction
339
+ hub: [0.85, 0.15, 0.2, 0.15],
340
+ main: [0.12, 0.12, 0.15, 0.1],
341
+ st: [0.1, 0.1, 0.12, 0.08],
342
+ '123': [0.08, 0.08, 0.1, 0.05],
343
+ // GraphQL/API
344
+ graphql: [0.1, 0.75, 0.15, 0.55],
345
+ api: [0.15, 0.7, 0.2, 0.5],
346
+ rest: [0.12, 0.68, 0.18, 0.48],
347
+ queries: [0.14, 0.65, 0.12, 0.6],
348
+ // Testing
349
+ testing: [0.1, 0.78, 0.08, 0.15],
350
+ test: [0.08, 0.8, 0.06, 0.12],
351
+ unit: [0.06, 0.82, 0.05, 0.1],
352
+ integration: [0.12, 0.75, 0.1, 0.18],
353
+ // State management
354
+ state: [0.08, 0.82, 0.2, 0.08],
355
+ management: [0.15, 0.75, 0.25, 0.12],
356
+ hooks: [0.06, 0.88, 0.15, 0.05],
357
+ usestate: [0.05, 0.9, 0.12, 0.04],
358
+ useeffect: [0.04, 0.88, 0.1, 0.03],
359
+ // Related/Concept domain (for semantic similarity tests)
360
+ related: [0.5, 0.5, 0.5, 0.5],
361
+ concept: [0.55, 0.45, 0.55, 0.45],
362
+ similar: [0.52, 0.48, 0.52, 0.48],
363
+ different: [0.48, 0.52, 0.48, 0.52],
364
+ words: [0.45, 0.55, 0.45, 0.55],
365
+ semantically: [0.6, 0.4, 0.6, 0.4],
366
+ // Exact match domain (distinctly different vectors)
367
+ exact: [0.1, 0.1, 0.1, 0.9],
368
+ match: [0.15, 0.15, 0.1, 0.85],
369
+ title: [0.1, 0.2, 0.1, 0.8],
370
+ contains: [0.12, 0.18, 0.12, 0.78],
371
+ search: [0.08, 0.22, 0.08, 0.82],
372
+ terms: [0.05, 0.25, 0.05, 0.85],
373
+ // Business domain (for fuzzy forward resolution tests)
374
+ enterprise: [0.7, 0.3, 0.8, 0.6],
375
+ large: [0.65, 0.25, 0.75, 0.55],
376
+ corporations: [0.68, 0.28, 0.78, 0.58],
377
+ companies: [0.6, 0.4, 0.7, 0.5],
378
+ company: [0.62, 0.38, 0.72, 0.52],
379
+ thousands: [0.7, 0.2, 0.7, 0.5],
380
+ employees: [0.55, 0.35, 0.65, 0.45],
381
+ big: [0.68, 0.3, 0.75, 0.58],
382
+ small: [0.3, 0.6, 0.3, 0.4],
383
+ business: [0.5, 0.5, 0.6, 0.5],
384
+ owners: [0.4, 0.5, 0.5, 0.45],
385
+ consumer: [0.35, 0.55, 0.35, 0.35],
386
+ individual: [0.32, 0.58, 0.32, 0.32],
387
+ b2c: [0.3, 0.6, 0.3, 0.35],
388
+ // Tech professional domain
389
+ developer: [0.2, 0.85, 0.15, 0.1],
390
+ engineer: [0.25, 0.82, 0.18, 0.12],
391
+ engineers: [0.27, 0.8, 0.2, 0.14],
392
+ builds: [0.18, 0.78, 0.16, 0.08],
393
+ writes: [0.15, 0.75, 0.12, 0.06],
394
+ professional: [0.22, 0.72, 0.2, 0.15],
395
+ applications: [0.2, 0.78, 0.18, 0.1],
396
+ tech: [0.25, 0.8, 0.2, 0.12],
397
+ technology: [0.28, 0.78, 0.22, 0.14],
398
+ electronics: [0.3, 0.75, 0.25, 0.15],
399
+ device: [0.25, 0.82, 0.2, 0.1],
400
+ furniture: [0.1, 0.15, 0.2, 0.85],
401
+ home: [0.12, 0.18, 0.22, 0.8],
402
+ living: [0.1, 0.15, 0.2, 0.82],
403
+ goods: [0.3, 0.5, 0.35, 0.4],
404
+ leaders: [0.4, 0.5, 0.6, 0.4],
405
+ senior: [0.35, 0.55, 0.55, 0.35],
406
+ // Data science domain
407
+ data: [0.75, 0.3, 0.15, 0.55],
408
+ science: [0.78, 0.25, 0.12, 0.5],
409
+ scientist: [0.8, 0.28, 0.1, 0.52],
410
+ background: [0.72, 0.32, 0.14, 0.48],
411
+ // DevOps/cloud domain
412
+ cloud: [0.1, 0.55, 0.85, 0.15],
413
+ expertise: [0.15, 0.5, 0.8, 0.18],
414
+ // Support domain
415
+ support: [0.2, 0.45, 0.3, 0.55],
416
+ specialist: [0.22, 0.48, 0.32, 0.52],
417
+ technical: [0.25, 0.65, 0.35, 0.4],
418
+ issues: [0.18, 0.42, 0.28, 0.48],
419
+ // Security domain
420
+ security: [0.3, 0.6, 0.4, 0.7],
421
+ auth: [0.28, 0.58, 0.38, 0.72],
422
+ authentication: [0.32, 0.55, 0.42, 0.75],
423
+ identity: [0.35, 0.52, 0.45, 0.68],
424
+ oauth: [0.3, 0.62, 0.4, 0.7],
425
+ // CRM domain
426
+ crm: [0.45, 0.4, 0.7, 0.55],
427
+ sales: [0.42, 0.38, 0.68, 0.52],
428
+ salesforce: [0.48, 0.42, 0.72, 0.58],
429
+ provider: [0.5, 0.45, 0.65, 0.5],
430
+ };
431
+ const DEFAULT_VECTOR = [0.1, 0.1, 0.1, 0.1];
432
+ // Simple hash function
433
+ const simpleHash = (str) => {
434
+ let hash = 0;
435
+ for (let i = 0; i < str.length; i++) {
436
+ const char = str.charCodeAt(i);
437
+ hash = ((hash << 5) - hash) + char;
438
+ hash = hash & hash;
439
+ }
440
+ return Math.abs(hash);
441
+ };
442
+ // Seeded random
443
+ const seededRandom = (seed, index) => {
444
+ const x = Math.sin(seed + index) * 10000;
445
+ return x - Math.floor(x);
446
+ };
447
+ // Tokenize
448
+ const words = text
449
+ .toLowerCase()
450
+ .replace(/[^\w\s]/g, ' ')
451
+ .split(/\s+/)
452
+ .filter(w => w.length > 0);
453
+ if (words.length === 0) {
454
+ return Array.from({ length: EMBEDDING_DIMENSIONS }, (_, i) => seededRandom(0, i) * 0.01);
455
+ }
456
+ // Aggregate word vectors
457
+ const aggregated = [0, 0, 0, 0];
458
+ for (const word of words) {
459
+ const lower = word.toLowerCase();
460
+ const vec = SEMANTIC_VECTORS[lower] ?? DEFAULT_VECTOR.map((v, i) => v + seededRandom(simpleHash(lower), i) * 0.1);
461
+ for (let i = 0; i < 4; i++) {
462
+ aggregated[i] += vec[i];
463
+ }
464
+ }
465
+ // Normalize
466
+ const norm = Math.sqrt(aggregated.reduce((sum, v) => sum + v * v, 0));
467
+ const normalized = aggregated.map(v => v / (norm || 1));
468
+ // Expand to full dimensions
469
+ const textHash = simpleHash(text);
470
+ const embedding = new Array(EMBEDDING_DIMENSIONS);
471
+ for (let i = 0; i < EMBEDDING_DIMENSIONS; i++) {
472
+ const baseIndex = i % 4;
473
+ const base = normalized[baseIndex];
474
+ const noise = seededRandom(textHash, i) * 0.1 - 0.05;
475
+ embedding[i] = base + noise;
476
+ }
477
+ // Final normalization
478
+ const finalNorm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
479
+ return embedding.map((v) => v / (finalNorm || 1));
480
+ }
481
+ /**
482
+ * Check if embeddings should be generated for a given entity type
483
+ *
484
+ * Consults the embeddings configuration to determine:
485
+ * - If embeddings are disabled for this type (config === false)
486
+ * - If specific fields are configured for embedding
487
+ * - If auto-detection of text fields should be used (default)
488
+ *
489
+ * @param type - The entity type name
490
+ * @returns Object with enabled flag and optional field list
491
+ *
492
+ * @internal
493
+ */
494
+ shouldEmbed(type) {
495
+ const config = this.embeddingsConfig[type];
496
+ if (config === false) {
497
+ return { enabled: false };
498
+ }
499
+ if (config && config.fields) {
500
+ return { enabled: true, fields: config.fields };
501
+ }
502
+ // Default: embed all text fields (auto-detect)
503
+ return { enabled: true };
504
+ }
505
+ /**
506
+ * Auto-generate and store an embedding for an entity
507
+ *
508
+ * Called during create/update operations to automatically generate
509
+ * embeddings for entities based on their text content. The embedding
510
+ * is stored as an artifact associated with the entity.
511
+ *
512
+ * @param type - The entity type name
513
+ * @param id - The entity ID
514
+ * @param data - The entity data to extract text from
515
+ *
516
+ * @internal
517
+ */
518
+ async autoEmbed(type, id, data) {
519
+ const { enabled, fields } = this.shouldEmbed(type);
520
+ if (!enabled)
521
+ return;
522
+ // Extract embeddable text
523
+ const { text, fields: embeddedFields } = extractEmbeddableText(data, fields);
524
+ if (!text.trim())
525
+ return;
526
+ // Generate embedding
527
+ const embedding = this.generateEmbedding(text);
528
+ const contentHash = generateContentHash(text);
529
+ // Store as artifact with complete metadata
530
+ const url = `${type}/${id}`;
531
+ await this.setArtifact(url, 'embedding', {
532
+ content: embedding,
533
+ sourceHash: contentHash,
534
+ metadata: {
535
+ fields: embeddedFields,
536
+ dimensions: EMBEDDING_DIMENSIONS,
537
+ text: text.slice(0, 200),
538
+ },
539
+ });
182
540
  }
183
541
  // ===========================================================================
184
542
  // Things (Records)
185
543
  // ===========================================================================
544
+ /**
545
+ * Get or create the storage map for an entity type
546
+ *
547
+ * Lazily creates the type-specific storage map if it doesn't exist.
548
+ * This ensures each entity type has its own namespace for ID collisions.
549
+ *
550
+ * @param type - The entity type name
551
+ * @returns The Map storing entities of this type (id -> entity data)
552
+ *
553
+ * @internal
554
+ */
186
555
  getTypeStore(type) {
187
556
  if (!this.entities.has(type)) {
188
557
  this.entities.set(type, new Map());
@@ -269,6 +638,118 @@ export class MemoryProvider {
269
638
  scored.sort((a, b) => b.score - a.score);
270
639
  return scored.map((s) => s.entity);
271
640
  }
641
+ /**
642
+ * Semantic search using embedding similarity
643
+ */
644
+ async semanticSearch(type, query, options) {
645
+ const store = this.getTypeStore(type);
646
+ const limit = options?.limit ?? 10;
647
+ const minScore = options?.minScore ?? 0;
648
+ // Generate query embedding
649
+ const queryEmbedding = this.generateEmbedding(query);
650
+ const scored = [];
651
+ for (const [id, entity] of store) {
652
+ // Get stored embedding from artifacts
653
+ const url = `${type}/${id}`;
654
+ const artifact = await this.getArtifact(url, 'embedding');
655
+ if (!artifact || !Array.isArray(artifact.content)) {
656
+ continue;
657
+ }
658
+ const embedding = artifact.content;
659
+ const score = cosineSimilarity(queryEmbedding, embedding);
660
+ if (score >= minScore) {
661
+ scored.push({
662
+ entity: { ...entity, $id: id, $type: type },
663
+ score,
664
+ });
665
+ }
666
+ }
667
+ // Sort by score descending
668
+ scored.sort((a, b) => b.score - a.score);
669
+ // Apply limit and add $score
670
+ return scored.slice(0, limit).map(({ entity, score }) => ({
671
+ ...entity,
672
+ $score: score,
673
+ }));
674
+ }
675
+ /**
676
+ * Hybrid search combining FTS and semantic with RRF scoring
677
+ */
678
+ async hybridSearch(type, query, options) {
679
+ const limit = options?.limit ?? 10;
680
+ const offset = options?.offset ?? 0;
681
+ const rrfK = options?.rrfK ?? 60;
682
+ const ftsWeight = options?.ftsWeight ?? 0.5;
683
+ const semanticWeight = options?.semanticWeight ?? 0.5;
684
+ const minScore = options?.minScore ?? 0;
685
+ // Get FTS results with their ranks
686
+ const ftsResults = await this.search(type, query);
687
+ const ftsRanks = new Map();
688
+ ftsResults.forEach((entity, index) => {
689
+ const id = entity.$id || entity.id;
690
+ ftsRanks.set(id, index + 1); // 1-indexed rank
691
+ });
692
+ // Get semantic results with their ranks and scores
693
+ // Get more results to ensure we have enough after offset
694
+ const semanticResults = await this.semanticSearch(type, query, { limit: (limit + offset) * 2, minScore });
695
+ const semanticRanks = new Map();
696
+ semanticResults.forEach((entity, index) => {
697
+ const id = entity.$id || entity.id;
698
+ semanticRanks.set(id, { rank: index + 1, score: entity.$score });
699
+ });
700
+ // Combine results with RRF
701
+ const allIds = new Set([...ftsRanks.keys(), ...semanticRanks.keys()]);
702
+ const combined = [];
703
+ const store = this.getTypeStore(type);
704
+ for (const id of allIds) {
705
+ const entity = store.get(id);
706
+ if (!entity)
707
+ continue;
708
+ const ftsRank = ftsRanks.get(id) ?? Infinity;
709
+ const semantic = semanticRanks.get(id) ?? { rank: Infinity, score: 0 };
710
+ const semanticRank = semantic.rank;
711
+ const semanticScore = semantic.score;
712
+ // Skip if semantic score is below threshold (when we have a semantic result)
713
+ if (semanticRanks.has(id) && semanticScore < minScore)
714
+ continue;
715
+ const rrfScore = computeRRF(ftsRank, semanticRank, rrfK, ftsWeight, semanticWeight);
716
+ combined.push({
717
+ entity: { ...entity, $id: id, $type: type },
718
+ rrfScore,
719
+ ftsRank,
720
+ semanticRank,
721
+ semanticScore,
722
+ });
723
+ }
724
+ // Sort by RRF score descending
725
+ combined.sort((a, b) => b.rrfScore - a.rrfScore);
726
+ // Apply offset and limit, then return with scoring fields
727
+ return combined.slice(offset, offset + limit).map(({ entity, rrfScore, ftsRank, semanticRank, semanticScore }) => ({
728
+ ...entity,
729
+ $rrfScore: rrfScore,
730
+ $ftsRank: ftsRank,
731
+ $semanticRank: semanticRank,
732
+ $score: semanticScore,
733
+ }));
734
+ }
735
+ /**
736
+ * Get all embeddings for a type
737
+ */
738
+ async getAllEmbeddings(type) {
739
+ const store = this.getTypeStore(type);
740
+ const results = [];
741
+ for (const [id] of store) {
742
+ const url = `${type}/${id}`;
743
+ const artifact = await this.getArtifact(url, 'embedding');
744
+ if (artifact && Array.isArray(artifact.content)) {
745
+ results.push({
746
+ id,
747
+ embedding: artifact.content,
748
+ });
749
+ }
750
+ }
751
+ return results;
752
+ }
272
753
  async create(type, id, data) {
273
754
  const store = this.getTypeStore(type);
274
755
  const entityId = id || generateId();
@@ -281,8 +762,12 @@ export class MemoryProvider {
281
762
  updatedAt: new Date().toISOString(),
282
763
  };
283
764
  store.set(entityId, entity);
284
- // Emit event
285
- await this.emit(`${type}.created`, { $id: entityId, $type: type, ...entity });
765
+ // Auto-generate embedding
766
+ await this.autoEmbed(type, entityId, entity);
767
+ // Emit type-specific and global events
768
+ const eventData = { $id: entityId, $type: type, ...entity };
769
+ await this.emit(`${type}.created`, eventData);
770
+ await this.emit('entity:created', eventData);
286
771
  return { ...entity, $id: entityId, $type: type };
287
772
  }
288
773
  async update(type, id, data) {
@@ -297,10 +782,14 @@ export class MemoryProvider {
297
782
  updatedAt: new Date().toISOString(),
298
783
  };
299
784
  store.set(id, updated);
300
- // Emit event
301
- await this.emit(`${type}.updated`, { $id: id, $type: type, ...updated });
302
- // Invalidate artifacts when data changes
785
+ // Re-generate embedding with updated data
786
+ await this.autoEmbed(type, id, updated);
787
+ // Invalidate non-embedding artifacts when data changes
303
788
  await this.invalidateArtifacts(`${type}/${id}`);
789
+ // Emit type-specific and global events
790
+ const eventData = { $id: id, $type: type, ...updated };
791
+ await this.emit(`${type}.updated`, eventData);
792
+ await this.emit('entity:updated', eventData);
304
793
  return { ...updated, $id: id, $type: type };
305
794
  }
306
795
  async delete(type, id) {
@@ -309,8 +798,10 @@ export class MemoryProvider {
309
798
  return false;
310
799
  }
311
800
  store.delete(id);
312
- // Emit event
313
- await this.emit(`${type}.deleted`, { $id: id, $type: type });
801
+ // Emit type-specific and global events
802
+ const eventData = { $id: id, $type: type };
803
+ await this.emit(`${type}.deleted`, eventData);
804
+ await this.emit('entity:deleted', eventData);
314
805
  // Clean up relations
315
806
  for (const [key, targets] of this.relations) {
316
807
  if (key.startsWith(`${type}:${id}:`)) {
@@ -325,6 +816,19 @@ export class MemoryProvider {
325
816
  // ===========================================================================
326
817
  // Relationships
327
818
  // ===========================================================================
819
+ /**
820
+ * Generate a unique key for storing relationships
821
+ *
822
+ * Creates a composite key from source entity type, ID, and relation name
823
+ * that serves as the key in the relations Map.
824
+ *
825
+ * @param fromType - The source entity type
826
+ * @param fromId - The source entity ID
827
+ * @param relation - The relationship name
828
+ * @returns Composite key in format "type:id:relation"
829
+ *
830
+ * @internal
831
+ */
328
832
  relationKey(fromType, fromId, relation) {
329
833
  return `${fromType}:${fromId}:${relation}`;
330
834
  }
@@ -343,17 +847,19 @@ export class MemoryProvider {
343
847
  }
344
848
  return results;
345
849
  }
346
- async relate(fromType, fromId, relation, toType, toId) {
850
+ async relate(fromType, fromId, relation, toType, toId, metadata) {
347
851
  const key = this.relationKey(fromType, fromId, relation);
348
852
  if (!this.relations.has(key)) {
349
853
  this.relations.set(key, new Set());
350
854
  }
351
855
  this.relations.get(key).add(`${toType}:${toId}`);
352
- // Emit event
856
+ // Emit event with metadata
353
857
  await this.emit('Relation.created', {
354
858
  from: `${fromType}/${fromId}`,
355
859
  type: relation,
356
860
  to: `${toType}/${toId}`,
861
+ matchMode: metadata?.matchMode,
862
+ similarity: metadata?.similarity,
357
863
  });
358
864
  }
359
865
  async unrelate(fromType, fromId, relation, toType, toId) {
@@ -427,6 +933,18 @@ export class MemoryProvider {
427
933
  await this.semaphore.map(handlers, (handler) => Promise.resolve(handler(event)));
428
934
  return event;
429
935
  }
936
+ /**
937
+ * Get all event handlers matching an event type
938
+ *
939
+ * Collects handlers from all registered patterns that match the given
940
+ * event type. Supports exact matches, wildcards (*), and prefix/suffix
941
+ * patterns (*.created, Post.*).
942
+ *
943
+ * @param type - The event type to match handlers for
944
+ * @returns Array of matching event handlers
945
+ *
946
+ * @internal
947
+ */
430
948
  getEventHandlers(type) {
431
949
  const handlers = [];
432
950
  for (const [pattern, patternHandlers] of this.eventHandlers) {
@@ -436,6 +954,21 @@ export class MemoryProvider {
436
954
  }
437
955
  return handlers;
438
956
  }
957
+ /**
958
+ * Check if an event type matches a subscription pattern
959
+ *
960
+ * Supports several pattern formats:
961
+ * - Exact match: 'Post.created' matches 'Post.created'
962
+ * - Global wildcard: '*' matches everything
963
+ * - Prefix wildcard: 'Post.*' matches 'Post.created', 'Post.updated', etc.
964
+ * - Suffix wildcard: '*.created' matches 'Post.created', 'User.created', etc.
965
+ *
966
+ * @param type - The event type to check
967
+ * @param pattern - The subscription pattern to match against
968
+ * @returns True if the type matches the pattern
969
+ *
970
+ * @internal
971
+ */
439
972
  matchesPattern(type, pattern) {
440
973
  if (pattern === type)
441
974
  return true;
@@ -674,6 +1207,18 @@ export class MemoryProvider {
674
1207
  // ===========================================================================
675
1208
  // Artifacts
676
1209
  // ===========================================================================
1210
+ /**
1211
+ * Generate a unique key for storing artifacts
1212
+ *
1213
+ * Creates a composite key from URL and artifact type for storage
1214
+ * in the artifacts Map.
1215
+ *
1216
+ * @param url - The entity URL (e.g., 'Post/123')
1217
+ * @param type - The artifact type (e.g., 'embedding')
1218
+ * @returns Composite key in format "url:type"
1219
+ *
1220
+ * @internal
1221
+ */
677
1222
  artifactKey(url, type) {
678
1223
  return `${url}:${type}`;
679
1224
  }
@@ -704,6 +1249,17 @@ export class MemoryProvider {
704
1249
  }
705
1250
  }
706
1251
  }
1252
+ /**
1253
+ * Invalidate cached artifacts for an entity (except embeddings)
1254
+ *
1255
+ * Called when entity data changes to ensure stale computed content
1256
+ * (like cached transformations) is regenerated. Embeddings are preserved
1257
+ * as they're regenerated separately via autoEmbed.
1258
+ *
1259
+ * @param url - The entity URL whose artifacts should be invalidated
1260
+ *
1261
+ * @internal
1262
+ */
707
1263
  async invalidateArtifacts(url) {
708
1264
  // Keep embedding artifact but mark others for regeneration
709
1265
  for (const [key, artifact] of this.artifacts) {