s3db.js 12.2.3 → 12.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "s3db.js",
3
- "version": "12.2.3",
3
+ "version": "12.2.4",
4
4
  "description": "Use AWS S3, the world's most reliable document storage, as a database with this ORM.",
5
5
  "main": "dist/s3db.cjs.js",
6
6
  "module": "dist/s3db.es.js",
@@ -86,6 +86,7 @@ export class VectorPlugin extends Plugin {
86
86
  *
87
87
  * Detects large vector fields and warns if proper behavior is not set.
88
88
  * Can optionally auto-fix by setting body-overflow behavior.
89
+ * Auto-creates partitions for optional embedding fields to enable O(1) filtering.
89
90
  */
90
91
  validateVectorStorage() {
91
92
  for (const resource of Object.values(this.database.resources)) {
@@ -131,7 +132,278 @@ export class VectorPlugin extends Plugin {
131
132
  }
132
133
  }
133
134
  }
135
+
136
+ // Auto-create partitions for optional embedding fields
137
+ this.setupEmbeddingPartitions(resource, vectorFields);
138
+ }
139
+ }
140
+
141
+ /**
142
+ * Setup automatic partitions for optional embedding fields
143
+ *
144
+ * Creates a partition that separates records with embeddings from those without.
145
+ * This enables O(1) filtering instead of O(n) full scans when searching/clustering.
146
+ *
147
+ * @param {Resource} resource - Resource instance
148
+ * @param {Array} vectorFields - Detected vector fields with metadata
149
+ */
150
+ setupEmbeddingPartitions(resource, vectorFields) {
151
+ // Skip if resource doesn't have config (e.g., mocked resources)
152
+ if (!resource.config) return;
153
+
154
+ for (const vectorField of vectorFields) {
155
+ // Check if the vector field is optional
156
+ const isOptional = this.isFieldOptional(resource.schema.attributes, vectorField.name);
157
+
158
+ if (!isOptional) continue;
159
+
160
+ // Generate partition name
161
+ const partitionName = `byHas${this.capitalize(vectorField.name.replace(/\./g, '_'))}`;
162
+ const trackingFieldName = `_has${this.capitalize(vectorField.name.replace(/\./g, '_'))}`;
163
+
164
+ // Check if partition already exists
165
+ if (resource.config.partitions && resource.config.partitions[partitionName]) {
166
+ this.emit('vector:partition-exists', {
167
+ resource: resource.name,
168
+ vectorField: vectorField.name,
169
+ partition: partitionName,
170
+ timestamp: Date.now()
171
+ });
172
+ continue;
173
+ }
174
+
175
+ // Create partition configuration
176
+ if (!resource.config.partitions) {
177
+ resource.config.partitions = {};
178
+ }
179
+
180
+ resource.config.partitions[partitionName] = {
181
+ fields: {
182
+ [trackingFieldName]: 'boolean'
183
+ }
184
+ };
185
+
186
+ // Add tracking field to schema if not present
187
+ if (!resource.schema.attributes[trackingFieldName]) {
188
+ resource.schema.attributes[trackingFieldName] = {
189
+ type: 'boolean',
190
+ optional: true,
191
+ default: false
192
+ };
193
+ }
194
+
195
+ // Emit event
196
+ this.emit('vector:partition-created', {
197
+ resource: resource.name,
198
+ vectorField: vectorField.name,
199
+ partition: partitionName,
200
+ trackingField: trackingFieldName,
201
+ timestamp: Date.now()
202
+ });
203
+
204
+ console.log(`✅ VectorPlugin: Created partition '${partitionName}' for optional embedding field '${vectorField.name}' in resource '${resource.name}'`);
205
+
206
+ // Install hooks to maintain the partition
207
+ this.installEmbeddingHooks(resource, vectorField.name, trackingFieldName);
208
+ }
209
+ }
210
+
211
+ /**
212
+ * Check if a field is optional in the schema
213
+ *
214
+ * @param {Object} attributes - Resource attributes
215
+ * @param {string} fieldPath - Field path (supports dot notation)
216
+ * @returns {boolean} True if field is optional
217
+ */
218
+ isFieldOptional(attributes, fieldPath) {
219
+ const parts = fieldPath.split('.');
220
+ let current = attributes;
221
+
222
+ for (let i = 0; i < parts.length; i++) {
223
+ const part = parts[i];
224
+ const attr = current[part];
225
+
226
+ if (!attr) return true; // Field doesn't exist = optional
227
+
228
+ // Shorthand notation (e.g., 'string|required', 'embedding:1536')
229
+ if (typeof attr === 'string') {
230
+ const flags = attr.split('|');
231
+ // If it has 'required' flag, it's not optional
232
+ if (flags.includes('required')) return false;
233
+ // If it has 'optional' flag, it's optional
234
+ if (flags.includes('optional') || flags.some(f => f.startsWith('optional:'))) return true;
235
+ // By default, fields without 'required' are optional
236
+ return !flags.includes('required');
237
+ }
238
+
239
+ // Expanded notation (e.g., { type: 'string', optional: true })
240
+ if (typeof attr === 'object') {
241
+ // If we're at the last part, check if it's optional
242
+ if (i === parts.length - 1) {
243
+ // Explicit optional field
244
+ if (attr.optional === true) return true;
245
+ // Explicit required field
246
+ if (attr.optional === false) return false;
247
+ // Check for 'required' in nested object structure
248
+ // Default: optional unless explicitly marked as required
249
+ return attr.optional !== false;
250
+ }
251
+
252
+ // Navigate into nested object
253
+ if (attr.type === 'object' && attr.props) {
254
+ current = attr.props;
255
+ } else {
256
+ return true; // Can't navigate further = assume optional
257
+ }
258
+ }
134
259
  }
260
+
261
+ return true; // Default to optional
262
+ }
263
+
264
+ /**
265
+ * Capitalize first letter of string
266
+ *
267
+ * @param {string} str - Input string
268
+ * @returns {string} Capitalized string
269
+ */
270
+ capitalize(str) {
271
+ return str.charAt(0).toUpperCase() + str.slice(1);
272
+ }
273
+
274
+ /**
275
+ * Install hooks to maintain embedding partition tracking field
276
+ *
277
+ * @param {Resource} resource - Resource instance
278
+ * @param {string} vectorField - Vector field name
279
+ * @param {string} trackingField - Tracking field name
280
+ */
281
+ installEmbeddingHooks(resource, vectorField, trackingField) {
282
+ // beforeInsert: Set tracking field based on vector presence
283
+ resource.registerHook('beforeInsert', async (data) => {
284
+ const hasVector = this.hasVectorValue(data, vectorField);
285
+ this.setNestedValue(data, trackingField, hasVector);
286
+ return data;
287
+ });
288
+
289
+ // beforeUpdate: Update tracking field if vector changes
290
+ resource.registerHook('beforeUpdate', async (id, updates) => {
291
+ // Check if the vector field is being updated
292
+ if (vectorField in updates || this.hasNestedKey(updates, vectorField)) {
293
+ const hasVector = this.hasVectorValue(updates, vectorField);
294
+ this.setNestedValue(updates, trackingField, hasVector);
295
+ }
296
+ return updates;
297
+ });
298
+
299
+ this.emit('vector:hooks-installed', {
300
+ resource: resource.name,
301
+ vectorField,
302
+ trackingField,
303
+ hooks: ['beforeInsert', 'beforeUpdate'],
304
+ timestamp: Date.now()
305
+ });
306
+ }
307
+
308
+ /**
309
+ * Check if data has a valid vector value for the given field
310
+ *
311
+ * @param {Object} data - Data object
312
+ * @param {string} fieldPath - Field path (supports dot notation)
313
+ * @returns {boolean} True if vector exists and is valid
314
+ */
315
+ hasVectorValue(data, fieldPath) {
316
+ const value = this.getNestedValue(data, fieldPath);
317
+ return value != null && Array.isArray(value) && value.length > 0;
318
+ }
319
+
320
+ /**
321
+ * Check if object has a nested key
322
+ *
323
+ * @param {Object} obj - Object to check
324
+ * @param {string} path - Dot-notation path
325
+ * @returns {boolean} True if key exists
326
+ */
327
+ hasNestedKey(obj, path) {
328
+ const parts = path.split('.');
329
+ let current = obj;
330
+
331
+ for (const part of parts) {
332
+ if (current == null || typeof current !== 'object') return false;
333
+ if (!(part in current)) return false;
334
+ current = current[part];
335
+ }
336
+
337
+ return true;
338
+ }
339
+
340
+ /**
341
+ * Get nested value from object using dot notation
342
+ *
343
+ * @param {Object} obj - Object to traverse
344
+ * @param {string} path - Dot-notation path
345
+ * @returns {*} Value at path or undefined
346
+ */
347
+ getNestedValue(obj, path) {
348
+ const parts = path.split('.');
349
+ let current = obj;
350
+
351
+ for (const part of parts) {
352
+ if (current == null || typeof current !== 'object') return undefined;
353
+ current = current[part];
354
+ }
355
+
356
+ return current;
357
+ }
358
+
359
+ /**
360
+ * Set nested value in object using dot notation
361
+ *
362
+ * @param {Object} obj - Object to modify
363
+ * @param {string} path - Dot-notation path
364
+ * @param {*} value - Value to set
365
+ */
366
+ setNestedValue(obj, path, value) {
367
+ const parts = path.split('.');
368
+ let current = obj;
369
+
370
+ for (let i = 0; i < parts.length - 1; i++) {
371
+ const part = parts[i];
372
+ if (!(part in current) || typeof current[part] !== 'object') {
373
+ current[part] = {};
374
+ }
375
+ current = current[part];
376
+ }
377
+
378
+ current[parts[parts.length - 1]] = value;
379
+ }
380
+
381
+ /**
382
+ * Get auto-created embedding partition for a vector field
383
+ *
384
+ * Returns partition configuration if an auto-partition exists for the given vector field.
385
+ * Auto-partitions enable O(1) filtering to only records with embeddings.
386
+ *
387
+ * @param {Resource} resource - Resource instance
388
+ * @param {string} vectorField - Vector field name
389
+ * @returns {Object|null} Partition config or null
390
+ */
391
+ getAutoEmbeddingPartition(resource, vectorField) {
392
+ // Skip if resource doesn't have config (e.g., mocked resources)
393
+ if (!resource.config) return null;
394
+
395
+ const partitionName = `byHas${this.capitalize(vectorField.replace(/\./g, '_'))}`;
396
+ const trackingFieldName = `_has${this.capitalize(vectorField.replace(/\./g, '_'))}`;
397
+
398
+ // Check if auto-partition exists
399
+ if (resource.config.partitions && resource.config.partitions[partitionName]) {
400
+ return {
401
+ partitionName,
402
+ partitionValues: { [trackingFieldName]: true }
403
+ };
404
+ }
405
+
406
+ return null;
135
407
  }
136
408
 
137
409
  /**
@@ -313,11 +585,12 @@ export class VectorPlugin extends Plugin {
313
585
  vectorField = 'vector'; // Fallback to default
314
586
  }
315
587
 
316
- const {
588
+ let {
317
589
  limit = 10,
318
590
  distanceMetric = this.config.distanceMetric,
319
591
  threshold = null,
320
- partition = null
592
+ partition = null,
593
+ partitionValues = null
321
594
  } = options;
322
595
 
323
596
  const distanceFn = this.distanceFunctions[distanceMetric];
@@ -337,6 +610,23 @@ export class VectorPlugin extends Plugin {
337
610
  throw error;
338
611
  }
339
612
 
613
+ // Auto-use embedding partition if available and no custom partition specified
614
+ if (!partition) {
615
+ const autoPartition = this.getAutoEmbeddingPartition(resource, vectorField);
616
+ if (autoPartition) {
617
+ partition = autoPartition.partitionName;
618
+ partitionValues = autoPartition.partitionValues;
619
+
620
+ this._emitEvent('vector:auto-partition-used', {
621
+ resource: resource.name,
622
+ vectorField,
623
+ partition,
624
+ partitionValues,
625
+ timestamp: Date.now()
626
+ });
627
+ }
628
+ }
629
+
340
630
  // Emit start event
341
631
  this._emitEvent('vector:search-start', {
342
632
  resource: resource.name,
@@ -344,6 +634,7 @@ export class VectorPlugin extends Plugin {
344
634
  limit,
345
635
  distanceMetric,
346
636
  partition,
637
+ partitionValues,
347
638
  threshold,
348
639
  queryDimensions: queryVector.length,
349
640
  timestamp: startTime
@@ -352,21 +643,41 @@ export class VectorPlugin extends Plugin {
352
643
  try {
353
644
  // Get all records (with optional partition filter)
354
645
  let allRecords;
355
- if (partition) {
646
+ if (partition && partitionValues) {
356
647
  this._emitEvent('vector:partition-filter', {
357
648
  resource: resource.name,
358
649
  partition,
650
+ partitionValues,
359
651
  timestamp: Date.now()
360
652
  });
361
- allRecords = await resource.list({ partition, partitionValues: partition });
653
+ allRecords = await resource.list({ partition, partitionValues });
362
654
  } else {
363
- allRecords = await resource.getAll();
655
+ // Fallback to list() if getAll() doesn't exist (for mocked resources in tests)
656
+ allRecords = resource.getAll ? await resource.getAll() : await resource.list();
364
657
  }
365
658
 
366
659
  const totalRecords = allRecords.length;
367
660
  let processedRecords = 0;
368
661
  let dimensionMismatches = 0;
369
662
 
663
+ // Performance warning for large resources without partition
664
+ if (!partition && totalRecords > 1000) {
665
+ const warning = {
666
+ resource: resource.name,
667
+ operation: 'vectorSearch',
668
+ totalRecords,
669
+ vectorField,
670
+ recommendation: 'Use partitions to filter data before vector search for better performance'
671
+ };
672
+
673
+ this._emitEvent('vector:performance-warning', warning);
674
+
675
+ console.warn(`⚠️ VectorPlugin: Performing vectorSearch on ${totalRecords} records without partition filter`);
676
+ console.warn(` Resource: '${resource.name}'`);
677
+ console.warn(` Recommendation: Use partition parameter to reduce search space`);
678
+ console.warn(` Example: resource.vectorSearch(vector, { partition: 'byCategory', partitionValues: { category: 'books' } })`);
679
+ }
680
+
370
681
  // Calculate distances
371
682
  const results = allRecords
372
683
  .filter(record => record[vectorField] && Array.isArray(record[vectorField]))
@@ -473,10 +784,11 @@ export class VectorPlugin extends Plugin {
473
784
  vectorField = 'vector'; // Fallback to default
474
785
  }
475
786
 
476
- const {
787
+ let {
477
788
  k = 5,
478
789
  distanceMetric = this.config.distanceMetric,
479
790
  partition = null,
791
+ partitionValues = null,
480
792
  ...kmeansOptions
481
793
  } = options;
482
794
 
@@ -497,6 +809,23 @@ export class VectorPlugin extends Plugin {
497
809
  throw error;
498
810
  }
499
811
 
812
+ // Auto-use embedding partition if available and no custom partition specified
813
+ if (!partition) {
814
+ const autoPartition = this.getAutoEmbeddingPartition(resource, vectorField);
815
+ if (autoPartition) {
816
+ partition = autoPartition.partitionName;
817
+ partitionValues = autoPartition.partitionValues;
818
+
819
+ this._emitEvent('vector:auto-partition-used', {
820
+ resource: resource.name,
821
+ vectorField,
822
+ partition,
823
+ partitionValues,
824
+ timestamp: Date.now()
825
+ });
826
+ }
827
+ }
828
+
500
829
  // Emit start event
501
830
  this._emitEvent('vector:cluster-start', {
502
831
  resource: resource.name,
@@ -504,6 +833,7 @@ export class VectorPlugin extends Plugin {
504
833
  k,
505
834
  distanceMetric,
506
835
  partition,
836
+ partitionValues,
507
837
  maxIterations: kmeansOptions.maxIterations || 100,
508
838
  timestamp: startTime
509
839
  });
@@ -511,15 +841,17 @@ export class VectorPlugin extends Plugin {
511
841
  try {
512
842
  // Get all records (with optional partition filter)
513
843
  let allRecords;
514
- if (partition) {
844
+ if (partition && partitionValues) {
515
845
  this._emitEvent('vector:partition-filter', {
516
846
  resource: resource.name,
517
847
  partition,
848
+ partitionValues,
518
849
  timestamp: Date.now()
519
850
  });
520
- allRecords = await resource.list({ partition, partitionValues: partition });
851
+ allRecords = await resource.list({ partition, partitionValues });
521
852
  } else {
522
- allRecords = await resource.getAll();
853
+ // Fallback to list() if getAll() doesn't exist (for mocked resources in tests)
854
+ allRecords = resource.getAll ? await resource.getAll() : await resource.list();
523
855
  }
524
856
 
525
857
  // Extract vectors
@@ -527,6 +859,26 @@ export class VectorPlugin extends Plugin {
527
859
  record => record[vectorField] && Array.isArray(record[vectorField])
528
860
  );
529
861
 
862
+ // Performance warning for large resources without partition
863
+ if (!partition && allRecords.length > 1000) {
864
+ const warning = {
865
+ resource: resource.name,
866
+ operation: 'cluster',
867
+ totalRecords: allRecords.length,
868
+ recordsWithVectors: recordsWithVectors.length,
869
+ vectorField,
870
+ recommendation: 'Use partitions to filter data before clustering for better performance'
871
+ };
872
+
873
+ this._emitEvent('vector:performance-warning', warning);
874
+
875
+ console.warn(`⚠️ VectorPlugin: Performing clustering on ${allRecords.length} records without partition filter`);
876
+ console.warn(` Resource: '${resource.name}'`);
877
+ console.warn(` Records with vectors: ${recordsWithVectors.length}`);
878
+ console.warn(` Recommendation: Use partition parameter to reduce clustering space`);
879
+ console.warn(` Example: resource.cluster({ k: 5, partition: 'byCategory', partitionValues: { category: 'books' } })`);
880
+ }
881
+
530
882
  if (recordsWithVectors.length === 0) {
531
883
  const error = new VectorError('No vectors found in resource', {
532
884
  operation: 'cluster',