s3db.js 12.2.3 → 12.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +915 -1669
- package/dist/s3db.cjs.js +284 -10
- package/dist/s3db.cjs.js.map +1 -1
- package/dist/s3db.es.js +284 -10
- package/dist/s3db.es.js.map +1 -1
- package/package.json +1 -1
- package/src/plugins/vector.plugin.js +361 -9
package/dist/s3db.cjs.js
CHANGED
|
@@ -21054,7 +21054,7 @@ class Database extends EventEmitter {
|
|
|
21054
21054
|
this.id = idGenerator(7);
|
|
21055
21055
|
this.version = "1";
|
|
21056
21056
|
this.s3dbVersion = (() => {
|
|
21057
|
-
const [ok, err, version] = tryFn(() => true ? "12.2.
|
|
21057
|
+
const [ok, err, version] = tryFn(() => true ? "12.2.4" : "latest");
|
|
21058
21058
|
return ok ? version : "latest";
|
|
21059
21059
|
})();
|
|
21060
21060
|
this._resourcesMap = {};
|
|
@@ -36879,6 +36879,7 @@ class VectorPlugin extends Plugin {
|
|
|
36879
36879
|
*
|
|
36880
36880
|
* Detects large vector fields and warns if proper behavior is not set.
|
|
36881
36881
|
* Can optionally auto-fix by setting body-overflow behavior.
|
|
36882
|
+
* Auto-creates partitions for optional embedding fields to enable O(1) filtering.
|
|
36882
36883
|
*/
|
|
36883
36884
|
validateVectorStorage() {
|
|
36884
36885
|
for (const resource of Object.values(this.database.resources)) {
|
|
@@ -36915,8 +36916,217 @@ class VectorPlugin extends Plugin {
|
|
|
36915
36916
|
}
|
|
36916
36917
|
}
|
|
36917
36918
|
}
|
|
36919
|
+
this.setupEmbeddingPartitions(resource, vectorFields);
|
|
36918
36920
|
}
|
|
36919
36921
|
}
|
|
36922
|
+
/**
|
|
36923
|
+
* Setup automatic partitions for optional embedding fields
|
|
36924
|
+
*
|
|
36925
|
+
* Creates a partition that separates records with embeddings from those without.
|
|
36926
|
+
* This enables O(1) filtering instead of O(n) full scans when searching/clustering.
|
|
36927
|
+
*
|
|
36928
|
+
* @param {Resource} resource - Resource instance
|
|
36929
|
+
* @param {Array} vectorFields - Detected vector fields with metadata
|
|
36930
|
+
*/
|
|
36931
|
+
setupEmbeddingPartitions(resource, vectorFields) {
|
|
36932
|
+
if (!resource.config) return;
|
|
36933
|
+
for (const vectorField of vectorFields) {
|
|
36934
|
+
const isOptional = this.isFieldOptional(resource.schema.attributes, vectorField.name);
|
|
36935
|
+
if (!isOptional) continue;
|
|
36936
|
+
const partitionName = `byHas${this.capitalize(vectorField.name.replace(/\./g, "_"))}`;
|
|
36937
|
+
const trackingFieldName = `_has${this.capitalize(vectorField.name.replace(/\./g, "_"))}`;
|
|
36938
|
+
if (resource.config.partitions && resource.config.partitions[partitionName]) {
|
|
36939
|
+
this.emit("vector:partition-exists", {
|
|
36940
|
+
resource: resource.name,
|
|
36941
|
+
vectorField: vectorField.name,
|
|
36942
|
+
partition: partitionName,
|
|
36943
|
+
timestamp: Date.now()
|
|
36944
|
+
});
|
|
36945
|
+
continue;
|
|
36946
|
+
}
|
|
36947
|
+
if (!resource.config.partitions) {
|
|
36948
|
+
resource.config.partitions = {};
|
|
36949
|
+
}
|
|
36950
|
+
resource.config.partitions[partitionName] = {
|
|
36951
|
+
fields: {
|
|
36952
|
+
[trackingFieldName]: "boolean"
|
|
36953
|
+
}
|
|
36954
|
+
};
|
|
36955
|
+
if (!resource.schema.attributes[trackingFieldName]) {
|
|
36956
|
+
resource.schema.attributes[trackingFieldName] = {
|
|
36957
|
+
type: "boolean",
|
|
36958
|
+
optional: true,
|
|
36959
|
+
default: false
|
|
36960
|
+
};
|
|
36961
|
+
}
|
|
36962
|
+
this.emit("vector:partition-created", {
|
|
36963
|
+
resource: resource.name,
|
|
36964
|
+
vectorField: vectorField.name,
|
|
36965
|
+
partition: partitionName,
|
|
36966
|
+
trackingField: trackingFieldName,
|
|
36967
|
+
timestamp: Date.now()
|
|
36968
|
+
});
|
|
36969
|
+
console.log(`\u2705 VectorPlugin: Created partition '${partitionName}' for optional embedding field '${vectorField.name}' in resource '${resource.name}'`);
|
|
36970
|
+
this.installEmbeddingHooks(resource, vectorField.name, trackingFieldName);
|
|
36971
|
+
}
|
|
36972
|
+
}
|
|
36973
|
+
/**
|
|
36974
|
+
* Check if a field is optional in the schema
|
|
36975
|
+
*
|
|
36976
|
+
* @param {Object} attributes - Resource attributes
|
|
36977
|
+
* @param {string} fieldPath - Field path (supports dot notation)
|
|
36978
|
+
* @returns {boolean} True if field is optional
|
|
36979
|
+
*/
|
|
36980
|
+
isFieldOptional(attributes, fieldPath) {
|
|
36981
|
+
const parts = fieldPath.split(".");
|
|
36982
|
+
let current = attributes;
|
|
36983
|
+
for (let i = 0; i < parts.length; i++) {
|
|
36984
|
+
const part = parts[i];
|
|
36985
|
+
const attr = current[part];
|
|
36986
|
+
if (!attr) return true;
|
|
36987
|
+
if (typeof attr === "string") {
|
|
36988
|
+
const flags = attr.split("|");
|
|
36989
|
+
if (flags.includes("required")) return false;
|
|
36990
|
+
if (flags.includes("optional") || flags.some((f) => f.startsWith("optional:"))) return true;
|
|
36991
|
+
return !flags.includes("required");
|
|
36992
|
+
}
|
|
36993
|
+
if (typeof attr === "object") {
|
|
36994
|
+
if (i === parts.length - 1) {
|
|
36995
|
+
if (attr.optional === true) return true;
|
|
36996
|
+
if (attr.optional === false) return false;
|
|
36997
|
+
return attr.optional !== false;
|
|
36998
|
+
}
|
|
36999
|
+
if (attr.type === "object" && attr.props) {
|
|
37000
|
+
current = attr.props;
|
|
37001
|
+
} else {
|
|
37002
|
+
return true;
|
|
37003
|
+
}
|
|
37004
|
+
}
|
|
37005
|
+
}
|
|
37006
|
+
return true;
|
|
37007
|
+
}
|
|
37008
|
+
/**
|
|
37009
|
+
* Capitalize first letter of string
|
|
37010
|
+
*
|
|
37011
|
+
* @param {string} str - Input string
|
|
37012
|
+
* @returns {string} Capitalized string
|
|
37013
|
+
*/
|
|
37014
|
+
capitalize(str) {
|
|
37015
|
+
return str.charAt(0).toUpperCase() + str.slice(1);
|
|
37016
|
+
}
|
|
37017
|
+
/**
|
|
37018
|
+
* Install hooks to maintain embedding partition tracking field
|
|
37019
|
+
*
|
|
37020
|
+
* @param {Resource} resource - Resource instance
|
|
37021
|
+
* @param {string} vectorField - Vector field name
|
|
37022
|
+
* @param {string} trackingField - Tracking field name
|
|
37023
|
+
*/
|
|
37024
|
+
installEmbeddingHooks(resource, vectorField, trackingField) {
|
|
37025
|
+
resource.registerHook("beforeInsert", async (data) => {
|
|
37026
|
+
const hasVector = this.hasVectorValue(data, vectorField);
|
|
37027
|
+
this.setNestedValue(data, trackingField, hasVector);
|
|
37028
|
+
return data;
|
|
37029
|
+
});
|
|
37030
|
+
resource.registerHook("beforeUpdate", async (id, updates) => {
|
|
37031
|
+
if (vectorField in updates || this.hasNestedKey(updates, vectorField)) {
|
|
37032
|
+
const hasVector = this.hasVectorValue(updates, vectorField);
|
|
37033
|
+
this.setNestedValue(updates, trackingField, hasVector);
|
|
37034
|
+
}
|
|
37035
|
+
return updates;
|
|
37036
|
+
});
|
|
37037
|
+
this.emit("vector:hooks-installed", {
|
|
37038
|
+
resource: resource.name,
|
|
37039
|
+
vectorField,
|
|
37040
|
+
trackingField,
|
|
37041
|
+
hooks: ["beforeInsert", "beforeUpdate"],
|
|
37042
|
+
timestamp: Date.now()
|
|
37043
|
+
});
|
|
37044
|
+
}
|
|
37045
|
+
/**
|
|
37046
|
+
* Check if data has a valid vector value for the given field
|
|
37047
|
+
*
|
|
37048
|
+
* @param {Object} data - Data object
|
|
37049
|
+
* @param {string} fieldPath - Field path (supports dot notation)
|
|
37050
|
+
* @returns {boolean} True if vector exists and is valid
|
|
37051
|
+
*/
|
|
37052
|
+
hasVectorValue(data, fieldPath) {
|
|
37053
|
+
const value = this.getNestedValue(data, fieldPath);
|
|
37054
|
+
return value != null && Array.isArray(value) && value.length > 0;
|
|
37055
|
+
}
|
|
37056
|
+
/**
|
|
37057
|
+
* Check if object has a nested key
|
|
37058
|
+
*
|
|
37059
|
+
* @param {Object} obj - Object to check
|
|
37060
|
+
* @param {string} path - Dot-notation path
|
|
37061
|
+
* @returns {boolean} True if key exists
|
|
37062
|
+
*/
|
|
37063
|
+
hasNestedKey(obj, path) {
|
|
37064
|
+
const parts = path.split(".");
|
|
37065
|
+
let current = obj;
|
|
37066
|
+
for (const part of parts) {
|
|
37067
|
+
if (current == null || typeof current !== "object") return false;
|
|
37068
|
+
if (!(part in current)) return false;
|
|
37069
|
+
current = current[part];
|
|
37070
|
+
}
|
|
37071
|
+
return true;
|
|
37072
|
+
}
|
|
37073
|
+
/**
|
|
37074
|
+
* Get nested value from object using dot notation
|
|
37075
|
+
*
|
|
37076
|
+
* @param {Object} obj - Object to traverse
|
|
37077
|
+
* @param {string} path - Dot-notation path
|
|
37078
|
+
* @returns {*} Value at path or undefined
|
|
37079
|
+
*/
|
|
37080
|
+
getNestedValue(obj, path) {
|
|
37081
|
+
const parts = path.split(".");
|
|
37082
|
+
let current = obj;
|
|
37083
|
+
for (const part of parts) {
|
|
37084
|
+
if (current == null || typeof current !== "object") return void 0;
|
|
37085
|
+
current = current[part];
|
|
37086
|
+
}
|
|
37087
|
+
return current;
|
|
37088
|
+
}
|
|
37089
|
+
/**
|
|
37090
|
+
* Set nested value in object using dot notation
|
|
37091
|
+
*
|
|
37092
|
+
* @param {Object} obj - Object to modify
|
|
37093
|
+
* @param {string} path - Dot-notation path
|
|
37094
|
+
* @param {*} value - Value to set
|
|
37095
|
+
*/
|
|
37096
|
+
setNestedValue(obj, path, value) {
|
|
37097
|
+
const parts = path.split(".");
|
|
37098
|
+
let current = obj;
|
|
37099
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
37100
|
+
const part = parts[i];
|
|
37101
|
+
if (!(part in current) || typeof current[part] !== "object") {
|
|
37102
|
+
current[part] = {};
|
|
37103
|
+
}
|
|
37104
|
+
current = current[part];
|
|
37105
|
+
}
|
|
37106
|
+
current[parts[parts.length - 1]] = value;
|
|
37107
|
+
}
|
|
37108
|
+
/**
|
|
37109
|
+
* Get auto-created embedding partition for a vector field
|
|
37110
|
+
*
|
|
37111
|
+
* Returns partition configuration if an auto-partition exists for the given vector field.
|
|
37112
|
+
* Auto-partitions enable O(1) filtering to only records with embeddings.
|
|
37113
|
+
*
|
|
37114
|
+
* @param {Resource} resource - Resource instance
|
|
37115
|
+
* @param {string} vectorField - Vector field name
|
|
37116
|
+
* @returns {Object|null} Partition config or null
|
|
37117
|
+
*/
|
|
37118
|
+
getAutoEmbeddingPartition(resource, vectorField) {
|
|
37119
|
+
if (!resource.config) return null;
|
|
37120
|
+
const partitionName = `byHas${this.capitalize(vectorField.replace(/\./g, "_"))}`;
|
|
37121
|
+
const trackingFieldName = `_has${this.capitalize(vectorField.replace(/\./g, "_"))}`;
|
|
37122
|
+
if (resource.config.partitions && resource.config.partitions[partitionName]) {
|
|
37123
|
+
return {
|
|
37124
|
+
partitionName,
|
|
37125
|
+
partitionValues: { [trackingFieldName]: true }
|
|
37126
|
+
};
|
|
37127
|
+
}
|
|
37128
|
+
return null;
|
|
37129
|
+
}
|
|
36920
37130
|
/**
|
|
36921
37131
|
* Auto-detect vector field from resource schema
|
|
36922
37132
|
*
|
|
@@ -37054,11 +37264,12 @@ class VectorPlugin extends Plugin {
|
|
|
37054
37264
|
} else if (!vectorField) {
|
|
37055
37265
|
vectorField = "vector";
|
|
37056
37266
|
}
|
|
37057
|
-
|
|
37267
|
+
let {
|
|
37058
37268
|
limit = 10,
|
|
37059
37269
|
distanceMetric = this.config.distanceMetric,
|
|
37060
37270
|
threshold = null,
|
|
37061
|
-
partition = null
|
|
37271
|
+
partition = null,
|
|
37272
|
+
partitionValues = null
|
|
37062
37273
|
} = options;
|
|
37063
37274
|
const distanceFn = this.distanceFunctions[distanceMetric];
|
|
37064
37275
|
if (!distanceFn) {
|
|
@@ -37074,31 +37285,61 @@ class VectorPlugin extends Plugin {
|
|
|
37074
37285
|
});
|
|
37075
37286
|
throw error;
|
|
37076
37287
|
}
|
|
37288
|
+
if (!partition) {
|
|
37289
|
+
const autoPartition = this.getAutoEmbeddingPartition(resource, vectorField);
|
|
37290
|
+
if (autoPartition) {
|
|
37291
|
+
partition = autoPartition.partitionName;
|
|
37292
|
+
partitionValues = autoPartition.partitionValues;
|
|
37293
|
+
this._emitEvent("vector:auto-partition-used", {
|
|
37294
|
+
resource: resource.name,
|
|
37295
|
+
vectorField,
|
|
37296
|
+
partition,
|
|
37297
|
+
partitionValues,
|
|
37298
|
+
timestamp: Date.now()
|
|
37299
|
+
});
|
|
37300
|
+
}
|
|
37301
|
+
}
|
|
37077
37302
|
this._emitEvent("vector:search-start", {
|
|
37078
37303
|
resource: resource.name,
|
|
37079
37304
|
vectorField,
|
|
37080
37305
|
limit,
|
|
37081
37306
|
distanceMetric,
|
|
37082
37307
|
partition,
|
|
37308
|
+
partitionValues,
|
|
37083
37309
|
threshold,
|
|
37084
37310
|
queryDimensions: queryVector.length,
|
|
37085
37311
|
timestamp: startTime
|
|
37086
37312
|
});
|
|
37087
37313
|
try {
|
|
37088
37314
|
let allRecords;
|
|
37089
|
-
if (partition) {
|
|
37315
|
+
if (partition && partitionValues) {
|
|
37090
37316
|
this._emitEvent("vector:partition-filter", {
|
|
37091
37317
|
resource: resource.name,
|
|
37092
37318
|
partition,
|
|
37319
|
+
partitionValues,
|
|
37093
37320
|
timestamp: Date.now()
|
|
37094
37321
|
});
|
|
37095
|
-
allRecords = await resource.list({ partition, partitionValues
|
|
37322
|
+
allRecords = await resource.list({ partition, partitionValues });
|
|
37096
37323
|
} else {
|
|
37097
|
-
allRecords = await resource.getAll();
|
|
37324
|
+
allRecords = resource.getAll ? await resource.getAll() : await resource.list();
|
|
37098
37325
|
}
|
|
37099
37326
|
const totalRecords = allRecords.length;
|
|
37100
37327
|
let processedRecords = 0;
|
|
37101
37328
|
let dimensionMismatches = 0;
|
|
37329
|
+
if (!partition && totalRecords > 1e3) {
|
|
37330
|
+
const warning = {
|
|
37331
|
+
resource: resource.name,
|
|
37332
|
+
operation: "vectorSearch",
|
|
37333
|
+
totalRecords,
|
|
37334
|
+
vectorField,
|
|
37335
|
+
recommendation: "Use partitions to filter data before vector search for better performance"
|
|
37336
|
+
};
|
|
37337
|
+
this._emitEvent("vector:performance-warning", warning);
|
|
37338
|
+
console.warn(`\u26A0\uFE0F VectorPlugin: Performing vectorSearch on ${totalRecords} records without partition filter`);
|
|
37339
|
+
console.warn(` Resource: '${resource.name}'`);
|
|
37340
|
+
console.warn(` Recommendation: Use partition parameter to reduce search space`);
|
|
37341
|
+
console.warn(` Example: resource.vectorSearch(vector, { partition: 'byCategory', partitionValues: { category: 'books' } })`);
|
|
37342
|
+
}
|
|
37102
37343
|
const results = allRecords.filter((record) => record[vectorField] && Array.isArray(record[vectorField])).map((record, index) => {
|
|
37103
37344
|
try {
|
|
37104
37345
|
const distance = distanceFn(queryVector, record[vectorField]);
|
|
@@ -37182,10 +37423,11 @@ class VectorPlugin extends Plugin {
|
|
|
37182
37423
|
} else if (!vectorField) {
|
|
37183
37424
|
vectorField = "vector";
|
|
37184
37425
|
}
|
|
37185
|
-
|
|
37426
|
+
let {
|
|
37186
37427
|
k = 5,
|
|
37187
37428
|
distanceMetric = this.config.distanceMetric,
|
|
37188
37429
|
partition = null,
|
|
37430
|
+
partitionValues = null,
|
|
37189
37431
|
...kmeansOptions
|
|
37190
37432
|
} = options;
|
|
37191
37433
|
const distanceFn = this.distanceFunctions[distanceMetric];
|
|
@@ -37202,30 +37444,62 @@ class VectorPlugin extends Plugin {
|
|
|
37202
37444
|
});
|
|
37203
37445
|
throw error;
|
|
37204
37446
|
}
|
|
37447
|
+
if (!partition) {
|
|
37448
|
+
const autoPartition = this.getAutoEmbeddingPartition(resource, vectorField);
|
|
37449
|
+
if (autoPartition) {
|
|
37450
|
+
partition = autoPartition.partitionName;
|
|
37451
|
+
partitionValues = autoPartition.partitionValues;
|
|
37452
|
+
this._emitEvent("vector:auto-partition-used", {
|
|
37453
|
+
resource: resource.name,
|
|
37454
|
+
vectorField,
|
|
37455
|
+
partition,
|
|
37456
|
+
partitionValues,
|
|
37457
|
+
timestamp: Date.now()
|
|
37458
|
+
});
|
|
37459
|
+
}
|
|
37460
|
+
}
|
|
37205
37461
|
this._emitEvent("vector:cluster-start", {
|
|
37206
37462
|
resource: resource.name,
|
|
37207
37463
|
vectorField,
|
|
37208
37464
|
k,
|
|
37209
37465
|
distanceMetric,
|
|
37210
37466
|
partition,
|
|
37467
|
+
partitionValues,
|
|
37211
37468
|
maxIterations: kmeansOptions.maxIterations || 100,
|
|
37212
37469
|
timestamp: startTime
|
|
37213
37470
|
});
|
|
37214
37471
|
try {
|
|
37215
37472
|
let allRecords;
|
|
37216
|
-
if (partition) {
|
|
37473
|
+
if (partition && partitionValues) {
|
|
37217
37474
|
this._emitEvent("vector:partition-filter", {
|
|
37218
37475
|
resource: resource.name,
|
|
37219
37476
|
partition,
|
|
37477
|
+
partitionValues,
|
|
37220
37478
|
timestamp: Date.now()
|
|
37221
37479
|
});
|
|
37222
|
-
allRecords = await resource.list({ partition, partitionValues
|
|
37480
|
+
allRecords = await resource.list({ partition, partitionValues });
|
|
37223
37481
|
} else {
|
|
37224
|
-
allRecords = await resource.getAll();
|
|
37482
|
+
allRecords = resource.getAll ? await resource.getAll() : await resource.list();
|
|
37225
37483
|
}
|
|
37226
37484
|
const recordsWithVectors = allRecords.filter(
|
|
37227
37485
|
(record) => record[vectorField] && Array.isArray(record[vectorField])
|
|
37228
37486
|
);
|
|
37487
|
+
if (!partition && allRecords.length > 1e3) {
|
|
37488
|
+
const warning = {
|
|
37489
|
+
resource: resource.name,
|
|
37490
|
+
operation: "cluster",
|
|
37491
|
+
totalRecords: allRecords.length,
|
|
37492
|
+
recordsWithVectors: recordsWithVectors.length,
|
|
37493
|
+
vectorField,
|
|
37494
|
+
recommendation: "Use partitions to filter data before clustering for better performance"
|
|
37495
|
+
};
|
|
37496
|
+
this._emitEvent("vector:performance-warning", warning);
|
|
37497
|
+
console.warn(`\u26A0\uFE0F VectorPlugin: Performing clustering on ${allRecords.length} records without partition filter`);
|
|
37498
|
+
console.warn(` Resource: '${resource.name}'`);
|
|
37499
|
+
console.warn(` Records with vectors: ${recordsWithVectors.length}`);
|
|
37500
|
+
console.warn(` Recommendation: Use partition parameter to reduce clustering space`);
|
|
37501
|
+
console.warn(` Example: resource.cluster({ k: 5, partition: 'byCategory', partitionValues: { category: 'books' } })`);
|
|
37502
|
+
}
|
|
37229
37503
|
if (recordsWithVectors.length === 0) {
|
|
37230
37504
|
const error = new VectorError("No vectors found in resource", {
|
|
37231
37505
|
operation: "cluster",
|