s3db.js 12.2.3 → 12.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +915 -1669
- package/dist/s3db.cjs.js +284 -10
- package/dist/s3db.cjs.js.map +1 -1
- package/dist/s3db.es.js +284 -10
- package/dist/s3db.es.js.map +1 -1
- package/package.json +1 -1
- package/src/plugins/vector.plugin.js +361 -9
package/dist/s3db.es.js
CHANGED
|
@@ -21031,7 +21031,7 @@ class Database extends EventEmitter {
|
|
|
21031
21031
|
this.id = idGenerator(7);
|
|
21032
21032
|
this.version = "1";
|
|
21033
21033
|
this.s3dbVersion = (() => {
|
|
21034
|
-
const [ok, err, version] = tryFn(() => true ? "12.2.
|
|
21034
|
+
const [ok, err, version] = tryFn(() => true ? "12.2.4" : "latest");
|
|
21035
21035
|
return ok ? version : "latest";
|
|
21036
21036
|
})();
|
|
21037
21037
|
this._resourcesMap = {};
|
|
@@ -36856,6 +36856,7 @@ class VectorPlugin extends Plugin {
|
|
|
36856
36856
|
*
|
|
36857
36857
|
* Detects large vector fields and warns if proper behavior is not set.
|
|
36858
36858
|
* Can optionally auto-fix by setting body-overflow behavior.
|
|
36859
|
+
* Auto-creates partitions for optional embedding fields to enable O(1) filtering.
|
|
36859
36860
|
*/
|
|
36860
36861
|
validateVectorStorage() {
|
|
36861
36862
|
for (const resource of Object.values(this.database.resources)) {
|
|
@@ -36892,8 +36893,217 @@ class VectorPlugin extends Plugin {
|
|
|
36892
36893
|
}
|
|
36893
36894
|
}
|
|
36894
36895
|
}
|
|
36896
|
+
this.setupEmbeddingPartitions(resource, vectorFields);
|
|
36895
36897
|
}
|
|
36896
36898
|
}
|
|
36899
|
+
/**
|
|
36900
|
+
* Setup automatic partitions for optional embedding fields
|
|
36901
|
+
*
|
|
36902
|
+
* Creates a partition that separates records with embeddings from those without.
|
|
36903
|
+
* This enables O(1) filtering instead of O(n) full scans when searching/clustering.
|
|
36904
|
+
*
|
|
36905
|
+
* @param {Resource} resource - Resource instance
|
|
36906
|
+
* @param {Array} vectorFields - Detected vector fields with metadata
|
|
36907
|
+
*/
|
|
36908
|
+
setupEmbeddingPartitions(resource, vectorFields) {
|
|
36909
|
+
if (!resource.config) return;
|
|
36910
|
+
for (const vectorField of vectorFields) {
|
|
36911
|
+
const isOptional = this.isFieldOptional(resource.schema.attributes, vectorField.name);
|
|
36912
|
+
if (!isOptional) continue;
|
|
36913
|
+
const partitionName = `byHas${this.capitalize(vectorField.name.replace(/\./g, "_"))}`;
|
|
36914
|
+
const trackingFieldName = `_has${this.capitalize(vectorField.name.replace(/\./g, "_"))}`;
|
|
36915
|
+
if (resource.config.partitions && resource.config.partitions[partitionName]) {
|
|
36916
|
+
this.emit("vector:partition-exists", {
|
|
36917
|
+
resource: resource.name,
|
|
36918
|
+
vectorField: vectorField.name,
|
|
36919
|
+
partition: partitionName,
|
|
36920
|
+
timestamp: Date.now()
|
|
36921
|
+
});
|
|
36922
|
+
continue;
|
|
36923
|
+
}
|
|
36924
|
+
if (!resource.config.partitions) {
|
|
36925
|
+
resource.config.partitions = {};
|
|
36926
|
+
}
|
|
36927
|
+
resource.config.partitions[partitionName] = {
|
|
36928
|
+
fields: {
|
|
36929
|
+
[trackingFieldName]: "boolean"
|
|
36930
|
+
}
|
|
36931
|
+
};
|
|
36932
|
+
if (!resource.schema.attributes[trackingFieldName]) {
|
|
36933
|
+
resource.schema.attributes[trackingFieldName] = {
|
|
36934
|
+
type: "boolean",
|
|
36935
|
+
optional: true,
|
|
36936
|
+
default: false
|
|
36937
|
+
};
|
|
36938
|
+
}
|
|
36939
|
+
this.emit("vector:partition-created", {
|
|
36940
|
+
resource: resource.name,
|
|
36941
|
+
vectorField: vectorField.name,
|
|
36942
|
+
partition: partitionName,
|
|
36943
|
+
trackingField: trackingFieldName,
|
|
36944
|
+
timestamp: Date.now()
|
|
36945
|
+
});
|
|
36946
|
+
console.log(`\u2705 VectorPlugin: Created partition '${partitionName}' for optional embedding field '${vectorField.name}' in resource '${resource.name}'`);
|
|
36947
|
+
this.installEmbeddingHooks(resource, vectorField.name, trackingFieldName);
|
|
36948
|
+
}
|
|
36949
|
+
}
|
|
36950
|
+
/**
|
|
36951
|
+
* Check if a field is optional in the schema
|
|
36952
|
+
*
|
|
36953
|
+
* @param {Object} attributes - Resource attributes
|
|
36954
|
+
* @param {string} fieldPath - Field path (supports dot notation)
|
|
36955
|
+
* @returns {boolean} True if field is optional
|
|
36956
|
+
*/
|
|
36957
|
+
isFieldOptional(attributes, fieldPath) {
|
|
36958
|
+
const parts = fieldPath.split(".");
|
|
36959
|
+
let current = attributes;
|
|
36960
|
+
for (let i = 0; i < parts.length; i++) {
|
|
36961
|
+
const part = parts[i];
|
|
36962
|
+
const attr = current[part];
|
|
36963
|
+
if (!attr) return true;
|
|
36964
|
+
if (typeof attr === "string") {
|
|
36965
|
+
const flags = attr.split("|");
|
|
36966
|
+
if (flags.includes("required")) return false;
|
|
36967
|
+
if (flags.includes("optional") || flags.some((f) => f.startsWith("optional:"))) return true;
|
|
36968
|
+
return !flags.includes("required");
|
|
36969
|
+
}
|
|
36970
|
+
if (typeof attr === "object") {
|
|
36971
|
+
if (i === parts.length - 1) {
|
|
36972
|
+
if (attr.optional === true) return true;
|
|
36973
|
+
if (attr.optional === false) return false;
|
|
36974
|
+
return attr.optional !== false;
|
|
36975
|
+
}
|
|
36976
|
+
if (attr.type === "object" && attr.props) {
|
|
36977
|
+
current = attr.props;
|
|
36978
|
+
} else {
|
|
36979
|
+
return true;
|
|
36980
|
+
}
|
|
36981
|
+
}
|
|
36982
|
+
}
|
|
36983
|
+
return true;
|
|
36984
|
+
}
|
|
36985
|
+
/**
|
|
36986
|
+
* Capitalize first letter of string
|
|
36987
|
+
*
|
|
36988
|
+
* @param {string} str - Input string
|
|
36989
|
+
* @returns {string} Capitalized string
|
|
36990
|
+
*/
|
|
36991
|
+
capitalize(str) {
|
|
36992
|
+
return str.charAt(0).toUpperCase() + str.slice(1);
|
|
36993
|
+
}
|
|
36994
|
+
/**
|
|
36995
|
+
* Install hooks to maintain embedding partition tracking field
|
|
36996
|
+
*
|
|
36997
|
+
* @param {Resource} resource - Resource instance
|
|
36998
|
+
* @param {string} vectorField - Vector field name
|
|
36999
|
+
* @param {string} trackingField - Tracking field name
|
|
37000
|
+
*/
|
|
37001
|
+
installEmbeddingHooks(resource, vectorField, trackingField) {
|
|
37002
|
+
resource.registerHook("beforeInsert", async (data) => {
|
|
37003
|
+
const hasVector = this.hasVectorValue(data, vectorField);
|
|
37004
|
+
this.setNestedValue(data, trackingField, hasVector);
|
|
37005
|
+
return data;
|
|
37006
|
+
});
|
|
37007
|
+
resource.registerHook("beforeUpdate", async (id, updates) => {
|
|
37008
|
+
if (vectorField in updates || this.hasNestedKey(updates, vectorField)) {
|
|
37009
|
+
const hasVector = this.hasVectorValue(updates, vectorField);
|
|
37010
|
+
this.setNestedValue(updates, trackingField, hasVector);
|
|
37011
|
+
}
|
|
37012
|
+
return updates;
|
|
37013
|
+
});
|
|
37014
|
+
this.emit("vector:hooks-installed", {
|
|
37015
|
+
resource: resource.name,
|
|
37016
|
+
vectorField,
|
|
37017
|
+
trackingField,
|
|
37018
|
+
hooks: ["beforeInsert", "beforeUpdate"],
|
|
37019
|
+
timestamp: Date.now()
|
|
37020
|
+
});
|
|
37021
|
+
}
|
|
37022
|
+
/**
|
|
37023
|
+
* Check if data has a valid vector value for the given field
|
|
37024
|
+
*
|
|
37025
|
+
* @param {Object} data - Data object
|
|
37026
|
+
* @param {string} fieldPath - Field path (supports dot notation)
|
|
37027
|
+
* @returns {boolean} True if vector exists and is valid
|
|
37028
|
+
*/
|
|
37029
|
+
hasVectorValue(data, fieldPath) {
|
|
37030
|
+
const value = this.getNestedValue(data, fieldPath);
|
|
37031
|
+
return value != null && Array.isArray(value) && value.length > 0;
|
|
37032
|
+
}
|
|
37033
|
+
/**
|
|
37034
|
+
* Check if object has a nested key
|
|
37035
|
+
*
|
|
37036
|
+
* @param {Object} obj - Object to check
|
|
37037
|
+
* @param {string} path - Dot-notation path
|
|
37038
|
+
* @returns {boolean} True if key exists
|
|
37039
|
+
*/
|
|
37040
|
+
hasNestedKey(obj, path) {
|
|
37041
|
+
const parts = path.split(".");
|
|
37042
|
+
let current = obj;
|
|
37043
|
+
for (const part of parts) {
|
|
37044
|
+
if (current == null || typeof current !== "object") return false;
|
|
37045
|
+
if (!(part in current)) return false;
|
|
37046
|
+
current = current[part];
|
|
37047
|
+
}
|
|
37048
|
+
return true;
|
|
37049
|
+
}
|
|
37050
|
+
/**
|
|
37051
|
+
* Get nested value from object using dot notation
|
|
37052
|
+
*
|
|
37053
|
+
* @param {Object} obj - Object to traverse
|
|
37054
|
+
* @param {string} path - Dot-notation path
|
|
37055
|
+
* @returns {*} Value at path or undefined
|
|
37056
|
+
*/
|
|
37057
|
+
getNestedValue(obj, path) {
|
|
37058
|
+
const parts = path.split(".");
|
|
37059
|
+
let current = obj;
|
|
37060
|
+
for (const part of parts) {
|
|
37061
|
+
if (current == null || typeof current !== "object") return void 0;
|
|
37062
|
+
current = current[part];
|
|
37063
|
+
}
|
|
37064
|
+
return current;
|
|
37065
|
+
}
|
|
37066
|
+
/**
|
|
37067
|
+
* Set nested value in object using dot notation
|
|
37068
|
+
*
|
|
37069
|
+
* @param {Object} obj - Object to modify
|
|
37070
|
+
* @param {string} path - Dot-notation path
|
|
37071
|
+
* @param {*} value - Value to set
|
|
37072
|
+
*/
|
|
37073
|
+
setNestedValue(obj, path, value) {
|
|
37074
|
+
const parts = path.split(".");
|
|
37075
|
+
let current = obj;
|
|
37076
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
37077
|
+
const part = parts[i];
|
|
37078
|
+
if (!(part in current) || typeof current[part] !== "object") {
|
|
37079
|
+
current[part] = {};
|
|
37080
|
+
}
|
|
37081
|
+
current = current[part];
|
|
37082
|
+
}
|
|
37083
|
+
current[parts[parts.length - 1]] = value;
|
|
37084
|
+
}
|
|
37085
|
+
/**
|
|
37086
|
+
* Get auto-created embedding partition for a vector field
|
|
37087
|
+
*
|
|
37088
|
+
* Returns partition configuration if an auto-partition exists for the given vector field.
|
|
37089
|
+
* Auto-partitions enable O(1) filtering to only records with embeddings.
|
|
37090
|
+
*
|
|
37091
|
+
* @param {Resource} resource - Resource instance
|
|
37092
|
+
* @param {string} vectorField - Vector field name
|
|
37093
|
+
* @returns {Object|null} Partition config or null
|
|
37094
|
+
*/
|
|
37095
|
+
getAutoEmbeddingPartition(resource, vectorField) {
|
|
37096
|
+
if (!resource.config) return null;
|
|
37097
|
+
const partitionName = `byHas${this.capitalize(vectorField.replace(/\./g, "_"))}`;
|
|
37098
|
+
const trackingFieldName = `_has${this.capitalize(vectorField.replace(/\./g, "_"))}`;
|
|
37099
|
+
if (resource.config.partitions && resource.config.partitions[partitionName]) {
|
|
37100
|
+
return {
|
|
37101
|
+
partitionName,
|
|
37102
|
+
partitionValues: { [trackingFieldName]: true }
|
|
37103
|
+
};
|
|
37104
|
+
}
|
|
37105
|
+
return null;
|
|
37106
|
+
}
|
|
36897
37107
|
/**
|
|
36898
37108
|
* Auto-detect vector field from resource schema
|
|
36899
37109
|
*
|
|
@@ -37031,11 +37241,12 @@ class VectorPlugin extends Plugin {
|
|
|
37031
37241
|
} else if (!vectorField) {
|
|
37032
37242
|
vectorField = "vector";
|
|
37033
37243
|
}
|
|
37034
|
-
|
|
37244
|
+
let {
|
|
37035
37245
|
limit = 10,
|
|
37036
37246
|
distanceMetric = this.config.distanceMetric,
|
|
37037
37247
|
threshold = null,
|
|
37038
|
-
partition = null
|
|
37248
|
+
partition = null,
|
|
37249
|
+
partitionValues = null
|
|
37039
37250
|
} = options;
|
|
37040
37251
|
const distanceFn = this.distanceFunctions[distanceMetric];
|
|
37041
37252
|
if (!distanceFn) {
|
|
@@ -37051,31 +37262,61 @@ class VectorPlugin extends Plugin {
|
|
|
37051
37262
|
});
|
|
37052
37263
|
throw error;
|
|
37053
37264
|
}
|
|
37265
|
+
if (!partition) {
|
|
37266
|
+
const autoPartition = this.getAutoEmbeddingPartition(resource, vectorField);
|
|
37267
|
+
if (autoPartition) {
|
|
37268
|
+
partition = autoPartition.partitionName;
|
|
37269
|
+
partitionValues = autoPartition.partitionValues;
|
|
37270
|
+
this._emitEvent("vector:auto-partition-used", {
|
|
37271
|
+
resource: resource.name,
|
|
37272
|
+
vectorField,
|
|
37273
|
+
partition,
|
|
37274
|
+
partitionValues,
|
|
37275
|
+
timestamp: Date.now()
|
|
37276
|
+
});
|
|
37277
|
+
}
|
|
37278
|
+
}
|
|
37054
37279
|
this._emitEvent("vector:search-start", {
|
|
37055
37280
|
resource: resource.name,
|
|
37056
37281
|
vectorField,
|
|
37057
37282
|
limit,
|
|
37058
37283
|
distanceMetric,
|
|
37059
37284
|
partition,
|
|
37285
|
+
partitionValues,
|
|
37060
37286
|
threshold,
|
|
37061
37287
|
queryDimensions: queryVector.length,
|
|
37062
37288
|
timestamp: startTime
|
|
37063
37289
|
});
|
|
37064
37290
|
try {
|
|
37065
37291
|
let allRecords;
|
|
37066
|
-
if (partition) {
|
|
37292
|
+
if (partition && partitionValues) {
|
|
37067
37293
|
this._emitEvent("vector:partition-filter", {
|
|
37068
37294
|
resource: resource.name,
|
|
37069
37295
|
partition,
|
|
37296
|
+
partitionValues,
|
|
37070
37297
|
timestamp: Date.now()
|
|
37071
37298
|
});
|
|
37072
|
-
allRecords = await resource.list({ partition, partitionValues
|
|
37299
|
+
allRecords = await resource.list({ partition, partitionValues });
|
|
37073
37300
|
} else {
|
|
37074
|
-
allRecords = await resource.getAll();
|
|
37301
|
+
allRecords = resource.getAll ? await resource.getAll() : await resource.list();
|
|
37075
37302
|
}
|
|
37076
37303
|
const totalRecords = allRecords.length;
|
|
37077
37304
|
let processedRecords = 0;
|
|
37078
37305
|
let dimensionMismatches = 0;
|
|
37306
|
+
if (!partition && totalRecords > 1e3) {
|
|
37307
|
+
const warning = {
|
|
37308
|
+
resource: resource.name,
|
|
37309
|
+
operation: "vectorSearch",
|
|
37310
|
+
totalRecords,
|
|
37311
|
+
vectorField,
|
|
37312
|
+
recommendation: "Use partitions to filter data before vector search for better performance"
|
|
37313
|
+
};
|
|
37314
|
+
this._emitEvent("vector:performance-warning", warning);
|
|
37315
|
+
console.warn(`\u26A0\uFE0F VectorPlugin: Performing vectorSearch on ${totalRecords} records without partition filter`);
|
|
37316
|
+
console.warn(` Resource: '${resource.name}'`);
|
|
37317
|
+
console.warn(` Recommendation: Use partition parameter to reduce search space`);
|
|
37318
|
+
console.warn(` Example: resource.vectorSearch(vector, { partition: 'byCategory', partitionValues: { category: 'books' } })`);
|
|
37319
|
+
}
|
|
37079
37320
|
const results = allRecords.filter((record) => record[vectorField] && Array.isArray(record[vectorField])).map((record, index) => {
|
|
37080
37321
|
try {
|
|
37081
37322
|
const distance = distanceFn(queryVector, record[vectorField]);
|
|
@@ -37159,10 +37400,11 @@ class VectorPlugin extends Plugin {
|
|
|
37159
37400
|
} else if (!vectorField) {
|
|
37160
37401
|
vectorField = "vector";
|
|
37161
37402
|
}
|
|
37162
|
-
|
|
37403
|
+
let {
|
|
37163
37404
|
k = 5,
|
|
37164
37405
|
distanceMetric = this.config.distanceMetric,
|
|
37165
37406
|
partition = null,
|
|
37407
|
+
partitionValues = null,
|
|
37166
37408
|
...kmeansOptions
|
|
37167
37409
|
} = options;
|
|
37168
37410
|
const distanceFn = this.distanceFunctions[distanceMetric];
|
|
@@ -37179,30 +37421,62 @@ class VectorPlugin extends Plugin {
|
|
|
37179
37421
|
});
|
|
37180
37422
|
throw error;
|
|
37181
37423
|
}
|
|
37424
|
+
if (!partition) {
|
|
37425
|
+
const autoPartition = this.getAutoEmbeddingPartition(resource, vectorField);
|
|
37426
|
+
if (autoPartition) {
|
|
37427
|
+
partition = autoPartition.partitionName;
|
|
37428
|
+
partitionValues = autoPartition.partitionValues;
|
|
37429
|
+
this._emitEvent("vector:auto-partition-used", {
|
|
37430
|
+
resource: resource.name,
|
|
37431
|
+
vectorField,
|
|
37432
|
+
partition,
|
|
37433
|
+
partitionValues,
|
|
37434
|
+
timestamp: Date.now()
|
|
37435
|
+
});
|
|
37436
|
+
}
|
|
37437
|
+
}
|
|
37182
37438
|
this._emitEvent("vector:cluster-start", {
|
|
37183
37439
|
resource: resource.name,
|
|
37184
37440
|
vectorField,
|
|
37185
37441
|
k,
|
|
37186
37442
|
distanceMetric,
|
|
37187
37443
|
partition,
|
|
37444
|
+
partitionValues,
|
|
37188
37445
|
maxIterations: kmeansOptions.maxIterations || 100,
|
|
37189
37446
|
timestamp: startTime
|
|
37190
37447
|
});
|
|
37191
37448
|
try {
|
|
37192
37449
|
let allRecords;
|
|
37193
|
-
if (partition) {
|
|
37450
|
+
if (partition && partitionValues) {
|
|
37194
37451
|
this._emitEvent("vector:partition-filter", {
|
|
37195
37452
|
resource: resource.name,
|
|
37196
37453
|
partition,
|
|
37454
|
+
partitionValues,
|
|
37197
37455
|
timestamp: Date.now()
|
|
37198
37456
|
});
|
|
37199
|
-
allRecords = await resource.list({ partition, partitionValues
|
|
37457
|
+
allRecords = await resource.list({ partition, partitionValues });
|
|
37200
37458
|
} else {
|
|
37201
|
-
allRecords = await resource.getAll();
|
|
37459
|
+
allRecords = resource.getAll ? await resource.getAll() : await resource.list();
|
|
37202
37460
|
}
|
|
37203
37461
|
const recordsWithVectors = allRecords.filter(
|
|
37204
37462
|
(record) => record[vectorField] && Array.isArray(record[vectorField])
|
|
37205
37463
|
);
|
|
37464
|
+
if (!partition && allRecords.length > 1e3) {
|
|
37465
|
+
const warning = {
|
|
37466
|
+
resource: resource.name,
|
|
37467
|
+
operation: "cluster",
|
|
37468
|
+
totalRecords: allRecords.length,
|
|
37469
|
+
recordsWithVectors: recordsWithVectors.length,
|
|
37470
|
+
vectorField,
|
|
37471
|
+
recommendation: "Use partitions to filter data before clustering for better performance"
|
|
37472
|
+
};
|
|
37473
|
+
this._emitEvent("vector:performance-warning", warning);
|
|
37474
|
+
console.warn(`\u26A0\uFE0F VectorPlugin: Performing clustering on ${allRecords.length} records without partition filter`);
|
|
37475
|
+
console.warn(` Resource: '${resource.name}'`);
|
|
37476
|
+
console.warn(` Records with vectors: ${recordsWithVectors.length}`);
|
|
37477
|
+
console.warn(` Recommendation: Use partition parameter to reduce clustering space`);
|
|
37478
|
+
console.warn(` Example: resource.cluster({ k: 5, partition: 'byCategory', partitionValues: { category: 'books' } })`);
|
|
37479
|
+
}
|
|
37206
37480
|
if (recordsWithVectors.length === 0) {
|
|
37207
37481
|
const error = new VectorError("No vectors found in resource", {
|
|
37208
37482
|
operation: "cluster",
|