s3db.js 11.2.4 → 11.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/s3db-cli.js +588 -74
- package/dist/s3db.cjs.js +1361 -23
- package/dist/s3db.cjs.js.map +1 -1
- package/dist/s3db.d.ts +3 -1
- package/dist/s3db.es.js +1359 -24
- package/dist/s3db.es.js.map +1 -1
- package/package.json +2 -1
- package/src/concerns/base62.js +70 -0
- package/src/plugins/index.js +1 -0
- package/src/plugins/vector/distances.js +173 -0
- package/src/plugins/vector/kmeans.js +367 -0
- package/src/plugins/vector/metrics.js +369 -0
- package/src/plugins/vector/vector-error.js +43 -0
- package/src/plugins/vector.plugin.js +687 -0
- package/src/resource.class.js +79 -0
- package/src/s3db.d.ts +3 -1
- package/src/schema.class.js +232 -41
- package/src/validator.class.js +8 -0
package/dist/s3db.es.js
CHANGED
|
@@ -77,6 +77,41 @@ const decodeDecimal = (s) => {
|
|
|
77
77
|
const num = decPart ? Number(decodedInt + "." + decPart) : decodedInt;
|
|
78
78
|
return negative ? -num : num;
|
|
79
79
|
};
|
|
80
|
+
const encodeFixedPoint = (n, precision = 6) => {
|
|
81
|
+
if (typeof n !== "number" || isNaN(n)) return "undefined";
|
|
82
|
+
if (!isFinite(n)) return "undefined";
|
|
83
|
+
const scale = Math.pow(10, precision);
|
|
84
|
+
const scaled = Math.round(n * scale);
|
|
85
|
+
if (scaled === 0) return "^0";
|
|
86
|
+
const negative = scaled < 0;
|
|
87
|
+
let num = Math.abs(scaled);
|
|
88
|
+
let s = "";
|
|
89
|
+
while (num > 0) {
|
|
90
|
+
s = alphabet[num % base] + s;
|
|
91
|
+
num = Math.floor(num / base);
|
|
92
|
+
}
|
|
93
|
+
return "^" + (negative ? "-" : "") + s;
|
|
94
|
+
};
|
|
95
|
+
const decodeFixedPoint = (s, precision = 6) => {
|
|
96
|
+
if (typeof s !== "string") return NaN;
|
|
97
|
+
if (!s.startsWith("^")) return NaN;
|
|
98
|
+
s = s.slice(1);
|
|
99
|
+
if (s === "0") return 0;
|
|
100
|
+
let negative = false;
|
|
101
|
+
if (s[0] === "-") {
|
|
102
|
+
negative = true;
|
|
103
|
+
s = s.slice(1);
|
|
104
|
+
}
|
|
105
|
+
let r = 0;
|
|
106
|
+
for (let i = 0; i < s.length; i++) {
|
|
107
|
+
const idx = charToValue[s[i]];
|
|
108
|
+
if (idx === void 0) return NaN;
|
|
109
|
+
r = r * base + idx;
|
|
110
|
+
}
|
|
111
|
+
const scale = Math.pow(10, precision);
|
|
112
|
+
const scaled = negative ? -r : r;
|
|
113
|
+
return scaled / scale;
|
|
114
|
+
};
|
|
80
115
|
|
|
81
116
|
const utf8BytesMemory = /* @__PURE__ */ new Map();
|
|
82
117
|
const UTF8_MEMORY_MAX_SIZE = 1e4;
|
|
@@ -11501,6 +11536,11 @@ class Validator extends FastestValidator {
|
|
|
11501
11536
|
type: "any",
|
|
11502
11537
|
custom: this.autoEncrypt ? jsonHandler : void 0
|
|
11503
11538
|
});
|
|
11539
|
+
this.alias("embedding", {
|
|
11540
|
+
type: "array",
|
|
11541
|
+
items: "number",
|
|
11542
|
+
empty: false
|
|
11543
|
+
});
|
|
11504
11544
|
}
|
|
11505
11545
|
}
|
|
11506
11546
|
const ValidatorManager = new Proxy(Validator, {
|
|
@@ -11749,6 +11789,59 @@ const SchemaActions = {
|
|
|
11749
11789
|
}
|
|
11750
11790
|
return NaN;
|
|
11751
11791
|
});
|
|
11792
|
+
},
|
|
11793
|
+
fromArrayOfEmbeddings: (value, { separator, precision = 6 }) => {
|
|
11794
|
+
if (value === null || value === void 0 || !Array.isArray(value)) {
|
|
11795
|
+
return value;
|
|
11796
|
+
}
|
|
11797
|
+
if (value.length === 0) {
|
|
11798
|
+
return "";
|
|
11799
|
+
}
|
|
11800
|
+
const encodedItems = value.map((item) => {
|
|
11801
|
+
if (typeof item === "number" && !isNaN(item)) {
|
|
11802
|
+
return encodeFixedPoint(item, precision);
|
|
11803
|
+
}
|
|
11804
|
+
const n = Number(item);
|
|
11805
|
+
return isNaN(n) ? "" : encodeFixedPoint(n, precision);
|
|
11806
|
+
});
|
|
11807
|
+
return encodedItems.join(separator);
|
|
11808
|
+
},
|
|
11809
|
+
toArrayOfEmbeddings: (value, { separator, precision = 6 }) => {
|
|
11810
|
+
if (Array.isArray(value)) {
|
|
11811
|
+
return value.map((v) => typeof v === "number" ? v : decodeFixedPoint(v, precision));
|
|
11812
|
+
}
|
|
11813
|
+
if (value === null || value === void 0) {
|
|
11814
|
+
return value;
|
|
11815
|
+
}
|
|
11816
|
+
if (value === "") {
|
|
11817
|
+
return [];
|
|
11818
|
+
}
|
|
11819
|
+
const str = String(value);
|
|
11820
|
+
const items = [];
|
|
11821
|
+
let current = "";
|
|
11822
|
+
let i = 0;
|
|
11823
|
+
while (i < str.length) {
|
|
11824
|
+
if (str[i] === "\\" && i + 1 < str.length) {
|
|
11825
|
+
current += str[i + 1];
|
|
11826
|
+
i += 2;
|
|
11827
|
+
} else if (str[i] === separator) {
|
|
11828
|
+
items.push(current);
|
|
11829
|
+
current = "";
|
|
11830
|
+
i++;
|
|
11831
|
+
} else {
|
|
11832
|
+
current += str[i];
|
|
11833
|
+
i++;
|
|
11834
|
+
}
|
|
11835
|
+
}
|
|
11836
|
+
items.push(current);
|
|
11837
|
+
return items.map((v) => {
|
|
11838
|
+
if (typeof v === "number") return v;
|
|
11839
|
+
if (typeof v === "string" && v !== "") {
|
|
11840
|
+
const n = decodeFixedPoint(v, precision);
|
|
11841
|
+
return isNaN(n) ? NaN : n;
|
|
11842
|
+
}
|
|
11843
|
+
return NaN;
|
|
11844
|
+
});
|
|
11752
11845
|
}
|
|
11753
11846
|
};
|
|
11754
11847
|
class Schema {
|
|
@@ -11818,18 +11911,89 @@ class Schema {
|
|
|
11818
11911
|
}
|
|
11819
11912
|
return objectKeys;
|
|
11820
11913
|
}
|
|
11914
|
+
_generateHooksFromOriginalAttributes(attributes, prefix = "") {
|
|
11915
|
+
for (const [key, value] of Object.entries(attributes)) {
|
|
11916
|
+
if (key.startsWith("$$")) continue;
|
|
11917
|
+
const fullKey = prefix ? `${prefix}.${key}` : key;
|
|
11918
|
+
if (typeof value === "object" && value !== null && !Array.isArray(value) && value.type) {
|
|
11919
|
+
if (value.type === "array" && value.items) {
|
|
11920
|
+
const itemsType = value.items;
|
|
11921
|
+
const arrayLength = typeof value.length === "number" ? value.length : null;
|
|
11922
|
+
if (itemsType === "string" || typeof itemsType === "string" && itemsType.includes("string")) {
|
|
11923
|
+
this.addHook("beforeMap", fullKey, "fromArray");
|
|
11924
|
+
this.addHook("afterUnmap", fullKey, "toArray");
|
|
11925
|
+
} else if (itemsType === "number" || typeof itemsType === "string" && itemsType.includes("number")) {
|
|
11926
|
+
const isIntegerArray = typeof itemsType === "string" && itemsType.includes("integer");
|
|
11927
|
+
const isEmbedding = !isIntegerArray && arrayLength !== null && arrayLength >= 256;
|
|
11928
|
+
if (isIntegerArray) {
|
|
11929
|
+
this.addHook("beforeMap", fullKey, "fromArrayOfNumbers");
|
|
11930
|
+
this.addHook("afterUnmap", fullKey, "toArrayOfNumbers");
|
|
11931
|
+
} else if (isEmbedding) {
|
|
11932
|
+
this.addHook("beforeMap", fullKey, "fromArrayOfEmbeddings");
|
|
11933
|
+
this.addHook("afterUnmap", fullKey, "toArrayOfEmbeddings");
|
|
11934
|
+
} else {
|
|
11935
|
+
this.addHook("beforeMap", fullKey, "fromArrayOfDecimals");
|
|
11936
|
+
this.addHook("afterUnmap", fullKey, "toArrayOfDecimals");
|
|
11937
|
+
}
|
|
11938
|
+
}
|
|
11939
|
+
}
|
|
11940
|
+
} else if (typeof value === "object" && value !== null && !Array.isArray(value) && !value.type) {
|
|
11941
|
+
this._generateHooksFromOriginalAttributes(value, fullKey);
|
|
11942
|
+
}
|
|
11943
|
+
}
|
|
11944
|
+
}
|
|
11821
11945
|
generateAutoHooks() {
|
|
11946
|
+
this._generateHooksFromOriginalAttributes(this.attributes);
|
|
11822
11947
|
const schema = flatten(cloneDeep(this.attributes), { safe: true });
|
|
11823
11948
|
for (const [name, definition] of Object.entries(schema)) {
|
|
11824
|
-
if (
|
|
11825
|
-
|
|
11949
|
+
if (name.includes("$$")) continue;
|
|
11950
|
+
if (this.options.hooks.beforeMap[name] || this.options.hooks.afterUnmap[name]) {
|
|
11951
|
+
continue;
|
|
11952
|
+
}
|
|
11953
|
+
const defStr = typeof definition === "string" ? definition : "";
|
|
11954
|
+
const defType = typeof definition === "object" && definition !== null ? definition.type : null;
|
|
11955
|
+
const isEmbeddingType = defStr.includes("embedding") || defType === "embedding";
|
|
11956
|
+
if (isEmbeddingType) {
|
|
11957
|
+
const lengthMatch = defStr.match(/embedding:(\d+)/);
|
|
11958
|
+
if (lengthMatch) {
|
|
11959
|
+
parseInt(lengthMatch[1], 10);
|
|
11960
|
+
} else if (defStr.includes("length:")) {
|
|
11961
|
+
const match = defStr.match(/length:(\d+)/);
|
|
11962
|
+
if (match) parseInt(match[1], 10);
|
|
11963
|
+
}
|
|
11964
|
+
this.addHook("beforeMap", name, "fromArrayOfEmbeddings");
|
|
11965
|
+
this.addHook("afterUnmap", name, "toArrayOfEmbeddings");
|
|
11966
|
+
continue;
|
|
11967
|
+
}
|
|
11968
|
+
const isArray = defStr.includes("array") || defType === "array";
|
|
11969
|
+
if (isArray) {
|
|
11970
|
+
let itemsType = null;
|
|
11971
|
+
if (typeof definition === "object" && definition !== null && definition.items) {
|
|
11972
|
+
itemsType = definition.items;
|
|
11973
|
+
} else if (defStr.includes("items:string")) {
|
|
11974
|
+
itemsType = "string";
|
|
11975
|
+
} else if (defStr.includes("items:number")) {
|
|
11976
|
+
itemsType = "number";
|
|
11977
|
+
}
|
|
11978
|
+
if (itemsType === "string" || typeof itemsType === "string" && itemsType.includes("string")) {
|
|
11826
11979
|
this.addHook("beforeMap", name, "fromArray");
|
|
11827
11980
|
this.addHook("afterUnmap", name, "toArray");
|
|
11828
|
-
} else if (
|
|
11829
|
-
const isIntegerArray =
|
|
11981
|
+
} else if (itemsType === "number" || typeof itemsType === "string" && itemsType.includes("number")) {
|
|
11982
|
+
const isIntegerArray = defStr.includes("integer:true") || defStr.includes("|integer:") || defStr.includes("|integer") || typeof itemsType === "string" && itemsType.includes("integer");
|
|
11983
|
+
let arrayLength = null;
|
|
11984
|
+
if (typeof definition === "object" && definition !== null && typeof definition.length === "number") {
|
|
11985
|
+
arrayLength = definition.length;
|
|
11986
|
+
} else if (defStr.includes("length:")) {
|
|
11987
|
+
const match = defStr.match(/length:(\d+)/);
|
|
11988
|
+
if (match) arrayLength = parseInt(match[1], 10);
|
|
11989
|
+
}
|
|
11990
|
+
const isEmbedding = !isIntegerArray && arrayLength !== null && arrayLength >= 256;
|
|
11830
11991
|
if (isIntegerArray) {
|
|
11831
11992
|
this.addHook("beforeMap", name, "fromArrayOfNumbers");
|
|
11832
11993
|
this.addHook("afterUnmap", name, "toArrayOfNumbers");
|
|
11994
|
+
} else if (isEmbedding) {
|
|
11995
|
+
this.addHook("beforeMap", name, "fromArrayOfEmbeddings");
|
|
11996
|
+
this.addHook("afterUnmap", name, "toArrayOfEmbeddings");
|
|
11833
11997
|
} else {
|
|
11834
11998
|
this.addHook("beforeMap", name, "fromArrayOfDecimals");
|
|
11835
11999
|
this.addHook("afterUnmap", name, "toArrayOfDecimals");
|
|
@@ -11837,7 +12001,7 @@ class Schema {
|
|
|
11837
12001
|
}
|
|
11838
12002
|
continue;
|
|
11839
12003
|
}
|
|
11840
|
-
if (
|
|
12004
|
+
if (defStr.includes("secret") || defType === "secret") {
|
|
11841
12005
|
if (this.options.autoEncrypt) {
|
|
11842
12006
|
this.addHook("beforeMap", name, "encrypt");
|
|
11843
12007
|
}
|
|
@@ -11846,8 +12010,8 @@ class Schema {
|
|
|
11846
12010
|
}
|
|
11847
12011
|
continue;
|
|
11848
12012
|
}
|
|
11849
|
-
if (
|
|
11850
|
-
const isInteger =
|
|
12013
|
+
if (defStr.includes("number") || defType === "number") {
|
|
12014
|
+
const isInteger = defStr.includes("integer:true") || defStr.includes("|integer:") || defStr.includes("|integer");
|
|
11851
12015
|
if (isInteger) {
|
|
11852
12016
|
this.addHook("beforeMap", name, "toBase62");
|
|
11853
12017
|
this.addHook("afterUnmap", name, "fromBase62");
|
|
@@ -11857,17 +12021,17 @@ class Schema {
|
|
|
11857
12021
|
}
|
|
11858
12022
|
continue;
|
|
11859
12023
|
}
|
|
11860
|
-
if (
|
|
12024
|
+
if (defStr.includes("boolean") || defType === "boolean") {
|
|
11861
12025
|
this.addHook("beforeMap", name, "fromBool");
|
|
11862
12026
|
this.addHook("afterUnmap", name, "toBool");
|
|
11863
12027
|
continue;
|
|
11864
12028
|
}
|
|
11865
|
-
if (
|
|
12029
|
+
if (defStr.includes("json") || defType === "json") {
|
|
11866
12030
|
this.addHook("beforeMap", name, "toJSON");
|
|
11867
12031
|
this.addHook("afterUnmap", name, "fromJSON");
|
|
11868
12032
|
continue;
|
|
11869
12033
|
}
|
|
11870
|
-
if (definition === "object" ||
|
|
12034
|
+
if (definition === "object" || defStr.includes("object") || defType === "object") {
|
|
11871
12035
|
this.addHook("beforeMap", name, "toJSON");
|
|
11872
12036
|
this.addHook("afterUnmap", name, "fromJSON");
|
|
11873
12037
|
continue;
|
|
@@ -12009,7 +12173,8 @@ class Schema {
|
|
|
12009
12173
|
const originalKey = reversedMap && reversedMap[key] ? reversedMap[key] : key;
|
|
12010
12174
|
let parsedValue = value;
|
|
12011
12175
|
const attrDef = this.getAttributeDefinition(originalKey);
|
|
12012
|
-
|
|
12176
|
+
const hasAfterUnmapHook = this.options.hooks?.afterUnmap?.[originalKey];
|
|
12177
|
+
if (!hasAfterUnmapHook && typeof attrDef === "string" && attrDef.includes("number") && !attrDef.includes("array") && !attrDef.includes("decimal")) {
|
|
12013
12178
|
if (typeof parsedValue === "string" && parsedValue !== "") {
|
|
12014
12179
|
parsedValue = decode(parsedValue);
|
|
12015
12180
|
} else if (typeof parsedValue === "number") ; else {
|
|
@@ -12074,18 +12239,38 @@ class Schema {
|
|
|
12074
12239
|
preprocessAttributesForValidation(attributes) {
|
|
12075
12240
|
const processed = {};
|
|
12076
12241
|
for (const [key, value] of Object.entries(attributes)) {
|
|
12077
|
-
if (typeof value === "
|
|
12078
|
-
|
|
12079
|
-
|
|
12080
|
-
|
|
12081
|
-
|
|
12082
|
-
|
|
12083
|
-
|
|
12084
|
-
|
|
12085
|
-
|
|
12086
|
-
|
|
12242
|
+
if (typeof value === "string") {
|
|
12243
|
+
if (value.startsWith("embedding:")) {
|
|
12244
|
+
const lengthMatch = value.match(/embedding:(\d+)/);
|
|
12245
|
+
if (lengthMatch) {
|
|
12246
|
+
const length = lengthMatch[1];
|
|
12247
|
+
const rest = value.substring(`embedding:${length}`.length);
|
|
12248
|
+
processed[key] = `array|items:number|length:${length}|empty:false${rest}`;
|
|
12249
|
+
continue;
|
|
12250
|
+
}
|
|
12251
|
+
}
|
|
12252
|
+
if (value.startsWith("embedding|") || value === "embedding") {
|
|
12253
|
+
processed[key] = value.replace(/^embedding/, "array|items:number|empty:false");
|
|
12254
|
+
continue;
|
|
12255
|
+
}
|
|
12256
|
+
processed[key] = value;
|
|
12257
|
+
} else if (typeof value === "object" && value !== null && !Array.isArray(value)) {
|
|
12258
|
+
const hasValidatorType = value.type !== void 0 && key !== "$$type";
|
|
12259
|
+
if (hasValidatorType) {
|
|
12260
|
+
processed[key] = value;
|
|
12261
|
+
} else {
|
|
12262
|
+
const isExplicitRequired = value.$$type && value.$$type.includes("required");
|
|
12263
|
+
const isExplicitOptional = value.$$type && value.$$type.includes("optional");
|
|
12264
|
+
const objectConfig = {
|
|
12265
|
+
type: "object",
|
|
12266
|
+
properties: this.preprocessAttributesForValidation(value),
|
|
12267
|
+
strict: false
|
|
12268
|
+
};
|
|
12269
|
+
if (isExplicitRequired) ; else if (isExplicitOptional || this.allNestedObjectsOptional) {
|
|
12270
|
+
objectConfig.optional = true;
|
|
12271
|
+
}
|
|
12272
|
+
processed[key] = objectConfig;
|
|
12087
12273
|
}
|
|
12088
|
-
processed[key] = objectConfig;
|
|
12089
12274
|
} else {
|
|
12090
12275
|
processed[key] = value;
|
|
12091
12276
|
}
|
|
@@ -12936,6 +13121,71 @@ ${errorDetails}`,
|
|
|
12936
13121
|
}
|
|
12937
13122
|
return true;
|
|
12938
13123
|
}
|
|
13124
|
+
/**
|
|
13125
|
+
* Find orphaned partitions (partitions that reference non-existent fields)
|
|
13126
|
+
* @returns {Object} Object with orphaned partition names as keys and details as values
|
|
13127
|
+
* @example
|
|
13128
|
+
* const orphaned = resource.findOrphanedPartitions();
|
|
13129
|
+
* // Returns: { byRegion: { missingFields: ['region'], definition: {...} } }
|
|
13130
|
+
*/
|
|
13131
|
+
findOrphanedPartitions() {
|
|
13132
|
+
const orphaned = {};
|
|
13133
|
+
if (!this.config.partitions) {
|
|
13134
|
+
return orphaned;
|
|
13135
|
+
}
|
|
13136
|
+
for (const [partitionName, partitionDef] of Object.entries(this.config.partitions)) {
|
|
13137
|
+
if (!partitionDef.fields) {
|
|
13138
|
+
continue;
|
|
13139
|
+
}
|
|
13140
|
+
const missingFields = [];
|
|
13141
|
+
for (const fieldName of Object.keys(partitionDef.fields)) {
|
|
13142
|
+
if (!this.fieldExistsInAttributes(fieldName)) {
|
|
13143
|
+
missingFields.push(fieldName);
|
|
13144
|
+
}
|
|
13145
|
+
}
|
|
13146
|
+
if (missingFields.length > 0) {
|
|
13147
|
+
orphaned[partitionName] = {
|
|
13148
|
+
missingFields,
|
|
13149
|
+
definition: partitionDef,
|
|
13150
|
+
allFields: Object.keys(partitionDef.fields)
|
|
13151
|
+
};
|
|
13152
|
+
}
|
|
13153
|
+
}
|
|
13154
|
+
return orphaned;
|
|
13155
|
+
}
|
|
13156
|
+
/**
|
|
13157
|
+
* Remove orphaned partitions (partitions that reference non-existent fields)
|
|
13158
|
+
* WARNING: This will modify the resource configuration and should be followed by uploadMetadataFile()
|
|
13159
|
+
* @param {Object} options - Options
|
|
13160
|
+
* @param {boolean} options.dryRun - If true, only returns what would be removed without modifying (default: false)
|
|
13161
|
+
* @returns {Object} Object with removed partition names and details
|
|
13162
|
+
* @example
|
|
13163
|
+
* // Dry run to see what would be removed
|
|
13164
|
+
* const toRemove = resource.removeOrphanedPartitions({ dryRun: true });
|
|
13165
|
+
* console.log('Would remove:', toRemove);
|
|
13166
|
+
*
|
|
13167
|
+
* // Actually remove orphaned partitions
|
|
13168
|
+
* const removed = resource.removeOrphanedPartitions();
|
|
13169
|
+
* await database.uploadMetadataFile(); // Save changes to S3
|
|
13170
|
+
*/
|
|
13171
|
+
removeOrphanedPartitions({ dryRun = false } = {}) {
|
|
13172
|
+
const orphaned = this.findOrphanedPartitions();
|
|
13173
|
+
if (Object.keys(orphaned).length === 0) {
|
|
13174
|
+
return {};
|
|
13175
|
+
}
|
|
13176
|
+
if (dryRun) {
|
|
13177
|
+
return orphaned;
|
|
13178
|
+
}
|
|
13179
|
+
for (const partitionName of Object.keys(orphaned)) {
|
|
13180
|
+
delete this.config.partitions[partitionName];
|
|
13181
|
+
}
|
|
13182
|
+
this.emit("orphanedPartitionsRemoved", {
|
|
13183
|
+
resourceName: this.name,
|
|
13184
|
+
removed: Object.keys(orphaned),
|
|
13185
|
+
details: orphaned
|
|
13186
|
+
});
|
|
13187
|
+
return orphaned;
|
|
13188
|
+
}
|
|
12939
13189
|
/**
|
|
12940
13190
|
* Apply a single partition rule to a field value
|
|
12941
13191
|
* @param {*} value - The field value
|
|
@@ -15029,7 +15279,7 @@ class Database extends EventEmitter {
|
|
|
15029
15279
|
this.id = idGenerator(7);
|
|
15030
15280
|
this.version = "1";
|
|
15031
15281
|
this.s3dbVersion = (() => {
|
|
15032
|
-
const [ok, err, version] = tryFn(() => true ? "11.2.
|
|
15282
|
+
const [ok, err, version] = tryFn(() => true ? "11.2.6" : "latest");
|
|
15033
15283
|
return ok ? version : "latest";
|
|
15034
15284
|
})();
|
|
15035
15285
|
this.resources = {};
|
|
@@ -18932,5 +19182,1090 @@ class StateMachinePlugin extends Plugin {
|
|
|
18932
19182
|
}
|
|
18933
19183
|
}
|
|
18934
19184
|
|
|
18935
|
-
|
|
19185
|
+
function cosineDistance(a, b) {
|
|
19186
|
+
if (a.length !== b.length) {
|
|
19187
|
+
throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
|
|
19188
|
+
}
|
|
19189
|
+
let dotProduct2 = 0;
|
|
19190
|
+
let normA = 0;
|
|
19191
|
+
let normB = 0;
|
|
19192
|
+
for (let i = 0; i < a.length; i++) {
|
|
19193
|
+
dotProduct2 += a[i] * b[i];
|
|
19194
|
+
normA += a[i] * a[i];
|
|
19195
|
+
normB += b[i] * b[i];
|
|
19196
|
+
}
|
|
19197
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
19198
|
+
if (denominator === 0) {
|
|
19199
|
+
return a.every((v) => v === 0) && b.every((v) => v === 0) ? 0 : 1;
|
|
19200
|
+
}
|
|
19201
|
+
const similarity = dotProduct2 / denominator;
|
|
19202
|
+
return 1 - similarity;
|
|
19203
|
+
}
|
|
19204
|
+
function euclideanDistance(a, b) {
|
|
19205
|
+
if (a.length !== b.length) {
|
|
19206
|
+
throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
|
|
19207
|
+
}
|
|
19208
|
+
let sum = 0;
|
|
19209
|
+
for (let i = 0; i < a.length; i++) {
|
|
19210
|
+
const diff = a[i] - b[i];
|
|
19211
|
+
sum += diff * diff;
|
|
19212
|
+
}
|
|
19213
|
+
return Math.sqrt(sum);
|
|
19214
|
+
}
|
|
19215
|
+
function manhattanDistance(a, b) {
|
|
19216
|
+
if (a.length !== b.length) {
|
|
19217
|
+
throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
|
|
19218
|
+
}
|
|
19219
|
+
let sum = 0;
|
|
19220
|
+
for (let i = 0; i < a.length; i++) {
|
|
19221
|
+
sum += Math.abs(a[i] - b[i]);
|
|
19222
|
+
}
|
|
19223
|
+
return sum;
|
|
19224
|
+
}
|
|
19225
|
+
function dotProduct(a, b) {
|
|
19226
|
+
if (a.length !== b.length) {
|
|
19227
|
+
throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
|
|
19228
|
+
}
|
|
19229
|
+
let sum = 0;
|
|
19230
|
+
for (let i = 0; i < a.length; i++) {
|
|
19231
|
+
sum += a[i] * b[i];
|
|
19232
|
+
}
|
|
19233
|
+
return sum;
|
|
19234
|
+
}
|
|
19235
|
+
function normalize(vector) {
|
|
19236
|
+
const magnitude2 = Math.sqrt(
|
|
19237
|
+
vector.reduce((sum, val) => sum + val * val, 0)
|
|
19238
|
+
);
|
|
19239
|
+
if (magnitude2 === 0) {
|
|
19240
|
+
return vector.slice();
|
|
19241
|
+
}
|
|
19242
|
+
return vector.map((val) => val / magnitude2);
|
|
19243
|
+
}
|
|
19244
|
+
|
|
19245
|
+
function kmeans(vectors, k, options = {}) {
|
|
19246
|
+
const {
|
|
19247
|
+
maxIterations = 100,
|
|
19248
|
+
tolerance = 1e-4,
|
|
19249
|
+
distanceFn = euclideanDistance,
|
|
19250
|
+
seed = null,
|
|
19251
|
+
onIteration = null
|
|
19252
|
+
} = options;
|
|
19253
|
+
if (vectors.length === 0) {
|
|
19254
|
+
throw new Error("Cannot cluster empty vector array");
|
|
19255
|
+
}
|
|
19256
|
+
if (k < 1) {
|
|
19257
|
+
throw new Error(`k must be at least 1, got ${k}`);
|
|
19258
|
+
}
|
|
19259
|
+
if (k > vectors.length) {
|
|
19260
|
+
throw new Error(`k (${k}) cannot be greater than number of vectors (${vectors.length})`);
|
|
19261
|
+
}
|
|
19262
|
+
const dimensions = vectors[0].length;
|
|
19263
|
+
for (let i = 1; i < vectors.length; i++) {
|
|
19264
|
+
if (vectors[i].length !== dimensions) {
|
|
19265
|
+
throw new Error(`All vectors must have same dimensions. Expected ${dimensions}, got ${vectors[i].length} at index ${i}`);
|
|
19266
|
+
}
|
|
19267
|
+
}
|
|
19268
|
+
const centroids = initializeCentroidsKMeansPlusPlus(vectors, k, distanceFn, seed);
|
|
19269
|
+
let assignments = new Array(vectors.length);
|
|
19270
|
+
let iterations = 0;
|
|
19271
|
+
let converged = false;
|
|
19272
|
+
let previousInertia = Infinity;
|
|
19273
|
+
while (!converged && iterations < maxIterations) {
|
|
19274
|
+
const newAssignments = vectors.map((vector) => {
|
|
19275
|
+
let minDist = Infinity;
|
|
19276
|
+
let nearestCluster = 0;
|
|
19277
|
+
for (let i = 0; i < k; i++) {
|
|
19278
|
+
const dist = distanceFn(vector, centroids[i]);
|
|
19279
|
+
if (dist < minDist) {
|
|
19280
|
+
minDist = dist;
|
|
19281
|
+
nearestCluster = i;
|
|
19282
|
+
}
|
|
19283
|
+
}
|
|
19284
|
+
return nearestCluster;
|
|
19285
|
+
});
|
|
19286
|
+
let inertia2 = 0;
|
|
19287
|
+
vectors.forEach((vector, i) => {
|
|
19288
|
+
const dist = distanceFn(vector, centroids[newAssignments[i]]);
|
|
19289
|
+
inertia2 += dist * dist;
|
|
19290
|
+
});
|
|
19291
|
+
const inertiaChange = Math.abs(previousInertia - inertia2);
|
|
19292
|
+
converged = inertiaChange < tolerance;
|
|
19293
|
+
assignments = newAssignments;
|
|
19294
|
+
previousInertia = inertia2;
|
|
19295
|
+
if (onIteration) {
|
|
19296
|
+
onIteration(iterations + 1, inertia2, converged);
|
|
19297
|
+
}
|
|
19298
|
+
if (!converged) {
|
|
19299
|
+
const clusterSums = Array(k).fill(null).map(() => new Array(dimensions).fill(0));
|
|
19300
|
+
const clusterCounts = new Array(k).fill(0);
|
|
19301
|
+
vectors.forEach((vector, i) => {
|
|
19302
|
+
const cluster = assignments[i];
|
|
19303
|
+
clusterCounts[cluster]++;
|
|
19304
|
+
vector.forEach((val, j) => {
|
|
19305
|
+
clusterSums[cluster][j] += val;
|
|
19306
|
+
});
|
|
19307
|
+
});
|
|
19308
|
+
for (let i = 0; i < k; i++) {
|
|
19309
|
+
if (clusterCounts[i] > 0) {
|
|
19310
|
+
centroids[i] = clusterSums[i].map((sum) => sum / clusterCounts[i]);
|
|
19311
|
+
} else {
|
|
19312
|
+
const randomIdx = Math.floor(Math.random() * vectors.length);
|
|
19313
|
+
centroids[i] = [...vectors[randomIdx]];
|
|
19314
|
+
}
|
|
19315
|
+
}
|
|
19316
|
+
}
|
|
19317
|
+
iterations++;
|
|
19318
|
+
}
|
|
19319
|
+
let inertia = 0;
|
|
19320
|
+
vectors.forEach((vector, i) => {
|
|
19321
|
+
const dist = distanceFn(vector, centroids[assignments[i]]);
|
|
19322
|
+
inertia += dist * dist;
|
|
19323
|
+
});
|
|
19324
|
+
return {
|
|
19325
|
+
centroids,
|
|
19326
|
+
assignments,
|
|
19327
|
+
iterations,
|
|
19328
|
+
converged,
|
|
19329
|
+
inertia
|
|
19330
|
+
};
|
|
19331
|
+
}
|
|
19332
|
+
function initializeCentroidsKMeansPlusPlus(vectors, k, distanceFn, seed) {
|
|
19333
|
+
const centroids = [];
|
|
19334
|
+
const n = vectors.length;
|
|
19335
|
+
const firstIndex = seed !== null ? seed % n : Math.floor(Math.random() * n);
|
|
19336
|
+
centroids.push([...vectors[firstIndex]]);
|
|
19337
|
+
for (let i = 1; i < k; i++) {
|
|
19338
|
+
const distances = vectors.map((vector) => {
|
|
19339
|
+
return Math.min(...centroids.map((c) => distanceFn(vector, c)));
|
|
19340
|
+
});
|
|
19341
|
+
const squaredDistances = distances.map((d) => d * d);
|
|
19342
|
+
const totalSquared = squaredDistances.reduce((a, b) => a + b, 0);
|
|
19343
|
+
if (totalSquared === 0) {
|
|
19344
|
+
const randomIdx = Math.floor(Math.random() * n);
|
|
19345
|
+
centroids.push([...vectors[randomIdx]]);
|
|
19346
|
+
continue;
|
|
19347
|
+
}
|
|
19348
|
+
let threshold = Math.random() * totalSquared;
|
|
19349
|
+
let cumulativeSum = 0;
|
|
19350
|
+
for (let j = 0; j < n; j++) {
|
|
19351
|
+
cumulativeSum += squaredDistances[j];
|
|
19352
|
+
if (cumulativeSum >= threshold) {
|
|
19353
|
+
centroids.push([...vectors[j]]);
|
|
19354
|
+
break;
|
|
19355
|
+
}
|
|
19356
|
+
}
|
|
19357
|
+
}
|
|
19358
|
+
return centroids;
|
|
19359
|
+
}
|
|
19360
|
+
async function findOptimalK(vectors, options = {}) {
|
|
19361
|
+
const {
|
|
19362
|
+
minK = 2,
|
|
19363
|
+
maxK = Math.min(10, Math.floor(Math.sqrt(vectors.length / 2))),
|
|
19364
|
+
distanceFn = euclideanDistance,
|
|
19365
|
+
nReferences = 10,
|
|
19366
|
+
stabilityRuns = 5,
|
|
19367
|
+
...kmeansOptions
|
|
19368
|
+
} = options;
|
|
19369
|
+
const metricsModule = await Promise.resolve().then(function () { return metrics; });
|
|
19370
|
+
const {
|
|
19371
|
+
silhouetteScore,
|
|
19372
|
+
daviesBouldinIndex,
|
|
19373
|
+
calinskiHarabaszIndex,
|
|
19374
|
+
gapStatistic,
|
|
19375
|
+
clusteringStability
|
|
19376
|
+
} = metricsModule;
|
|
19377
|
+
const results = [];
|
|
19378
|
+
for (let k = minK; k <= maxK; k++) {
|
|
19379
|
+
const kmeansResult = kmeans(vectors, k, { ...kmeansOptions, distanceFn });
|
|
19380
|
+
const silhouette = silhouetteScore(
|
|
19381
|
+
vectors,
|
|
19382
|
+
kmeansResult.assignments,
|
|
19383
|
+
kmeansResult.centroids,
|
|
19384
|
+
distanceFn
|
|
19385
|
+
);
|
|
19386
|
+
const daviesBouldin = daviesBouldinIndex(
|
|
19387
|
+
vectors,
|
|
19388
|
+
kmeansResult.assignments,
|
|
19389
|
+
kmeansResult.centroids,
|
|
19390
|
+
distanceFn
|
|
19391
|
+
);
|
|
19392
|
+
const calinskiHarabasz = calinskiHarabaszIndex(
|
|
19393
|
+
vectors,
|
|
19394
|
+
kmeansResult.assignments,
|
|
19395
|
+
kmeansResult.centroids,
|
|
19396
|
+
distanceFn
|
|
19397
|
+
);
|
|
19398
|
+
const gap = await gapStatistic(
|
|
19399
|
+
vectors,
|
|
19400
|
+
kmeansResult.assignments,
|
|
19401
|
+
kmeansResult.centroids,
|
|
19402
|
+
distanceFn,
|
|
19403
|
+
nReferences
|
|
19404
|
+
);
|
|
19405
|
+
const stability = clusteringStability(
|
|
19406
|
+
vectors,
|
|
19407
|
+
k,
|
|
19408
|
+
{ ...kmeansOptions, distanceFn, nRuns: stabilityRuns }
|
|
19409
|
+
);
|
|
19410
|
+
results.push({
|
|
19411
|
+
k,
|
|
19412
|
+
inertia: kmeansResult.inertia,
|
|
19413
|
+
silhouette,
|
|
19414
|
+
daviesBouldin,
|
|
19415
|
+
calinskiHarabasz,
|
|
19416
|
+
gap: gap.gap,
|
|
19417
|
+
gapSk: gap.sk,
|
|
19418
|
+
stability: stability.stability,
|
|
19419
|
+
cvInertia: stability.cvInertia,
|
|
19420
|
+
iterations: kmeansResult.iterations,
|
|
19421
|
+
converged: kmeansResult.converged
|
|
19422
|
+
});
|
|
19423
|
+
}
|
|
19424
|
+
const elbowK = findElbowPoint(results.map((r) => r.inertia));
|
|
19425
|
+
const recommendations = {
|
|
19426
|
+
elbow: minK + elbowK,
|
|
19427
|
+
silhouette: results.reduce(
|
|
19428
|
+
(best, curr) => curr.silhouette > best.silhouette ? curr : best
|
|
19429
|
+
).k,
|
|
19430
|
+
daviesBouldin: results.reduce(
|
|
19431
|
+
(best, curr) => curr.daviesBouldin < best.daviesBouldin ? curr : best
|
|
19432
|
+
).k,
|
|
19433
|
+
calinskiHarabasz: results.reduce(
|
|
19434
|
+
(best, curr) => curr.calinskiHarabasz > best.calinskiHarabasz ? curr : best
|
|
19435
|
+
).k,
|
|
19436
|
+
gap: results.reduce(
|
|
19437
|
+
(best, curr) => curr.gap > best.gap ? curr : best
|
|
19438
|
+
).k,
|
|
19439
|
+
stability: results.reduce(
|
|
19440
|
+
(best, curr) => curr.stability > best.stability ? curr : best
|
|
19441
|
+
).k
|
|
19442
|
+
};
|
|
19443
|
+
const votes = Object.values(recommendations);
|
|
19444
|
+
const consensus = votes.reduce((acc, k) => {
|
|
19445
|
+
acc[k] = (acc[k] || 0) + 1;
|
|
19446
|
+
return acc;
|
|
19447
|
+
}, {});
|
|
19448
|
+
const consensusK = parseInt(
|
|
19449
|
+
Object.entries(consensus).reduce((a, b) => b[1] > a[1] ? b : a)[0]
|
|
19450
|
+
);
|
|
19451
|
+
return {
|
|
19452
|
+
results,
|
|
19453
|
+
recommendations,
|
|
19454
|
+
consensus: consensusK,
|
|
19455
|
+
summary: {
|
|
19456
|
+
analysisRange: `${minK}-${maxK}`,
|
|
19457
|
+
totalVectors: vectors.length,
|
|
19458
|
+
dimensions: vectors[0].length,
|
|
19459
|
+
recommendation: consensusK,
|
|
19460
|
+
confidence: consensus[consensusK] / votes.length
|
|
19461
|
+
}
|
|
19462
|
+
};
|
|
19463
|
+
}
|
|
19464
|
+
function findElbowPoint(inertias) {
|
|
19465
|
+
const n = inertias.length;
|
|
19466
|
+
if (n < 3) return 0;
|
|
19467
|
+
let maxCurvature = -Infinity;
|
|
19468
|
+
let elbowIndex = 0;
|
|
19469
|
+
for (let i = 1; i < n - 1; i++) {
|
|
19470
|
+
const curvature = inertias[i - 1] - 2 * inertias[i] + inertias[i + 1];
|
|
19471
|
+
if (curvature > maxCurvature) {
|
|
19472
|
+
maxCurvature = curvature;
|
|
19473
|
+
elbowIndex = i;
|
|
19474
|
+
}
|
|
19475
|
+
}
|
|
19476
|
+
return elbowIndex;
|
|
19477
|
+
}
|
|
19478
|
+
|
|
19479
|
+
class VectorError extends PluginError {
|
|
19480
|
+
constructor(message, details = {}) {
|
|
19481
|
+
super(message, {
|
|
19482
|
+
pluginName: "VectorPlugin",
|
|
19483
|
+
...details,
|
|
19484
|
+
description: details.description || `
|
|
19485
|
+
Vector Plugin Error
|
|
19486
|
+
|
|
19487
|
+
Operation: ${details.operation || "unknown"}
|
|
19488
|
+
|
|
19489
|
+
Common causes:
|
|
19490
|
+
1. Vector dimension mismatch between vectors
|
|
19491
|
+
2. Invalid distance metric specified (must be: cosine, euclidean, manhattan)
|
|
19492
|
+
3. Empty vector array provided for clustering
|
|
19493
|
+
4. k value larger than number of available vectors
|
|
19494
|
+
5. Vector field not found or invalid in resource
|
|
19495
|
+
6. Large vectors without proper behavior (use 'body-overflow' or 'body-only')
|
|
19496
|
+
|
|
19497
|
+
Available distance metrics:
|
|
19498
|
+
- cosine: Best for normalized vectors, semantic similarity. Range: [0, 2]
|
|
19499
|
+
- euclidean: Standard L2 distance, geometric proximity. Range: [0, \u221E)
|
|
19500
|
+
- manhattan: L1 distance, faster computation. Range: [0, \u221E)
|
|
19501
|
+
|
|
19502
|
+
Storage considerations:
|
|
19503
|
+
- Vectors > 250 dimensions may exceed S3 metadata limit (2KB)
|
|
19504
|
+
- Use behavior: 'body-overflow' or 'body-only' for large vectors
|
|
19505
|
+
- OpenAI ada-002 (1536 dims): ~10KB, requires body storage
|
|
19506
|
+
- Sentence Transformers (384 dims): ~2.7KB, requires body storage
|
|
19507
|
+
`.trim()
|
|
19508
|
+
});
|
|
19509
|
+
}
|
|
19510
|
+
}
|
|
19511
|
+
|
|
19512
|
+
class VectorPlugin extends Plugin {
|
|
19513
|
+
constructor(options = {}) {
|
|
19514
|
+
super(options);
|
|
19515
|
+
this.config = {
|
|
19516
|
+
dimensions: 1536,
|
|
19517
|
+
// Default to OpenAI text-embedding-3-small/3-large
|
|
19518
|
+
distanceMetric: "cosine",
|
|
19519
|
+
// Default metric
|
|
19520
|
+
storageThreshold: 1500,
|
|
19521
|
+
// Bytes - warn if vectors exceed this
|
|
19522
|
+
autoFixBehavior: false,
|
|
19523
|
+
// Automatically set body-overflow
|
|
19524
|
+
autoDetectVectorField: true,
|
|
19525
|
+
// Auto-detect embedding:XXX fields
|
|
19526
|
+
emitEvents: true,
|
|
19527
|
+
// Emit events for monitoring
|
|
19528
|
+
verboseEvents: false,
|
|
19529
|
+
// Emit detailed progress events
|
|
19530
|
+
eventThrottle: 100,
|
|
19531
|
+
// Throttle progress events (ms)
|
|
19532
|
+
...options
|
|
19533
|
+
};
|
|
19534
|
+
this.distanceFunctions = {
|
|
19535
|
+
cosine: cosineDistance,
|
|
19536
|
+
euclidean: euclideanDistance,
|
|
19537
|
+
manhattan: manhattanDistance
|
|
19538
|
+
};
|
|
19539
|
+
this._vectorFieldCache = /* @__PURE__ */ new Map();
|
|
19540
|
+
this._throttleState = /* @__PURE__ */ new Map();
|
|
19541
|
+
}
|
|
19542
|
+
async onInstall() {
|
|
19543
|
+
this.emit("installed", { plugin: "VectorPlugin" });
|
|
19544
|
+
this.validateVectorStorage();
|
|
19545
|
+
this.installResourceMethods();
|
|
19546
|
+
}
|
|
19547
|
+
async onStart() {
|
|
19548
|
+
this.emit("started", { plugin: "VectorPlugin" });
|
|
19549
|
+
}
|
|
19550
|
+
async onStop() {
|
|
19551
|
+
this.emit("stopped", { plugin: "VectorPlugin" });
|
|
19552
|
+
}
|
|
19553
|
+
async onUninstall(options) {
|
|
19554
|
+
for (const resource of Object.values(this.database.resources)) {
|
|
19555
|
+
delete resource.vectorSearch;
|
|
19556
|
+
delete resource.cluster;
|
|
19557
|
+
delete resource.vectorDistance;
|
|
19558
|
+
delete resource.similarTo;
|
|
19559
|
+
delete resource.findSimilar;
|
|
19560
|
+
delete resource.distance;
|
|
19561
|
+
}
|
|
19562
|
+
this.emit("uninstalled", { plugin: "VectorPlugin" });
|
|
19563
|
+
}
|
|
19564
|
+
/**
|
|
19565
|
+
* Validate vector storage configuration for all resources
|
|
19566
|
+
*
|
|
19567
|
+
* Detects large vector fields and warns if proper behavior is not set.
|
|
19568
|
+
* Can optionally auto-fix by setting body-overflow behavior.
|
|
19569
|
+
*/
|
|
19570
|
+
validateVectorStorage() {
|
|
19571
|
+
for (const resource of Object.values(this.database.resources)) {
|
|
19572
|
+
const vectorFields = this.findVectorFields(resource.schema.attributes);
|
|
19573
|
+
if (vectorFields.length === 0) continue;
|
|
19574
|
+
const totalVectorSize = vectorFields.reduce((sum, f) => sum + f.estimatedBytes, 0);
|
|
19575
|
+
if (totalVectorSize > this.config.storageThreshold) {
|
|
19576
|
+
const hasCorrectBehavior = ["body-overflow", "body-only"].includes(resource.behavior);
|
|
19577
|
+
if (!hasCorrectBehavior) {
|
|
19578
|
+
const warning = {
|
|
19579
|
+
resource: resource.name,
|
|
19580
|
+
vectorFields: vectorFields.map((f) => ({
|
|
19581
|
+
field: f.name,
|
|
19582
|
+
dimensions: f.length,
|
|
19583
|
+
estimatedBytes: f.estimatedBytes
|
|
19584
|
+
})),
|
|
19585
|
+
totalEstimatedBytes: totalVectorSize,
|
|
19586
|
+
metadataLimit: 2047,
|
|
19587
|
+
currentBehavior: resource.behavior || "default",
|
|
19588
|
+
recommendation: "body-overflow"
|
|
19589
|
+
};
|
|
19590
|
+
this.emit("vector:storage-warning", warning);
|
|
19591
|
+
if (this.config.autoFixBehavior) {
|
|
19592
|
+
resource.behavior = "body-overflow";
|
|
19593
|
+
this.emit("vector:behavior-fixed", {
|
|
19594
|
+
resource: resource.name,
|
|
19595
|
+
newBehavior: "body-overflow"
|
|
19596
|
+
});
|
|
19597
|
+
} else {
|
|
19598
|
+
console.warn(`\u26A0\uFE0F VectorPlugin: Resource '${resource.name}' has large vector fields (${totalVectorSize} bytes estimated)`);
|
|
19599
|
+
console.warn(` Current behavior: '${resource.behavior || "default"}'`);
|
|
19600
|
+
console.warn(` Recommendation: Add behavior: 'body-overflow' or 'body-only' to resource configuration`);
|
|
19601
|
+
console.warn(` Large vectors will exceed S3 metadata limit (2047 bytes) and cause errors.`);
|
|
19602
|
+
}
|
|
19603
|
+
}
|
|
19604
|
+
}
|
|
19605
|
+
}
|
|
19606
|
+
}
|
|
19607
|
+
/**
|
|
19608
|
+
* Auto-detect vector field from resource schema
|
|
19609
|
+
*
|
|
19610
|
+
* Looks for fields with type 'embedding:XXX' pattern.
|
|
19611
|
+
* Caches result per resource for performance.
|
|
19612
|
+
*
|
|
19613
|
+
* @param {Resource} resource - Resource instance
|
|
19614
|
+
* @returns {string|null} Detected vector field name or null
|
|
19615
|
+
*/
|
|
19616
|
+
detectVectorField(resource) {
|
|
19617
|
+
if (this._vectorFieldCache.has(resource.name)) {
|
|
19618
|
+
return this._vectorFieldCache.get(resource.name);
|
|
19619
|
+
}
|
|
19620
|
+
const vectorField = this._findEmbeddingField(resource.schema.attributes);
|
|
19621
|
+
this._vectorFieldCache.set(resource.name, vectorField);
|
|
19622
|
+
if (vectorField && this.config.emitEvents) {
|
|
19623
|
+
this.emit("vector:field-detected", {
|
|
19624
|
+
resource: resource.name,
|
|
19625
|
+
vectorField,
|
|
19626
|
+
timestamp: Date.now()
|
|
19627
|
+
});
|
|
19628
|
+
}
|
|
19629
|
+
return vectorField;
|
|
19630
|
+
}
|
|
19631
|
+
/**
|
|
19632
|
+
* Recursively find embedding:XXX field in attributes
|
|
19633
|
+
*
|
|
19634
|
+
* @param {Object} attributes - Resource attributes
|
|
19635
|
+
* @param {string} path - Current path (for nested objects)
|
|
19636
|
+
* @returns {string|null} Field path or null
|
|
19637
|
+
*/
|
|
19638
|
+
_findEmbeddingField(attributes, path = "") {
|
|
19639
|
+
for (const [key, attr] of Object.entries(attributes)) {
|
|
19640
|
+
const fullPath = path ? `${path}.${key}` : key;
|
|
19641
|
+
if (typeof attr === "string" && attr.startsWith("embedding:")) {
|
|
19642
|
+
return fullPath;
|
|
19643
|
+
}
|
|
19644
|
+
if (attr.type === "array" && attr.items === "number" && attr.length) {
|
|
19645
|
+
return fullPath;
|
|
19646
|
+
}
|
|
19647
|
+
if (attr.type === "object" && attr.props) {
|
|
19648
|
+
const nested = this._findEmbeddingField(attr.props, fullPath);
|
|
19649
|
+
if (nested) return nested;
|
|
19650
|
+
}
|
|
19651
|
+
}
|
|
19652
|
+
return null;
|
|
19653
|
+
}
|
|
19654
|
+
/**
|
|
19655
|
+
* Emit event with throttling support
|
|
19656
|
+
*
|
|
19657
|
+
* @param {string} eventName - Event name
|
|
19658
|
+
* @param {Object} data - Event data
|
|
19659
|
+
* @param {string} throttleKey - Unique key for throttling (optional)
|
|
19660
|
+
*/
|
|
19661
|
+
_emitEvent(eventName, data, throttleKey = null) {
|
|
19662
|
+
if (!this.config.emitEvents) return;
|
|
19663
|
+
if (throttleKey) {
|
|
19664
|
+
const now = Date.now();
|
|
19665
|
+
const lastEmit = this._throttleState.get(throttleKey);
|
|
19666
|
+
if (lastEmit && now - lastEmit < this.config.eventThrottle) {
|
|
19667
|
+
return;
|
|
19668
|
+
}
|
|
19669
|
+
this._throttleState.set(throttleKey, now);
|
|
19670
|
+
}
|
|
19671
|
+
this.emit(eventName, data);
|
|
19672
|
+
}
|
|
19673
|
+
/**
|
|
19674
|
+
* Find vector fields in resource attributes
|
|
19675
|
+
*
|
|
19676
|
+
* @param {Object} attributes - Resource attributes
|
|
19677
|
+
* @param {string} path - Current path (for nested objects)
|
|
19678
|
+
* @returns {Array} Array of vector field info
|
|
19679
|
+
*/
|
|
19680
|
+
findVectorFields(attributes, path = "") {
|
|
19681
|
+
const vectors = [];
|
|
19682
|
+
for (const [key, attr] of Object.entries(attributes)) {
|
|
19683
|
+
const fullPath = path ? `${path}.${key}` : key;
|
|
19684
|
+
if (attr.type === "array" && attr.items === "number" && attr.length) {
|
|
19685
|
+
vectors.push({
|
|
19686
|
+
name: fullPath,
|
|
19687
|
+
length: attr.length,
|
|
19688
|
+
estimatedBytes: this.estimateVectorBytes(attr.length)
|
|
19689
|
+
});
|
|
19690
|
+
}
|
|
19691
|
+
if (attr.type === "object" && attr.props) {
|
|
19692
|
+
vectors.push(...this.findVectorFields(attr.props, fullPath));
|
|
19693
|
+
}
|
|
19694
|
+
}
|
|
19695
|
+
return vectors;
|
|
19696
|
+
}
|
|
19697
|
+
/**
|
|
19698
|
+
* Estimate bytes required to store a vector in JSON format
|
|
19699
|
+
*
|
|
19700
|
+
* Conservative estimate: ~7 bytes per number + array overhead
|
|
19701
|
+
*
|
|
19702
|
+
* @param {number} dimensions - Number of dimensions
|
|
19703
|
+
* @returns {number} Estimated bytes
|
|
19704
|
+
*/
|
|
19705
|
+
estimateVectorBytes(dimensions) {
|
|
19706
|
+
return dimensions * 7 + 50;
|
|
19707
|
+
}
|
|
19708
|
+
/**
|
|
19709
|
+
* Install vector methods on all resources
|
|
19710
|
+
*/
|
|
19711
|
+
installResourceMethods() {
|
|
19712
|
+
for (const resource of Object.values(this.database.resources)) {
|
|
19713
|
+
const searchMethod = this.createVectorSearchMethod(resource);
|
|
19714
|
+
const clusterMethod = this.createClusteringMethod(resource);
|
|
19715
|
+
const distanceMethod = this.createDistanceMethod();
|
|
19716
|
+
resource.vectorSearch = searchMethod;
|
|
19717
|
+
resource.cluster = clusterMethod;
|
|
19718
|
+
resource.vectorDistance = distanceMethod;
|
|
19719
|
+
resource.similarTo = searchMethod;
|
|
19720
|
+
resource.findSimilar = searchMethod;
|
|
19721
|
+
resource.distance = distanceMethod;
|
|
19722
|
+
}
|
|
19723
|
+
}
|
|
19724
|
+
/**
|
|
19725
|
+
* Create vector search method for a resource
|
|
19726
|
+
*
|
|
19727
|
+
* Performs K-nearest neighbors search to find similar vectors.
|
|
19728
|
+
*
|
|
19729
|
+
* @param {Resource} resource - Resource instance
|
|
19730
|
+
* @returns {Function} Vector search method
|
|
19731
|
+
*/
|
|
19732
|
+
createVectorSearchMethod(resource) {
|
|
19733
|
+
return async (queryVector, options = {}) => {
|
|
19734
|
+
const startTime = Date.now();
|
|
19735
|
+
let vectorField = options.vectorField;
|
|
19736
|
+
if (!vectorField && this.config.autoDetectVectorField) {
|
|
19737
|
+
vectorField = this.detectVectorField(resource);
|
|
19738
|
+
if (!vectorField) {
|
|
19739
|
+
vectorField = "vector";
|
|
19740
|
+
}
|
|
19741
|
+
} else if (!vectorField) {
|
|
19742
|
+
vectorField = "vector";
|
|
19743
|
+
}
|
|
19744
|
+
const {
|
|
19745
|
+
limit = 10,
|
|
19746
|
+
distanceMetric = this.config.distanceMetric,
|
|
19747
|
+
threshold = null,
|
|
19748
|
+
partition = null
|
|
19749
|
+
} = options;
|
|
19750
|
+
const distanceFn = this.distanceFunctions[distanceMetric];
|
|
19751
|
+
if (!distanceFn) {
|
|
19752
|
+
const error = new VectorError(`Invalid distance metric: ${distanceMetric}`, {
|
|
19753
|
+
operation: "vectorSearch",
|
|
19754
|
+
availableMetrics: Object.keys(this.distanceFunctions),
|
|
19755
|
+
providedMetric: distanceMetric
|
|
19756
|
+
});
|
|
19757
|
+
this._emitEvent("vector:search-error", {
|
|
19758
|
+
resource: resource.name,
|
|
19759
|
+
error: error.message,
|
|
19760
|
+
timestamp: Date.now()
|
|
19761
|
+
});
|
|
19762
|
+
throw error;
|
|
19763
|
+
}
|
|
19764
|
+
this._emitEvent("vector:search-start", {
|
|
19765
|
+
resource: resource.name,
|
|
19766
|
+
vectorField,
|
|
19767
|
+
limit,
|
|
19768
|
+
distanceMetric,
|
|
19769
|
+
partition,
|
|
19770
|
+
threshold,
|
|
19771
|
+
queryDimensions: queryVector.length,
|
|
19772
|
+
timestamp: startTime
|
|
19773
|
+
});
|
|
19774
|
+
try {
|
|
19775
|
+
let allRecords;
|
|
19776
|
+
if (partition) {
|
|
19777
|
+
this._emitEvent("vector:partition-filter", {
|
|
19778
|
+
resource: resource.name,
|
|
19779
|
+
partition,
|
|
19780
|
+
timestamp: Date.now()
|
|
19781
|
+
});
|
|
19782
|
+
allRecords = await resource.list({ partition, partitionValues: partition });
|
|
19783
|
+
} else {
|
|
19784
|
+
allRecords = await resource.getAll();
|
|
19785
|
+
}
|
|
19786
|
+
const totalRecords = allRecords.length;
|
|
19787
|
+
let processedRecords = 0;
|
|
19788
|
+
let dimensionMismatches = 0;
|
|
19789
|
+
const results = allRecords.filter((record) => record[vectorField] && Array.isArray(record[vectorField])).map((record, index) => {
|
|
19790
|
+
try {
|
|
19791
|
+
const distance = distanceFn(queryVector, record[vectorField]);
|
|
19792
|
+
processedRecords++;
|
|
19793
|
+
if (this.config.verboseEvents && processedRecords % 100 === 0) {
|
|
19794
|
+
this._emitEvent("vector:search-progress", {
|
|
19795
|
+
resource: resource.name,
|
|
19796
|
+
processed: processedRecords,
|
|
19797
|
+
total: totalRecords,
|
|
19798
|
+
progress: processedRecords / totalRecords * 100,
|
|
19799
|
+
timestamp: Date.now()
|
|
19800
|
+
}, `search-${resource.name}`);
|
|
19801
|
+
}
|
|
19802
|
+
return { record, distance };
|
|
19803
|
+
} catch (err) {
|
|
19804
|
+
dimensionMismatches++;
|
|
19805
|
+
if (this.config.verboseEvents) {
|
|
19806
|
+
this._emitEvent("vector:dimension-mismatch", {
|
|
19807
|
+
resource: resource.name,
|
|
19808
|
+
recordIndex: index,
|
|
19809
|
+
expected: queryVector.length,
|
|
19810
|
+
got: record[vectorField]?.length,
|
|
19811
|
+
timestamp: Date.now()
|
|
19812
|
+
});
|
|
19813
|
+
}
|
|
19814
|
+
return null;
|
|
19815
|
+
}
|
|
19816
|
+
}).filter((result) => result !== null).filter((result) => threshold === null || result.distance <= threshold).sort((a, b) => a.distance - b.distance).slice(0, limit);
|
|
19817
|
+
const duration = Date.now() - startTime;
|
|
19818
|
+
const throughput = totalRecords / (duration / 1e3);
|
|
19819
|
+
this._emitEvent("vector:search-complete", {
|
|
19820
|
+
resource: resource.name,
|
|
19821
|
+
vectorField,
|
|
19822
|
+
resultsCount: results.length,
|
|
19823
|
+
totalRecords,
|
|
19824
|
+
processedRecords,
|
|
19825
|
+
dimensionMismatches,
|
|
19826
|
+
duration,
|
|
19827
|
+
throughput: throughput.toFixed(2),
|
|
19828
|
+
timestamp: Date.now()
|
|
19829
|
+
});
|
|
19830
|
+
if (this.config.verboseEvents) {
|
|
19831
|
+
this._emitEvent("vector:performance", {
|
|
19832
|
+
operation: "search",
|
|
19833
|
+
resource: resource.name,
|
|
19834
|
+
duration,
|
|
19835
|
+
throughput: throughput.toFixed(2),
|
|
19836
|
+
recordsPerSecond: (processedRecords / (duration / 1e3)).toFixed(2),
|
|
19837
|
+
timestamp: Date.now()
|
|
19838
|
+
});
|
|
19839
|
+
}
|
|
19840
|
+
return results;
|
|
19841
|
+
} catch (error) {
|
|
19842
|
+
this._emitEvent("vector:search-error", {
|
|
19843
|
+
resource: resource.name,
|
|
19844
|
+
error: error.message,
|
|
19845
|
+
stack: error.stack,
|
|
19846
|
+
timestamp: Date.now()
|
|
19847
|
+
});
|
|
19848
|
+
throw error;
|
|
19849
|
+
}
|
|
19850
|
+
};
|
|
19851
|
+
}
|
|
19852
|
+
/**
|
|
19853
|
+
* Create clustering method for a resource
|
|
19854
|
+
*
|
|
19855
|
+
* Performs k-means clustering on resource vectors.
|
|
19856
|
+
*
|
|
19857
|
+
* @param {Resource} resource - Resource instance
|
|
19858
|
+
* @returns {Function} Clustering method
|
|
19859
|
+
*/
|
|
19860
|
+
createClusteringMethod(resource) {
|
|
19861
|
+
return async (options = {}) => {
|
|
19862
|
+
const startTime = Date.now();
|
|
19863
|
+
let vectorField = options.vectorField;
|
|
19864
|
+
if (!vectorField && this.config.autoDetectVectorField) {
|
|
19865
|
+
vectorField = this.detectVectorField(resource);
|
|
19866
|
+
if (!vectorField) {
|
|
19867
|
+
vectorField = "vector";
|
|
19868
|
+
}
|
|
19869
|
+
} else if (!vectorField) {
|
|
19870
|
+
vectorField = "vector";
|
|
19871
|
+
}
|
|
19872
|
+
const {
|
|
19873
|
+
k = 5,
|
|
19874
|
+
distanceMetric = this.config.distanceMetric,
|
|
19875
|
+
partition = null,
|
|
19876
|
+
...kmeansOptions
|
|
19877
|
+
} = options;
|
|
19878
|
+
const distanceFn = this.distanceFunctions[distanceMetric];
|
|
19879
|
+
if (!distanceFn) {
|
|
19880
|
+
const error = new VectorError(`Invalid distance metric: ${distanceMetric}`, {
|
|
19881
|
+
operation: "cluster",
|
|
19882
|
+
availableMetrics: Object.keys(this.distanceFunctions),
|
|
19883
|
+
providedMetric: distanceMetric
|
|
19884
|
+
});
|
|
19885
|
+
this._emitEvent("vector:cluster-error", {
|
|
19886
|
+
resource: resource.name,
|
|
19887
|
+
error: error.message,
|
|
19888
|
+
timestamp: Date.now()
|
|
19889
|
+
});
|
|
19890
|
+
throw error;
|
|
19891
|
+
}
|
|
19892
|
+
this._emitEvent("vector:cluster-start", {
|
|
19893
|
+
resource: resource.name,
|
|
19894
|
+
vectorField,
|
|
19895
|
+
k,
|
|
19896
|
+
distanceMetric,
|
|
19897
|
+
partition,
|
|
19898
|
+
maxIterations: kmeansOptions.maxIterations || 100,
|
|
19899
|
+
timestamp: startTime
|
|
19900
|
+
});
|
|
19901
|
+
try {
|
|
19902
|
+
let allRecords;
|
|
19903
|
+
if (partition) {
|
|
19904
|
+
this._emitEvent("vector:partition-filter", {
|
|
19905
|
+
resource: resource.name,
|
|
19906
|
+
partition,
|
|
19907
|
+
timestamp: Date.now()
|
|
19908
|
+
});
|
|
19909
|
+
allRecords = await resource.list({ partition, partitionValues: partition });
|
|
19910
|
+
} else {
|
|
19911
|
+
allRecords = await resource.getAll();
|
|
19912
|
+
}
|
|
19913
|
+
const recordsWithVectors = allRecords.filter(
|
|
19914
|
+
(record) => record[vectorField] && Array.isArray(record[vectorField])
|
|
19915
|
+
);
|
|
19916
|
+
if (recordsWithVectors.length === 0) {
|
|
19917
|
+
const error = new VectorError("No vectors found in resource", {
|
|
19918
|
+
operation: "cluster",
|
|
19919
|
+
resourceName: resource.name,
|
|
19920
|
+
vectorField
|
|
19921
|
+
});
|
|
19922
|
+
this._emitEvent("vector:empty-dataset", {
|
|
19923
|
+
resource: resource.name,
|
|
19924
|
+
vectorField,
|
|
19925
|
+
totalRecords: allRecords.length,
|
|
19926
|
+
timestamp: Date.now()
|
|
19927
|
+
});
|
|
19928
|
+
throw error;
|
|
19929
|
+
}
|
|
19930
|
+
const vectors = recordsWithVectors.map((record) => record[vectorField]);
|
|
19931
|
+
const result = kmeans(vectors, k, {
|
|
19932
|
+
...kmeansOptions,
|
|
19933
|
+
distanceFn,
|
|
19934
|
+
onIteration: this.config.verboseEvents ? (iteration, inertia, converged) => {
|
|
19935
|
+
this._emitEvent("vector:cluster-iteration", {
|
|
19936
|
+
resource: resource.name,
|
|
19937
|
+
k,
|
|
19938
|
+
iteration,
|
|
19939
|
+
inertia,
|
|
19940
|
+
converged,
|
|
19941
|
+
timestamp: Date.now()
|
|
19942
|
+
}, `cluster-${resource.name}`);
|
|
19943
|
+
} : void 0
|
|
19944
|
+
});
|
|
19945
|
+
if (result.converged) {
|
|
19946
|
+
this._emitEvent("vector:cluster-converged", {
|
|
19947
|
+
resource: resource.name,
|
|
19948
|
+
k,
|
|
19949
|
+
iterations: result.iterations,
|
|
19950
|
+
inertia: result.inertia,
|
|
19951
|
+
timestamp: Date.now()
|
|
19952
|
+
});
|
|
19953
|
+
}
|
|
19954
|
+
const clusters = Array(k).fill(null).map(() => []);
|
|
19955
|
+
recordsWithVectors.forEach((record, i) => {
|
|
19956
|
+
const clusterIndex = result.assignments[i];
|
|
19957
|
+
clusters[clusterIndex].push(record);
|
|
19958
|
+
});
|
|
19959
|
+
const duration = Date.now() - startTime;
|
|
19960
|
+
const clusterSizes = clusters.map((c) => c.length);
|
|
19961
|
+
this._emitEvent("vector:cluster-complete", {
|
|
19962
|
+
resource: resource.name,
|
|
19963
|
+
vectorField,
|
|
19964
|
+
k,
|
|
19965
|
+
vectorCount: vectors.length,
|
|
19966
|
+
iterations: result.iterations,
|
|
19967
|
+
converged: result.converged,
|
|
19968
|
+
inertia: result.inertia,
|
|
19969
|
+
clusterSizes,
|
|
19970
|
+
duration,
|
|
19971
|
+
timestamp: Date.now()
|
|
19972
|
+
});
|
|
19973
|
+
if (this.config.verboseEvents) {
|
|
19974
|
+
this._emitEvent("vector:performance", {
|
|
19975
|
+
operation: "clustering",
|
|
19976
|
+
resource: resource.name,
|
|
19977
|
+
k,
|
|
19978
|
+
duration,
|
|
19979
|
+
iterationsPerSecond: (result.iterations / (duration / 1e3)).toFixed(2),
|
|
19980
|
+
vectorsPerSecond: (vectors.length / (duration / 1e3)).toFixed(2),
|
|
19981
|
+
timestamp: Date.now()
|
|
19982
|
+
});
|
|
19983
|
+
}
|
|
19984
|
+
return {
|
|
19985
|
+
clusters,
|
|
19986
|
+
centroids: result.centroids,
|
|
19987
|
+
inertia: result.inertia,
|
|
19988
|
+
iterations: result.iterations,
|
|
19989
|
+
converged: result.converged
|
|
19990
|
+
};
|
|
19991
|
+
} catch (error) {
|
|
19992
|
+
this._emitEvent("vector:cluster-error", {
|
|
19993
|
+
resource: resource.name,
|
|
19994
|
+
error: error.message,
|
|
19995
|
+
stack: error.stack,
|
|
19996
|
+
timestamp: Date.now()
|
|
19997
|
+
});
|
|
19998
|
+
throw error;
|
|
19999
|
+
}
|
|
20000
|
+
};
|
|
20001
|
+
}
|
|
20002
|
+
/**
|
|
20003
|
+
* Create distance calculation method
|
|
20004
|
+
*
|
|
20005
|
+
* @returns {Function} Distance method
|
|
20006
|
+
*/
|
|
20007
|
+
createDistanceMethod() {
|
|
20008
|
+
return (vector1, vector2, metric = this.config.distanceMetric) => {
|
|
20009
|
+
const distanceFn = this.distanceFunctions[metric];
|
|
20010
|
+
if (!distanceFn) {
|
|
20011
|
+
throw new VectorError(`Invalid distance metric: ${metric}`, {
|
|
20012
|
+
operation: "vectorDistance",
|
|
20013
|
+
availableMetrics: Object.keys(this.distanceFunctions),
|
|
20014
|
+
providedMetric: metric
|
|
20015
|
+
});
|
|
20016
|
+
}
|
|
20017
|
+
return distanceFn(vector1, vector2);
|
|
20018
|
+
};
|
|
20019
|
+
}
|
|
20020
|
+
/**
|
|
20021
|
+
* Static utility: Normalize vector
|
|
20022
|
+
*
|
|
20023
|
+
* @param {number[]} vector - Input vector
|
|
20024
|
+
* @returns {number[]} Normalized vector
|
|
20025
|
+
*/
|
|
20026
|
+
static normalize(vector) {
|
|
20027
|
+
return normalize(vector);
|
|
20028
|
+
}
|
|
20029
|
+
/**
|
|
20030
|
+
* Static utility: Calculate dot product
|
|
20031
|
+
*
|
|
20032
|
+
* @param {number[]} vector1 - First vector
|
|
20033
|
+
* @param {number[]} vector2 - Second vector
|
|
20034
|
+
* @returns {number} Dot product
|
|
20035
|
+
*/
|
|
20036
|
+
static dotProduct(vector1, vector2) {
|
|
20037
|
+
return dotProduct(vector1, vector2);
|
|
20038
|
+
}
|
|
20039
|
+
/**
|
|
20040
|
+
* Static utility: Find optimal K for clustering
|
|
20041
|
+
*
|
|
20042
|
+
* Analyzes clustering quality across a range of K values using
|
|
20043
|
+
* multiple evaluation metrics.
|
|
20044
|
+
*
|
|
20045
|
+
* @param {number[][]} vectors - Vectors to analyze
|
|
20046
|
+
* @param {Object} options - Configuration options
|
|
20047
|
+
* @returns {Promise<Object>} Analysis results with recommendations
|
|
20048
|
+
*/
|
|
20049
|
+
static async findOptimalK(vectors, options) {
|
|
20050
|
+
return findOptimalK(vectors, options);
|
|
20051
|
+
}
|
|
20052
|
+
}
|
|
20053
|
+
|
|
20054
|
+
function silhouetteScore(vectors, assignments, centroids, distanceFn = euclideanDistance) {
|
|
20055
|
+
const k = centroids.length;
|
|
20056
|
+
const n = vectors.length;
|
|
20057
|
+
const clusters = Array(k).fill(null).map(() => []);
|
|
20058
|
+
vectors.forEach((vector, i) => {
|
|
20059
|
+
clusters[assignments[i]].push(i);
|
|
20060
|
+
});
|
|
20061
|
+
let totalScore = 0;
|
|
20062
|
+
let validPoints = 0;
|
|
20063
|
+
if (clusters.every((c) => c.length <= 1)) {
|
|
20064
|
+
return 0;
|
|
20065
|
+
}
|
|
20066
|
+
for (let i = 0; i < n; i++) {
|
|
20067
|
+
const clusterIdx = assignments[i];
|
|
20068
|
+
const cluster = clusters[clusterIdx];
|
|
20069
|
+
if (cluster.length === 1) continue;
|
|
20070
|
+
let a = 0;
|
|
20071
|
+
for (const j of cluster) {
|
|
20072
|
+
if (i !== j) {
|
|
20073
|
+
a += distanceFn(vectors[i], vectors[j]);
|
|
20074
|
+
}
|
|
20075
|
+
}
|
|
20076
|
+
a /= cluster.length - 1;
|
|
20077
|
+
let b = Infinity;
|
|
20078
|
+
for (let otherCluster = 0; otherCluster < k; otherCluster++) {
|
|
20079
|
+
if (otherCluster === clusterIdx) continue;
|
|
20080
|
+
const otherPoints = clusters[otherCluster];
|
|
20081
|
+
if (otherPoints.length === 0) continue;
|
|
20082
|
+
let avgDist = 0;
|
|
20083
|
+
for (const j of otherPoints) {
|
|
20084
|
+
avgDist += distanceFn(vectors[i], vectors[j]);
|
|
20085
|
+
}
|
|
20086
|
+
avgDist /= otherPoints.length;
|
|
20087
|
+
b = Math.min(b, avgDist);
|
|
20088
|
+
}
|
|
20089
|
+
if (b === Infinity) continue;
|
|
20090
|
+
const maxAB = Math.max(a, b);
|
|
20091
|
+
const s = maxAB === 0 ? 0 : (b - a) / maxAB;
|
|
20092
|
+
totalScore += s;
|
|
20093
|
+
validPoints++;
|
|
20094
|
+
}
|
|
20095
|
+
return validPoints > 0 ? totalScore / validPoints : 0;
|
|
20096
|
+
}
|
|
20097
|
+
function daviesBouldinIndex(vectors, assignments, centroids, distanceFn = euclideanDistance) {
|
|
20098
|
+
const k = centroids.length;
|
|
20099
|
+
const scatters = new Array(k).fill(0);
|
|
20100
|
+
const clusterCounts = new Array(k).fill(0);
|
|
20101
|
+
vectors.forEach((vector, i) => {
|
|
20102
|
+
const cluster = assignments[i];
|
|
20103
|
+
scatters[cluster] += distanceFn(vector, centroids[cluster]);
|
|
20104
|
+
clusterCounts[cluster]++;
|
|
20105
|
+
});
|
|
20106
|
+
for (let i = 0; i < k; i++) {
|
|
20107
|
+
if (clusterCounts[i] > 0) {
|
|
20108
|
+
scatters[i] /= clusterCounts[i];
|
|
20109
|
+
}
|
|
20110
|
+
}
|
|
20111
|
+
let dbIndex = 0;
|
|
20112
|
+
let validClusters = 0;
|
|
20113
|
+
for (let i = 0; i < k; i++) {
|
|
20114
|
+
if (clusterCounts[i] === 0) continue;
|
|
20115
|
+
let maxRatio = 0;
|
|
20116
|
+
for (let j = 0; j < k; j++) {
|
|
20117
|
+
if (i === j || clusterCounts[j] === 0) continue;
|
|
20118
|
+
const centroidDist = distanceFn(centroids[i], centroids[j]);
|
|
20119
|
+
if (centroidDist === 0) continue;
|
|
20120
|
+
const ratio = (scatters[i] + scatters[j]) / centroidDist;
|
|
20121
|
+
maxRatio = Math.max(maxRatio, ratio);
|
|
20122
|
+
}
|
|
20123
|
+
dbIndex += maxRatio;
|
|
20124
|
+
validClusters++;
|
|
20125
|
+
}
|
|
20126
|
+
return validClusters > 0 ? dbIndex / validClusters : 0;
|
|
20127
|
+
}
|
|
20128
|
+
function calinskiHarabaszIndex(vectors, assignments, centroids, distanceFn = euclideanDistance) {
|
|
20129
|
+
const n = vectors.length;
|
|
20130
|
+
const k = centroids.length;
|
|
20131
|
+
if (k === 1 || k === n) return 0;
|
|
20132
|
+
const dimensions = vectors[0].length;
|
|
20133
|
+
const overallCentroid = new Array(dimensions).fill(0);
|
|
20134
|
+
vectors.forEach((vector) => {
|
|
20135
|
+
vector.forEach((val, dim) => {
|
|
20136
|
+
overallCentroid[dim] += val;
|
|
20137
|
+
});
|
|
20138
|
+
});
|
|
20139
|
+
overallCentroid.forEach((val, dim, arr) => {
|
|
20140
|
+
arr[dim] = val / n;
|
|
20141
|
+
});
|
|
20142
|
+
const clusterCounts = new Array(k).fill(0);
|
|
20143
|
+
vectors.forEach((vector, i) => {
|
|
20144
|
+
clusterCounts[assignments[i]]++;
|
|
20145
|
+
});
|
|
20146
|
+
let bgss = 0;
|
|
20147
|
+
for (let i = 0; i < k; i++) {
|
|
20148
|
+
if (clusterCounts[i] === 0) continue;
|
|
20149
|
+
const dist = distanceFn(centroids[i], overallCentroid);
|
|
20150
|
+
bgss += clusterCounts[i] * dist * dist;
|
|
20151
|
+
}
|
|
20152
|
+
let wcss = 0;
|
|
20153
|
+
vectors.forEach((vector, i) => {
|
|
20154
|
+
const cluster = assignments[i];
|
|
20155
|
+
const dist = distanceFn(vector, centroids[cluster]);
|
|
20156
|
+
wcss += dist * dist;
|
|
20157
|
+
});
|
|
20158
|
+
if (wcss === 0) return 0;
|
|
20159
|
+
return bgss / (k - 1) / (wcss / (n - k));
|
|
20160
|
+
}
|
|
20161
|
+
async function gapStatistic(vectors, assignments, centroids, distanceFn = euclideanDistance, nReferences = 10) {
|
|
20162
|
+
const n = vectors.length;
|
|
20163
|
+
const k = centroids.length;
|
|
20164
|
+
const dimensions = vectors[0].length;
|
|
20165
|
+
let wk = 0;
|
|
20166
|
+
vectors.forEach((vector, i) => {
|
|
20167
|
+
const dist = distanceFn(vector, centroids[assignments[i]]);
|
|
20168
|
+
wk += dist * dist;
|
|
20169
|
+
});
|
|
20170
|
+
wk = Math.log(wk + 1e-10);
|
|
20171
|
+
const referenceWks = [];
|
|
20172
|
+
const mins = new Array(dimensions).fill(Infinity);
|
|
20173
|
+
const maxs = new Array(dimensions).fill(-Infinity);
|
|
20174
|
+
vectors.forEach((vector) => {
|
|
20175
|
+
vector.forEach((val, dim) => {
|
|
20176
|
+
mins[dim] = Math.min(mins[dim], val);
|
|
20177
|
+
maxs[dim] = Math.max(maxs[dim], val);
|
|
20178
|
+
});
|
|
20179
|
+
});
|
|
20180
|
+
for (let ref = 0; ref < nReferences; ref++) {
|
|
20181
|
+
const refVectors = [];
|
|
20182
|
+
for (let i = 0; i < n; i++) {
|
|
20183
|
+
const refVector = new Array(dimensions);
|
|
20184
|
+
for (let dim = 0; dim < dimensions; dim++) {
|
|
20185
|
+
refVector[dim] = mins[dim] + Math.random() * (maxs[dim] - mins[dim]);
|
|
20186
|
+
}
|
|
20187
|
+
refVectors.push(refVector);
|
|
20188
|
+
}
|
|
20189
|
+
const refResult = kmeans(refVectors, k, { maxIterations: 50, distanceFn });
|
|
20190
|
+
let refWk = 0;
|
|
20191
|
+
refVectors.forEach((vector, i) => {
|
|
20192
|
+
const dist = distanceFn(vector, refResult.centroids[refResult.assignments[i]]);
|
|
20193
|
+
refWk += dist * dist;
|
|
20194
|
+
});
|
|
20195
|
+
referenceWks.push(Math.log(refWk + 1e-10));
|
|
20196
|
+
}
|
|
20197
|
+
const expectedWk = referenceWks.reduce((a, b) => a + b, 0) / nReferences;
|
|
20198
|
+
const gap = expectedWk - wk;
|
|
20199
|
+
const sdk = Math.sqrt(
|
|
20200
|
+
referenceWks.reduce((sum, wk2) => sum + Math.pow(wk2 - expectedWk, 2), 0) / nReferences
|
|
20201
|
+
);
|
|
20202
|
+
const sk = sdk * Math.sqrt(1 + 1 / nReferences);
|
|
20203
|
+
return { gap, sk, expectedWk, actualWk: wk };
|
|
20204
|
+
}
|
|
20205
|
+
function clusteringStability(vectors, k, options = {}) {
|
|
20206
|
+
const {
|
|
20207
|
+
nRuns = 10,
|
|
20208
|
+
distanceFn = euclideanDistance,
|
|
20209
|
+
...kmeansOptions
|
|
20210
|
+
} = options;
|
|
20211
|
+
const inertias = [];
|
|
20212
|
+
const allAssignments = [];
|
|
20213
|
+
for (let run = 0; run < nRuns; run++) {
|
|
20214
|
+
const result = kmeans(vectors, k, {
|
|
20215
|
+
...kmeansOptions,
|
|
20216
|
+
distanceFn,
|
|
20217
|
+
seed: run
|
|
20218
|
+
// Different seed for each run
|
|
20219
|
+
});
|
|
20220
|
+
inertias.push(result.inertia);
|
|
20221
|
+
allAssignments.push(result.assignments);
|
|
20222
|
+
}
|
|
20223
|
+
const assignmentSimilarities = [];
|
|
20224
|
+
for (let i = 0; i < nRuns - 1; i++) {
|
|
20225
|
+
for (let j = i + 1; j < nRuns; j++) {
|
|
20226
|
+
const similarity = calculateAssignmentSimilarity(allAssignments[i], allAssignments[j]);
|
|
20227
|
+
assignmentSimilarities.push(similarity);
|
|
20228
|
+
}
|
|
20229
|
+
}
|
|
20230
|
+
const avgInertia = inertias.reduce((a, b) => a + b, 0) / nRuns;
|
|
20231
|
+
const stdInertia = Math.sqrt(
|
|
20232
|
+
inertias.reduce((sum, val) => sum + Math.pow(val - avgInertia, 2), 0) / nRuns
|
|
20233
|
+
);
|
|
20234
|
+
const avgSimilarity = assignmentSimilarities.length > 0 ? assignmentSimilarities.reduce((a, b) => a + b, 0) / assignmentSimilarities.length : 1;
|
|
20235
|
+
return {
|
|
20236
|
+
avgInertia,
|
|
20237
|
+
stdInertia,
|
|
20238
|
+
cvInertia: avgInertia !== 0 ? stdInertia / avgInertia : 0,
|
|
20239
|
+
// Coefficient of variation
|
|
20240
|
+
avgSimilarity,
|
|
20241
|
+
stability: avgSimilarity
|
|
20242
|
+
// Higher is more stable
|
|
20243
|
+
};
|
|
20244
|
+
}
|
|
20245
|
+
function calculateAssignmentSimilarity(assignments1, assignments2) {
|
|
20246
|
+
const n = assignments1.length;
|
|
20247
|
+
let matches = 0;
|
|
20248
|
+
for (let i = 0; i < n; i++) {
|
|
20249
|
+
for (let j = i + 1; j < n; j++) {
|
|
20250
|
+
const sameCluster1 = assignments1[i] === assignments1[j];
|
|
20251
|
+
const sameCluster2 = assignments2[i] === assignments2[j];
|
|
20252
|
+
if (sameCluster1 === sameCluster2) {
|
|
20253
|
+
matches++;
|
|
20254
|
+
}
|
|
20255
|
+
}
|
|
20256
|
+
}
|
|
20257
|
+
const totalPairs = n * (n - 1) / 2;
|
|
20258
|
+
return totalPairs > 0 ? matches / totalPairs : 1;
|
|
20259
|
+
}
|
|
20260
|
+
|
|
20261
|
+
var metrics = /*#__PURE__*/Object.freeze({
|
|
20262
|
+
__proto__: null,
|
|
20263
|
+
calinskiHarabaszIndex: calinskiHarabaszIndex,
|
|
20264
|
+
clusteringStability: clusteringStability,
|
|
20265
|
+
daviesBouldinIndex: daviesBouldinIndex,
|
|
20266
|
+
gapStatistic: gapStatistic,
|
|
20267
|
+
silhouetteScore: silhouetteScore
|
|
20268
|
+
});
|
|
20269
|
+
|
|
20270
|
+
export { AVAILABLE_BEHAVIORS, AnalyticsNotEnabledError, AuditPlugin, AuthenticationError, BackupPlugin, BaseError, BehaviorError, CachePlugin, Client, ConnectionString, ConnectionStringError, CostsPlugin, CryptoError, DEFAULT_BEHAVIOR, Database, DatabaseError, EncryptionError, ErrorMap, EventualConsistencyPlugin, FullTextPlugin, InvalidResourceItem, MetadataLimitError, MetricsPlugin, MissingMetadata, NoSuchBucket, NoSuchKey, NotFound, PartitionDriverError, PartitionError, PermissionError, Plugin, PluginError, PluginObject, PluginStorageError, QueueConsumerPlugin, ReplicatorPlugin, Resource, ResourceError, ResourceIdsPageReader, ResourceIdsReader, ResourceNotFound, ResourceReader, ResourceWriter, S3QueuePlugin, Database as S3db, S3dbError, SchedulerPlugin, Schema, SchemaError, StateMachinePlugin, StreamError, UnknownError, ValidationError, Validator, VectorPlugin, behaviors, calculateAttributeNamesSize, calculateAttributeSizes, calculateEffectiveLimit, calculateSystemOverhead, calculateTotalSize, calculateUTF8Bytes, clearUTF8Cache, clearUTF8Memo, clearUTF8Memory, decode, decodeDecimal, decodeFixedPoint, decrypt, S3db as default, encode, encodeDecimal, encodeFixedPoint, encrypt, getBehavior, getSizeBreakdown, idGenerator, mapAwsError, md5, passwordGenerator, sha256, streamToString, transformValue, tryFn, tryFnSync };
|
|
18936
20271
|
//# sourceMappingURL=s3db.es.js.map
|