s3db.js 11.2.4 → 11.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/s3db.cjs.js CHANGED
@@ -81,6 +81,41 @@ const decodeDecimal = (s) => {
81
81
  const num = decPart ? Number(decodedInt + "." + decPart) : decodedInt;
82
82
  return negative ? -num : num;
83
83
  };
84
+ const encodeFixedPoint = (n, precision = 6) => {
85
+ if (typeof n !== "number" || isNaN(n)) return "undefined";
86
+ if (!isFinite(n)) return "undefined";
87
+ const scale = Math.pow(10, precision);
88
+ const scaled = Math.round(n * scale);
89
+ if (scaled === 0) return "^0";
90
+ const negative = scaled < 0;
91
+ let num = Math.abs(scaled);
92
+ let s = "";
93
+ while (num > 0) {
94
+ s = alphabet[num % base] + s;
95
+ num = Math.floor(num / base);
96
+ }
97
+ return "^" + (negative ? "-" : "") + s;
98
+ };
99
+ const decodeFixedPoint = (s, precision = 6) => {
100
+ if (typeof s !== "string") return NaN;
101
+ if (!s.startsWith("^")) return NaN;
102
+ s = s.slice(1);
103
+ if (s === "0") return 0;
104
+ let negative = false;
105
+ if (s[0] === "-") {
106
+ negative = true;
107
+ s = s.slice(1);
108
+ }
109
+ let r = 0;
110
+ for (let i = 0; i < s.length; i++) {
111
+ const idx = charToValue[s[i]];
112
+ if (idx === void 0) return NaN;
113
+ r = r * base + idx;
114
+ }
115
+ const scale = Math.pow(10, precision);
116
+ const scaled = negative ? -r : r;
117
+ return scaled / scale;
118
+ };
84
119
 
85
120
  const utf8BytesMemory = /* @__PURE__ */ new Map();
86
121
  const UTF8_MEMORY_MAX_SIZE = 1e4;
@@ -11505,6 +11540,11 @@ class Validator extends FastestValidator {
11505
11540
  type: "any",
11506
11541
  custom: this.autoEncrypt ? jsonHandler : void 0
11507
11542
  });
11543
+ this.alias("embedding", {
11544
+ type: "array",
11545
+ items: "number",
11546
+ empty: false
11547
+ });
11508
11548
  }
11509
11549
  }
11510
11550
  const ValidatorManager = new Proxy(Validator, {
@@ -11753,6 +11793,59 @@ const SchemaActions = {
11753
11793
  }
11754
11794
  return NaN;
11755
11795
  });
11796
+ },
11797
+ fromArrayOfEmbeddings: (value, { separator, precision = 6 }) => {
11798
+ if (value === null || value === void 0 || !Array.isArray(value)) {
11799
+ return value;
11800
+ }
11801
+ if (value.length === 0) {
11802
+ return "";
11803
+ }
11804
+ const encodedItems = value.map((item) => {
11805
+ if (typeof item === "number" && !isNaN(item)) {
11806
+ return encodeFixedPoint(item, precision);
11807
+ }
11808
+ const n = Number(item);
11809
+ return isNaN(n) ? "" : encodeFixedPoint(n, precision);
11810
+ });
11811
+ return encodedItems.join(separator);
11812
+ },
11813
+ toArrayOfEmbeddings: (value, { separator, precision = 6 }) => {
11814
+ if (Array.isArray(value)) {
11815
+ return value.map((v) => typeof v === "number" ? v : decodeFixedPoint(v, precision));
11816
+ }
11817
+ if (value === null || value === void 0) {
11818
+ return value;
11819
+ }
11820
+ if (value === "") {
11821
+ return [];
11822
+ }
11823
+ const str = String(value);
11824
+ const items = [];
11825
+ let current = "";
11826
+ let i = 0;
11827
+ while (i < str.length) {
11828
+ if (str[i] === "\\" && i + 1 < str.length) {
11829
+ current += str[i + 1];
11830
+ i += 2;
11831
+ } else if (str[i] === separator) {
11832
+ items.push(current);
11833
+ current = "";
11834
+ i++;
11835
+ } else {
11836
+ current += str[i];
11837
+ i++;
11838
+ }
11839
+ }
11840
+ items.push(current);
11841
+ return items.map((v) => {
11842
+ if (typeof v === "number") return v;
11843
+ if (typeof v === "string" && v !== "") {
11844
+ const n = decodeFixedPoint(v, precision);
11845
+ return isNaN(n) ? NaN : n;
11846
+ }
11847
+ return NaN;
11848
+ });
11756
11849
  }
11757
11850
  };
11758
11851
  class Schema {
@@ -11822,18 +11915,89 @@ class Schema {
11822
11915
  }
11823
11916
  return objectKeys;
11824
11917
  }
11918
+ _generateHooksFromOriginalAttributes(attributes, prefix = "") {
11919
+ for (const [key, value] of Object.entries(attributes)) {
11920
+ if (key.startsWith("$$")) continue;
11921
+ const fullKey = prefix ? `${prefix}.${key}` : key;
11922
+ if (typeof value === "object" && value !== null && !Array.isArray(value) && value.type) {
11923
+ if (value.type === "array" && value.items) {
11924
+ const itemsType = value.items;
11925
+ const arrayLength = typeof value.length === "number" ? value.length : null;
11926
+ if (itemsType === "string" || typeof itemsType === "string" && itemsType.includes("string")) {
11927
+ this.addHook("beforeMap", fullKey, "fromArray");
11928
+ this.addHook("afterUnmap", fullKey, "toArray");
11929
+ } else if (itemsType === "number" || typeof itemsType === "string" && itemsType.includes("number")) {
11930
+ const isIntegerArray = typeof itemsType === "string" && itemsType.includes("integer");
11931
+ const isEmbedding = !isIntegerArray && arrayLength !== null && arrayLength >= 256;
11932
+ if (isIntegerArray) {
11933
+ this.addHook("beforeMap", fullKey, "fromArrayOfNumbers");
11934
+ this.addHook("afterUnmap", fullKey, "toArrayOfNumbers");
11935
+ } else if (isEmbedding) {
11936
+ this.addHook("beforeMap", fullKey, "fromArrayOfEmbeddings");
11937
+ this.addHook("afterUnmap", fullKey, "toArrayOfEmbeddings");
11938
+ } else {
11939
+ this.addHook("beforeMap", fullKey, "fromArrayOfDecimals");
11940
+ this.addHook("afterUnmap", fullKey, "toArrayOfDecimals");
11941
+ }
11942
+ }
11943
+ }
11944
+ } else if (typeof value === "object" && value !== null && !Array.isArray(value) && !value.type) {
11945
+ this._generateHooksFromOriginalAttributes(value, fullKey);
11946
+ }
11947
+ }
11948
+ }
11825
11949
  generateAutoHooks() {
11950
+ this._generateHooksFromOriginalAttributes(this.attributes);
11826
11951
  const schema = flat.flatten(lodashEs.cloneDeep(this.attributes), { safe: true });
11827
11952
  for (const [name, definition] of Object.entries(schema)) {
11828
- if (definition.includes("array")) {
11829
- if (definition.includes("items:string")) {
11953
+ if (name.includes("$$")) continue;
11954
+ if (this.options.hooks.beforeMap[name] || this.options.hooks.afterUnmap[name]) {
11955
+ continue;
11956
+ }
11957
+ const defStr = typeof definition === "string" ? definition : "";
11958
+ const defType = typeof definition === "object" && definition !== null ? definition.type : null;
11959
+ const isEmbeddingType = defStr.includes("embedding") || defType === "embedding";
11960
+ if (isEmbeddingType) {
11961
+ const lengthMatch = defStr.match(/embedding:(\d+)/);
11962
+ if (lengthMatch) {
11963
+ parseInt(lengthMatch[1], 10);
11964
+ } else if (defStr.includes("length:")) {
11965
+ const match = defStr.match(/length:(\d+)/);
11966
+ if (match) parseInt(match[1], 10);
11967
+ }
11968
+ this.addHook("beforeMap", name, "fromArrayOfEmbeddings");
11969
+ this.addHook("afterUnmap", name, "toArrayOfEmbeddings");
11970
+ continue;
11971
+ }
11972
+ const isArray = defStr.includes("array") || defType === "array";
11973
+ if (isArray) {
11974
+ let itemsType = null;
11975
+ if (typeof definition === "object" && definition !== null && definition.items) {
11976
+ itemsType = definition.items;
11977
+ } else if (defStr.includes("items:string")) {
11978
+ itemsType = "string";
11979
+ } else if (defStr.includes("items:number")) {
11980
+ itemsType = "number";
11981
+ }
11982
+ if (itemsType === "string" || typeof itemsType === "string" && itemsType.includes("string")) {
11830
11983
  this.addHook("beforeMap", name, "fromArray");
11831
11984
  this.addHook("afterUnmap", name, "toArray");
11832
- } else if (definition.includes("items:number")) {
11833
- const isIntegerArray = definition.includes("integer:true") || definition.includes("|integer:") || definition.includes("|integer");
11985
+ } else if (itemsType === "number" || typeof itemsType === "string" && itemsType.includes("number")) {
11986
+ const isIntegerArray = defStr.includes("integer:true") || defStr.includes("|integer:") || defStr.includes("|integer") || typeof itemsType === "string" && itemsType.includes("integer");
11987
+ let arrayLength = null;
11988
+ if (typeof definition === "object" && definition !== null && typeof definition.length === "number") {
11989
+ arrayLength = definition.length;
11990
+ } else if (defStr.includes("length:")) {
11991
+ const match = defStr.match(/length:(\d+)/);
11992
+ if (match) arrayLength = parseInt(match[1], 10);
11993
+ }
11994
+ const isEmbedding = !isIntegerArray && arrayLength !== null && arrayLength >= 256;
11834
11995
  if (isIntegerArray) {
11835
11996
  this.addHook("beforeMap", name, "fromArrayOfNumbers");
11836
11997
  this.addHook("afterUnmap", name, "toArrayOfNumbers");
11998
+ } else if (isEmbedding) {
11999
+ this.addHook("beforeMap", name, "fromArrayOfEmbeddings");
12000
+ this.addHook("afterUnmap", name, "toArrayOfEmbeddings");
11837
12001
  } else {
11838
12002
  this.addHook("beforeMap", name, "fromArrayOfDecimals");
11839
12003
  this.addHook("afterUnmap", name, "toArrayOfDecimals");
@@ -11841,7 +12005,7 @@ class Schema {
11841
12005
  }
11842
12006
  continue;
11843
12007
  }
11844
- if (definition.includes("secret")) {
12008
+ if (defStr.includes("secret") || defType === "secret") {
11845
12009
  if (this.options.autoEncrypt) {
11846
12010
  this.addHook("beforeMap", name, "encrypt");
11847
12011
  }
@@ -11850,8 +12014,8 @@ class Schema {
11850
12014
  }
11851
12015
  continue;
11852
12016
  }
11853
- if (definition.includes("number")) {
11854
- const isInteger = definition.includes("integer:true") || definition.includes("|integer:") || definition.includes("|integer");
12017
+ if (defStr.includes("number") || defType === "number") {
12018
+ const isInteger = defStr.includes("integer:true") || defStr.includes("|integer:") || defStr.includes("|integer");
11855
12019
  if (isInteger) {
11856
12020
  this.addHook("beforeMap", name, "toBase62");
11857
12021
  this.addHook("afterUnmap", name, "fromBase62");
@@ -11861,17 +12025,17 @@ class Schema {
11861
12025
  }
11862
12026
  continue;
11863
12027
  }
11864
- if (definition.includes("boolean")) {
12028
+ if (defStr.includes("boolean") || defType === "boolean") {
11865
12029
  this.addHook("beforeMap", name, "fromBool");
11866
12030
  this.addHook("afterUnmap", name, "toBool");
11867
12031
  continue;
11868
12032
  }
11869
- if (definition.includes("json")) {
12033
+ if (defStr.includes("json") || defType === "json") {
11870
12034
  this.addHook("beforeMap", name, "toJSON");
11871
12035
  this.addHook("afterUnmap", name, "fromJSON");
11872
12036
  continue;
11873
12037
  }
11874
- if (definition === "object" || definition.includes("object")) {
12038
+ if (definition === "object" || defStr.includes("object") || defType === "object") {
11875
12039
  this.addHook("beforeMap", name, "toJSON");
11876
12040
  this.addHook("afterUnmap", name, "fromJSON");
11877
12041
  continue;
@@ -12013,7 +12177,8 @@ class Schema {
12013
12177
  const originalKey = reversedMap && reversedMap[key] ? reversedMap[key] : key;
12014
12178
  let parsedValue = value;
12015
12179
  const attrDef = this.getAttributeDefinition(originalKey);
12016
- if (typeof attrDef === "string" && attrDef.includes("number") && !attrDef.includes("array") && !attrDef.includes("decimal")) {
12180
+ const hasAfterUnmapHook = this.options.hooks?.afterUnmap?.[originalKey];
12181
+ if (!hasAfterUnmapHook && typeof attrDef === "string" && attrDef.includes("number") && !attrDef.includes("array") && !attrDef.includes("decimal")) {
12017
12182
  if (typeof parsedValue === "string" && parsedValue !== "") {
12018
12183
  parsedValue = decode(parsedValue);
12019
12184
  } else if (typeof parsedValue === "number") ; else {
@@ -12078,18 +12243,38 @@ class Schema {
12078
12243
  preprocessAttributesForValidation(attributes) {
12079
12244
  const processed = {};
12080
12245
  for (const [key, value] of Object.entries(attributes)) {
12081
- if (typeof value === "object" && value !== null && !Array.isArray(value)) {
12082
- const isExplicitRequired = value.$$type && value.$$type.includes("required");
12083
- const isExplicitOptional = value.$$type && value.$$type.includes("optional");
12084
- const objectConfig = {
12085
- type: "object",
12086
- properties: this.preprocessAttributesForValidation(value),
12087
- strict: false
12088
- };
12089
- if (isExplicitRequired) ; else if (isExplicitOptional || this.allNestedObjectsOptional) {
12090
- objectConfig.optional = true;
12246
+ if (typeof value === "string") {
12247
+ if (value.startsWith("embedding:")) {
12248
+ const lengthMatch = value.match(/embedding:(\d+)/);
12249
+ if (lengthMatch) {
12250
+ const length = lengthMatch[1];
12251
+ const rest = value.substring(`embedding:${length}`.length);
12252
+ processed[key] = `array|items:number|length:${length}|empty:false${rest}`;
12253
+ continue;
12254
+ }
12255
+ }
12256
+ if (value.startsWith("embedding|") || value === "embedding") {
12257
+ processed[key] = value.replace(/^embedding/, "array|items:number|empty:false");
12258
+ continue;
12259
+ }
12260
+ processed[key] = value;
12261
+ } else if (typeof value === "object" && value !== null && !Array.isArray(value)) {
12262
+ const hasValidatorType = value.type !== void 0 && key !== "$$type";
12263
+ if (hasValidatorType) {
12264
+ processed[key] = value;
12265
+ } else {
12266
+ const isExplicitRequired = value.$$type && value.$$type.includes("required");
12267
+ const isExplicitOptional = value.$$type && value.$$type.includes("optional");
12268
+ const objectConfig = {
12269
+ type: "object",
12270
+ properties: this.preprocessAttributesForValidation(value),
12271
+ strict: false
12272
+ };
12273
+ if (isExplicitRequired) ; else if (isExplicitOptional || this.allNestedObjectsOptional) {
12274
+ objectConfig.optional = true;
12275
+ }
12276
+ processed[key] = objectConfig;
12091
12277
  }
12092
- processed[key] = objectConfig;
12093
12278
  } else {
12094
12279
  processed[key] = value;
12095
12280
  }
@@ -15033,7 +15218,7 @@ class Database extends EventEmitter {
15033
15218
  this.id = idGenerator(7);
15034
15219
  this.version = "1";
15035
15220
  this.s3dbVersion = (() => {
15036
- const [ok, err, version] = tryFn(() => true ? "11.2.4" : "latest");
15221
+ const [ok, err, version] = tryFn(() => true ? "11.2.5" : "latest");
15037
15222
  return ok ? version : "latest";
15038
15223
  })();
15039
15224
  this.resources = {};
@@ -18936,6 +19121,1091 @@ class StateMachinePlugin extends Plugin {
18936
19121
  }
18937
19122
  }
18938
19123
 
19124
+ function cosineDistance(a, b) {
19125
+ if (a.length !== b.length) {
19126
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19127
+ }
19128
+ let dotProduct2 = 0;
19129
+ let normA = 0;
19130
+ let normB = 0;
19131
+ for (let i = 0; i < a.length; i++) {
19132
+ dotProduct2 += a[i] * b[i];
19133
+ normA += a[i] * a[i];
19134
+ normB += b[i] * b[i];
19135
+ }
19136
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
19137
+ if (denominator === 0) {
19138
+ return a.every((v) => v === 0) && b.every((v) => v === 0) ? 0 : 1;
19139
+ }
19140
+ const similarity = dotProduct2 / denominator;
19141
+ return 1 - similarity;
19142
+ }
19143
+ function euclideanDistance(a, b) {
19144
+ if (a.length !== b.length) {
19145
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19146
+ }
19147
+ let sum = 0;
19148
+ for (let i = 0; i < a.length; i++) {
19149
+ const diff = a[i] - b[i];
19150
+ sum += diff * diff;
19151
+ }
19152
+ return Math.sqrt(sum);
19153
+ }
19154
+ function manhattanDistance(a, b) {
19155
+ if (a.length !== b.length) {
19156
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19157
+ }
19158
+ let sum = 0;
19159
+ for (let i = 0; i < a.length; i++) {
19160
+ sum += Math.abs(a[i] - b[i]);
19161
+ }
19162
+ return sum;
19163
+ }
19164
+ function dotProduct(a, b) {
19165
+ if (a.length !== b.length) {
19166
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19167
+ }
19168
+ let sum = 0;
19169
+ for (let i = 0; i < a.length; i++) {
19170
+ sum += a[i] * b[i];
19171
+ }
19172
+ return sum;
19173
+ }
19174
+ function normalize(vector) {
19175
+ const magnitude2 = Math.sqrt(
19176
+ vector.reduce((sum, val) => sum + val * val, 0)
19177
+ );
19178
+ if (magnitude2 === 0) {
19179
+ return vector.slice();
19180
+ }
19181
+ return vector.map((val) => val / magnitude2);
19182
+ }
19183
+
19184
+ function kmeans(vectors, k, options = {}) {
19185
+ const {
19186
+ maxIterations = 100,
19187
+ tolerance = 1e-4,
19188
+ distanceFn = euclideanDistance,
19189
+ seed = null,
19190
+ onIteration = null
19191
+ } = options;
19192
+ if (vectors.length === 0) {
19193
+ throw new Error("Cannot cluster empty vector array");
19194
+ }
19195
+ if (k < 1) {
19196
+ throw new Error(`k must be at least 1, got ${k}`);
19197
+ }
19198
+ if (k > vectors.length) {
19199
+ throw new Error(`k (${k}) cannot be greater than number of vectors (${vectors.length})`);
19200
+ }
19201
+ const dimensions = vectors[0].length;
19202
+ for (let i = 1; i < vectors.length; i++) {
19203
+ if (vectors[i].length !== dimensions) {
19204
+ throw new Error(`All vectors must have same dimensions. Expected ${dimensions}, got ${vectors[i].length} at index ${i}`);
19205
+ }
19206
+ }
19207
+ const centroids = initializeCentroidsKMeansPlusPlus(vectors, k, distanceFn, seed);
19208
+ let assignments = new Array(vectors.length);
19209
+ let iterations = 0;
19210
+ let converged = false;
19211
+ let previousInertia = Infinity;
19212
+ while (!converged && iterations < maxIterations) {
19213
+ const newAssignments = vectors.map((vector) => {
19214
+ let minDist = Infinity;
19215
+ let nearestCluster = 0;
19216
+ for (let i = 0; i < k; i++) {
19217
+ const dist = distanceFn(vector, centroids[i]);
19218
+ if (dist < minDist) {
19219
+ minDist = dist;
19220
+ nearestCluster = i;
19221
+ }
19222
+ }
19223
+ return nearestCluster;
19224
+ });
19225
+ let inertia2 = 0;
19226
+ vectors.forEach((vector, i) => {
19227
+ const dist = distanceFn(vector, centroids[newAssignments[i]]);
19228
+ inertia2 += dist * dist;
19229
+ });
19230
+ const inertiaChange = Math.abs(previousInertia - inertia2);
19231
+ converged = inertiaChange < tolerance;
19232
+ assignments = newAssignments;
19233
+ previousInertia = inertia2;
19234
+ if (onIteration) {
19235
+ onIteration(iterations + 1, inertia2, converged);
19236
+ }
19237
+ if (!converged) {
19238
+ const clusterSums = Array(k).fill(null).map(() => new Array(dimensions).fill(0));
19239
+ const clusterCounts = new Array(k).fill(0);
19240
+ vectors.forEach((vector, i) => {
19241
+ const cluster = assignments[i];
19242
+ clusterCounts[cluster]++;
19243
+ vector.forEach((val, j) => {
19244
+ clusterSums[cluster][j] += val;
19245
+ });
19246
+ });
19247
+ for (let i = 0; i < k; i++) {
19248
+ if (clusterCounts[i] > 0) {
19249
+ centroids[i] = clusterSums[i].map((sum) => sum / clusterCounts[i]);
19250
+ } else {
19251
+ const randomIdx = Math.floor(Math.random() * vectors.length);
19252
+ centroids[i] = [...vectors[randomIdx]];
19253
+ }
19254
+ }
19255
+ }
19256
+ iterations++;
19257
+ }
19258
+ let inertia = 0;
19259
+ vectors.forEach((vector, i) => {
19260
+ const dist = distanceFn(vector, centroids[assignments[i]]);
19261
+ inertia += dist * dist;
19262
+ });
19263
+ return {
19264
+ centroids,
19265
+ assignments,
19266
+ iterations,
19267
+ converged,
19268
+ inertia
19269
+ };
19270
+ }
19271
+ function initializeCentroidsKMeansPlusPlus(vectors, k, distanceFn, seed) {
19272
+ const centroids = [];
19273
+ const n = vectors.length;
19274
+ const firstIndex = seed !== null ? seed % n : Math.floor(Math.random() * n);
19275
+ centroids.push([...vectors[firstIndex]]);
19276
+ for (let i = 1; i < k; i++) {
19277
+ const distances = vectors.map((vector) => {
19278
+ return Math.min(...centroids.map((c) => distanceFn(vector, c)));
19279
+ });
19280
+ const squaredDistances = distances.map((d) => d * d);
19281
+ const totalSquared = squaredDistances.reduce((a, b) => a + b, 0);
19282
+ if (totalSquared === 0) {
19283
+ const randomIdx = Math.floor(Math.random() * n);
19284
+ centroids.push([...vectors[randomIdx]]);
19285
+ continue;
19286
+ }
19287
+ let threshold = Math.random() * totalSquared;
19288
+ let cumulativeSum = 0;
19289
+ for (let j = 0; j < n; j++) {
19290
+ cumulativeSum += squaredDistances[j];
19291
+ if (cumulativeSum >= threshold) {
19292
+ centroids.push([...vectors[j]]);
19293
+ break;
19294
+ }
19295
+ }
19296
+ }
19297
+ return centroids;
19298
+ }
19299
+ async function findOptimalK(vectors, options = {}) {
19300
+ const {
19301
+ minK = 2,
19302
+ maxK = Math.min(10, Math.floor(Math.sqrt(vectors.length / 2))),
19303
+ distanceFn = euclideanDistance,
19304
+ nReferences = 10,
19305
+ stabilityRuns = 5,
19306
+ ...kmeansOptions
19307
+ } = options;
19308
+ const metricsModule = await Promise.resolve().then(function () { return metrics; });
19309
+ const {
19310
+ silhouetteScore,
19311
+ daviesBouldinIndex,
19312
+ calinskiHarabaszIndex,
19313
+ gapStatistic,
19314
+ clusteringStability
19315
+ } = metricsModule;
19316
+ const results = [];
19317
+ for (let k = minK; k <= maxK; k++) {
19318
+ const kmeansResult = kmeans(vectors, k, { ...kmeansOptions, distanceFn });
19319
+ const silhouette = silhouetteScore(
19320
+ vectors,
19321
+ kmeansResult.assignments,
19322
+ kmeansResult.centroids,
19323
+ distanceFn
19324
+ );
19325
+ const daviesBouldin = daviesBouldinIndex(
19326
+ vectors,
19327
+ kmeansResult.assignments,
19328
+ kmeansResult.centroids,
19329
+ distanceFn
19330
+ );
19331
+ const calinskiHarabasz = calinskiHarabaszIndex(
19332
+ vectors,
19333
+ kmeansResult.assignments,
19334
+ kmeansResult.centroids,
19335
+ distanceFn
19336
+ );
19337
+ const gap = await gapStatistic(
19338
+ vectors,
19339
+ kmeansResult.assignments,
19340
+ kmeansResult.centroids,
19341
+ distanceFn,
19342
+ nReferences
19343
+ );
19344
+ const stability = clusteringStability(
19345
+ vectors,
19346
+ k,
19347
+ { ...kmeansOptions, distanceFn, nRuns: stabilityRuns }
19348
+ );
19349
+ results.push({
19350
+ k,
19351
+ inertia: kmeansResult.inertia,
19352
+ silhouette,
19353
+ daviesBouldin,
19354
+ calinskiHarabasz,
19355
+ gap: gap.gap,
19356
+ gapSk: gap.sk,
19357
+ stability: stability.stability,
19358
+ cvInertia: stability.cvInertia,
19359
+ iterations: kmeansResult.iterations,
19360
+ converged: kmeansResult.converged
19361
+ });
19362
+ }
19363
+ const elbowK = findElbowPoint(results.map((r) => r.inertia));
19364
+ const recommendations = {
19365
+ elbow: minK + elbowK,
19366
+ silhouette: results.reduce(
19367
+ (best, curr) => curr.silhouette > best.silhouette ? curr : best
19368
+ ).k,
19369
+ daviesBouldin: results.reduce(
19370
+ (best, curr) => curr.daviesBouldin < best.daviesBouldin ? curr : best
19371
+ ).k,
19372
+ calinskiHarabasz: results.reduce(
19373
+ (best, curr) => curr.calinskiHarabasz > best.calinskiHarabasz ? curr : best
19374
+ ).k,
19375
+ gap: results.reduce(
19376
+ (best, curr) => curr.gap > best.gap ? curr : best
19377
+ ).k,
19378
+ stability: results.reduce(
19379
+ (best, curr) => curr.stability > best.stability ? curr : best
19380
+ ).k
19381
+ };
19382
+ const votes = Object.values(recommendations);
19383
+ const consensus = votes.reduce((acc, k) => {
19384
+ acc[k] = (acc[k] || 0) + 1;
19385
+ return acc;
19386
+ }, {});
19387
+ const consensusK = parseInt(
19388
+ Object.entries(consensus).reduce((a, b) => b[1] > a[1] ? b : a)[0]
19389
+ );
19390
+ return {
19391
+ results,
19392
+ recommendations,
19393
+ consensus: consensusK,
19394
+ summary: {
19395
+ analysisRange: `${minK}-${maxK}`,
19396
+ totalVectors: vectors.length,
19397
+ dimensions: vectors[0].length,
19398
+ recommendation: consensusK,
19399
+ confidence: consensus[consensusK] / votes.length
19400
+ }
19401
+ };
19402
+ }
19403
+ function findElbowPoint(inertias) {
19404
+ const n = inertias.length;
19405
+ if (n < 3) return 0;
19406
+ let maxCurvature = -Infinity;
19407
+ let elbowIndex = 0;
19408
+ for (let i = 1; i < n - 1; i++) {
19409
+ const curvature = inertias[i - 1] - 2 * inertias[i] + inertias[i + 1];
19410
+ if (curvature > maxCurvature) {
19411
+ maxCurvature = curvature;
19412
+ elbowIndex = i;
19413
+ }
19414
+ }
19415
+ return elbowIndex;
19416
+ }
19417
+
19418
+ class VectorError extends PluginError {
19419
+ constructor(message, details = {}) {
19420
+ super(message, {
19421
+ pluginName: "VectorPlugin",
19422
+ ...details,
19423
+ description: details.description || `
19424
+ Vector Plugin Error
19425
+
19426
+ Operation: ${details.operation || "unknown"}
19427
+
19428
+ Common causes:
19429
+ 1. Vector dimension mismatch between vectors
19430
+ 2. Invalid distance metric specified (must be: cosine, euclidean, manhattan)
19431
+ 3. Empty vector array provided for clustering
19432
+ 4. k value larger than number of available vectors
19433
+ 5. Vector field not found or invalid in resource
19434
+ 6. Large vectors without proper behavior (use 'body-overflow' or 'body-only')
19435
+
19436
+ Available distance metrics:
19437
+ - cosine: Best for normalized vectors, semantic similarity. Range: [0, 2]
19438
+ - euclidean: Standard L2 distance, geometric proximity. Range: [0, \u221E)
19439
+ - manhattan: L1 distance, faster computation. Range: [0, \u221E)
19440
+
19441
+ Storage considerations:
19442
+ - Vectors > 250 dimensions may exceed S3 metadata limit (2KB)
19443
+ - Use behavior: 'body-overflow' or 'body-only' for large vectors
19444
+ - OpenAI ada-002 (1536 dims): ~10KB, requires body storage
19445
+ - Sentence Transformers (384 dims): ~2.7KB, requires body storage
19446
+ `.trim()
19447
+ });
19448
+ }
19449
+ }
19450
+
19451
+ class VectorPlugin extends Plugin {
19452
+ constructor(options = {}) {
19453
+ super(options);
19454
+ this.config = {
19455
+ dimensions: 1536,
19456
+ // Default to OpenAI text-embedding-3-small/3-large
19457
+ distanceMetric: "cosine",
19458
+ // Default metric
19459
+ storageThreshold: 1500,
19460
+ // Bytes - warn if vectors exceed this
19461
+ autoFixBehavior: false,
19462
+ // Automatically set body-overflow
19463
+ autoDetectVectorField: true,
19464
+ // Auto-detect embedding:XXX fields
19465
+ emitEvents: true,
19466
+ // Emit events for monitoring
19467
+ verboseEvents: false,
19468
+ // Emit detailed progress events
19469
+ eventThrottle: 100,
19470
+ // Throttle progress events (ms)
19471
+ ...options
19472
+ };
19473
+ this.distanceFunctions = {
19474
+ cosine: cosineDistance,
19475
+ euclidean: euclideanDistance,
19476
+ manhattan: manhattanDistance
19477
+ };
19478
+ this._vectorFieldCache = /* @__PURE__ */ new Map();
19479
+ this._throttleState = /* @__PURE__ */ new Map();
19480
+ }
19481
+ async onInstall() {
19482
+ this.emit("installed", { plugin: "VectorPlugin" });
19483
+ this.validateVectorStorage();
19484
+ this.installResourceMethods();
19485
+ }
19486
+ async onStart() {
19487
+ this.emit("started", { plugin: "VectorPlugin" });
19488
+ }
19489
+ async onStop() {
19490
+ this.emit("stopped", { plugin: "VectorPlugin" });
19491
+ }
19492
+ async onUninstall(options) {
19493
+ for (const resource of Object.values(this.database.resources)) {
19494
+ delete resource.vectorSearch;
19495
+ delete resource.cluster;
19496
+ delete resource.vectorDistance;
19497
+ delete resource.similarTo;
19498
+ delete resource.findSimilar;
19499
+ delete resource.distance;
19500
+ }
19501
+ this.emit("uninstalled", { plugin: "VectorPlugin" });
19502
+ }
19503
+ /**
19504
+ * Validate vector storage configuration for all resources
19505
+ *
19506
+ * Detects large vector fields and warns if proper behavior is not set.
19507
+ * Can optionally auto-fix by setting body-overflow behavior.
19508
+ */
19509
+ validateVectorStorage() {
19510
+ for (const resource of Object.values(this.database.resources)) {
19511
+ const vectorFields = this.findVectorFields(resource.schema.attributes);
19512
+ if (vectorFields.length === 0) continue;
19513
+ const totalVectorSize = vectorFields.reduce((sum, f) => sum + f.estimatedBytes, 0);
19514
+ if (totalVectorSize > this.config.storageThreshold) {
19515
+ const hasCorrectBehavior = ["body-overflow", "body-only"].includes(resource.behavior);
19516
+ if (!hasCorrectBehavior) {
19517
+ const warning = {
19518
+ resource: resource.name,
19519
+ vectorFields: vectorFields.map((f) => ({
19520
+ field: f.name,
19521
+ dimensions: f.length,
19522
+ estimatedBytes: f.estimatedBytes
19523
+ })),
19524
+ totalEstimatedBytes: totalVectorSize,
19525
+ metadataLimit: 2047,
19526
+ currentBehavior: resource.behavior || "default",
19527
+ recommendation: "body-overflow"
19528
+ };
19529
+ this.emit("vector:storage-warning", warning);
19530
+ if (this.config.autoFixBehavior) {
19531
+ resource.behavior = "body-overflow";
19532
+ this.emit("vector:behavior-fixed", {
19533
+ resource: resource.name,
19534
+ newBehavior: "body-overflow"
19535
+ });
19536
+ } else {
19537
+ console.warn(`\u26A0\uFE0F VectorPlugin: Resource '${resource.name}' has large vector fields (${totalVectorSize} bytes estimated)`);
19538
+ console.warn(` Current behavior: '${resource.behavior || "default"}'`);
19539
+ console.warn(` Recommendation: Add behavior: 'body-overflow' or 'body-only' to resource configuration`);
19540
+ console.warn(` Large vectors will exceed S3 metadata limit (2047 bytes) and cause errors.`);
19541
+ }
19542
+ }
19543
+ }
19544
+ }
19545
+ }
19546
+ /**
19547
+ * Auto-detect vector field from resource schema
19548
+ *
19549
+ * Looks for fields with type 'embedding:XXX' pattern.
19550
+ * Caches result per resource for performance.
19551
+ *
19552
+ * @param {Resource} resource - Resource instance
19553
+ * @returns {string|null} Detected vector field name or null
19554
+ */
19555
+ detectVectorField(resource) {
19556
+ if (this._vectorFieldCache.has(resource.name)) {
19557
+ return this._vectorFieldCache.get(resource.name);
19558
+ }
19559
+ const vectorField = this._findEmbeddingField(resource.schema.attributes);
19560
+ this._vectorFieldCache.set(resource.name, vectorField);
19561
+ if (vectorField && this.config.emitEvents) {
19562
+ this.emit("vector:field-detected", {
19563
+ resource: resource.name,
19564
+ vectorField,
19565
+ timestamp: Date.now()
19566
+ });
19567
+ }
19568
+ return vectorField;
19569
+ }
19570
+ /**
19571
+ * Recursively find embedding:XXX field in attributes
19572
+ *
19573
+ * @param {Object} attributes - Resource attributes
19574
+ * @param {string} path - Current path (for nested objects)
19575
+ * @returns {string|null} Field path or null
19576
+ */
19577
+ _findEmbeddingField(attributes, path = "") {
19578
+ for (const [key, attr] of Object.entries(attributes)) {
19579
+ const fullPath = path ? `${path}.${key}` : key;
19580
+ if (typeof attr === "string" && attr.startsWith("embedding:")) {
19581
+ return fullPath;
19582
+ }
19583
+ if (attr.type === "array" && attr.items === "number" && attr.length) {
19584
+ return fullPath;
19585
+ }
19586
+ if (attr.type === "object" && attr.props) {
19587
+ const nested = this._findEmbeddingField(attr.props, fullPath);
19588
+ if (nested) return nested;
19589
+ }
19590
+ }
19591
+ return null;
19592
+ }
19593
+ /**
19594
+ * Emit event with throttling support
19595
+ *
19596
+ * @param {string} eventName - Event name
19597
+ * @param {Object} data - Event data
19598
+ * @param {string} throttleKey - Unique key for throttling (optional)
19599
+ */
19600
+ _emitEvent(eventName, data, throttleKey = null) {
19601
+ if (!this.config.emitEvents) return;
19602
+ if (throttleKey) {
19603
+ const now = Date.now();
19604
+ const lastEmit = this._throttleState.get(throttleKey);
19605
+ if (lastEmit && now - lastEmit < this.config.eventThrottle) {
19606
+ return;
19607
+ }
19608
+ this._throttleState.set(throttleKey, now);
19609
+ }
19610
+ this.emit(eventName, data);
19611
+ }
19612
+ /**
19613
+ * Find vector fields in resource attributes
19614
+ *
19615
+ * @param {Object} attributes - Resource attributes
19616
+ * @param {string} path - Current path (for nested objects)
19617
+ * @returns {Array} Array of vector field info
19618
+ */
19619
+ findVectorFields(attributes, path = "") {
19620
+ const vectors = [];
19621
+ for (const [key, attr] of Object.entries(attributes)) {
19622
+ const fullPath = path ? `${path}.${key}` : key;
19623
+ if (attr.type === "array" && attr.items === "number" && attr.length) {
19624
+ vectors.push({
19625
+ name: fullPath,
19626
+ length: attr.length,
19627
+ estimatedBytes: this.estimateVectorBytes(attr.length)
19628
+ });
19629
+ }
19630
+ if (attr.type === "object" && attr.props) {
19631
+ vectors.push(...this.findVectorFields(attr.props, fullPath));
19632
+ }
19633
+ }
19634
+ return vectors;
19635
+ }
19636
+ /**
19637
+ * Estimate bytes required to store a vector in JSON format
19638
+ *
19639
+ * Conservative estimate: ~7 bytes per number + array overhead
19640
+ *
19641
+ * @param {number} dimensions - Number of dimensions
19642
+ * @returns {number} Estimated bytes
19643
+ */
19644
+ estimateVectorBytes(dimensions) {
19645
+ return dimensions * 7 + 50;
19646
+ }
19647
+ /**
19648
+ * Install vector methods on all resources
19649
+ */
19650
+ installResourceMethods() {
19651
+ for (const resource of Object.values(this.database.resources)) {
19652
+ const searchMethod = this.createVectorSearchMethod(resource);
19653
+ const clusterMethod = this.createClusteringMethod(resource);
19654
+ const distanceMethod = this.createDistanceMethod();
19655
+ resource.vectorSearch = searchMethod;
19656
+ resource.cluster = clusterMethod;
19657
+ resource.vectorDistance = distanceMethod;
19658
+ resource.similarTo = searchMethod;
19659
+ resource.findSimilar = searchMethod;
19660
+ resource.distance = distanceMethod;
19661
+ }
19662
+ }
19663
+ /**
19664
+ * Create vector search method for a resource
19665
+ *
19666
+ * Performs K-nearest neighbors search to find similar vectors.
19667
+ *
19668
+ * @param {Resource} resource - Resource instance
19669
+ * @returns {Function} Vector search method
19670
+ */
19671
+ createVectorSearchMethod(resource) {
19672
+ return async (queryVector, options = {}) => {
19673
+ const startTime = Date.now();
19674
+ let vectorField = options.vectorField;
19675
+ if (!vectorField && this.config.autoDetectVectorField) {
19676
+ vectorField = this.detectVectorField(resource);
19677
+ if (!vectorField) {
19678
+ vectorField = "vector";
19679
+ }
19680
+ } else if (!vectorField) {
19681
+ vectorField = "vector";
19682
+ }
19683
+ const {
19684
+ limit = 10,
19685
+ distanceMetric = this.config.distanceMetric,
19686
+ threshold = null,
19687
+ partition = null
19688
+ } = options;
19689
+ const distanceFn = this.distanceFunctions[distanceMetric];
19690
+ if (!distanceFn) {
19691
+ const error = new VectorError(`Invalid distance metric: ${distanceMetric}`, {
19692
+ operation: "vectorSearch",
19693
+ availableMetrics: Object.keys(this.distanceFunctions),
19694
+ providedMetric: distanceMetric
19695
+ });
19696
+ this._emitEvent("vector:search-error", {
19697
+ resource: resource.name,
19698
+ error: error.message,
19699
+ timestamp: Date.now()
19700
+ });
19701
+ throw error;
19702
+ }
19703
+ this._emitEvent("vector:search-start", {
19704
+ resource: resource.name,
19705
+ vectorField,
19706
+ limit,
19707
+ distanceMetric,
19708
+ partition,
19709
+ threshold,
19710
+ queryDimensions: queryVector.length,
19711
+ timestamp: startTime
19712
+ });
19713
+ try {
19714
+ let allRecords;
19715
+ if (partition) {
19716
+ this._emitEvent("vector:partition-filter", {
19717
+ resource: resource.name,
19718
+ partition,
19719
+ timestamp: Date.now()
19720
+ });
19721
+ allRecords = await resource.list({ partition, partitionValues: partition });
19722
+ } else {
19723
+ allRecords = await resource.getAll();
19724
+ }
19725
+ const totalRecords = allRecords.length;
19726
+ let processedRecords = 0;
19727
+ let dimensionMismatches = 0;
19728
+ const results = allRecords.filter((record) => record[vectorField] && Array.isArray(record[vectorField])).map((record, index) => {
19729
+ try {
19730
+ const distance = distanceFn(queryVector, record[vectorField]);
19731
+ processedRecords++;
19732
+ if (this.config.verboseEvents && processedRecords % 100 === 0) {
19733
+ this._emitEvent("vector:search-progress", {
19734
+ resource: resource.name,
19735
+ processed: processedRecords,
19736
+ total: totalRecords,
19737
+ progress: processedRecords / totalRecords * 100,
19738
+ timestamp: Date.now()
19739
+ }, `search-${resource.name}`);
19740
+ }
19741
+ return { record, distance };
19742
+ } catch (err) {
19743
+ dimensionMismatches++;
19744
+ if (this.config.verboseEvents) {
19745
+ this._emitEvent("vector:dimension-mismatch", {
19746
+ resource: resource.name,
19747
+ recordIndex: index,
19748
+ expected: queryVector.length,
19749
+ got: record[vectorField]?.length,
19750
+ timestamp: Date.now()
19751
+ });
19752
+ }
19753
+ return null;
19754
+ }
19755
+ }).filter((result) => result !== null).filter((result) => threshold === null || result.distance <= threshold).sort((a, b) => a.distance - b.distance).slice(0, limit);
19756
+ const duration = Date.now() - startTime;
19757
+ const throughput = totalRecords / (duration / 1e3);
19758
+ this._emitEvent("vector:search-complete", {
19759
+ resource: resource.name,
19760
+ vectorField,
19761
+ resultsCount: results.length,
19762
+ totalRecords,
19763
+ processedRecords,
19764
+ dimensionMismatches,
19765
+ duration,
19766
+ throughput: throughput.toFixed(2),
19767
+ timestamp: Date.now()
19768
+ });
19769
+ if (this.config.verboseEvents) {
19770
+ this._emitEvent("vector:performance", {
19771
+ operation: "search",
19772
+ resource: resource.name,
19773
+ duration,
19774
+ throughput: throughput.toFixed(2),
19775
+ recordsPerSecond: (processedRecords / (duration / 1e3)).toFixed(2),
19776
+ timestamp: Date.now()
19777
+ });
19778
+ }
19779
+ return results;
19780
+ } catch (error) {
19781
+ this._emitEvent("vector:search-error", {
19782
+ resource: resource.name,
19783
+ error: error.message,
19784
+ stack: error.stack,
19785
+ timestamp: Date.now()
19786
+ });
19787
+ throw error;
19788
+ }
19789
+ };
19790
+ }
19791
+ /**
19792
+ * Create clustering method for a resource
19793
+ *
19794
+ * Performs k-means clustering on resource vectors.
19795
+ *
19796
+ * @param {Resource} resource - Resource instance
19797
+ * @returns {Function} Clustering method
19798
+ */
19799
+ createClusteringMethod(resource) {
19800
+ return async (options = {}) => {
19801
+ const startTime = Date.now();
19802
+ let vectorField = options.vectorField;
19803
+ if (!vectorField && this.config.autoDetectVectorField) {
19804
+ vectorField = this.detectVectorField(resource);
19805
+ if (!vectorField) {
19806
+ vectorField = "vector";
19807
+ }
19808
+ } else if (!vectorField) {
19809
+ vectorField = "vector";
19810
+ }
19811
+ const {
19812
+ k = 5,
19813
+ distanceMetric = this.config.distanceMetric,
19814
+ partition = null,
19815
+ ...kmeansOptions
19816
+ } = options;
19817
+ const distanceFn = this.distanceFunctions[distanceMetric];
19818
+ if (!distanceFn) {
19819
+ const error = new VectorError(`Invalid distance metric: ${distanceMetric}`, {
19820
+ operation: "cluster",
19821
+ availableMetrics: Object.keys(this.distanceFunctions),
19822
+ providedMetric: distanceMetric
19823
+ });
19824
+ this._emitEvent("vector:cluster-error", {
19825
+ resource: resource.name,
19826
+ error: error.message,
19827
+ timestamp: Date.now()
19828
+ });
19829
+ throw error;
19830
+ }
19831
+ this._emitEvent("vector:cluster-start", {
19832
+ resource: resource.name,
19833
+ vectorField,
19834
+ k,
19835
+ distanceMetric,
19836
+ partition,
19837
+ maxIterations: kmeansOptions.maxIterations || 100,
19838
+ timestamp: startTime
19839
+ });
19840
+ try {
19841
+ let allRecords;
19842
+ if (partition) {
19843
+ this._emitEvent("vector:partition-filter", {
19844
+ resource: resource.name,
19845
+ partition,
19846
+ timestamp: Date.now()
19847
+ });
19848
+ allRecords = await resource.list({ partition, partitionValues: partition });
19849
+ } else {
19850
+ allRecords = await resource.getAll();
19851
+ }
19852
+ const recordsWithVectors = allRecords.filter(
19853
+ (record) => record[vectorField] && Array.isArray(record[vectorField])
19854
+ );
19855
+ if (recordsWithVectors.length === 0) {
19856
+ const error = new VectorError("No vectors found in resource", {
19857
+ operation: "cluster",
19858
+ resourceName: resource.name,
19859
+ vectorField
19860
+ });
19861
+ this._emitEvent("vector:empty-dataset", {
19862
+ resource: resource.name,
19863
+ vectorField,
19864
+ totalRecords: allRecords.length,
19865
+ timestamp: Date.now()
19866
+ });
19867
+ throw error;
19868
+ }
19869
+ const vectors = recordsWithVectors.map((record) => record[vectorField]);
19870
+ const result = kmeans(vectors, k, {
19871
+ ...kmeansOptions,
19872
+ distanceFn,
19873
+ onIteration: this.config.verboseEvents ? (iteration, inertia, converged) => {
19874
+ this._emitEvent("vector:cluster-iteration", {
19875
+ resource: resource.name,
19876
+ k,
19877
+ iteration,
19878
+ inertia,
19879
+ converged,
19880
+ timestamp: Date.now()
19881
+ }, `cluster-${resource.name}`);
19882
+ } : void 0
19883
+ });
19884
+ if (result.converged) {
19885
+ this._emitEvent("vector:cluster-converged", {
19886
+ resource: resource.name,
19887
+ k,
19888
+ iterations: result.iterations,
19889
+ inertia: result.inertia,
19890
+ timestamp: Date.now()
19891
+ });
19892
+ }
19893
+ const clusters = Array(k).fill(null).map(() => []);
19894
+ recordsWithVectors.forEach((record, i) => {
19895
+ const clusterIndex = result.assignments[i];
19896
+ clusters[clusterIndex].push(record);
19897
+ });
19898
+ const duration = Date.now() - startTime;
19899
+ const clusterSizes = clusters.map((c) => c.length);
19900
+ this._emitEvent("vector:cluster-complete", {
19901
+ resource: resource.name,
19902
+ vectorField,
19903
+ k,
19904
+ vectorCount: vectors.length,
19905
+ iterations: result.iterations,
19906
+ converged: result.converged,
19907
+ inertia: result.inertia,
19908
+ clusterSizes,
19909
+ duration,
19910
+ timestamp: Date.now()
19911
+ });
19912
+ if (this.config.verboseEvents) {
19913
+ this._emitEvent("vector:performance", {
19914
+ operation: "clustering",
19915
+ resource: resource.name,
19916
+ k,
19917
+ duration,
19918
+ iterationsPerSecond: (result.iterations / (duration / 1e3)).toFixed(2),
19919
+ vectorsPerSecond: (vectors.length / (duration / 1e3)).toFixed(2),
19920
+ timestamp: Date.now()
19921
+ });
19922
+ }
19923
+ return {
19924
+ clusters,
19925
+ centroids: result.centroids,
19926
+ inertia: result.inertia,
19927
+ iterations: result.iterations,
19928
+ converged: result.converged
19929
+ };
19930
+ } catch (error) {
19931
+ this._emitEvent("vector:cluster-error", {
19932
+ resource: resource.name,
19933
+ error: error.message,
19934
+ stack: error.stack,
19935
+ timestamp: Date.now()
19936
+ });
19937
+ throw error;
19938
+ }
19939
+ };
19940
+ }
19941
+ /**
19942
+ * Create distance calculation method
19943
+ *
19944
+ * @returns {Function} Distance method
19945
+ */
19946
+ createDistanceMethod() {
19947
+ return (vector1, vector2, metric = this.config.distanceMetric) => {
19948
+ const distanceFn = this.distanceFunctions[metric];
19949
+ if (!distanceFn) {
19950
+ throw new VectorError(`Invalid distance metric: ${metric}`, {
19951
+ operation: "vectorDistance",
19952
+ availableMetrics: Object.keys(this.distanceFunctions),
19953
+ providedMetric: metric
19954
+ });
19955
+ }
19956
+ return distanceFn(vector1, vector2);
19957
+ };
19958
+ }
19959
+ /**
19960
+ * Static utility: Normalize vector
19961
+ *
19962
+ * @param {number[]} vector - Input vector
19963
+ * @returns {number[]} Normalized vector
19964
+ */
19965
+ static normalize(vector) {
19966
+ return normalize(vector);
19967
+ }
19968
+ /**
19969
+ * Static utility: Calculate dot product
19970
+ *
19971
+ * @param {number[]} vector1 - First vector
19972
+ * @param {number[]} vector2 - Second vector
19973
+ * @returns {number} Dot product
19974
+ */
19975
+ static dotProduct(vector1, vector2) {
19976
+ return dotProduct(vector1, vector2);
19977
+ }
19978
+ /**
19979
+ * Static utility: Find optimal K for clustering
19980
+ *
19981
+ * Analyzes clustering quality across a range of K values using
19982
+ * multiple evaluation metrics.
19983
+ *
19984
+ * @param {number[][]} vectors - Vectors to analyze
19985
+ * @param {Object} options - Configuration options
19986
+ * @returns {Promise<Object>} Analysis results with recommendations
19987
+ */
19988
+ static async findOptimalK(vectors, options) {
19989
+ return findOptimalK(vectors, options);
19990
+ }
19991
+ }
19992
+
19993
+ function silhouetteScore(vectors, assignments, centroids, distanceFn = euclideanDistance) {
19994
+ const k = centroids.length;
19995
+ const n = vectors.length;
19996
+ const clusters = Array(k).fill(null).map(() => []);
19997
+ vectors.forEach((vector, i) => {
19998
+ clusters[assignments[i]].push(i);
19999
+ });
20000
+ let totalScore = 0;
20001
+ let validPoints = 0;
20002
+ if (clusters.every((c) => c.length <= 1)) {
20003
+ return 0;
20004
+ }
20005
+ for (let i = 0; i < n; i++) {
20006
+ const clusterIdx = assignments[i];
20007
+ const cluster = clusters[clusterIdx];
20008
+ if (cluster.length === 1) continue;
20009
+ let a = 0;
20010
+ for (const j of cluster) {
20011
+ if (i !== j) {
20012
+ a += distanceFn(vectors[i], vectors[j]);
20013
+ }
20014
+ }
20015
+ a /= cluster.length - 1;
20016
+ let b = Infinity;
20017
+ for (let otherCluster = 0; otherCluster < k; otherCluster++) {
20018
+ if (otherCluster === clusterIdx) continue;
20019
+ const otherPoints = clusters[otherCluster];
20020
+ if (otherPoints.length === 0) continue;
20021
+ let avgDist = 0;
20022
+ for (const j of otherPoints) {
20023
+ avgDist += distanceFn(vectors[i], vectors[j]);
20024
+ }
20025
+ avgDist /= otherPoints.length;
20026
+ b = Math.min(b, avgDist);
20027
+ }
20028
+ if (b === Infinity) continue;
20029
+ const maxAB = Math.max(a, b);
20030
+ const s = maxAB === 0 ? 0 : (b - a) / maxAB;
20031
+ totalScore += s;
20032
+ validPoints++;
20033
+ }
20034
+ return validPoints > 0 ? totalScore / validPoints : 0;
20035
+ }
20036
+ function daviesBouldinIndex(vectors, assignments, centroids, distanceFn = euclideanDistance) {
20037
+ const k = centroids.length;
20038
+ const scatters = new Array(k).fill(0);
20039
+ const clusterCounts = new Array(k).fill(0);
20040
+ vectors.forEach((vector, i) => {
20041
+ const cluster = assignments[i];
20042
+ scatters[cluster] += distanceFn(vector, centroids[cluster]);
20043
+ clusterCounts[cluster]++;
20044
+ });
20045
+ for (let i = 0; i < k; i++) {
20046
+ if (clusterCounts[i] > 0) {
20047
+ scatters[i] /= clusterCounts[i];
20048
+ }
20049
+ }
20050
+ let dbIndex = 0;
20051
+ let validClusters = 0;
20052
+ for (let i = 0; i < k; i++) {
20053
+ if (clusterCounts[i] === 0) continue;
20054
+ let maxRatio = 0;
20055
+ for (let j = 0; j < k; j++) {
20056
+ if (i === j || clusterCounts[j] === 0) continue;
20057
+ const centroidDist = distanceFn(centroids[i], centroids[j]);
20058
+ if (centroidDist === 0) continue;
20059
+ const ratio = (scatters[i] + scatters[j]) / centroidDist;
20060
+ maxRatio = Math.max(maxRatio, ratio);
20061
+ }
20062
+ dbIndex += maxRatio;
20063
+ validClusters++;
20064
+ }
20065
+ return validClusters > 0 ? dbIndex / validClusters : 0;
20066
+ }
20067
+ function calinskiHarabaszIndex(vectors, assignments, centroids, distanceFn = euclideanDistance) {
20068
+ const n = vectors.length;
20069
+ const k = centroids.length;
20070
+ if (k === 1 || k === n) return 0;
20071
+ const dimensions = vectors[0].length;
20072
+ const overallCentroid = new Array(dimensions).fill(0);
20073
+ vectors.forEach((vector) => {
20074
+ vector.forEach((val, dim) => {
20075
+ overallCentroid[dim] += val;
20076
+ });
20077
+ });
20078
+ overallCentroid.forEach((val, dim, arr) => {
20079
+ arr[dim] = val / n;
20080
+ });
20081
+ const clusterCounts = new Array(k).fill(0);
20082
+ vectors.forEach((vector, i) => {
20083
+ clusterCounts[assignments[i]]++;
20084
+ });
20085
+ let bgss = 0;
20086
+ for (let i = 0; i < k; i++) {
20087
+ if (clusterCounts[i] === 0) continue;
20088
+ const dist = distanceFn(centroids[i], overallCentroid);
20089
+ bgss += clusterCounts[i] * dist * dist;
20090
+ }
20091
+ let wcss = 0;
20092
+ vectors.forEach((vector, i) => {
20093
+ const cluster = assignments[i];
20094
+ const dist = distanceFn(vector, centroids[cluster]);
20095
+ wcss += dist * dist;
20096
+ });
20097
+ if (wcss === 0) return 0;
20098
+ return bgss / (k - 1) / (wcss / (n - k));
20099
+ }
20100
+ async function gapStatistic(vectors, assignments, centroids, distanceFn = euclideanDistance, nReferences = 10) {
20101
+ const n = vectors.length;
20102
+ const k = centroids.length;
20103
+ const dimensions = vectors[0].length;
20104
+ let wk = 0;
20105
+ vectors.forEach((vector, i) => {
20106
+ const dist = distanceFn(vector, centroids[assignments[i]]);
20107
+ wk += dist * dist;
20108
+ });
20109
+ wk = Math.log(wk + 1e-10);
20110
+ const referenceWks = [];
20111
+ const mins = new Array(dimensions).fill(Infinity);
20112
+ const maxs = new Array(dimensions).fill(-Infinity);
20113
+ vectors.forEach((vector) => {
20114
+ vector.forEach((val, dim) => {
20115
+ mins[dim] = Math.min(mins[dim], val);
20116
+ maxs[dim] = Math.max(maxs[dim], val);
20117
+ });
20118
+ });
20119
+ for (let ref = 0; ref < nReferences; ref++) {
20120
+ const refVectors = [];
20121
+ for (let i = 0; i < n; i++) {
20122
+ const refVector = new Array(dimensions);
20123
+ for (let dim = 0; dim < dimensions; dim++) {
20124
+ refVector[dim] = mins[dim] + Math.random() * (maxs[dim] - mins[dim]);
20125
+ }
20126
+ refVectors.push(refVector);
20127
+ }
20128
+ const refResult = kmeans(refVectors, k, { maxIterations: 50, distanceFn });
20129
+ let refWk = 0;
20130
+ refVectors.forEach((vector, i) => {
20131
+ const dist = distanceFn(vector, refResult.centroids[refResult.assignments[i]]);
20132
+ refWk += dist * dist;
20133
+ });
20134
+ referenceWks.push(Math.log(refWk + 1e-10));
20135
+ }
20136
+ const expectedWk = referenceWks.reduce((a, b) => a + b, 0) / nReferences;
20137
+ const gap = expectedWk - wk;
20138
+ const sdk = Math.sqrt(
20139
+ referenceWks.reduce((sum, wk2) => sum + Math.pow(wk2 - expectedWk, 2), 0) / nReferences
20140
+ );
20141
+ const sk = sdk * Math.sqrt(1 + 1 / nReferences);
20142
+ return { gap, sk, expectedWk, actualWk: wk };
20143
+ }
20144
+ function clusteringStability(vectors, k, options = {}) {
20145
+ const {
20146
+ nRuns = 10,
20147
+ distanceFn = euclideanDistance,
20148
+ ...kmeansOptions
20149
+ } = options;
20150
+ const inertias = [];
20151
+ const allAssignments = [];
20152
+ for (let run = 0; run < nRuns; run++) {
20153
+ const result = kmeans(vectors, k, {
20154
+ ...kmeansOptions,
20155
+ distanceFn,
20156
+ seed: run
20157
+ // Different seed for each run
20158
+ });
20159
+ inertias.push(result.inertia);
20160
+ allAssignments.push(result.assignments);
20161
+ }
20162
+ const assignmentSimilarities = [];
20163
+ for (let i = 0; i < nRuns - 1; i++) {
20164
+ for (let j = i + 1; j < nRuns; j++) {
20165
+ const similarity = calculateAssignmentSimilarity(allAssignments[i], allAssignments[j]);
20166
+ assignmentSimilarities.push(similarity);
20167
+ }
20168
+ }
20169
+ const avgInertia = inertias.reduce((a, b) => a + b, 0) / nRuns;
20170
+ const stdInertia = Math.sqrt(
20171
+ inertias.reduce((sum, val) => sum + Math.pow(val - avgInertia, 2), 0) / nRuns
20172
+ );
20173
+ const avgSimilarity = assignmentSimilarities.length > 0 ? assignmentSimilarities.reduce((a, b) => a + b, 0) / assignmentSimilarities.length : 1;
20174
+ return {
20175
+ avgInertia,
20176
+ stdInertia,
20177
+ cvInertia: avgInertia !== 0 ? stdInertia / avgInertia : 0,
20178
+ // Coefficient of variation
20179
+ avgSimilarity,
20180
+ stability: avgSimilarity
20181
+ // Higher is more stable
20182
+ };
20183
+ }
20184
+ function calculateAssignmentSimilarity(assignments1, assignments2) {
20185
+ const n = assignments1.length;
20186
+ let matches = 0;
20187
+ for (let i = 0; i < n; i++) {
20188
+ for (let j = i + 1; j < n; j++) {
20189
+ const sameCluster1 = assignments1[i] === assignments1[j];
20190
+ const sameCluster2 = assignments2[i] === assignments2[j];
20191
+ if (sameCluster1 === sameCluster2) {
20192
+ matches++;
20193
+ }
20194
+ }
20195
+ }
20196
+ const totalPairs = n * (n - 1) / 2;
20197
+ return totalPairs > 0 ? matches / totalPairs : 1;
20198
+ }
20199
+
20200
+ var metrics = /*#__PURE__*/Object.freeze({
20201
+ __proto__: null,
20202
+ calinskiHarabaszIndex: calinskiHarabaszIndex,
20203
+ clusteringStability: clusteringStability,
20204
+ daviesBouldinIndex: daviesBouldinIndex,
20205
+ gapStatistic: gapStatistic,
20206
+ silhouetteScore: silhouetteScore
20207
+ });
20208
+
18939
20209
  exports.AVAILABLE_BEHAVIORS = AVAILABLE_BEHAVIORS;
18940
20210
  exports.AnalyticsNotEnabledError = AnalyticsNotEnabledError;
18941
20211
  exports.AuditPlugin = AuditPlugin;
@@ -18990,6 +20260,7 @@ exports.StreamError = StreamError;
18990
20260
  exports.UnknownError = UnknownError;
18991
20261
  exports.ValidationError = ValidationError;
18992
20262
  exports.Validator = Validator;
20263
+ exports.VectorPlugin = VectorPlugin;
18993
20264
  exports.behaviors = behaviors;
18994
20265
  exports.calculateAttributeNamesSize = calculateAttributeNamesSize;
18995
20266
  exports.calculateAttributeSizes = calculateAttributeSizes;
@@ -19002,10 +20273,12 @@ exports.clearUTF8Memo = clearUTF8Memo;
19002
20273
  exports.clearUTF8Memory = clearUTF8Memory;
19003
20274
  exports.decode = decode;
19004
20275
  exports.decodeDecimal = decodeDecimal;
20276
+ exports.decodeFixedPoint = decodeFixedPoint;
19005
20277
  exports.decrypt = decrypt;
19006
20278
  exports.default = S3db;
19007
20279
  exports.encode = encode;
19008
20280
  exports.encodeDecimal = encodeDecimal;
20281
+ exports.encodeFixedPoint = encodeFixedPoint;
19009
20282
  exports.encrypt = encrypt;
19010
20283
  exports.getBehavior = getBehavior;
19011
20284
  exports.getSizeBreakdown = getSizeBreakdown;