s3db.js 11.2.4 → 11.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/s3db.cjs.js CHANGED
@@ -81,6 +81,41 @@ const decodeDecimal = (s) => {
81
81
  const num = decPart ? Number(decodedInt + "." + decPart) : decodedInt;
82
82
  return negative ? -num : num;
83
83
  };
84
+ const encodeFixedPoint = (n, precision = 6) => {
85
+ if (typeof n !== "number" || isNaN(n)) return "undefined";
86
+ if (!isFinite(n)) return "undefined";
87
+ const scale = Math.pow(10, precision);
88
+ const scaled = Math.round(n * scale);
89
+ if (scaled === 0) return "^0";
90
+ const negative = scaled < 0;
91
+ let num = Math.abs(scaled);
92
+ let s = "";
93
+ while (num > 0) {
94
+ s = alphabet[num % base] + s;
95
+ num = Math.floor(num / base);
96
+ }
97
+ return "^" + (negative ? "-" : "") + s;
98
+ };
99
+ const decodeFixedPoint = (s, precision = 6) => {
100
+ if (typeof s !== "string") return NaN;
101
+ if (!s.startsWith("^")) return NaN;
102
+ s = s.slice(1);
103
+ if (s === "0") return 0;
104
+ let negative = false;
105
+ if (s[0] === "-") {
106
+ negative = true;
107
+ s = s.slice(1);
108
+ }
109
+ let r = 0;
110
+ for (let i = 0; i < s.length; i++) {
111
+ const idx = charToValue[s[i]];
112
+ if (idx === void 0) return NaN;
113
+ r = r * base + idx;
114
+ }
115
+ const scale = Math.pow(10, precision);
116
+ const scaled = negative ? -r : r;
117
+ return scaled / scale;
118
+ };
84
119
 
85
120
  const utf8BytesMemory = /* @__PURE__ */ new Map();
86
121
  const UTF8_MEMORY_MAX_SIZE = 1e4;
@@ -11505,6 +11540,11 @@ class Validator extends FastestValidator {
11505
11540
  type: "any",
11506
11541
  custom: this.autoEncrypt ? jsonHandler : void 0
11507
11542
  });
11543
+ this.alias("embedding", {
11544
+ type: "array",
11545
+ items: "number",
11546
+ empty: false
11547
+ });
11508
11548
  }
11509
11549
  }
11510
11550
  const ValidatorManager = new Proxy(Validator, {
@@ -11753,6 +11793,59 @@ const SchemaActions = {
11753
11793
  }
11754
11794
  return NaN;
11755
11795
  });
11796
+ },
11797
+ fromArrayOfEmbeddings: (value, { separator, precision = 6 }) => {
11798
+ if (value === null || value === void 0 || !Array.isArray(value)) {
11799
+ return value;
11800
+ }
11801
+ if (value.length === 0) {
11802
+ return "";
11803
+ }
11804
+ const encodedItems = value.map((item) => {
11805
+ if (typeof item === "number" && !isNaN(item)) {
11806
+ return encodeFixedPoint(item, precision);
11807
+ }
11808
+ const n = Number(item);
11809
+ return isNaN(n) ? "" : encodeFixedPoint(n, precision);
11810
+ });
11811
+ return encodedItems.join(separator);
11812
+ },
11813
+ toArrayOfEmbeddings: (value, { separator, precision = 6 }) => {
11814
+ if (Array.isArray(value)) {
11815
+ return value.map((v) => typeof v === "number" ? v : decodeFixedPoint(v, precision));
11816
+ }
11817
+ if (value === null || value === void 0) {
11818
+ return value;
11819
+ }
11820
+ if (value === "") {
11821
+ return [];
11822
+ }
11823
+ const str = String(value);
11824
+ const items = [];
11825
+ let current = "";
11826
+ let i = 0;
11827
+ while (i < str.length) {
11828
+ if (str[i] === "\\" && i + 1 < str.length) {
11829
+ current += str[i + 1];
11830
+ i += 2;
11831
+ } else if (str[i] === separator) {
11832
+ items.push(current);
11833
+ current = "";
11834
+ i++;
11835
+ } else {
11836
+ current += str[i];
11837
+ i++;
11838
+ }
11839
+ }
11840
+ items.push(current);
11841
+ return items.map((v) => {
11842
+ if (typeof v === "number") return v;
11843
+ if (typeof v === "string" && v !== "") {
11844
+ const n = decodeFixedPoint(v, precision);
11845
+ return isNaN(n) ? NaN : n;
11846
+ }
11847
+ return NaN;
11848
+ });
11756
11849
  }
11757
11850
  };
11758
11851
  class Schema {
@@ -11822,18 +11915,89 @@ class Schema {
11822
11915
  }
11823
11916
  return objectKeys;
11824
11917
  }
11918
+ _generateHooksFromOriginalAttributes(attributes, prefix = "") {
11919
+ for (const [key, value] of Object.entries(attributes)) {
11920
+ if (key.startsWith("$$")) continue;
11921
+ const fullKey = prefix ? `${prefix}.${key}` : key;
11922
+ if (typeof value === "object" && value !== null && !Array.isArray(value) && value.type) {
11923
+ if (value.type === "array" && value.items) {
11924
+ const itemsType = value.items;
11925
+ const arrayLength = typeof value.length === "number" ? value.length : null;
11926
+ if (itemsType === "string" || typeof itemsType === "string" && itemsType.includes("string")) {
11927
+ this.addHook("beforeMap", fullKey, "fromArray");
11928
+ this.addHook("afterUnmap", fullKey, "toArray");
11929
+ } else if (itemsType === "number" || typeof itemsType === "string" && itemsType.includes("number")) {
11930
+ const isIntegerArray = typeof itemsType === "string" && itemsType.includes("integer");
11931
+ const isEmbedding = !isIntegerArray && arrayLength !== null && arrayLength >= 256;
11932
+ if (isIntegerArray) {
11933
+ this.addHook("beforeMap", fullKey, "fromArrayOfNumbers");
11934
+ this.addHook("afterUnmap", fullKey, "toArrayOfNumbers");
11935
+ } else if (isEmbedding) {
11936
+ this.addHook("beforeMap", fullKey, "fromArrayOfEmbeddings");
11937
+ this.addHook("afterUnmap", fullKey, "toArrayOfEmbeddings");
11938
+ } else {
11939
+ this.addHook("beforeMap", fullKey, "fromArrayOfDecimals");
11940
+ this.addHook("afterUnmap", fullKey, "toArrayOfDecimals");
11941
+ }
11942
+ }
11943
+ }
11944
+ } else if (typeof value === "object" && value !== null && !Array.isArray(value) && !value.type) {
11945
+ this._generateHooksFromOriginalAttributes(value, fullKey);
11946
+ }
11947
+ }
11948
+ }
11825
11949
  generateAutoHooks() {
11950
+ this._generateHooksFromOriginalAttributes(this.attributes);
11826
11951
  const schema = flat.flatten(lodashEs.cloneDeep(this.attributes), { safe: true });
11827
11952
  for (const [name, definition] of Object.entries(schema)) {
11828
- if (definition.includes("array")) {
11829
- if (definition.includes("items:string")) {
11953
+ if (name.includes("$$")) continue;
11954
+ if (this.options.hooks.beforeMap[name] || this.options.hooks.afterUnmap[name]) {
11955
+ continue;
11956
+ }
11957
+ const defStr = typeof definition === "string" ? definition : "";
11958
+ const defType = typeof definition === "object" && definition !== null ? definition.type : null;
11959
+ const isEmbeddingType = defStr.includes("embedding") || defType === "embedding";
11960
+ if (isEmbeddingType) {
11961
+ const lengthMatch = defStr.match(/embedding:(\d+)/);
11962
+ if (lengthMatch) {
11963
+ parseInt(lengthMatch[1], 10);
11964
+ } else if (defStr.includes("length:")) {
11965
+ const match = defStr.match(/length:(\d+)/);
11966
+ if (match) parseInt(match[1], 10);
11967
+ }
11968
+ this.addHook("beforeMap", name, "fromArrayOfEmbeddings");
11969
+ this.addHook("afterUnmap", name, "toArrayOfEmbeddings");
11970
+ continue;
11971
+ }
11972
+ const isArray = defStr.includes("array") || defType === "array";
11973
+ if (isArray) {
11974
+ let itemsType = null;
11975
+ if (typeof definition === "object" && definition !== null && definition.items) {
11976
+ itemsType = definition.items;
11977
+ } else if (defStr.includes("items:string")) {
11978
+ itemsType = "string";
11979
+ } else if (defStr.includes("items:number")) {
11980
+ itemsType = "number";
11981
+ }
11982
+ if (itemsType === "string" || typeof itemsType === "string" && itemsType.includes("string")) {
11830
11983
  this.addHook("beforeMap", name, "fromArray");
11831
11984
  this.addHook("afterUnmap", name, "toArray");
11832
- } else if (definition.includes("items:number")) {
11833
- const isIntegerArray = definition.includes("integer:true") || definition.includes("|integer:") || definition.includes("|integer");
11985
+ } else if (itemsType === "number" || typeof itemsType === "string" && itemsType.includes("number")) {
11986
+ const isIntegerArray = defStr.includes("integer:true") || defStr.includes("|integer:") || defStr.includes("|integer") || typeof itemsType === "string" && itemsType.includes("integer");
11987
+ let arrayLength = null;
11988
+ if (typeof definition === "object" && definition !== null && typeof definition.length === "number") {
11989
+ arrayLength = definition.length;
11990
+ } else if (defStr.includes("length:")) {
11991
+ const match = defStr.match(/length:(\d+)/);
11992
+ if (match) arrayLength = parseInt(match[1], 10);
11993
+ }
11994
+ const isEmbedding = !isIntegerArray && arrayLength !== null && arrayLength >= 256;
11834
11995
  if (isIntegerArray) {
11835
11996
  this.addHook("beforeMap", name, "fromArrayOfNumbers");
11836
11997
  this.addHook("afterUnmap", name, "toArrayOfNumbers");
11998
+ } else if (isEmbedding) {
11999
+ this.addHook("beforeMap", name, "fromArrayOfEmbeddings");
12000
+ this.addHook("afterUnmap", name, "toArrayOfEmbeddings");
11837
12001
  } else {
11838
12002
  this.addHook("beforeMap", name, "fromArrayOfDecimals");
11839
12003
  this.addHook("afterUnmap", name, "toArrayOfDecimals");
@@ -11841,7 +12005,7 @@ class Schema {
11841
12005
  }
11842
12006
  continue;
11843
12007
  }
11844
- if (definition.includes("secret")) {
12008
+ if (defStr.includes("secret") || defType === "secret") {
11845
12009
  if (this.options.autoEncrypt) {
11846
12010
  this.addHook("beforeMap", name, "encrypt");
11847
12011
  }
@@ -11850,8 +12014,8 @@ class Schema {
11850
12014
  }
11851
12015
  continue;
11852
12016
  }
11853
- if (definition.includes("number")) {
11854
- const isInteger = definition.includes("integer:true") || definition.includes("|integer:") || definition.includes("|integer");
12017
+ if (defStr.includes("number") || defType === "number") {
12018
+ const isInteger = defStr.includes("integer:true") || defStr.includes("|integer:") || defStr.includes("|integer");
11855
12019
  if (isInteger) {
11856
12020
  this.addHook("beforeMap", name, "toBase62");
11857
12021
  this.addHook("afterUnmap", name, "fromBase62");
@@ -11861,17 +12025,17 @@ class Schema {
11861
12025
  }
11862
12026
  continue;
11863
12027
  }
11864
- if (definition.includes("boolean")) {
12028
+ if (defStr.includes("boolean") || defType === "boolean") {
11865
12029
  this.addHook("beforeMap", name, "fromBool");
11866
12030
  this.addHook("afterUnmap", name, "toBool");
11867
12031
  continue;
11868
12032
  }
11869
- if (definition.includes("json")) {
12033
+ if (defStr.includes("json") || defType === "json") {
11870
12034
  this.addHook("beforeMap", name, "toJSON");
11871
12035
  this.addHook("afterUnmap", name, "fromJSON");
11872
12036
  continue;
11873
12037
  }
11874
- if (definition === "object" || definition.includes("object")) {
12038
+ if (definition === "object" || defStr.includes("object") || defType === "object") {
11875
12039
  this.addHook("beforeMap", name, "toJSON");
11876
12040
  this.addHook("afterUnmap", name, "fromJSON");
11877
12041
  continue;
@@ -12013,7 +12177,8 @@ class Schema {
12013
12177
  const originalKey = reversedMap && reversedMap[key] ? reversedMap[key] : key;
12014
12178
  let parsedValue = value;
12015
12179
  const attrDef = this.getAttributeDefinition(originalKey);
12016
- if (typeof attrDef === "string" && attrDef.includes("number") && !attrDef.includes("array") && !attrDef.includes("decimal")) {
12180
+ const hasAfterUnmapHook = this.options.hooks?.afterUnmap?.[originalKey];
12181
+ if (!hasAfterUnmapHook && typeof attrDef === "string" && attrDef.includes("number") && !attrDef.includes("array") && !attrDef.includes("decimal")) {
12017
12182
  if (typeof parsedValue === "string" && parsedValue !== "") {
12018
12183
  parsedValue = decode(parsedValue);
12019
12184
  } else if (typeof parsedValue === "number") ; else {
@@ -12078,18 +12243,38 @@ class Schema {
12078
12243
  preprocessAttributesForValidation(attributes) {
12079
12244
  const processed = {};
12080
12245
  for (const [key, value] of Object.entries(attributes)) {
12081
- if (typeof value === "object" && value !== null && !Array.isArray(value)) {
12082
- const isExplicitRequired = value.$$type && value.$$type.includes("required");
12083
- const isExplicitOptional = value.$$type && value.$$type.includes("optional");
12084
- const objectConfig = {
12085
- type: "object",
12086
- properties: this.preprocessAttributesForValidation(value),
12087
- strict: false
12088
- };
12089
- if (isExplicitRequired) ; else if (isExplicitOptional || this.allNestedObjectsOptional) {
12090
- objectConfig.optional = true;
12246
+ if (typeof value === "string") {
12247
+ if (value.startsWith("embedding:")) {
12248
+ const lengthMatch = value.match(/embedding:(\d+)/);
12249
+ if (lengthMatch) {
12250
+ const length = lengthMatch[1];
12251
+ const rest = value.substring(`embedding:${length}`.length);
12252
+ processed[key] = `array|items:number|length:${length}|empty:false${rest}`;
12253
+ continue;
12254
+ }
12255
+ }
12256
+ if (value.startsWith("embedding|") || value === "embedding") {
12257
+ processed[key] = value.replace(/^embedding/, "array|items:number|empty:false");
12258
+ continue;
12259
+ }
12260
+ processed[key] = value;
12261
+ } else if (typeof value === "object" && value !== null && !Array.isArray(value)) {
12262
+ const hasValidatorType = value.type !== void 0 && key !== "$$type";
12263
+ if (hasValidatorType) {
12264
+ processed[key] = value;
12265
+ } else {
12266
+ const isExplicitRequired = value.$$type && value.$$type.includes("required");
12267
+ const isExplicitOptional = value.$$type && value.$$type.includes("optional");
12268
+ const objectConfig = {
12269
+ type: "object",
12270
+ properties: this.preprocessAttributesForValidation(value),
12271
+ strict: false
12272
+ };
12273
+ if (isExplicitRequired) ; else if (isExplicitOptional || this.allNestedObjectsOptional) {
12274
+ objectConfig.optional = true;
12275
+ }
12276
+ processed[key] = objectConfig;
12091
12277
  }
12092
- processed[key] = objectConfig;
12093
12278
  } else {
12094
12279
  processed[key] = value;
12095
12280
  }
@@ -12940,6 +13125,71 @@ ${errorDetails}`,
12940
13125
  }
12941
13126
  return true;
12942
13127
  }
13128
+ /**
13129
+ * Find orphaned partitions (partitions that reference non-existent fields)
13130
+ * @returns {Object} Object with orphaned partition names as keys and details as values
13131
+ * @example
13132
+ * const orphaned = resource.findOrphanedPartitions();
13133
+ * // Returns: { byRegion: { missingFields: ['region'], definition: {...} } }
13134
+ */
13135
+ findOrphanedPartitions() {
13136
+ const orphaned = {};
13137
+ if (!this.config.partitions) {
13138
+ return orphaned;
13139
+ }
13140
+ for (const [partitionName, partitionDef] of Object.entries(this.config.partitions)) {
13141
+ if (!partitionDef.fields) {
13142
+ continue;
13143
+ }
13144
+ const missingFields = [];
13145
+ for (const fieldName of Object.keys(partitionDef.fields)) {
13146
+ if (!this.fieldExistsInAttributes(fieldName)) {
13147
+ missingFields.push(fieldName);
13148
+ }
13149
+ }
13150
+ if (missingFields.length > 0) {
13151
+ orphaned[partitionName] = {
13152
+ missingFields,
13153
+ definition: partitionDef,
13154
+ allFields: Object.keys(partitionDef.fields)
13155
+ };
13156
+ }
13157
+ }
13158
+ return orphaned;
13159
+ }
13160
+ /**
13161
+ * Remove orphaned partitions (partitions that reference non-existent fields)
13162
+ * WARNING: This will modify the resource configuration and should be followed by uploadMetadataFile()
13163
+ * @param {Object} options - Options
13164
+ * @param {boolean} options.dryRun - If true, only returns what would be removed without modifying (default: false)
13165
+ * @returns {Object} Object with removed partition names and details
13166
+ * @example
13167
+ * // Dry run to see what would be removed
13168
+ * const toRemove = resource.removeOrphanedPartitions({ dryRun: true });
13169
+ * console.log('Would remove:', toRemove);
13170
+ *
13171
+ * // Actually remove orphaned partitions
13172
+ * const removed = resource.removeOrphanedPartitions();
13173
+ * await database.uploadMetadataFile(); // Save changes to S3
13174
+ */
13175
+ removeOrphanedPartitions({ dryRun = false } = {}) {
13176
+ const orphaned = this.findOrphanedPartitions();
13177
+ if (Object.keys(orphaned).length === 0) {
13178
+ return {};
13179
+ }
13180
+ if (dryRun) {
13181
+ return orphaned;
13182
+ }
13183
+ for (const partitionName of Object.keys(orphaned)) {
13184
+ delete this.config.partitions[partitionName];
13185
+ }
13186
+ this.emit("orphanedPartitionsRemoved", {
13187
+ resourceName: this.name,
13188
+ removed: Object.keys(orphaned),
13189
+ details: orphaned
13190
+ });
13191
+ return orphaned;
13192
+ }
12943
13193
  /**
12944
13194
  * Apply a single partition rule to a field value
12945
13195
  * @param {*} value - The field value
@@ -15033,7 +15283,7 @@ class Database extends EventEmitter {
15033
15283
  this.id = idGenerator(7);
15034
15284
  this.version = "1";
15035
15285
  this.s3dbVersion = (() => {
15036
- const [ok, err, version] = tryFn(() => true ? "11.2.4" : "latest");
15286
+ const [ok, err, version] = tryFn(() => true ? "11.2.6" : "latest");
15037
15287
  return ok ? version : "latest";
15038
15288
  })();
15039
15289
  this.resources = {};
@@ -18936,6 +19186,1091 @@ class StateMachinePlugin extends Plugin {
18936
19186
  }
18937
19187
  }
18938
19188
 
19189
+ function cosineDistance(a, b) {
19190
+ if (a.length !== b.length) {
19191
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19192
+ }
19193
+ let dotProduct2 = 0;
19194
+ let normA = 0;
19195
+ let normB = 0;
19196
+ for (let i = 0; i < a.length; i++) {
19197
+ dotProduct2 += a[i] * b[i];
19198
+ normA += a[i] * a[i];
19199
+ normB += b[i] * b[i];
19200
+ }
19201
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
19202
+ if (denominator === 0) {
19203
+ return a.every((v) => v === 0) && b.every((v) => v === 0) ? 0 : 1;
19204
+ }
19205
+ const similarity = dotProduct2 / denominator;
19206
+ return 1 - similarity;
19207
+ }
19208
+ function euclideanDistance(a, b) {
19209
+ if (a.length !== b.length) {
19210
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19211
+ }
19212
+ let sum = 0;
19213
+ for (let i = 0; i < a.length; i++) {
19214
+ const diff = a[i] - b[i];
19215
+ sum += diff * diff;
19216
+ }
19217
+ return Math.sqrt(sum);
19218
+ }
19219
+ function manhattanDistance(a, b) {
19220
+ if (a.length !== b.length) {
19221
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19222
+ }
19223
+ let sum = 0;
19224
+ for (let i = 0; i < a.length; i++) {
19225
+ sum += Math.abs(a[i] - b[i]);
19226
+ }
19227
+ return sum;
19228
+ }
19229
+ function dotProduct(a, b) {
19230
+ if (a.length !== b.length) {
19231
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19232
+ }
19233
+ let sum = 0;
19234
+ for (let i = 0; i < a.length; i++) {
19235
+ sum += a[i] * b[i];
19236
+ }
19237
+ return sum;
19238
+ }
19239
+ function normalize(vector) {
19240
+ const magnitude2 = Math.sqrt(
19241
+ vector.reduce((sum, val) => sum + val * val, 0)
19242
+ );
19243
+ if (magnitude2 === 0) {
19244
+ return vector.slice();
19245
+ }
19246
+ return vector.map((val) => val / magnitude2);
19247
+ }
19248
+
19249
+ function kmeans(vectors, k, options = {}) {
19250
+ const {
19251
+ maxIterations = 100,
19252
+ tolerance = 1e-4,
19253
+ distanceFn = euclideanDistance,
19254
+ seed = null,
19255
+ onIteration = null
19256
+ } = options;
19257
+ if (vectors.length === 0) {
19258
+ throw new Error("Cannot cluster empty vector array");
19259
+ }
19260
+ if (k < 1) {
19261
+ throw new Error(`k must be at least 1, got ${k}`);
19262
+ }
19263
+ if (k > vectors.length) {
19264
+ throw new Error(`k (${k}) cannot be greater than number of vectors (${vectors.length})`);
19265
+ }
19266
+ const dimensions = vectors[0].length;
19267
+ for (let i = 1; i < vectors.length; i++) {
19268
+ if (vectors[i].length !== dimensions) {
19269
+ throw new Error(`All vectors must have same dimensions. Expected ${dimensions}, got ${vectors[i].length} at index ${i}`);
19270
+ }
19271
+ }
19272
+ const centroids = initializeCentroidsKMeansPlusPlus(vectors, k, distanceFn, seed);
19273
+ let assignments = new Array(vectors.length);
19274
+ let iterations = 0;
19275
+ let converged = false;
19276
+ let previousInertia = Infinity;
19277
+ while (!converged && iterations < maxIterations) {
19278
+ const newAssignments = vectors.map((vector) => {
19279
+ let minDist = Infinity;
19280
+ let nearestCluster = 0;
19281
+ for (let i = 0; i < k; i++) {
19282
+ const dist = distanceFn(vector, centroids[i]);
19283
+ if (dist < minDist) {
19284
+ minDist = dist;
19285
+ nearestCluster = i;
19286
+ }
19287
+ }
19288
+ return nearestCluster;
19289
+ });
19290
+ let inertia2 = 0;
19291
+ vectors.forEach((vector, i) => {
19292
+ const dist = distanceFn(vector, centroids[newAssignments[i]]);
19293
+ inertia2 += dist * dist;
19294
+ });
19295
+ const inertiaChange = Math.abs(previousInertia - inertia2);
19296
+ converged = inertiaChange < tolerance;
19297
+ assignments = newAssignments;
19298
+ previousInertia = inertia2;
19299
+ if (onIteration) {
19300
+ onIteration(iterations + 1, inertia2, converged);
19301
+ }
19302
+ if (!converged) {
19303
+ const clusterSums = Array(k).fill(null).map(() => new Array(dimensions).fill(0));
19304
+ const clusterCounts = new Array(k).fill(0);
19305
+ vectors.forEach((vector, i) => {
19306
+ const cluster = assignments[i];
19307
+ clusterCounts[cluster]++;
19308
+ vector.forEach((val, j) => {
19309
+ clusterSums[cluster][j] += val;
19310
+ });
19311
+ });
19312
+ for (let i = 0; i < k; i++) {
19313
+ if (clusterCounts[i] > 0) {
19314
+ centroids[i] = clusterSums[i].map((sum) => sum / clusterCounts[i]);
19315
+ } else {
19316
+ const randomIdx = Math.floor(Math.random() * vectors.length);
19317
+ centroids[i] = [...vectors[randomIdx]];
19318
+ }
19319
+ }
19320
+ }
19321
+ iterations++;
19322
+ }
19323
+ let inertia = 0;
19324
+ vectors.forEach((vector, i) => {
19325
+ const dist = distanceFn(vector, centroids[assignments[i]]);
19326
+ inertia += dist * dist;
19327
+ });
19328
+ return {
19329
+ centroids,
19330
+ assignments,
19331
+ iterations,
19332
+ converged,
19333
+ inertia
19334
+ };
19335
+ }
19336
+ function initializeCentroidsKMeansPlusPlus(vectors, k, distanceFn, seed) {
19337
+ const centroids = [];
19338
+ const n = vectors.length;
19339
+ const firstIndex = seed !== null ? seed % n : Math.floor(Math.random() * n);
19340
+ centroids.push([...vectors[firstIndex]]);
19341
+ for (let i = 1; i < k; i++) {
19342
+ const distances = vectors.map((vector) => {
19343
+ return Math.min(...centroids.map((c) => distanceFn(vector, c)));
19344
+ });
19345
+ const squaredDistances = distances.map((d) => d * d);
19346
+ const totalSquared = squaredDistances.reduce((a, b) => a + b, 0);
19347
+ if (totalSquared === 0) {
19348
+ const randomIdx = Math.floor(Math.random() * n);
19349
+ centroids.push([...vectors[randomIdx]]);
19350
+ continue;
19351
+ }
19352
+ let threshold = Math.random() * totalSquared;
19353
+ let cumulativeSum = 0;
19354
+ for (let j = 0; j < n; j++) {
19355
+ cumulativeSum += squaredDistances[j];
19356
+ if (cumulativeSum >= threshold) {
19357
+ centroids.push([...vectors[j]]);
19358
+ break;
19359
+ }
19360
+ }
19361
+ }
19362
+ return centroids;
19363
+ }
19364
+ async function findOptimalK(vectors, options = {}) {
19365
+ const {
19366
+ minK = 2,
19367
+ maxK = Math.min(10, Math.floor(Math.sqrt(vectors.length / 2))),
19368
+ distanceFn = euclideanDistance,
19369
+ nReferences = 10,
19370
+ stabilityRuns = 5,
19371
+ ...kmeansOptions
19372
+ } = options;
19373
+ const metricsModule = await Promise.resolve().then(function () { return metrics; });
19374
+ const {
19375
+ silhouetteScore,
19376
+ daviesBouldinIndex,
19377
+ calinskiHarabaszIndex,
19378
+ gapStatistic,
19379
+ clusteringStability
19380
+ } = metricsModule;
19381
+ const results = [];
19382
+ for (let k = minK; k <= maxK; k++) {
19383
+ const kmeansResult = kmeans(vectors, k, { ...kmeansOptions, distanceFn });
19384
+ const silhouette = silhouetteScore(
19385
+ vectors,
19386
+ kmeansResult.assignments,
19387
+ kmeansResult.centroids,
19388
+ distanceFn
19389
+ );
19390
+ const daviesBouldin = daviesBouldinIndex(
19391
+ vectors,
19392
+ kmeansResult.assignments,
19393
+ kmeansResult.centroids,
19394
+ distanceFn
19395
+ );
19396
+ const calinskiHarabasz = calinskiHarabaszIndex(
19397
+ vectors,
19398
+ kmeansResult.assignments,
19399
+ kmeansResult.centroids,
19400
+ distanceFn
19401
+ );
19402
+ const gap = await gapStatistic(
19403
+ vectors,
19404
+ kmeansResult.assignments,
19405
+ kmeansResult.centroids,
19406
+ distanceFn,
19407
+ nReferences
19408
+ );
19409
+ const stability = clusteringStability(
19410
+ vectors,
19411
+ k,
19412
+ { ...kmeansOptions, distanceFn, nRuns: stabilityRuns }
19413
+ );
19414
+ results.push({
19415
+ k,
19416
+ inertia: kmeansResult.inertia,
19417
+ silhouette,
19418
+ daviesBouldin,
19419
+ calinskiHarabasz,
19420
+ gap: gap.gap,
19421
+ gapSk: gap.sk,
19422
+ stability: stability.stability,
19423
+ cvInertia: stability.cvInertia,
19424
+ iterations: kmeansResult.iterations,
19425
+ converged: kmeansResult.converged
19426
+ });
19427
+ }
19428
+ const elbowK = findElbowPoint(results.map((r) => r.inertia));
19429
+ const recommendations = {
19430
+ elbow: minK + elbowK,
19431
+ silhouette: results.reduce(
19432
+ (best, curr) => curr.silhouette > best.silhouette ? curr : best
19433
+ ).k,
19434
+ daviesBouldin: results.reduce(
19435
+ (best, curr) => curr.daviesBouldin < best.daviesBouldin ? curr : best
19436
+ ).k,
19437
+ calinskiHarabasz: results.reduce(
19438
+ (best, curr) => curr.calinskiHarabasz > best.calinskiHarabasz ? curr : best
19439
+ ).k,
19440
+ gap: results.reduce(
19441
+ (best, curr) => curr.gap > best.gap ? curr : best
19442
+ ).k,
19443
+ stability: results.reduce(
19444
+ (best, curr) => curr.stability > best.stability ? curr : best
19445
+ ).k
19446
+ };
19447
+ const votes = Object.values(recommendations);
19448
+ const consensus = votes.reduce((acc, k) => {
19449
+ acc[k] = (acc[k] || 0) + 1;
19450
+ return acc;
19451
+ }, {});
19452
+ const consensusK = parseInt(
19453
+ Object.entries(consensus).reduce((a, b) => b[1] > a[1] ? b : a)[0]
19454
+ );
19455
+ return {
19456
+ results,
19457
+ recommendations,
19458
+ consensus: consensusK,
19459
+ summary: {
19460
+ analysisRange: `${minK}-${maxK}`,
19461
+ totalVectors: vectors.length,
19462
+ dimensions: vectors[0].length,
19463
+ recommendation: consensusK,
19464
+ confidence: consensus[consensusK] / votes.length
19465
+ }
19466
+ };
19467
+ }
19468
+ function findElbowPoint(inertias) {
19469
+ const n = inertias.length;
19470
+ if (n < 3) return 0;
19471
+ let maxCurvature = -Infinity;
19472
+ let elbowIndex = 0;
19473
+ for (let i = 1; i < n - 1; i++) {
19474
+ const curvature = inertias[i - 1] - 2 * inertias[i] + inertias[i + 1];
19475
+ if (curvature > maxCurvature) {
19476
+ maxCurvature = curvature;
19477
+ elbowIndex = i;
19478
+ }
19479
+ }
19480
+ return elbowIndex;
19481
+ }
19482
+
19483
+ class VectorError extends PluginError {
19484
+ constructor(message, details = {}) {
19485
+ super(message, {
19486
+ pluginName: "VectorPlugin",
19487
+ ...details,
19488
+ description: details.description || `
19489
+ Vector Plugin Error
19490
+
19491
+ Operation: ${details.operation || "unknown"}
19492
+
19493
+ Common causes:
19494
+ 1. Vector dimension mismatch between vectors
19495
+ 2. Invalid distance metric specified (must be: cosine, euclidean, manhattan)
19496
+ 3. Empty vector array provided for clustering
19497
+ 4. k value larger than number of available vectors
19498
+ 5. Vector field not found or invalid in resource
19499
+ 6. Large vectors without proper behavior (use 'body-overflow' or 'body-only')
19500
+
19501
+ Available distance metrics:
19502
+ - cosine: Best for normalized vectors, semantic similarity. Range: [0, 2]
19503
+ - euclidean: Standard L2 distance, geometric proximity. Range: [0, \u221E)
19504
+ - manhattan: L1 distance, faster computation. Range: [0, \u221E)
19505
+
19506
+ Storage considerations:
19507
+ - Vectors > 250 dimensions may exceed S3 metadata limit (2KB)
19508
+ - Use behavior: 'body-overflow' or 'body-only' for large vectors
19509
+ - OpenAI ada-002 (1536 dims): ~10KB, requires body storage
19510
+ - Sentence Transformers (384 dims): ~2.7KB, requires body storage
19511
+ `.trim()
19512
+ });
19513
+ }
19514
+ }
19515
+
19516
+ class VectorPlugin extends Plugin {
19517
+ constructor(options = {}) {
19518
+ super(options);
19519
+ this.config = {
19520
+ dimensions: 1536,
19521
+ // Default to OpenAI text-embedding-3-small/3-large
19522
+ distanceMetric: "cosine",
19523
+ // Default metric
19524
+ storageThreshold: 1500,
19525
+ // Bytes - warn if vectors exceed this
19526
+ autoFixBehavior: false,
19527
+ // Automatically set body-overflow
19528
+ autoDetectVectorField: true,
19529
+ // Auto-detect embedding:XXX fields
19530
+ emitEvents: true,
19531
+ // Emit events for monitoring
19532
+ verboseEvents: false,
19533
+ // Emit detailed progress events
19534
+ eventThrottle: 100,
19535
+ // Throttle progress events (ms)
19536
+ ...options
19537
+ };
19538
+ this.distanceFunctions = {
19539
+ cosine: cosineDistance,
19540
+ euclidean: euclideanDistance,
19541
+ manhattan: manhattanDistance
19542
+ };
19543
+ this._vectorFieldCache = /* @__PURE__ */ new Map();
19544
+ this._throttleState = /* @__PURE__ */ new Map();
19545
+ }
19546
+ async onInstall() {
19547
+ this.emit("installed", { plugin: "VectorPlugin" });
19548
+ this.validateVectorStorage();
19549
+ this.installResourceMethods();
19550
+ }
19551
+ async onStart() {
19552
+ this.emit("started", { plugin: "VectorPlugin" });
19553
+ }
19554
+ async onStop() {
19555
+ this.emit("stopped", { plugin: "VectorPlugin" });
19556
+ }
19557
+ async onUninstall(options) {
19558
+ for (const resource of Object.values(this.database.resources)) {
19559
+ delete resource.vectorSearch;
19560
+ delete resource.cluster;
19561
+ delete resource.vectorDistance;
19562
+ delete resource.similarTo;
19563
+ delete resource.findSimilar;
19564
+ delete resource.distance;
19565
+ }
19566
+ this.emit("uninstalled", { plugin: "VectorPlugin" });
19567
+ }
19568
+ /**
19569
+ * Validate vector storage configuration for all resources
19570
+ *
19571
+ * Detects large vector fields and warns if proper behavior is not set.
19572
+ * Can optionally auto-fix by setting body-overflow behavior.
19573
+ */
19574
+ validateVectorStorage() {
19575
+ for (const resource of Object.values(this.database.resources)) {
19576
+ const vectorFields = this.findVectorFields(resource.schema.attributes);
19577
+ if (vectorFields.length === 0) continue;
19578
+ const totalVectorSize = vectorFields.reduce((sum, f) => sum + f.estimatedBytes, 0);
19579
+ if (totalVectorSize > this.config.storageThreshold) {
19580
+ const hasCorrectBehavior = ["body-overflow", "body-only"].includes(resource.behavior);
19581
+ if (!hasCorrectBehavior) {
19582
+ const warning = {
19583
+ resource: resource.name,
19584
+ vectorFields: vectorFields.map((f) => ({
19585
+ field: f.name,
19586
+ dimensions: f.length,
19587
+ estimatedBytes: f.estimatedBytes
19588
+ })),
19589
+ totalEstimatedBytes: totalVectorSize,
19590
+ metadataLimit: 2047,
19591
+ currentBehavior: resource.behavior || "default",
19592
+ recommendation: "body-overflow"
19593
+ };
19594
+ this.emit("vector:storage-warning", warning);
19595
+ if (this.config.autoFixBehavior) {
19596
+ resource.behavior = "body-overflow";
19597
+ this.emit("vector:behavior-fixed", {
19598
+ resource: resource.name,
19599
+ newBehavior: "body-overflow"
19600
+ });
19601
+ } else {
19602
+ console.warn(`\u26A0\uFE0F VectorPlugin: Resource '${resource.name}' has large vector fields (${totalVectorSize} bytes estimated)`);
19603
+ console.warn(` Current behavior: '${resource.behavior || "default"}'`);
19604
+ console.warn(` Recommendation: Add behavior: 'body-overflow' or 'body-only' to resource configuration`);
19605
+ console.warn(` Large vectors will exceed S3 metadata limit (2047 bytes) and cause errors.`);
19606
+ }
19607
+ }
19608
+ }
19609
+ }
19610
+ }
19611
+ /**
19612
+ * Auto-detect vector field from resource schema
19613
+ *
19614
+ * Looks for fields with type 'embedding:XXX' pattern.
19615
+ * Caches result per resource for performance.
19616
+ *
19617
+ * @param {Resource} resource - Resource instance
19618
+ * @returns {string|null} Detected vector field name or null
19619
+ */
19620
+ detectVectorField(resource) {
19621
+ if (this._vectorFieldCache.has(resource.name)) {
19622
+ return this._vectorFieldCache.get(resource.name);
19623
+ }
19624
+ const vectorField = this._findEmbeddingField(resource.schema.attributes);
19625
+ this._vectorFieldCache.set(resource.name, vectorField);
19626
+ if (vectorField && this.config.emitEvents) {
19627
+ this.emit("vector:field-detected", {
19628
+ resource: resource.name,
19629
+ vectorField,
19630
+ timestamp: Date.now()
19631
+ });
19632
+ }
19633
+ return vectorField;
19634
+ }
19635
+ /**
19636
+ * Recursively find embedding:XXX field in attributes
19637
+ *
19638
+ * @param {Object} attributes - Resource attributes
19639
+ * @param {string} path - Current path (for nested objects)
19640
+ * @returns {string|null} Field path or null
19641
+ */
19642
+ _findEmbeddingField(attributes, path = "") {
19643
+ for (const [key, attr] of Object.entries(attributes)) {
19644
+ const fullPath = path ? `${path}.${key}` : key;
19645
+ if (typeof attr === "string" && attr.startsWith("embedding:")) {
19646
+ return fullPath;
19647
+ }
19648
+ if (attr.type === "array" && attr.items === "number" && attr.length) {
19649
+ return fullPath;
19650
+ }
19651
+ if (attr.type === "object" && attr.props) {
19652
+ const nested = this._findEmbeddingField(attr.props, fullPath);
19653
+ if (nested) return nested;
19654
+ }
19655
+ }
19656
+ return null;
19657
+ }
19658
+ /**
19659
+ * Emit event with throttling support
19660
+ *
19661
+ * @param {string} eventName - Event name
19662
+ * @param {Object} data - Event data
19663
+ * @param {string} throttleKey - Unique key for throttling (optional)
19664
+ */
19665
+ _emitEvent(eventName, data, throttleKey = null) {
19666
+ if (!this.config.emitEvents) return;
19667
+ if (throttleKey) {
19668
+ const now = Date.now();
19669
+ const lastEmit = this._throttleState.get(throttleKey);
19670
+ if (lastEmit && now - lastEmit < this.config.eventThrottle) {
19671
+ return;
19672
+ }
19673
+ this._throttleState.set(throttleKey, now);
19674
+ }
19675
+ this.emit(eventName, data);
19676
+ }
19677
+ /**
19678
+ * Find vector fields in resource attributes
19679
+ *
19680
+ * @param {Object} attributes - Resource attributes
19681
+ * @param {string} path - Current path (for nested objects)
19682
+ * @returns {Array} Array of vector field info
19683
+ */
19684
+ findVectorFields(attributes, path = "") {
19685
+ const vectors = [];
19686
+ for (const [key, attr] of Object.entries(attributes)) {
19687
+ const fullPath = path ? `${path}.${key}` : key;
19688
+ if (attr.type === "array" && attr.items === "number" && attr.length) {
19689
+ vectors.push({
19690
+ name: fullPath,
19691
+ length: attr.length,
19692
+ estimatedBytes: this.estimateVectorBytes(attr.length)
19693
+ });
19694
+ }
19695
+ if (attr.type === "object" && attr.props) {
19696
+ vectors.push(...this.findVectorFields(attr.props, fullPath));
19697
+ }
19698
+ }
19699
+ return vectors;
19700
+ }
19701
+ /**
19702
+ * Estimate bytes required to store a vector in JSON format
19703
+ *
19704
+ * Conservative estimate: ~7 bytes per number + array overhead
19705
+ *
19706
+ * @param {number} dimensions - Number of dimensions
19707
+ * @returns {number} Estimated bytes
19708
+ */
19709
+ estimateVectorBytes(dimensions) {
19710
+ return dimensions * 7 + 50;
19711
+ }
19712
+ /**
19713
+ * Install vector methods on all resources
19714
+ */
19715
+ installResourceMethods() {
19716
+ for (const resource of Object.values(this.database.resources)) {
19717
+ const searchMethod = this.createVectorSearchMethod(resource);
19718
+ const clusterMethod = this.createClusteringMethod(resource);
19719
+ const distanceMethod = this.createDistanceMethod();
19720
+ resource.vectorSearch = searchMethod;
19721
+ resource.cluster = clusterMethod;
19722
+ resource.vectorDistance = distanceMethod;
19723
+ resource.similarTo = searchMethod;
19724
+ resource.findSimilar = searchMethod;
19725
+ resource.distance = distanceMethod;
19726
+ }
19727
+ }
19728
+ /**
19729
+ * Create vector search method for a resource
19730
+ *
19731
+ * Performs K-nearest neighbors search to find similar vectors.
19732
+ *
19733
+ * @param {Resource} resource - Resource instance
19734
+ * @returns {Function} Vector search method
19735
+ */
19736
+ createVectorSearchMethod(resource) {
19737
+ return async (queryVector, options = {}) => {
19738
+ const startTime = Date.now();
19739
+ let vectorField = options.vectorField;
19740
+ if (!vectorField && this.config.autoDetectVectorField) {
19741
+ vectorField = this.detectVectorField(resource);
19742
+ if (!vectorField) {
19743
+ vectorField = "vector";
19744
+ }
19745
+ } else if (!vectorField) {
19746
+ vectorField = "vector";
19747
+ }
19748
+ const {
19749
+ limit = 10,
19750
+ distanceMetric = this.config.distanceMetric,
19751
+ threshold = null,
19752
+ partition = null
19753
+ } = options;
19754
+ const distanceFn = this.distanceFunctions[distanceMetric];
19755
+ if (!distanceFn) {
19756
+ const error = new VectorError(`Invalid distance metric: ${distanceMetric}`, {
19757
+ operation: "vectorSearch",
19758
+ availableMetrics: Object.keys(this.distanceFunctions),
19759
+ providedMetric: distanceMetric
19760
+ });
19761
+ this._emitEvent("vector:search-error", {
19762
+ resource: resource.name,
19763
+ error: error.message,
19764
+ timestamp: Date.now()
19765
+ });
19766
+ throw error;
19767
+ }
19768
+ this._emitEvent("vector:search-start", {
19769
+ resource: resource.name,
19770
+ vectorField,
19771
+ limit,
19772
+ distanceMetric,
19773
+ partition,
19774
+ threshold,
19775
+ queryDimensions: queryVector.length,
19776
+ timestamp: startTime
19777
+ });
19778
+ try {
19779
+ let allRecords;
19780
+ if (partition) {
19781
+ this._emitEvent("vector:partition-filter", {
19782
+ resource: resource.name,
19783
+ partition,
19784
+ timestamp: Date.now()
19785
+ });
19786
+ allRecords = await resource.list({ partition, partitionValues: partition });
19787
+ } else {
19788
+ allRecords = await resource.getAll();
19789
+ }
19790
+ const totalRecords = allRecords.length;
19791
+ let processedRecords = 0;
19792
+ let dimensionMismatches = 0;
19793
+ const results = allRecords.filter((record) => record[vectorField] && Array.isArray(record[vectorField])).map((record, index) => {
19794
+ try {
19795
+ const distance = distanceFn(queryVector, record[vectorField]);
19796
+ processedRecords++;
19797
+ if (this.config.verboseEvents && processedRecords % 100 === 0) {
19798
+ this._emitEvent("vector:search-progress", {
19799
+ resource: resource.name,
19800
+ processed: processedRecords,
19801
+ total: totalRecords,
19802
+ progress: processedRecords / totalRecords * 100,
19803
+ timestamp: Date.now()
19804
+ }, `search-${resource.name}`);
19805
+ }
19806
+ return { record, distance };
19807
+ } catch (err) {
19808
+ dimensionMismatches++;
19809
+ if (this.config.verboseEvents) {
19810
+ this._emitEvent("vector:dimension-mismatch", {
19811
+ resource: resource.name,
19812
+ recordIndex: index,
19813
+ expected: queryVector.length,
19814
+ got: record[vectorField]?.length,
19815
+ timestamp: Date.now()
19816
+ });
19817
+ }
19818
+ return null;
19819
+ }
19820
+ }).filter((result) => result !== null).filter((result) => threshold === null || result.distance <= threshold).sort((a, b) => a.distance - b.distance).slice(0, limit);
19821
+ const duration = Date.now() - startTime;
19822
+ const throughput = totalRecords / (duration / 1e3);
19823
+ this._emitEvent("vector:search-complete", {
19824
+ resource: resource.name,
19825
+ vectorField,
19826
+ resultsCount: results.length,
19827
+ totalRecords,
19828
+ processedRecords,
19829
+ dimensionMismatches,
19830
+ duration,
19831
+ throughput: throughput.toFixed(2),
19832
+ timestamp: Date.now()
19833
+ });
19834
+ if (this.config.verboseEvents) {
19835
+ this._emitEvent("vector:performance", {
19836
+ operation: "search",
19837
+ resource: resource.name,
19838
+ duration,
19839
+ throughput: throughput.toFixed(2),
19840
+ recordsPerSecond: (processedRecords / (duration / 1e3)).toFixed(2),
19841
+ timestamp: Date.now()
19842
+ });
19843
+ }
19844
+ return results;
19845
+ } catch (error) {
19846
+ this._emitEvent("vector:search-error", {
19847
+ resource: resource.name,
19848
+ error: error.message,
19849
+ stack: error.stack,
19850
+ timestamp: Date.now()
19851
+ });
19852
+ throw error;
19853
+ }
19854
+ };
19855
+ }
19856
+ /**
19857
+ * Create clustering method for a resource
19858
+ *
19859
+ * Performs k-means clustering on resource vectors.
19860
+ *
19861
+ * @param {Resource} resource - Resource instance
19862
+ * @returns {Function} Clustering method
19863
+ */
19864
+ createClusteringMethod(resource) {
19865
+ return async (options = {}) => {
19866
+ const startTime = Date.now();
19867
+ let vectorField = options.vectorField;
19868
+ if (!vectorField && this.config.autoDetectVectorField) {
19869
+ vectorField = this.detectVectorField(resource);
19870
+ if (!vectorField) {
19871
+ vectorField = "vector";
19872
+ }
19873
+ } else if (!vectorField) {
19874
+ vectorField = "vector";
19875
+ }
19876
+ const {
19877
+ k = 5,
19878
+ distanceMetric = this.config.distanceMetric,
19879
+ partition = null,
19880
+ ...kmeansOptions
19881
+ } = options;
19882
+ const distanceFn = this.distanceFunctions[distanceMetric];
19883
+ if (!distanceFn) {
19884
+ const error = new VectorError(`Invalid distance metric: ${distanceMetric}`, {
19885
+ operation: "cluster",
19886
+ availableMetrics: Object.keys(this.distanceFunctions),
19887
+ providedMetric: distanceMetric
19888
+ });
19889
+ this._emitEvent("vector:cluster-error", {
19890
+ resource: resource.name,
19891
+ error: error.message,
19892
+ timestamp: Date.now()
19893
+ });
19894
+ throw error;
19895
+ }
19896
+ this._emitEvent("vector:cluster-start", {
19897
+ resource: resource.name,
19898
+ vectorField,
19899
+ k,
19900
+ distanceMetric,
19901
+ partition,
19902
+ maxIterations: kmeansOptions.maxIterations || 100,
19903
+ timestamp: startTime
19904
+ });
19905
+ try {
19906
+ let allRecords;
19907
+ if (partition) {
19908
+ this._emitEvent("vector:partition-filter", {
19909
+ resource: resource.name,
19910
+ partition,
19911
+ timestamp: Date.now()
19912
+ });
19913
+ allRecords = await resource.list({ partition, partitionValues: partition });
19914
+ } else {
19915
+ allRecords = await resource.getAll();
19916
+ }
19917
+ const recordsWithVectors = allRecords.filter(
19918
+ (record) => record[vectorField] && Array.isArray(record[vectorField])
19919
+ );
19920
+ if (recordsWithVectors.length === 0) {
19921
+ const error = new VectorError("No vectors found in resource", {
19922
+ operation: "cluster",
19923
+ resourceName: resource.name,
19924
+ vectorField
19925
+ });
19926
+ this._emitEvent("vector:empty-dataset", {
19927
+ resource: resource.name,
19928
+ vectorField,
19929
+ totalRecords: allRecords.length,
19930
+ timestamp: Date.now()
19931
+ });
19932
+ throw error;
19933
+ }
19934
+ const vectors = recordsWithVectors.map((record) => record[vectorField]);
19935
+ const result = kmeans(vectors, k, {
19936
+ ...kmeansOptions,
19937
+ distanceFn,
19938
+ onIteration: this.config.verboseEvents ? (iteration, inertia, converged) => {
19939
+ this._emitEvent("vector:cluster-iteration", {
19940
+ resource: resource.name,
19941
+ k,
19942
+ iteration,
19943
+ inertia,
19944
+ converged,
19945
+ timestamp: Date.now()
19946
+ }, `cluster-${resource.name}`);
19947
+ } : void 0
19948
+ });
19949
+ if (result.converged) {
19950
+ this._emitEvent("vector:cluster-converged", {
19951
+ resource: resource.name,
19952
+ k,
19953
+ iterations: result.iterations,
19954
+ inertia: result.inertia,
19955
+ timestamp: Date.now()
19956
+ });
19957
+ }
19958
+ const clusters = Array(k).fill(null).map(() => []);
19959
+ recordsWithVectors.forEach((record, i) => {
19960
+ const clusterIndex = result.assignments[i];
19961
+ clusters[clusterIndex].push(record);
19962
+ });
19963
+ const duration = Date.now() - startTime;
19964
+ const clusterSizes = clusters.map((c) => c.length);
19965
+ this._emitEvent("vector:cluster-complete", {
19966
+ resource: resource.name,
19967
+ vectorField,
19968
+ k,
19969
+ vectorCount: vectors.length,
19970
+ iterations: result.iterations,
19971
+ converged: result.converged,
19972
+ inertia: result.inertia,
19973
+ clusterSizes,
19974
+ duration,
19975
+ timestamp: Date.now()
19976
+ });
19977
+ if (this.config.verboseEvents) {
19978
+ this._emitEvent("vector:performance", {
19979
+ operation: "clustering",
19980
+ resource: resource.name,
19981
+ k,
19982
+ duration,
19983
+ iterationsPerSecond: (result.iterations / (duration / 1e3)).toFixed(2),
19984
+ vectorsPerSecond: (vectors.length / (duration / 1e3)).toFixed(2),
19985
+ timestamp: Date.now()
19986
+ });
19987
+ }
19988
+ return {
19989
+ clusters,
19990
+ centroids: result.centroids,
19991
+ inertia: result.inertia,
19992
+ iterations: result.iterations,
19993
+ converged: result.converged
19994
+ };
19995
+ } catch (error) {
19996
+ this._emitEvent("vector:cluster-error", {
19997
+ resource: resource.name,
19998
+ error: error.message,
19999
+ stack: error.stack,
20000
+ timestamp: Date.now()
20001
+ });
20002
+ throw error;
20003
+ }
20004
+ };
20005
+ }
20006
+ /**
20007
+ * Create distance calculation method
20008
+ *
20009
+ * @returns {Function} Distance method
20010
+ */
20011
+ createDistanceMethod() {
20012
+ return (vector1, vector2, metric = this.config.distanceMetric) => {
20013
+ const distanceFn = this.distanceFunctions[metric];
20014
+ if (!distanceFn) {
20015
+ throw new VectorError(`Invalid distance metric: ${metric}`, {
20016
+ operation: "vectorDistance",
20017
+ availableMetrics: Object.keys(this.distanceFunctions),
20018
+ providedMetric: metric
20019
+ });
20020
+ }
20021
+ return distanceFn(vector1, vector2);
20022
+ };
20023
+ }
20024
+ /**
20025
+ * Static utility: Normalize vector
20026
+ *
20027
+ * @param {number[]} vector - Input vector
20028
+ * @returns {number[]} Normalized vector
20029
+ */
20030
+ static normalize(vector) {
20031
+ return normalize(vector);
20032
+ }
20033
+ /**
20034
+ * Static utility: Calculate dot product
20035
+ *
20036
+ * @param {number[]} vector1 - First vector
20037
+ * @param {number[]} vector2 - Second vector
20038
+ * @returns {number} Dot product
20039
+ */
20040
+ static dotProduct(vector1, vector2) {
20041
+ return dotProduct(vector1, vector2);
20042
+ }
20043
+ /**
20044
+ * Static utility: Find optimal K for clustering
20045
+ *
20046
+ * Analyzes clustering quality across a range of K values using
20047
+ * multiple evaluation metrics.
20048
+ *
20049
+ * @param {number[][]} vectors - Vectors to analyze
20050
+ * @param {Object} options - Configuration options
20051
+ * @returns {Promise<Object>} Analysis results with recommendations
20052
+ */
20053
+ static async findOptimalK(vectors, options) {
20054
+ return findOptimalK(vectors, options);
20055
+ }
20056
+ }
20057
+
20058
+ function silhouetteScore(vectors, assignments, centroids, distanceFn = euclideanDistance) {
20059
+ const k = centroids.length;
20060
+ const n = vectors.length;
20061
+ const clusters = Array(k).fill(null).map(() => []);
20062
+ vectors.forEach((vector, i) => {
20063
+ clusters[assignments[i]].push(i);
20064
+ });
20065
+ let totalScore = 0;
20066
+ let validPoints = 0;
20067
+ if (clusters.every((c) => c.length <= 1)) {
20068
+ return 0;
20069
+ }
20070
+ for (let i = 0; i < n; i++) {
20071
+ const clusterIdx = assignments[i];
20072
+ const cluster = clusters[clusterIdx];
20073
+ if (cluster.length === 1) continue;
20074
+ let a = 0;
20075
+ for (const j of cluster) {
20076
+ if (i !== j) {
20077
+ a += distanceFn(vectors[i], vectors[j]);
20078
+ }
20079
+ }
20080
+ a /= cluster.length - 1;
20081
+ let b = Infinity;
20082
+ for (let otherCluster = 0; otherCluster < k; otherCluster++) {
20083
+ if (otherCluster === clusterIdx) continue;
20084
+ const otherPoints = clusters[otherCluster];
20085
+ if (otherPoints.length === 0) continue;
20086
+ let avgDist = 0;
20087
+ for (const j of otherPoints) {
20088
+ avgDist += distanceFn(vectors[i], vectors[j]);
20089
+ }
20090
+ avgDist /= otherPoints.length;
20091
+ b = Math.min(b, avgDist);
20092
+ }
20093
+ if (b === Infinity) continue;
20094
+ const maxAB = Math.max(a, b);
20095
+ const s = maxAB === 0 ? 0 : (b - a) / maxAB;
20096
+ totalScore += s;
20097
+ validPoints++;
20098
+ }
20099
+ return validPoints > 0 ? totalScore / validPoints : 0;
20100
+ }
20101
+ function daviesBouldinIndex(vectors, assignments, centroids, distanceFn = euclideanDistance) {
20102
+ const k = centroids.length;
20103
+ const scatters = new Array(k).fill(0);
20104
+ const clusterCounts = new Array(k).fill(0);
20105
+ vectors.forEach((vector, i) => {
20106
+ const cluster = assignments[i];
20107
+ scatters[cluster] += distanceFn(vector, centroids[cluster]);
20108
+ clusterCounts[cluster]++;
20109
+ });
20110
+ for (let i = 0; i < k; i++) {
20111
+ if (clusterCounts[i] > 0) {
20112
+ scatters[i] /= clusterCounts[i];
20113
+ }
20114
+ }
20115
+ let dbIndex = 0;
20116
+ let validClusters = 0;
20117
+ for (let i = 0; i < k; i++) {
20118
+ if (clusterCounts[i] === 0) continue;
20119
+ let maxRatio = 0;
20120
+ for (let j = 0; j < k; j++) {
20121
+ if (i === j || clusterCounts[j] === 0) continue;
20122
+ const centroidDist = distanceFn(centroids[i], centroids[j]);
20123
+ if (centroidDist === 0) continue;
20124
+ const ratio = (scatters[i] + scatters[j]) / centroidDist;
20125
+ maxRatio = Math.max(maxRatio, ratio);
20126
+ }
20127
+ dbIndex += maxRatio;
20128
+ validClusters++;
20129
+ }
20130
+ return validClusters > 0 ? dbIndex / validClusters : 0;
20131
+ }
20132
+ function calinskiHarabaszIndex(vectors, assignments, centroids, distanceFn = euclideanDistance) {
20133
+ const n = vectors.length;
20134
+ const k = centroids.length;
20135
+ if (k === 1 || k === n) return 0;
20136
+ const dimensions = vectors[0].length;
20137
+ const overallCentroid = new Array(dimensions).fill(0);
20138
+ vectors.forEach((vector) => {
20139
+ vector.forEach((val, dim) => {
20140
+ overallCentroid[dim] += val;
20141
+ });
20142
+ });
20143
+ overallCentroid.forEach((val, dim, arr) => {
20144
+ arr[dim] = val / n;
20145
+ });
20146
+ const clusterCounts = new Array(k).fill(0);
20147
+ vectors.forEach((vector, i) => {
20148
+ clusterCounts[assignments[i]]++;
20149
+ });
20150
+ let bgss = 0;
20151
+ for (let i = 0; i < k; i++) {
20152
+ if (clusterCounts[i] === 0) continue;
20153
+ const dist = distanceFn(centroids[i], overallCentroid);
20154
+ bgss += clusterCounts[i] * dist * dist;
20155
+ }
20156
+ let wcss = 0;
20157
+ vectors.forEach((vector, i) => {
20158
+ const cluster = assignments[i];
20159
+ const dist = distanceFn(vector, centroids[cluster]);
20160
+ wcss += dist * dist;
20161
+ });
20162
+ if (wcss === 0) return 0;
20163
+ return bgss / (k - 1) / (wcss / (n - k));
20164
+ }
20165
+ async function gapStatistic(vectors, assignments, centroids, distanceFn = euclideanDistance, nReferences = 10) {
20166
+ const n = vectors.length;
20167
+ const k = centroids.length;
20168
+ const dimensions = vectors[0].length;
20169
+ let wk = 0;
20170
+ vectors.forEach((vector, i) => {
20171
+ const dist = distanceFn(vector, centroids[assignments[i]]);
20172
+ wk += dist * dist;
20173
+ });
20174
+ wk = Math.log(wk + 1e-10);
20175
+ const referenceWks = [];
20176
+ const mins = new Array(dimensions).fill(Infinity);
20177
+ const maxs = new Array(dimensions).fill(-Infinity);
20178
+ vectors.forEach((vector) => {
20179
+ vector.forEach((val, dim) => {
20180
+ mins[dim] = Math.min(mins[dim], val);
20181
+ maxs[dim] = Math.max(maxs[dim], val);
20182
+ });
20183
+ });
20184
+ for (let ref = 0; ref < nReferences; ref++) {
20185
+ const refVectors = [];
20186
+ for (let i = 0; i < n; i++) {
20187
+ const refVector = new Array(dimensions);
20188
+ for (let dim = 0; dim < dimensions; dim++) {
20189
+ refVector[dim] = mins[dim] + Math.random() * (maxs[dim] - mins[dim]);
20190
+ }
20191
+ refVectors.push(refVector);
20192
+ }
20193
+ const refResult = kmeans(refVectors, k, { maxIterations: 50, distanceFn });
20194
+ let refWk = 0;
20195
+ refVectors.forEach((vector, i) => {
20196
+ const dist = distanceFn(vector, refResult.centroids[refResult.assignments[i]]);
20197
+ refWk += dist * dist;
20198
+ });
20199
+ referenceWks.push(Math.log(refWk + 1e-10));
20200
+ }
20201
+ const expectedWk = referenceWks.reduce((a, b) => a + b, 0) / nReferences;
20202
+ const gap = expectedWk - wk;
20203
+ const sdk = Math.sqrt(
20204
+ referenceWks.reduce((sum, wk2) => sum + Math.pow(wk2 - expectedWk, 2), 0) / nReferences
20205
+ );
20206
+ const sk = sdk * Math.sqrt(1 + 1 / nReferences);
20207
+ return { gap, sk, expectedWk, actualWk: wk };
20208
+ }
20209
+ function clusteringStability(vectors, k, options = {}) {
20210
+ const {
20211
+ nRuns = 10,
20212
+ distanceFn = euclideanDistance,
20213
+ ...kmeansOptions
20214
+ } = options;
20215
+ const inertias = [];
20216
+ const allAssignments = [];
20217
+ for (let run = 0; run < nRuns; run++) {
20218
+ const result = kmeans(vectors, k, {
20219
+ ...kmeansOptions,
20220
+ distanceFn,
20221
+ seed: run
20222
+ // Different seed for each run
20223
+ });
20224
+ inertias.push(result.inertia);
20225
+ allAssignments.push(result.assignments);
20226
+ }
20227
+ const assignmentSimilarities = [];
20228
+ for (let i = 0; i < nRuns - 1; i++) {
20229
+ for (let j = i + 1; j < nRuns; j++) {
20230
+ const similarity = calculateAssignmentSimilarity(allAssignments[i], allAssignments[j]);
20231
+ assignmentSimilarities.push(similarity);
20232
+ }
20233
+ }
20234
+ const avgInertia = inertias.reduce((a, b) => a + b, 0) / nRuns;
20235
+ const stdInertia = Math.sqrt(
20236
+ inertias.reduce((sum, val) => sum + Math.pow(val - avgInertia, 2), 0) / nRuns
20237
+ );
20238
+ const avgSimilarity = assignmentSimilarities.length > 0 ? assignmentSimilarities.reduce((a, b) => a + b, 0) / assignmentSimilarities.length : 1;
20239
+ return {
20240
+ avgInertia,
20241
+ stdInertia,
20242
+ cvInertia: avgInertia !== 0 ? stdInertia / avgInertia : 0,
20243
+ // Coefficient of variation
20244
+ avgSimilarity,
20245
+ stability: avgSimilarity
20246
+ // Higher is more stable
20247
+ };
20248
+ }
20249
+ function calculateAssignmentSimilarity(assignments1, assignments2) {
20250
+ const n = assignments1.length;
20251
+ let matches = 0;
20252
+ for (let i = 0; i < n; i++) {
20253
+ for (let j = i + 1; j < n; j++) {
20254
+ const sameCluster1 = assignments1[i] === assignments1[j];
20255
+ const sameCluster2 = assignments2[i] === assignments2[j];
20256
+ if (sameCluster1 === sameCluster2) {
20257
+ matches++;
20258
+ }
20259
+ }
20260
+ }
20261
+ const totalPairs = n * (n - 1) / 2;
20262
+ return totalPairs > 0 ? matches / totalPairs : 1;
20263
+ }
20264
+
20265
+ var metrics = /*#__PURE__*/Object.freeze({
20266
+ __proto__: null,
20267
+ calinskiHarabaszIndex: calinskiHarabaszIndex,
20268
+ clusteringStability: clusteringStability,
20269
+ daviesBouldinIndex: daviesBouldinIndex,
20270
+ gapStatistic: gapStatistic,
20271
+ silhouetteScore: silhouetteScore
20272
+ });
20273
+
18939
20274
  exports.AVAILABLE_BEHAVIORS = AVAILABLE_BEHAVIORS;
18940
20275
  exports.AnalyticsNotEnabledError = AnalyticsNotEnabledError;
18941
20276
  exports.AuditPlugin = AuditPlugin;
@@ -18990,6 +20325,7 @@ exports.StreamError = StreamError;
18990
20325
  exports.UnknownError = UnknownError;
18991
20326
  exports.ValidationError = ValidationError;
18992
20327
  exports.Validator = Validator;
20328
+ exports.VectorPlugin = VectorPlugin;
18993
20329
  exports.behaviors = behaviors;
18994
20330
  exports.calculateAttributeNamesSize = calculateAttributeNamesSize;
18995
20331
  exports.calculateAttributeSizes = calculateAttributeSizes;
@@ -19002,10 +20338,12 @@ exports.clearUTF8Memo = clearUTF8Memo;
19002
20338
  exports.clearUTF8Memory = clearUTF8Memory;
19003
20339
  exports.decode = decode;
19004
20340
  exports.decodeDecimal = decodeDecimal;
20341
+ exports.decodeFixedPoint = decodeFixedPoint;
19005
20342
  exports.decrypt = decrypt;
19006
20343
  exports.default = S3db;
19007
20344
  exports.encode = encode;
19008
20345
  exports.encodeDecimal = encodeDecimal;
20346
+ exports.encodeFixedPoint = encodeFixedPoint;
19009
20347
  exports.encrypt = encrypt;
19010
20348
  exports.getBehavior = getBehavior;
19011
20349
  exports.getSizeBreakdown = getSizeBreakdown;