s3db.js 11.2.4 → 11.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/s3db.es.js CHANGED
@@ -77,6 +77,41 @@ const decodeDecimal = (s) => {
77
77
  const num = decPart ? Number(decodedInt + "." + decPart) : decodedInt;
78
78
  return negative ? -num : num;
79
79
  };
80
+ const encodeFixedPoint = (n, precision = 6) => {
81
+ if (typeof n !== "number" || isNaN(n)) return "undefined";
82
+ if (!isFinite(n)) return "undefined";
83
+ const scale = Math.pow(10, precision);
84
+ const scaled = Math.round(n * scale);
85
+ if (scaled === 0) return "^0";
86
+ const negative = scaled < 0;
87
+ let num = Math.abs(scaled);
88
+ let s = "";
89
+ while (num > 0) {
90
+ s = alphabet[num % base] + s;
91
+ num = Math.floor(num / base);
92
+ }
93
+ return "^" + (negative ? "-" : "") + s;
94
+ };
95
+ const decodeFixedPoint = (s, precision = 6) => {
96
+ if (typeof s !== "string") return NaN;
97
+ if (!s.startsWith("^")) return NaN;
98
+ s = s.slice(1);
99
+ if (s === "0") return 0;
100
+ let negative = false;
101
+ if (s[0] === "-") {
102
+ negative = true;
103
+ s = s.slice(1);
104
+ }
105
+ let r = 0;
106
+ for (let i = 0; i < s.length; i++) {
107
+ const idx = charToValue[s[i]];
108
+ if (idx === void 0) return NaN;
109
+ r = r * base + idx;
110
+ }
111
+ const scale = Math.pow(10, precision);
112
+ const scaled = negative ? -r : r;
113
+ return scaled / scale;
114
+ };
80
115
 
81
116
  const utf8BytesMemory = /* @__PURE__ */ new Map();
82
117
  const UTF8_MEMORY_MAX_SIZE = 1e4;
@@ -11501,6 +11536,11 @@ class Validator extends FastestValidator {
11501
11536
  type: "any",
11502
11537
  custom: this.autoEncrypt ? jsonHandler : void 0
11503
11538
  });
11539
+ this.alias("embedding", {
11540
+ type: "array",
11541
+ items: "number",
11542
+ empty: false
11543
+ });
11504
11544
  }
11505
11545
  }
11506
11546
  const ValidatorManager = new Proxy(Validator, {
@@ -11749,6 +11789,59 @@ const SchemaActions = {
11749
11789
  }
11750
11790
  return NaN;
11751
11791
  });
11792
+ },
11793
+ fromArrayOfEmbeddings: (value, { separator, precision = 6 }) => {
11794
+ if (value === null || value === void 0 || !Array.isArray(value)) {
11795
+ return value;
11796
+ }
11797
+ if (value.length === 0) {
11798
+ return "";
11799
+ }
11800
+ const encodedItems = value.map((item) => {
11801
+ if (typeof item === "number" && !isNaN(item)) {
11802
+ return encodeFixedPoint(item, precision);
11803
+ }
11804
+ const n = Number(item);
11805
+ return isNaN(n) ? "" : encodeFixedPoint(n, precision);
11806
+ });
11807
+ return encodedItems.join(separator);
11808
+ },
11809
+ toArrayOfEmbeddings: (value, { separator, precision = 6 }) => {
11810
+ if (Array.isArray(value)) {
11811
+ return value.map((v) => typeof v === "number" ? v : decodeFixedPoint(v, precision));
11812
+ }
11813
+ if (value === null || value === void 0) {
11814
+ return value;
11815
+ }
11816
+ if (value === "") {
11817
+ return [];
11818
+ }
11819
+ const str = String(value);
11820
+ const items = [];
11821
+ let current = "";
11822
+ let i = 0;
11823
+ while (i < str.length) {
11824
+ if (str[i] === "\\" && i + 1 < str.length) {
11825
+ current += str[i + 1];
11826
+ i += 2;
11827
+ } else if (str[i] === separator) {
11828
+ items.push(current);
11829
+ current = "";
11830
+ i++;
11831
+ } else {
11832
+ current += str[i];
11833
+ i++;
11834
+ }
11835
+ }
11836
+ items.push(current);
11837
+ return items.map((v) => {
11838
+ if (typeof v === "number") return v;
11839
+ if (typeof v === "string" && v !== "") {
11840
+ const n = decodeFixedPoint(v, precision);
11841
+ return isNaN(n) ? NaN : n;
11842
+ }
11843
+ return NaN;
11844
+ });
11752
11845
  }
11753
11846
  };
11754
11847
  class Schema {
@@ -11818,18 +11911,89 @@ class Schema {
11818
11911
  }
11819
11912
  return objectKeys;
11820
11913
  }
11914
+ _generateHooksFromOriginalAttributes(attributes, prefix = "") {
11915
+ for (const [key, value] of Object.entries(attributes)) {
11916
+ if (key.startsWith("$$")) continue;
11917
+ const fullKey = prefix ? `${prefix}.${key}` : key;
11918
+ if (typeof value === "object" && value !== null && !Array.isArray(value) && value.type) {
11919
+ if (value.type === "array" && value.items) {
11920
+ const itemsType = value.items;
11921
+ const arrayLength = typeof value.length === "number" ? value.length : null;
11922
+ if (itemsType === "string" || typeof itemsType === "string" && itemsType.includes("string")) {
11923
+ this.addHook("beforeMap", fullKey, "fromArray");
11924
+ this.addHook("afterUnmap", fullKey, "toArray");
11925
+ } else if (itemsType === "number" || typeof itemsType === "string" && itemsType.includes("number")) {
11926
+ const isIntegerArray = typeof itemsType === "string" && itemsType.includes("integer");
11927
+ const isEmbedding = !isIntegerArray && arrayLength !== null && arrayLength >= 256;
11928
+ if (isIntegerArray) {
11929
+ this.addHook("beforeMap", fullKey, "fromArrayOfNumbers");
11930
+ this.addHook("afterUnmap", fullKey, "toArrayOfNumbers");
11931
+ } else if (isEmbedding) {
11932
+ this.addHook("beforeMap", fullKey, "fromArrayOfEmbeddings");
11933
+ this.addHook("afterUnmap", fullKey, "toArrayOfEmbeddings");
11934
+ } else {
11935
+ this.addHook("beforeMap", fullKey, "fromArrayOfDecimals");
11936
+ this.addHook("afterUnmap", fullKey, "toArrayOfDecimals");
11937
+ }
11938
+ }
11939
+ }
11940
+ } else if (typeof value === "object" && value !== null && !Array.isArray(value) && !value.type) {
11941
+ this._generateHooksFromOriginalAttributes(value, fullKey);
11942
+ }
11943
+ }
11944
+ }
11821
11945
  generateAutoHooks() {
11946
+ this._generateHooksFromOriginalAttributes(this.attributes);
11822
11947
  const schema = flatten(cloneDeep(this.attributes), { safe: true });
11823
11948
  for (const [name, definition] of Object.entries(schema)) {
11824
- if (definition.includes("array")) {
11825
- if (definition.includes("items:string")) {
11949
+ if (name.includes("$$")) continue;
11950
+ if (this.options.hooks.beforeMap[name] || this.options.hooks.afterUnmap[name]) {
11951
+ continue;
11952
+ }
11953
+ const defStr = typeof definition === "string" ? definition : "";
11954
+ const defType = typeof definition === "object" && definition !== null ? definition.type : null;
11955
+ const isEmbeddingType = defStr.includes("embedding") || defType === "embedding";
11956
+ if (isEmbeddingType) {
11957
+ const lengthMatch = defStr.match(/embedding:(\d+)/);
11958
+ if (lengthMatch) {
11959
+ parseInt(lengthMatch[1], 10);
11960
+ } else if (defStr.includes("length:")) {
11961
+ const match = defStr.match(/length:(\d+)/);
11962
+ if (match) parseInt(match[1], 10);
11963
+ }
11964
+ this.addHook("beforeMap", name, "fromArrayOfEmbeddings");
11965
+ this.addHook("afterUnmap", name, "toArrayOfEmbeddings");
11966
+ continue;
11967
+ }
11968
+ const isArray = defStr.includes("array") || defType === "array";
11969
+ if (isArray) {
11970
+ let itemsType = null;
11971
+ if (typeof definition === "object" && definition !== null && definition.items) {
11972
+ itemsType = definition.items;
11973
+ } else if (defStr.includes("items:string")) {
11974
+ itemsType = "string";
11975
+ } else if (defStr.includes("items:number")) {
11976
+ itemsType = "number";
11977
+ }
11978
+ if (itemsType === "string" || typeof itemsType === "string" && itemsType.includes("string")) {
11826
11979
  this.addHook("beforeMap", name, "fromArray");
11827
11980
  this.addHook("afterUnmap", name, "toArray");
11828
- } else if (definition.includes("items:number")) {
11829
- const isIntegerArray = definition.includes("integer:true") || definition.includes("|integer:") || definition.includes("|integer");
11981
+ } else if (itemsType === "number" || typeof itemsType === "string" && itemsType.includes("number")) {
11982
+ const isIntegerArray = defStr.includes("integer:true") || defStr.includes("|integer:") || defStr.includes("|integer") || typeof itemsType === "string" && itemsType.includes("integer");
11983
+ let arrayLength = null;
11984
+ if (typeof definition === "object" && definition !== null && typeof definition.length === "number") {
11985
+ arrayLength = definition.length;
11986
+ } else if (defStr.includes("length:")) {
11987
+ const match = defStr.match(/length:(\d+)/);
11988
+ if (match) arrayLength = parseInt(match[1], 10);
11989
+ }
11990
+ const isEmbedding = !isIntegerArray && arrayLength !== null && arrayLength >= 256;
11830
11991
  if (isIntegerArray) {
11831
11992
  this.addHook("beforeMap", name, "fromArrayOfNumbers");
11832
11993
  this.addHook("afterUnmap", name, "toArrayOfNumbers");
11994
+ } else if (isEmbedding) {
11995
+ this.addHook("beforeMap", name, "fromArrayOfEmbeddings");
11996
+ this.addHook("afterUnmap", name, "toArrayOfEmbeddings");
11833
11997
  } else {
11834
11998
  this.addHook("beforeMap", name, "fromArrayOfDecimals");
11835
11999
  this.addHook("afterUnmap", name, "toArrayOfDecimals");
@@ -11837,7 +12001,7 @@ class Schema {
11837
12001
  }
11838
12002
  continue;
11839
12003
  }
11840
- if (definition.includes("secret")) {
12004
+ if (defStr.includes("secret") || defType === "secret") {
11841
12005
  if (this.options.autoEncrypt) {
11842
12006
  this.addHook("beforeMap", name, "encrypt");
11843
12007
  }
@@ -11846,8 +12010,8 @@ class Schema {
11846
12010
  }
11847
12011
  continue;
11848
12012
  }
11849
- if (definition.includes("number")) {
11850
- const isInteger = definition.includes("integer:true") || definition.includes("|integer:") || definition.includes("|integer");
12013
+ if (defStr.includes("number") || defType === "number") {
12014
+ const isInteger = defStr.includes("integer:true") || defStr.includes("|integer:") || defStr.includes("|integer");
11851
12015
  if (isInteger) {
11852
12016
  this.addHook("beforeMap", name, "toBase62");
11853
12017
  this.addHook("afterUnmap", name, "fromBase62");
@@ -11857,17 +12021,17 @@ class Schema {
11857
12021
  }
11858
12022
  continue;
11859
12023
  }
11860
- if (definition.includes("boolean")) {
12024
+ if (defStr.includes("boolean") || defType === "boolean") {
11861
12025
  this.addHook("beforeMap", name, "fromBool");
11862
12026
  this.addHook("afterUnmap", name, "toBool");
11863
12027
  continue;
11864
12028
  }
11865
- if (definition.includes("json")) {
12029
+ if (defStr.includes("json") || defType === "json") {
11866
12030
  this.addHook("beforeMap", name, "toJSON");
11867
12031
  this.addHook("afterUnmap", name, "fromJSON");
11868
12032
  continue;
11869
12033
  }
11870
- if (definition === "object" || definition.includes("object")) {
12034
+ if (definition === "object" || defStr.includes("object") || defType === "object") {
11871
12035
  this.addHook("beforeMap", name, "toJSON");
11872
12036
  this.addHook("afterUnmap", name, "fromJSON");
11873
12037
  continue;
@@ -12009,7 +12173,8 @@ class Schema {
12009
12173
  const originalKey = reversedMap && reversedMap[key] ? reversedMap[key] : key;
12010
12174
  let parsedValue = value;
12011
12175
  const attrDef = this.getAttributeDefinition(originalKey);
12012
- if (typeof attrDef === "string" && attrDef.includes("number") && !attrDef.includes("array") && !attrDef.includes("decimal")) {
12176
+ const hasAfterUnmapHook = this.options.hooks?.afterUnmap?.[originalKey];
12177
+ if (!hasAfterUnmapHook && typeof attrDef === "string" && attrDef.includes("number") && !attrDef.includes("array") && !attrDef.includes("decimal")) {
12013
12178
  if (typeof parsedValue === "string" && parsedValue !== "") {
12014
12179
  parsedValue = decode(parsedValue);
12015
12180
  } else if (typeof parsedValue === "number") ; else {
@@ -12074,18 +12239,38 @@ class Schema {
12074
12239
  preprocessAttributesForValidation(attributes) {
12075
12240
  const processed = {};
12076
12241
  for (const [key, value] of Object.entries(attributes)) {
12077
- if (typeof value === "object" && value !== null && !Array.isArray(value)) {
12078
- const isExplicitRequired = value.$$type && value.$$type.includes("required");
12079
- const isExplicitOptional = value.$$type && value.$$type.includes("optional");
12080
- const objectConfig = {
12081
- type: "object",
12082
- properties: this.preprocessAttributesForValidation(value),
12083
- strict: false
12084
- };
12085
- if (isExplicitRequired) ; else if (isExplicitOptional || this.allNestedObjectsOptional) {
12086
- objectConfig.optional = true;
12242
+ if (typeof value === "string") {
12243
+ if (value.startsWith("embedding:")) {
12244
+ const lengthMatch = value.match(/embedding:(\d+)/);
12245
+ if (lengthMatch) {
12246
+ const length = lengthMatch[1];
12247
+ const rest = value.substring(`embedding:${length}`.length);
12248
+ processed[key] = `array|items:number|length:${length}|empty:false${rest}`;
12249
+ continue;
12250
+ }
12251
+ }
12252
+ if (value.startsWith("embedding|") || value === "embedding") {
12253
+ processed[key] = value.replace(/^embedding/, "array|items:number|empty:false");
12254
+ continue;
12255
+ }
12256
+ processed[key] = value;
12257
+ } else if (typeof value === "object" && value !== null && !Array.isArray(value)) {
12258
+ const hasValidatorType = value.type !== void 0 && key !== "$$type";
12259
+ if (hasValidatorType) {
12260
+ processed[key] = value;
12261
+ } else {
12262
+ const isExplicitRequired = value.$$type && value.$$type.includes("required");
12263
+ const isExplicitOptional = value.$$type && value.$$type.includes("optional");
12264
+ const objectConfig = {
12265
+ type: "object",
12266
+ properties: this.preprocessAttributesForValidation(value),
12267
+ strict: false
12268
+ };
12269
+ if (isExplicitRequired) ; else if (isExplicitOptional || this.allNestedObjectsOptional) {
12270
+ objectConfig.optional = true;
12271
+ }
12272
+ processed[key] = objectConfig;
12087
12273
  }
12088
- processed[key] = objectConfig;
12089
12274
  } else {
12090
12275
  processed[key] = value;
12091
12276
  }
@@ -12936,6 +13121,71 @@ ${errorDetails}`,
12936
13121
  }
12937
13122
  return true;
12938
13123
  }
13124
+ /**
13125
+ * Find orphaned partitions (partitions that reference non-existent fields)
13126
+ * @returns {Object} Object with orphaned partition names as keys and details as values
13127
+ * @example
13128
+ * const orphaned = resource.findOrphanedPartitions();
13129
+ * // Returns: { byRegion: { missingFields: ['region'], definition: {...} } }
13130
+ */
13131
+ findOrphanedPartitions() {
13132
+ const orphaned = {};
13133
+ if (!this.config.partitions) {
13134
+ return orphaned;
13135
+ }
13136
+ for (const [partitionName, partitionDef] of Object.entries(this.config.partitions)) {
13137
+ if (!partitionDef.fields) {
13138
+ continue;
13139
+ }
13140
+ const missingFields = [];
13141
+ for (const fieldName of Object.keys(partitionDef.fields)) {
13142
+ if (!this.fieldExistsInAttributes(fieldName)) {
13143
+ missingFields.push(fieldName);
13144
+ }
13145
+ }
13146
+ if (missingFields.length > 0) {
13147
+ orphaned[partitionName] = {
13148
+ missingFields,
13149
+ definition: partitionDef,
13150
+ allFields: Object.keys(partitionDef.fields)
13151
+ };
13152
+ }
13153
+ }
13154
+ return orphaned;
13155
+ }
13156
+ /**
13157
+ * Remove orphaned partitions (partitions that reference non-existent fields)
13158
+ * WARNING: This will modify the resource configuration and should be followed by uploadMetadataFile()
13159
+ * @param {Object} options - Options
13160
+ * @param {boolean} options.dryRun - If true, only returns what would be removed without modifying (default: false)
13161
+ * @returns {Object} Object with removed partition names and details
13162
+ * @example
13163
+ * // Dry run to see what would be removed
13164
+ * const toRemove = resource.removeOrphanedPartitions({ dryRun: true });
13165
+ * console.log('Would remove:', toRemove);
13166
+ *
13167
+ * // Actually remove orphaned partitions
13168
+ * const removed = resource.removeOrphanedPartitions();
13169
+ * await database.uploadMetadataFile(); // Save changes to S3
13170
+ */
13171
+ removeOrphanedPartitions({ dryRun = false } = {}) {
13172
+ const orphaned = this.findOrphanedPartitions();
13173
+ if (Object.keys(orphaned).length === 0) {
13174
+ return {};
13175
+ }
13176
+ if (dryRun) {
13177
+ return orphaned;
13178
+ }
13179
+ for (const partitionName of Object.keys(orphaned)) {
13180
+ delete this.config.partitions[partitionName];
13181
+ }
13182
+ this.emit("orphanedPartitionsRemoved", {
13183
+ resourceName: this.name,
13184
+ removed: Object.keys(orphaned),
13185
+ details: orphaned
13186
+ });
13187
+ return orphaned;
13188
+ }
12939
13189
  /**
12940
13190
  * Apply a single partition rule to a field value
12941
13191
  * @param {*} value - The field value
@@ -15029,7 +15279,7 @@ class Database extends EventEmitter {
15029
15279
  this.id = idGenerator(7);
15030
15280
  this.version = "1";
15031
15281
  this.s3dbVersion = (() => {
15032
- const [ok, err, version] = tryFn(() => true ? "11.2.4" : "latest");
15282
+ const [ok, err, version] = tryFn(() => true ? "11.2.6" : "latest");
15033
15283
  return ok ? version : "latest";
15034
15284
  })();
15035
15285
  this.resources = {};
@@ -18932,5 +19182,1090 @@ class StateMachinePlugin extends Plugin {
18932
19182
  }
18933
19183
  }
18934
19184
 
18935
- export { AVAILABLE_BEHAVIORS, AnalyticsNotEnabledError, AuditPlugin, AuthenticationError, BackupPlugin, BaseError, BehaviorError, CachePlugin, Client, ConnectionString, ConnectionStringError, CostsPlugin, CryptoError, DEFAULT_BEHAVIOR, Database, DatabaseError, EncryptionError, ErrorMap, EventualConsistencyPlugin, FullTextPlugin, InvalidResourceItem, MetadataLimitError, MetricsPlugin, MissingMetadata, NoSuchBucket, NoSuchKey, NotFound, PartitionDriverError, PartitionError, PermissionError, Plugin, PluginError, PluginObject, PluginStorageError, QueueConsumerPlugin, ReplicatorPlugin, Resource, ResourceError, ResourceIdsPageReader, ResourceIdsReader, ResourceNotFound, ResourceReader, ResourceWriter, S3QueuePlugin, Database as S3db, S3dbError, SchedulerPlugin, Schema, SchemaError, StateMachinePlugin, StreamError, UnknownError, ValidationError, Validator, behaviors, calculateAttributeNamesSize, calculateAttributeSizes, calculateEffectiveLimit, calculateSystemOverhead, calculateTotalSize, calculateUTF8Bytes, clearUTF8Cache, clearUTF8Memo, clearUTF8Memory, decode, decodeDecimal, decrypt, S3db as default, encode, encodeDecimal, encrypt, getBehavior, getSizeBreakdown, idGenerator, mapAwsError, md5, passwordGenerator, sha256, streamToString, transformValue, tryFn, tryFnSync };
19185
+ function cosineDistance(a, b) {
19186
+ if (a.length !== b.length) {
19187
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19188
+ }
19189
+ let dotProduct2 = 0;
19190
+ let normA = 0;
19191
+ let normB = 0;
19192
+ for (let i = 0; i < a.length; i++) {
19193
+ dotProduct2 += a[i] * b[i];
19194
+ normA += a[i] * a[i];
19195
+ normB += b[i] * b[i];
19196
+ }
19197
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
19198
+ if (denominator === 0) {
19199
+ return a.every((v) => v === 0) && b.every((v) => v === 0) ? 0 : 1;
19200
+ }
19201
+ const similarity = dotProduct2 / denominator;
19202
+ return 1 - similarity;
19203
+ }
19204
+ function euclideanDistance(a, b) {
19205
+ if (a.length !== b.length) {
19206
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19207
+ }
19208
+ let sum = 0;
19209
+ for (let i = 0; i < a.length; i++) {
19210
+ const diff = a[i] - b[i];
19211
+ sum += diff * diff;
19212
+ }
19213
+ return Math.sqrt(sum);
19214
+ }
19215
+ function manhattanDistance(a, b) {
19216
+ if (a.length !== b.length) {
19217
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19218
+ }
19219
+ let sum = 0;
19220
+ for (let i = 0; i < a.length; i++) {
19221
+ sum += Math.abs(a[i] - b[i]);
19222
+ }
19223
+ return sum;
19224
+ }
19225
+ function dotProduct(a, b) {
19226
+ if (a.length !== b.length) {
19227
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19228
+ }
19229
+ let sum = 0;
19230
+ for (let i = 0; i < a.length; i++) {
19231
+ sum += a[i] * b[i];
19232
+ }
19233
+ return sum;
19234
+ }
19235
+ function normalize(vector) {
19236
+ const magnitude2 = Math.sqrt(
19237
+ vector.reduce((sum, val) => sum + val * val, 0)
19238
+ );
19239
+ if (magnitude2 === 0) {
19240
+ return vector.slice();
19241
+ }
19242
+ return vector.map((val) => val / magnitude2);
19243
+ }
19244
+
19245
+ function kmeans(vectors, k, options = {}) {
19246
+ const {
19247
+ maxIterations = 100,
19248
+ tolerance = 1e-4,
19249
+ distanceFn = euclideanDistance,
19250
+ seed = null,
19251
+ onIteration = null
19252
+ } = options;
19253
+ if (vectors.length === 0) {
19254
+ throw new Error("Cannot cluster empty vector array");
19255
+ }
19256
+ if (k < 1) {
19257
+ throw new Error(`k must be at least 1, got ${k}`);
19258
+ }
19259
+ if (k > vectors.length) {
19260
+ throw new Error(`k (${k}) cannot be greater than number of vectors (${vectors.length})`);
19261
+ }
19262
+ const dimensions = vectors[0].length;
19263
+ for (let i = 1; i < vectors.length; i++) {
19264
+ if (vectors[i].length !== dimensions) {
19265
+ throw new Error(`All vectors must have same dimensions. Expected ${dimensions}, got ${vectors[i].length} at index ${i}`);
19266
+ }
19267
+ }
19268
+ const centroids = initializeCentroidsKMeansPlusPlus(vectors, k, distanceFn, seed);
19269
+ let assignments = new Array(vectors.length);
19270
+ let iterations = 0;
19271
+ let converged = false;
19272
+ let previousInertia = Infinity;
19273
+ while (!converged && iterations < maxIterations) {
19274
+ const newAssignments = vectors.map((vector) => {
19275
+ let minDist = Infinity;
19276
+ let nearestCluster = 0;
19277
+ for (let i = 0; i < k; i++) {
19278
+ const dist = distanceFn(vector, centroids[i]);
19279
+ if (dist < minDist) {
19280
+ minDist = dist;
19281
+ nearestCluster = i;
19282
+ }
19283
+ }
19284
+ return nearestCluster;
19285
+ });
19286
+ let inertia2 = 0;
19287
+ vectors.forEach((vector, i) => {
19288
+ const dist = distanceFn(vector, centroids[newAssignments[i]]);
19289
+ inertia2 += dist * dist;
19290
+ });
19291
+ const inertiaChange = Math.abs(previousInertia - inertia2);
19292
+ converged = inertiaChange < tolerance;
19293
+ assignments = newAssignments;
19294
+ previousInertia = inertia2;
19295
+ if (onIteration) {
19296
+ onIteration(iterations + 1, inertia2, converged);
19297
+ }
19298
+ if (!converged) {
19299
+ const clusterSums = Array(k).fill(null).map(() => new Array(dimensions).fill(0));
19300
+ const clusterCounts = new Array(k).fill(0);
19301
+ vectors.forEach((vector, i) => {
19302
+ const cluster = assignments[i];
19303
+ clusterCounts[cluster]++;
19304
+ vector.forEach((val, j) => {
19305
+ clusterSums[cluster][j] += val;
19306
+ });
19307
+ });
19308
+ for (let i = 0; i < k; i++) {
19309
+ if (clusterCounts[i] > 0) {
19310
+ centroids[i] = clusterSums[i].map((sum) => sum / clusterCounts[i]);
19311
+ } else {
19312
+ const randomIdx = Math.floor(Math.random() * vectors.length);
19313
+ centroids[i] = [...vectors[randomIdx]];
19314
+ }
19315
+ }
19316
+ }
19317
+ iterations++;
19318
+ }
19319
+ let inertia = 0;
19320
+ vectors.forEach((vector, i) => {
19321
+ const dist = distanceFn(vector, centroids[assignments[i]]);
19322
+ inertia += dist * dist;
19323
+ });
19324
+ return {
19325
+ centroids,
19326
+ assignments,
19327
+ iterations,
19328
+ converged,
19329
+ inertia
19330
+ };
19331
+ }
19332
+ function initializeCentroidsKMeansPlusPlus(vectors, k, distanceFn, seed) {
19333
+ const centroids = [];
19334
+ const n = vectors.length;
19335
+ const firstIndex = seed !== null ? seed % n : Math.floor(Math.random() * n);
19336
+ centroids.push([...vectors[firstIndex]]);
19337
+ for (let i = 1; i < k; i++) {
19338
+ const distances = vectors.map((vector) => {
19339
+ return Math.min(...centroids.map((c) => distanceFn(vector, c)));
19340
+ });
19341
+ const squaredDistances = distances.map((d) => d * d);
19342
+ const totalSquared = squaredDistances.reduce((a, b) => a + b, 0);
19343
+ if (totalSquared === 0) {
19344
+ const randomIdx = Math.floor(Math.random() * n);
19345
+ centroids.push([...vectors[randomIdx]]);
19346
+ continue;
19347
+ }
19348
+ let threshold = Math.random() * totalSquared;
19349
+ let cumulativeSum = 0;
19350
+ for (let j = 0; j < n; j++) {
19351
+ cumulativeSum += squaredDistances[j];
19352
+ if (cumulativeSum >= threshold) {
19353
+ centroids.push([...vectors[j]]);
19354
+ break;
19355
+ }
19356
+ }
19357
+ }
19358
+ return centroids;
19359
+ }
19360
+ async function findOptimalK(vectors, options = {}) {
19361
+ const {
19362
+ minK = 2,
19363
+ maxK = Math.min(10, Math.floor(Math.sqrt(vectors.length / 2))),
19364
+ distanceFn = euclideanDistance,
19365
+ nReferences = 10,
19366
+ stabilityRuns = 5,
19367
+ ...kmeansOptions
19368
+ } = options;
19369
+ const metricsModule = await Promise.resolve().then(function () { return metrics; });
19370
+ const {
19371
+ silhouetteScore,
19372
+ daviesBouldinIndex,
19373
+ calinskiHarabaszIndex,
19374
+ gapStatistic,
19375
+ clusteringStability
19376
+ } = metricsModule;
19377
+ const results = [];
19378
+ for (let k = minK; k <= maxK; k++) {
19379
+ const kmeansResult = kmeans(vectors, k, { ...kmeansOptions, distanceFn });
19380
+ const silhouette = silhouetteScore(
19381
+ vectors,
19382
+ kmeansResult.assignments,
19383
+ kmeansResult.centroids,
19384
+ distanceFn
19385
+ );
19386
+ const daviesBouldin = daviesBouldinIndex(
19387
+ vectors,
19388
+ kmeansResult.assignments,
19389
+ kmeansResult.centroids,
19390
+ distanceFn
19391
+ );
19392
+ const calinskiHarabasz = calinskiHarabaszIndex(
19393
+ vectors,
19394
+ kmeansResult.assignments,
19395
+ kmeansResult.centroids,
19396
+ distanceFn
19397
+ );
19398
+ const gap = await gapStatistic(
19399
+ vectors,
19400
+ kmeansResult.assignments,
19401
+ kmeansResult.centroids,
19402
+ distanceFn,
19403
+ nReferences
19404
+ );
19405
+ const stability = clusteringStability(
19406
+ vectors,
19407
+ k,
19408
+ { ...kmeansOptions, distanceFn, nRuns: stabilityRuns }
19409
+ );
19410
+ results.push({
19411
+ k,
19412
+ inertia: kmeansResult.inertia,
19413
+ silhouette,
19414
+ daviesBouldin,
19415
+ calinskiHarabasz,
19416
+ gap: gap.gap,
19417
+ gapSk: gap.sk,
19418
+ stability: stability.stability,
19419
+ cvInertia: stability.cvInertia,
19420
+ iterations: kmeansResult.iterations,
19421
+ converged: kmeansResult.converged
19422
+ });
19423
+ }
19424
+ const elbowK = findElbowPoint(results.map((r) => r.inertia));
19425
+ const recommendations = {
19426
+ elbow: minK + elbowK,
19427
+ silhouette: results.reduce(
19428
+ (best, curr) => curr.silhouette > best.silhouette ? curr : best
19429
+ ).k,
19430
+ daviesBouldin: results.reduce(
19431
+ (best, curr) => curr.daviesBouldin < best.daviesBouldin ? curr : best
19432
+ ).k,
19433
+ calinskiHarabasz: results.reduce(
19434
+ (best, curr) => curr.calinskiHarabasz > best.calinskiHarabasz ? curr : best
19435
+ ).k,
19436
+ gap: results.reduce(
19437
+ (best, curr) => curr.gap > best.gap ? curr : best
19438
+ ).k,
19439
+ stability: results.reduce(
19440
+ (best, curr) => curr.stability > best.stability ? curr : best
19441
+ ).k
19442
+ };
19443
+ const votes = Object.values(recommendations);
19444
+ const consensus = votes.reduce((acc, k) => {
19445
+ acc[k] = (acc[k] || 0) + 1;
19446
+ return acc;
19447
+ }, {});
19448
+ const consensusK = parseInt(
19449
+ Object.entries(consensus).reduce((a, b) => b[1] > a[1] ? b : a)[0]
19450
+ );
19451
+ return {
19452
+ results,
19453
+ recommendations,
19454
+ consensus: consensusK,
19455
+ summary: {
19456
+ analysisRange: `${minK}-${maxK}`,
19457
+ totalVectors: vectors.length,
19458
+ dimensions: vectors[0].length,
19459
+ recommendation: consensusK,
19460
+ confidence: consensus[consensusK] / votes.length
19461
+ }
19462
+ };
19463
+ }
19464
+ function findElbowPoint(inertias) {
19465
+ const n = inertias.length;
19466
+ if (n < 3) return 0;
19467
+ let maxCurvature = -Infinity;
19468
+ let elbowIndex = 0;
19469
+ for (let i = 1; i < n - 1; i++) {
19470
+ const curvature = inertias[i - 1] - 2 * inertias[i] + inertias[i + 1];
19471
+ if (curvature > maxCurvature) {
19472
+ maxCurvature = curvature;
19473
+ elbowIndex = i;
19474
+ }
19475
+ }
19476
+ return elbowIndex;
19477
+ }
19478
+
19479
+ class VectorError extends PluginError {
19480
+ constructor(message, details = {}) {
19481
+ super(message, {
19482
+ pluginName: "VectorPlugin",
19483
+ ...details,
19484
+ description: details.description || `
19485
+ Vector Plugin Error
19486
+
19487
+ Operation: ${details.operation || "unknown"}
19488
+
19489
+ Common causes:
19490
+ 1. Vector dimension mismatch between vectors
19491
+ 2. Invalid distance metric specified (must be: cosine, euclidean, manhattan)
19492
+ 3. Empty vector array provided for clustering
19493
+ 4. k value larger than number of available vectors
19494
+ 5. Vector field not found or invalid in resource
19495
+ 6. Large vectors without proper behavior (use 'body-overflow' or 'body-only')
19496
+
19497
+ Available distance metrics:
19498
+ - cosine: Best for normalized vectors, semantic similarity. Range: [0, 2]
19499
+ - euclidean: Standard L2 distance, geometric proximity. Range: [0, \u221E)
19500
+ - manhattan: L1 distance, faster computation. Range: [0, \u221E)
19501
+
19502
+ Storage considerations:
19503
+ - Vectors > 250 dimensions may exceed S3 metadata limit (2KB)
19504
+ - Use behavior: 'body-overflow' or 'body-only' for large vectors
19505
+ - OpenAI ada-002 (1536 dims): ~10KB, requires body storage
19506
+ - Sentence Transformers (384 dims): ~2.7KB, requires body storage
19507
+ `.trim()
19508
+ });
19509
+ }
19510
+ }
19511
+
19512
+ class VectorPlugin extends Plugin {
19513
+ constructor(options = {}) {
19514
+ super(options);
19515
+ this.config = {
19516
+ dimensions: 1536,
19517
+ // Default to OpenAI text-embedding-3-small/3-large
19518
+ distanceMetric: "cosine",
19519
+ // Default metric
19520
+ storageThreshold: 1500,
19521
+ // Bytes - warn if vectors exceed this
19522
+ autoFixBehavior: false,
19523
+ // Automatically set body-overflow
19524
+ autoDetectVectorField: true,
19525
+ // Auto-detect embedding:XXX fields
19526
+ emitEvents: true,
19527
+ // Emit events for monitoring
19528
+ verboseEvents: false,
19529
+ // Emit detailed progress events
19530
+ eventThrottle: 100,
19531
+ // Throttle progress events (ms)
19532
+ ...options
19533
+ };
19534
+ this.distanceFunctions = {
19535
+ cosine: cosineDistance,
19536
+ euclidean: euclideanDistance,
19537
+ manhattan: manhattanDistance
19538
+ };
19539
+ this._vectorFieldCache = /* @__PURE__ */ new Map();
19540
+ this._throttleState = /* @__PURE__ */ new Map();
19541
+ }
19542
+ async onInstall() {
19543
+ this.emit("installed", { plugin: "VectorPlugin" });
19544
+ this.validateVectorStorage();
19545
+ this.installResourceMethods();
19546
+ }
19547
+ async onStart() {
19548
+ this.emit("started", { plugin: "VectorPlugin" });
19549
+ }
19550
+ async onStop() {
19551
+ this.emit("stopped", { plugin: "VectorPlugin" });
19552
+ }
19553
+ async onUninstall(options) {
19554
+ for (const resource of Object.values(this.database.resources)) {
19555
+ delete resource.vectorSearch;
19556
+ delete resource.cluster;
19557
+ delete resource.vectorDistance;
19558
+ delete resource.similarTo;
19559
+ delete resource.findSimilar;
19560
+ delete resource.distance;
19561
+ }
19562
+ this.emit("uninstalled", { plugin: "VectorPlugin" });
19563
+ }
19564
+ /**
19565
+ * Validate vector storage configuration for all resources
19566
+ *
19567
+ * Detects large vector fields and warns if proper behavior is not set.
19568
+ * Can optionally auto-fix by setting body-overflow behavior.
19569
+ */
19570
+ validateVectorStorage() {
19571
+ for (const resource of Object.values(this.database.resources)) {
19572
+ const vectorFields = this.findVectorFields(resource.schema.attributes);
19573
+ if (vectorFields.length === 0) continue;
19574
+ const totalVectorSize = vectorFields.reduce((sum, f) => sum + f.estimatedBytes, 0);
19575
+ if (totalVectorSize > this.config.storageThreshold) {
19576
+ const hasCorrectBehavior = ["body-overflow", "body-only"].includes(resource.behavior);
19577
+ if (!hasCorrectBehavior) {
19578
+ const warning = {
19579
+ resource: resource.name,
19580
+ vectorFields: vectorFields.map((f) => ({
19581
+ field: f.name,
19582
+ dimensions: f.length,
19583
+ estimatedBytes: f.estimatedBytes
19584
+ })),
19585
+ totalEstimatedBytes: totalVectorSize,
19586
+ metadataLimit: 2047,
19587
+ currentBehavior: resource.behavior || "default",
19588
+ recommendation: "body-overflow"
19589
+ };
19590
+ this.emit("vector:storage-warning", warning);
19591
+ if (this.config.autoFixBehavior) {
19592
+ resource.behavior = "body-overflow";
19593
+ this.emit("vector:behavior-fixed", {
19594
+ resource: resource.name,
19595
+ newBehavior: "body-overflow"
19596
+ });
19597
+ } else {
19598
+ console.warn(`\u26A0\uFE0F VectorPlugin: Resource '${resource.name}' has large vector fields (${totalVectorSize} bytes estimated)`);
19599
+ console.warn(` Current behavior: '${resource.behavior || "default"}'`);
19600
+ console.warn(` Recommendation: Add behavior: 'body-overflow' or 'body-only' to resource configuration`);
19601
+ console.warn(` Large vectors will exceed S3 metadata limit (2047 bytes) and cause errors.`);
19602
+ }
19603
+ }
19604
+ }
19605
+ }
19606
+ }
19607
+ /**
19608
+ * Auto-detect vector field from resource schema
19609
+ *
19610
+ * Looks for fields with type 'embedding:XXX' pattern.
19611
+ * Caches result per resource for performance.
19612
+ *
19613
+ * @param {Resource} resource - Resource instance
19614
+ * @returns {string|null} Detected vector field name or null
19615
+ */
19616
+ detectVectorField(resource) {
19617
+ if (this._vectorFieldCache.has(resource.name)) {
19618
+ return this._vectorFieldCache.get(resource.name);
19619
+ }
19620
+ const vectorField = this._findEmbeddingField(resource.schema.attributes);
19621
+ this._vectorFieldCache.set(resource.name, vectorField);
19622
+ if (vectorField && this.config.emitEvents) {
19623
+ this.emit("vector:field-detected", {
19624
+ resource: resource.name,
19625
+ vectorField,
19626
+ timestamp: Date.now()
19627
+ });
19628
+ }
19629
+ return vectorField;
19630
+ }
19631
+ /**
19632
+ * Recursively find embedding:XXX field in attributes
19633
+ *
19634
+ * @param {Object} attributes - Resource attributes
19635
+ * @param {string} path - Current path (for nested objects)
19636
+ * @returns {string|null} Field path or null
19637
+ */
19638
+ _findEmbeddingField(attributes, path = "") {
19639
+ for (const [key, attr] of Object.entries(attributes)) {
19640
+ const fullPath = path ? `${path}.${key}` : key;
19641
+ if (typeof attr === "string" && attr.startsWith("embedding:")) {
19642
+ return fullPath;
19643
+ }
19644
+ if (attr.type === "array" && attr.items === "number" && attr.length) {
19645
+ return fullPath;
19646
+ }
19647
+ if (attr.type === "object" && attr.props) {
19648
+ const nested = this._findEmbeddingField(attr.props, fullPath);
19649
+ if (nested) return nested;
19650
+ }
19651
+ }
19652
+ return null;
19653
+ }
19654
+ /**
19655
+ * Emit event with throttling support
19656
+ *
19657
+ * @param {string} eventName - Event name
19658
+ * @param {Object} data - Event data
19659
+ * @param {string} throttleKey - Unique key for throttling (optional)
19660
+ */
19661
+ _emitEvent(eventName, data, throttleKey = null) {
19662
+ if (!this.config.emitEvents) return;
19663
+ if (throttleKey) {
19664
+ const now = Date.now();
19665
+ const lastEmit = this._throttleState.get(throttleKey);
19666
+ if (lastEmit && now - lastEmit < this.config.eventThrottle) {
19667
+ return;
19668
+ }
19669
+ this._throttleState.set(throttleKey, now);
19670
+ }
19671
+ this.emit(eventName, data);
19672
+ }
19673
+ /**
19674
+ * Find vector fields in resource attributes
19675
+ *
19676
+ * @param {Object} attributes - Resource attributes
19677
+ * @param {string} path - Current path (for nested objects)
19678
+ * @returns {Array} Array of vector field info
19679
+ */
19680
+ findVectorFields(attributes, path = "") {
19681
+ const vectors = [];
19682
+ for (const [key, attr] of Object.entries(attributes)) {
19683
+ const fullPath = path ? `${path}.${key}` : key;
19684
+ if (attr.type === "array" && attr.items === "number" && attr.length) {
19685
+ vectors.push({
19686
+ name: fullPath,
19687
+ length: attr.length,
19688
+ estimatedBytes: this.estimateVectorBytes(attr.length)
19689
+ });
19690
+ }
19691
+ if (attr.type === "object" && attr.props) {
19692
+ vectors.push(...this.findVectorFields(attr.props, fullPath));
19693
+ }
19694
+ }
19695
+ return vectors;
19696
+ }
19697
+ /**
19698
+ * Estimate bytes required to store a vector in JSON format
19699
+ *
19700
+ * Conservative estimate: ~7 bytes per number + array overhead
19701
+ *
19702
+ * @param {number} dimensions - Number of dimensions
19703
+ * @returns {number} Estimated bytes
19704
+ */
19705
+ estimateVectorBytes(dimensions) {
19706
+ return dimensions * 7 + 50;
19707
+ }
19708
+ /**
19709
+ * Install vector methods on all resources
19710
+ */
19711
+ installResourceMethods() {
19712
+ for (const resource of Object.values(this.database.resources)) {
19713
+ const searchMethod = this.createVectorSearchMethod(resource);
19714
+ const clusterMethod = this.createClusteringMethod(resource);
19715
+ const distanceMethod = this.createDistanceMethod();
19716
+ resource.vectorSearch = searchMethod;
19717
+ resource.cluster = clusterMethod;
19718
+ resource.vectorDistance = distanceMethod;
19719
+ resource.similarTo = searchMethod;
19720
+ resource.findSimilar = searchMethod;
19721
+ resource.distance = distanceMethod;
19722
+ }
19723
+ }
19724
+ /**
19725
+ * Create vector search method for a resource
19726
+ *
19727
+ * Performs K-nearest neighbors search to find similar vectors.
19728
+ *
19729
+ * @param {Resource} resource - Resource instance
19730
+ * @returns {Function} Vector search method
19731
+ */
19732
+ createVectorSearchMethod(resource) {
19733
+ return async (queryVector, options = {}) => {
19734
+ const startTime = Date.now();
19735
+ let vectorField = options.vectorField;
19736
+ if (!vectorField && this.config.autoDetectVectorField) {
19737
+ vectorField = this.detectVectorField(resource);
19738
+ if (!vectorField) {
19739
+ vectorField = "vector";
19740
+ }
19741
+ } else if (!vectorField) {
19742
+ vectorField = "vector";
19743
+ }
19744
+ const {
19745
+ limit = 10,
19746
+ distanceMetric = this.config.distanceMetric,
19747
+ threshold = null,
19748
+ partition = null
19749
+ } = options;
19750
+ const distanceFn = this.distanceFunctions[distanceMetric];
19751
+ if (!distanceFn) {
19752
+ const error = new VectorError(`Invalid distance metric: ${distanceMetric}`, {
19753
+ operation: "vectorSearch",
19754
+ availableMetrics: Object.keys(this.distanceFunctions),
19755
+ providedMetric: distanceMetric
19756
+ });
19757
+ this._emitEvent("vector:search-error", {
19758
+ resource: resource.name,
19759
+ error: error.message,
19760
+ timestamp: Date.now()
19761
+ });
19762
+ throw error;
19763
+ }
19764
+ this._emitEvent("vector:search-start", {
19765
+ resource: resource.name,
19766
+ vectorField,
19767
+ limit,
19768
+ distanceMetric,
19769
+ partition,
19770
+ threshold,
19771
+ queryDimensions: queryVector.length,
19772
+ timestamp: startTime
19773
+ });
19774
+ try {
19775
+ let allRecords;
19776
+ if (partition) {
19777
+ this._emitEvent("vector:partition-filter", {
19778
+ resource: resource.name,
19779
+ partition,
19780
+ timestamp: Date.now()
19781
+ });
19782
+ allRecords = await resource.list({ partition, partitionValues: partition });
19783
+ } else {
19784
+ allRecords = await resource.getAll();
19785
+ }
19786
+ const totalRecords = allRecords.length;
19787
+ let processedRecords = 0;
19788
+ let dimensionMismatches = 0;
19789
+ const results = allRecords.filter((record) => record[vectorField] && Array.isArray(record[vectorField])).map((record, index) => {
19790
+ try {
19791
+ const distance = distanceFn(queryVector, record[vectorField]);
19792
+ processedRecords++;
19793
+ if (this.config.verboseEvents && processedRecords % 100 === 0) {
19794
+ this._emitEvent("vector:search-progress", {
19795
+ resource: resource.name,
19796
+ processed: processedRecords,
19797
+ total: totalRecords,
19798
+ progress: processedRecords / totalRecords * 100,
19799
+ timestamp: Date.now()
19800
+ }, `search-${resource.name}`);
19801
+ }
19802
+ return { record, distance };
19803
+ } catch (err) {
19804
+ dimensionMismatches++;
19805
+ if (this.config.verboseEvents) {
19806
+ this._emitEvent("vector:dimension-mismatch", {
19807
+ resource: resource.name,
19808
+ recordIndex: index,
19809
+ expected: queryVector.length,
19810
+ got: record[vectorField]?.length,
19811
+ timestamp: Date.now()
19812
+ });
19813
+ }
19814
+ return null;
19815
+ }
19816
+ }).filter((result) => result !== null).filter((result) => threshold === null || result.distance <= threshold).sort((a, b) => a.distance - b.distance).slice(0, limit);
19817
+ const duration = Date.now() - startTime;
19818
+ const throughput = totalRecords / (duration / 1e3);
19819
+ this._emitEvent("vector:search-complete", {
19820
+ resource: resource.name,
19821
+ vectorField,
19822
+ resultsCount: results.length,
19823
+ totalRecords,
19824
+ processedRecords,
19825
+ dimensionMismatches,
19826
+ duration,
19827
+ throughput: throughput.toFixed(2),
19828
+ timestamp: Date.now()
19829
+ });
19830
+ if (this.config.verboseEvents) {
19831
+ this._emitEvent("vector:performance", {
19832
+ operation: "search",
19833
+ resource: resource.name,
19834
+ duration,
19835
+ throughput: throughput.toFixed(2),
19836
+ recordsPerSecond: (processedRecords / (duration / 1e3)).toFixed(2),
19837
+ timestamp: Date.now()
19838
+ });
19839
+ }
19840
+ return results;
19841
+ } catch (error) {
19842
+ this._emitEvent("vector:search-error", {
19843
+ resource: resource.name,
19844
+ error: error.message,
19845
+ stack: error.stack,
19846
+ timestamp: Date.now()
19847
+ });
19848
+ throw error;
19849
+ }
19850
+ };
19851
+ }
19852
+ /**
19853
+ * Create clustering method for a resource
19854
+ *
19855
+ * Performs k-means clustering on resource vectors.
19856
+ *
19857
+ * @param {Resource} resource - Resource instance
19858
+ * @returns {Function} Clustering method
19859
+ */
19860
+ createClusteringMethod(resource) {
19861
+ return async (options = {}) => {
19862
+ const startTime = Date.now();
19863
+ let vectorField = options.vectorField;
19864
+ if (!vectorField && this.config.autoDetectVectorField) {
19865
+ vectorField = this.detectVectorField(resource);
19866
+ if (!vectorField) {
19867
+ vectorField = "vector";
19868
+ }
19869
+ } else if (!vectorField) {
19870
+ vectorField = "vector";
19871
+ }
19872
+ const {
19873
+ k = 5,
19874
+ distanceMetric = this.config.distanceMetric,
19875
+ partition = null,
19876
+ ...kmeansOptions
19877
+ } = options;
19878
+ const distanceFn = this.distanceFunctions[distanceMetric];
19879
+ if (!distanceFn) {
19880
+ const error = new VectorError(`Invalid distance metric: ${distanceMetric}`, {
19881
+ operation: "cluster",
19882
+ availableMetrics: Object.keys(this.distanceFunctions),
19883
+ providedMetric: distanceMetric
19884
+ });
19885
+ this._emitEvent("vector:cluster-error", {
19886
+ resource: resource.name,
19887
+ error: error.message,
19888
+ timestamp: Date.now()
19889
+ });
19890
+ throw error;
19891
+ }
19892
+ this._emitEvent("vector:cluster-start", {
19893
+ resource: resource.name,
19894
+ vectorField,
19895
+ k,
19896
+ distanceMetric,
19897
+ partition,
19898
+ maxIterations: kmeansOptions.maxIterations || 100,
19899
+ timestamp: startTime
19900
+ });
19901
+ try {
19902
+ let allRecords;
19903
+ if (partition) {
19904
+ this._emitEvent("vector:partition-filter", {
19905
+ resource: resource.name,
19906
+ partition,
19907
+ timestamp: Date.now()
19908
+ });
19909
+ allRecords = await resource.list({ partition, partitionValues: partition });
19910
+ } else {
19911
+ allRecords = await resource.getAll();
19912
+ }
19913
+ const recordsWithVectors = allRecords.filter(
19914
+ (record) => record[vectorField] && Array.isArray(record[vectorField])
19915
+ );
19916
+ if (recordsWithVectors.length === 0) {
19917
+ const error = new VectorError("No vectors found in resource", {
19918
+ operation: "cluster",
19919
+ resourceName: resource.name,
19920
+ vectorField
19921
+ });
19922
+ this._emitEvent("vector:empty-dataset", {
19923
+ resource: resource.name,
19924
+ vectorField,
19925
+ totalRecords: allRecords.length,
19926
+ timestamp: Date.now()
19927
+ });
19928
+ throw error;
19929
+ }
19930
+ const vectors = recordsWithVectors.map((record) => record[vectorField]);
19931
+ const result = kmeans(vectors, k, {
19932
+ ...kmeansOptions,
19933
+ distanceFn,
19934
+ onIteration: this.config.verboseEvents ? (iteration, inertia, converged) => {
19935
+ this._emitEvent("vector:cluster-iteration", {
19936
+ resource: resource.name,
19937
+ k,
19938
+ iteration,
19939
+ inertia,
19940
+ converged,
19941
+ timestamp: Date.now()
19942
+ }, `cluster-${resource.name}`);
19943
+ } : void 0
19944
+ });
19945
+ if (result.converged) {
19946
+ this._emitEvent("vector:cluster-converged", {
19947
+ resource: resource.name,
19948
+ k,
19949
+ iterations: result.iterations,
19950
+ inertia: result.inertia,
19951
+ timestamp: Date.now()
19952
+ });
19953
+ }
19954
+ const clusters = Array(k).fill(null).map(() => []);
19955
+ recordsWithVectors.forEach((record, i) => {
19956
+ const clusterIndex = result.assignments[i];
19957
+ clusters[clusterIndex].push(record);
19958
+ });
19959
+ const duration = Date.now() - startTime;
19960
+ const clusterSizes = clusters.map((c) => c.length);
19961
+ this._emitEvent("vector:cluster-complete", {
19962
+ resource: resource.name,
19963
+ vectorField,
19964
+ k,
19965
+ vectorCount: vectors.length,
19966
+ iterations: result.iterations,
19967
+ converged: result.converged,
19968
+ inertia: result.inertia,
19969
+ clusterSizes,
19970
+ duration,
19971
+ timestamp: Date.now()
19972
+ });
19973
+ if (this.config.verboseEvents) {
19974
+ this._emitEvent("vector:performance", {
19975
+ operation: "clustering",
19976
+ resource: resource.name,
19977
+ k,
19978
+ duration,
19979
+ iterationsPerSecond: (result.iterations / (duration / 1e3)).toFixed(2),
19980
+ vectorsPerSecond: (vectors.length / (duration / 1e3)).toFixed(2),
19981
+ timestamp: Date.now()
19982
+ });
19983
+ }
19984
+ return {
19985
+ clusters,
19986
+ centroids: result.centroids,
19987
+ inertia: result.inertia,
19988
+ iterations: result.iterations,
19989
+ converged: result.converged
19990
+ };
19991
+ } catch (error) {
19992
+ this._emitEvent("vector:cluster-error", {
19993
+ resource: resource.name,
19994
+ error: error.message,
19995
+ stack: error.stack,
19996
+ timestamp: Date.now()
19997
+ });
19998
+ throw error;
19999
+ }
20000
+ };
20001
+ }
20002
+ /**
20003
+ * Create distance calculation method
20004
+ *
20005
+ * @returns {Function} Distance method
20006
+ */
20007
+ createDistanceMethod() {
20008
+ return (vector1, vector2, metric = this.config.distanceMetric) => {
20009
+ const distanceFn = this.distanceFunctions[metric];
20010
+ if (!distanceFn) {
20011
+ throw new VectorError(`Invalid distance metric: ${metric}`, {
20012
+ operation: "vectorDistance",
20013
+ availableMetrics: Object.keys(this.distanceFunctions),
20014
+ providedMetric: metric
20015
+ });
20016
+ }
20017
+ return distanceFn(vector1, vector2);
20018
+ };
20019
+ }
20020
+ /**
20021
+ * Static utility: Normalize vector
20022
+ *
20023
+ * @param {number[]} vector - Input vector
20024
+ * @returns {number[]} Normalized vector
20025
+ */
20026
+ static normalize(vector) {
20027
+ return normalize(vector);
20028
+ }
20029
+ /**
20030
+ * Static utility: Calculate dot product
20031
+ *
20032
+ * @param {number[]} vector1 - First vector
20033
+ * @param {number[]} vector2 - Second vector
20034
+ * @returns {number} Dot product
20035
+ */
20036
+ static dotProduct(vector1, vector2) {
20037
+ return dotProduct(vector1, vector2);
20038
+ }
20039
+ /**
20040
+ * Static utility: Find optimal K for clustering
20041
+ *
20042
+ * Analyzes clustering quality across a range of K values using
20043
+ * multiple evaluation metrics.
20044
+ *
20045
+ * @param {number[][]} vectors - Vectors to analyze
20046
+ * @param {Object} options - Configuration options
20047
+ * @returns {Promise<Object>} Analysis results with recommendations
20048
+ */
20049
+ static async findOptimalK(vectors, options) {
20050
+ return findOptimalK(vectors, options);
20051
+ }
20052
+ }
20053
+
20054
+ function silhouetteScore(vectors, assignments, centroids, distanceFn = euclideanDistance) {
20055
+ const k = centroids.length;
20056
+ const n = vectors.length;
20057
+ const clusters = Array(k).fill(null).map(() => []);
20058
+ vectors.forEach((vector, i) => {
20059
+ clusters[assignments[i]].push(i);
20060
+ });
20061
+ let totalScore = 0;
20062
+ let validPoints = 0;
20063
+ if (clusters.every((c) => c.length <= 1)) {
20064
+ return 0;
20065
+ }
20066
+ for (let i = 0; i < n; i++) {
20067
+ const clusterIdx = assignments[i];
20068
+ const cluster = clusters[clusterIdx];
20069
+ if (cluster.length === 1) continue;
20070
+ let a = 0;
20071
+ for (const j of cluster) {
20072
+ if (i !== j) {
20073
+ a += distanceFn(vectors[i], vectors[j]);
20074
+ }
20075
+ }
20076
+ a /= cluster.length - 1;
20077
+ let b = Infinity;
20078
+ for (let otherCluster = 0; otherCluster < k; otherCluster++) {
20079
+ if (otherCluster === clusterIdx) continue;
20080
+ const otherPoints = clusters[otherCluster];
20081
+ if (otherPoints.length === 0) continue;
20082
+ let avgDist = 0;
20083
+ for (const j of otherPoints) {
20084
+ avgDist += distanceFn(vectors[i], vectors[j]);
20085
+ }
20086
+ avgDist /= otherPoints.length;
20087
+ b = Math.min(b, avgDist);
20088
+ }
20089
+ if (b === Infinity) continue;
20090
+ const maxAB = Math.max(a, b);
20091
+ const s = maxAB === 0 ? 0 : (b - a) / maxAB;
20092
+ totalScore += s;
20093
+ validPoints++;
20094
+ }
20095
+ return validPoints > 0 ? totalScore / validPoints : 0;
20096
+ }
20097
+ function daviesBouldinIndex(vectors, assignments, centroids, distanceFn = euclideanDistance) {
20098
+ const k = centroids.length;
20099
+ const scatters = new Array(k).fill(0);
20100
+ const clusterCounts = new Array(k).fill(0);
20101
+ vectors.forEach((vector, i) => {
20102
+ const cluster = assignments[i];
20103
+ scatters[cluster] += distanceFn(vector, centroids[cluster]);
20104
+ clusterCounts[cluster]++;
20105
+ });
20106
+ for (let i = 0; i < k; i++) {
20107
+ if (clusterCounts[i] > 0) {
20108
+ scatters[i] /= clusterCounts[i];
20109
+ }
20110
+ }
20111
+ let dbIndex = 0;
20112
+ let validClusters = 0;
20113
+ for (let i = 0; i < k; i++) {
20114
+ if (clusterCounts[i] === 0) continue;
20115
+ let maxRatio = 0;
20116
+ for (let j = 0; j < k; j++) {
20117
+ if (i === j || clusterCounts[j] === 0) continue;
20118
+ const centroidDist = distanceFn(centroids[i], centroids[j]);
20119
+ if (centroidDist === 0) continue;
20120
+ const ratio = (scatters[i] + scatters[j]) / centroidDist;
20121
+ maxRatio = Math.max(maxRatio, ratio);
20122
+ }
20123
+ dbIndex += maxRatio;
20124
+ validClusters++;
20125
+ }
20126
+ return validClusters > 0 ? dbIndex / validClusters : 0;
20127
+ }
20128
+ function calinskiHarabaszIndex(vectors, assignments, centroids, distanceFn = euclideanDistance) {
20129
+ const n = vectors.length;
20130
+ const k = centroids.length;
20131
+ if (k === 1 || k === n) return 0;
20132
+ const dimensions = vectors[0].length;
20133
+ const overallCentroid = new Array(dimensions).fill(0);
20134
+ vectors.forEach((vector) => {
20135
+ vector.forEach((val, dim) => {
20136
+ overallCentroid[dim] += val;
20137
+ });
20138
+ });
20139
+ overallCentroid.forEach((val, dim, arr) => {
20140
+ arr[dim] = val / n;
20141
+ });
20142
+ const clusterCounts = new Array(k).fill(0);
20143
+ vectors.forEach((vector, i) => {
20144
+ clusterCounts[assignments[i]]++;
20145
+ });
20146
+ let bgss = 0;
20147
+ for (let i = 0; i < k; i++) {
20148
+ if (clusterCounts[i] === 0) continue;
20149
+ const dist = distanceFn(centroids[i], overallCentroid);
20150
+ bgss += clusterCounts[i] * dist * dist;
20151
+ }
20152
+ let wcss = 0;
20153
+ vectors.forEach((vector, i) => {
20154
+ const cluster = assignments[i];
20155
+ const dist = distanceFn(vector, centroids[cluster]);
20156
+ wcss += dist * dist;
20157
+ });
20158
+ if (wcss === 0) return 0;
20159
+ return bgss / (k - 1) / (wcss / (n - k));
20160
+ }
20161
+ async function gapStatistic(vectors, assignments, centroids, distanceFn = euclideanDistance, nReferences = 10) {
20162
+ const n = vectors.length;
20163
+ const k = centroids.length;
20164
+ const dimensions = vectors[0].length;
20165
+ let wk = 0;
20166
+ vectors.forEach((vector, i) => {
20167
+ const dist = distanceFn(vector, centroids[assignments[i]]);
20168
+ wk += dist * dist;
20169
+ });
20170
+ wk = Math.log(wk + 1e-10);
20171
+ const referenceWks = [];
20172
+ const mins = new Array(dimensions).fill(Infinity);
20173
+ const maxs = new Array(dimensions).fill(-Infinity);
20174
+ vectors.forEach((vector) => {
20175
+ vector.forEach((val, dim) => {
20176
+ mins[dim] = Math.min(mins[dim], val);
20177
+ maxs[dim] = Math.max(maxs[dim], val);
20178
+ });
20179
+ });
20180
+ for (let ref = 0; ref < nReferences; ref++) {
20181
+ const refVectors = [];
20182
+ for (let i = 0; i < n; i++) {
20183
+ const refVector = new Array(dimensions);
20184
+ for (let dim = 0; dim < dimensions; dim++) {
20185
+ refVector[dim] = mins[dim] + Math.random() * (maxs[dim] - mins[dim]);
20186
+ }
20187
+ refVectors.push(refVector);
20188
+ }
20189
+ const refResult = kmeans(refVectors, k, { maxIterations: 50, distanceFn });
20190
+ let refWk = 0;
20191
+ refVectors.forEach((vector, i) => {
20192
+ const dist = distanceFn(vector, refResult.centroids[refResult.assignments[i]]);
20193
+ refWk += dist * dist;
20194
+ });
20195
+ referenceWks.push(Math.log(refWk + 1e-10));
20196
+ }
20197
+ const expectedWk = referenceWks.reduce((a, b) => a + b, 0) / nReferences;
20198
+ const gap = expectedWk - wk;
20199
+ const sdk = Math.sqrt(
20200
+ referenceWks.reduce((sum, wk2) => sum + Math.pow(wk2 - expectedWk, 2), 0) / nReferences
20201
+ );
20202
+ const sk = sdk * Math.sqrt(1 + 1 / nReferences);
20203
+ return { gap, sk, expectedWk, actualWk: wk };
20204
+ }
20205
+ function clusteringStability(vectors, k, options = {}) {
20206
+ const {
20207
+ nRuns = 10,
20208
+ distanceFn = euclideanDistance,
20209
+ ...kmeansOptions
20210
+ } = options;
20211
+ const inertias = [];
20212
+ const allAssignments = [];
20213
+ for (let run = 0; run < nRuns; run++) {
20214
+ const result = kmeans(vectors, k, {
20215
+ ...kmeansOptions,
20216
+ distanceFn,
20217
+ seed: run
20218
+ // Different seed for each run
20219
+ });
20220
+ inertias.push(result.inertia);
20221
+ allAssignments.push(result.assignments);
20222
+ }
20223
+ const assignmentSimilarities = [];
20224
+ for (let i = 0; i < nRuns - 1; i++) {
20225
+ for (let j = i + 1; j < nRuns; j++) {
20226
+ const similarity = calculateAssignmentSimilarity(allAssignments[i], allAssignments[j]);
20227
+ assignmentSimilarities.push(similarity);
20228
+ }
20229
+ }
20230
+ const avgInertia = inertias.reduce((a, b) => a + b, 0) / nRuns;
20231
+ const stdInertia = Math.sqrt(
20232
+ inertias.reduce((sum, val) => sum + Math.pow(val - avgInertia, 2), 0) / nRuns
20233
+ );
20234
+ const avgSimilarity = assignmentSimilarities.length > 0 ? assignmentSimilarities.reduce((a, b) => a + b, 0) / assignmentSimilarities.length : 1;
20235
+ return {
20236
+ avgInertia,
20237
+ stdInertia,
20238
+ cvInertia: avgInertia !== 0 ? stdInertia / avgInertia : 0,
20239
+ // Coefficient of variation
20240
+ avgSimilarity,
20241
+ stability: avgSimilarity
20242
+ // Higher is more stable
20243
+ };
20244
+ }
20245
+ function calculateAssignmentSimilarity(assignments1, assignments2) {
20246
+ const n = assignments1.length;
20247
+ let matches = 0;
20248
+ for (let i = 0; i < n; i++) {
20249
+ for (let j = i + 1; j < n; j++) {
20250
+ const sameCluster1 = assignments1[i] === assignments1[j];
20251
+ const sameCluster2 = assignments2[i] === assignments2[j];
20252
+ if (sameCluster1 === sameCluster2) {
20253
+ matches++;
20254
+ }
20255
+ }
20256
+ }
20257
+ const totalPairs = n * (n - 1) / 2;
20258
+ return totalPairs > 0 ? matches / totalPairs : 1;
20259
+ }
20260
+
20261
+ var metrics = /*#__PURE__*/Object.freeze({
20262
+ __proto__: null,
20263
+ calinskiHarabaszIndex: calinskiHarabaszIndex,
20264
+ clusteringStability: clusteringStability,
20265
+ daviesBouldinIndex: daviesBouldinIndex,
20266
+ gapStatistic: gapStatistic,
20267
+ silhouetteScore: silhouetteScore
20268
+ });
20269
+
20270
+ export { AVAILABLE_BEHAVIORS, AnalyticsNotEnabledError, AuditPlugin, AuthenticationError, BackupPlugin, BaseError, BehaviorError, CachePlugin, Client, ConnectionString, ConnectionStringError, CostsPlugin, CryptoError, DEFAULT_BEHAVIOR, Database, DatabaseError, EncryptionError, ErrorMap, EventualConsistencyPlugin, FullTextPlugin, InvalidResourceItem, MetadataLimitError, MetricsPlugin, MissingMetadata, NoSuchBucket, NoSuchKey, NotFound, PartitionDriverError, PartitionError, PermissionError, Plugin, PluginError, PluginObject, PluginStorageError, QueueConsumerPlugin, ReplicatorPlugin, Resource, ResourceError, ResourceIdsPageReader, ResourceIdsReader, ResourceNotFound, ResourceReader, ResourceWriter, S3QueuePlugin, Database as S3db, S3dbError, SchedulerPlugin, Schema, SchemaError, StateMachinePlugin, StreamError, UnknownError, ValidationError, Validator, VectorPlugin, behaviors, calculateAttributeNamesSize, calculateAttributeSizes, calculateEffectiveLimit, calculateSystemOverhead, calculateTotalSize, calculateUTF8Bytes, clearUTF8Cache, clearUTF8Memo, clearUTF8Memory, decode, decodeDecimal, decodeFixedPoint, decrypt, S3db as default, encode, encodeDecimal, encodeFixedPoint, encrypt, getBehavior, getSizeBreakdown, idGenerator, mapAwsError, md5, passwordGenerator, sha256, streamToString, transformValue, tryFn, tryFnSync };
18936
20271
  //# sourceMappingURL=s3db.es.js.map