s3db.js 11.2.4 → 11.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/s3db.es.js CHANGED
@@ -77,6 +77,41 @@ const decodeDecimal = (s) => {
77
77
  const num = decPart ? Number(decodedInt + "." + decPart) : decodedInt;
78
78
  return negative ? -num : num;
79
79
  };
80
+ const encodeFixedPoint = (n, precision = 6) => {
81
+ if (typeof n !== "number" || isNaN(n)) return "undefined";
82
+ if (!isFinite(n)) return "undefined";
83
+ const scale = Math.pow(10, precision);
84
+ const scaled = Math.round(n * scale);
85
+ if (scaled === 0) return "^0";
86
+ const negative = scaled < 0;
87
+ let num = Math.abs(scaled);
88
+ let s = "";
89
+ while (num > 0) {
90
+ s = alphabet[num % base] + s;
91
+ num = Math.floor(num / base);
92
+ }
93
+ return "^" + (negative ? "-" : "") + s;
94
+ };
95
+ const decodeFixedPoint = (s, precision = 6) => {
96
+ if (typeof s !== "string") return NaN;
97
+ if (!s.startsWith("^")) return NaN;
98
+ s = s.slice(1);
99
+ if (s === "0") return 0;
100
+ let negative = false;
101
+ if (s[0] === "-") {
102
+ negative = true;
103
+ s = s.slice(1);
104
+ }
105
+ let r = 0;
106
+ for (let i = 0; i < s.length; i++) {
107
+ const idx = charToValue[s[i]];
108
+ if (idx === void 0) return NaN;
109
+ r = r * base + idx;
110
+ }
111
+ const scale = Math.pow(10, precision);
112
+ const scaled = negative ? -r : r;
113
+ return scaled / scale;
114
+ };
80
115
 
81
116
  const utf8BytesMemory = /* @__PURE__ */ new Map();
82
117
  const UTF8_MEMORY_MAX_SIZE = 1e4;
@@ -11501,6 +11536,11 @@ class Validator extends FastestValidator {
11501
11536
  type: "any",
11502
11537
  custom: this.autoEncrypt ? jsonHandler : void 0
11503
11538
  });
11539
+ this.alias("embedding", {
11540
+ type: "array",
11541
+ items: "number",
11542
+ empty: false
11543
+ });
11504
11544
  }
11505
11545
  }
11506
11546
  const ValidatorManager = new Proxy(Validator, {
@@ -11749,6 +11789,59 @@ const SchemaActions = {
11749
11789
  }
11750
11790
  return NaN;
11751
11791
  });
11792
+ },
11793
+ fromArrayOfEmbeddings: (value, { separator, precision = 6 }) => {
11794
+ if (value === null || value === void 0 || !Array.isArray(value)) {
11795
+ return value;
11796
+ }
11797
+ if (value.length === 0) {
11798
+ return "";
11799
+ }
11800
+ const encodedItems = value.map((item) => {
11801
+ if (typeof item === "number" && !isNaN(item)) {
11802
+ return encodeFixedPoint(item, precision);
11803
+ }
11804
+ const n = Number(item);
11805
+ return isNaN(n) ? "" : encodeFixedPoint(n, precision);
11806
+ });
11807
+ return encodedItems.join(separator);
11808
+ },
11809
+ toArrayOfEmbeddings: (value, { separator, precision = 6 }) => {
11810
+ if (Array.isArray(value)) {
11811
+ return value.map((v) => typeof v === "number" ? v : decodeFixedPoint(v, precision));
11812
+ }
11813
+ if (value === null || value === void 0) {
11814
+ return value;
11815
+ }
11816
+ if (value === "") {
11817
+ return [];
11818
+ }
11819
+ const str = String(value);
11820
+ const items = [];
11821
+ let current = "";
11822
+ let i = 0;
11823
+ while (i < str.length) {
11824
+ if (str[i] === "\\" && i + 1 < str.length) {
11825
+ current += str[i + 1];
11826
+ i += 2;
11827
+ } else if (str[i] === separator) {
11828
+ items.push(current);
11829
+ current = "";
11830
+ i++;
11831
+ } else {
11832
+ current += str[i];
11833
+ i++;
11834
+ }
11835
+ }
11836
+ items.push(current);
11837
+ return items.map((v) => {
11838
+ if (typeof v === "number") return v;
11839
+ if (typeof v === "string" && v !== "") {
11840
+ const n = decodeFixedPoint(v, precision);
11841
+ return isNaN(n) ? NaN : n;
11842
+ }
11843
+ return NaN;
11844
+ });
11752
11845
  }
11753
11846
  };
11754
11847
  class Schema {
@@ -11818,18 +11911,89 @@ class Schema {
11818
11911
  }
11819
11912
  return objectKeys;
11820
11913
  }
11914
+ _generateHooksFromOriginalAttributes(attributes, prefix = "") {
11915
+ for (const [key, value] of Object.entries(attributes)) {
11916
+ if (key.startsWith("$$")) continue;
11917
+ const fullKey = prefix ? `${prefix}.${key}` : key;
11918
+ if (typeof value === "object" && value !== null && !Array.isArray(value) && value.type) {
11919
+ if (value.type === "array" && value.items) {
11920
+ const itemsType = value.items;
11921
+ const arrayLength = typeof value.length === "number" ? value.length : null;
11922
+ if (itemsType === "string" || typeof itemsType === "string" && itemsType.includes("string")) {
11923
+ this.addHook("beforeMap", fullKey, "fromArray");
11924
+ this.addHook("afterUnmap", fullKey, "toArray");
11925
+ } else if (itemsType === "number" || typeof itemsType === "string" && itemsType.includes("number")) {
11926
+ const isIntegerArray = typeof itemsType === "string" && itemsType.includes("integer");
11927
+ const isEmbedding = !isIntegerArray && arrayLength !== null && arrayLength >= 256;
11928
+ if (isIntegerArray) {
11929
+ this.addHook("beforeMap", fullKey, "fromArrayOfNumbers");
11930
+ this.addHook("afterUnmap", fullKey, "toArrayOfNumbers");
11931
+ } else if (isEmbedding) {
11932
+ this.addHook("beforeMap", fullKey, "fromArrayOfEmbeddings");
11933
+ this.addHook("afterUnmap", fullKey, "toArrayOfEmbeddings");
11934
+ } else {
11935
+ this.addHook("beforeMap", fullKey, "fromArrayOfDecimals");
11936
+ this.addHook("afterUnmap", fullKey, "toArrayOfDecimals");
11937
+ }
11938
+ }
11939
+ }
11940
+ } else if (typeof value === "object" && value !== null && !Array.isArray(value) && !value.type) {
11941
+ this._generateHooksFromOriginalAttributes(value, fullKey);
11942
+ }
11943
+ }
11944
+ }
11821
11945
  generateAutoHooks() {
11946
+ this._generateHooksFromOriginalAttributes(this.attributes);
11822
11947
  const schema = flatten(cloneDeep(this.attributes), { safe: true });
11823
11948
  for (const [name, definition] of Object.entries(schema)) {
11824
- if (definition.includes("array")) {
11825
- if (definition.includes("items:string")) {
11949
+ if (name.includes("$$")) continue;
11950
+ if (this.options.hooks.beforeMap[name] || this.options.hooks.afterUnmap[name]) {
11951
+ continue;
11952
+ }
11953
+ const defStr = typeof definition === "string" ? definition : "";
11954
+ const defType = typeof definition === "object" && definition !== null ? definition.type : null;
11955
+ const isEmbeddingType = defStr.includes("embedding") || defType === "embedding";
11956
+ if (isEmbeddingType) {
11957
+ const lengthMatch = defStr.match(/embedding:(\d+)/);
11958
+ if (lengthMatch) {
11959
+ parseInt(lengthMatch[1], 10);
11960
+ } else if (defStr.includes("length:")) {
11961
+ const match = defStr.match(/length:(\d+)/);
11962
+ if (match) parseInt(match[1], 10);
11963
+ }
11964
+ this.addHook("beforeMap", name, "fromArrayOfEmbeddings");
11965
+ this.addHook("afterUnmap", name, "toArrayOfEmbeddings");
11966
+ continue;
11967
+ }
11968
+ const isArray = defStr.includes("array") || defType === "array";
11969
+ if (isArray) {
11970
+ let itemsType = null;
11971
+ if (typeof definition === "object" && definition !== null && definition.items) {
11972
+ itemsType = definition.items;
11973
+ } else if (defStr.includes("items:string")) {
11974
+ itemsType = "string";
11975
+ } else if (defStr.includes("items:number")) {
11976
+ itemsType = "number";
11977
+ }
11978
+ if (itemsType === "string" || typeof itemsType === "string" && itemsType.includes("string")) {
11826
11979
  this.addHook("beforeMap", name, "fromArray");
11827
11980
  this.addHook("afterUnmap", name, "toArray");
11828
- } else if (definition.includes("items:number")) {
11829
- const isIntegerArray = definition.includes("integer:true") || definition.includes("|integer:") || definition.includes("|integer");
11981
+ } else if (itemsType === "number" || typeof itemsType === "string" && itemsType.includes("number")) {
11982
+ const isIntegerArray = defStr.includes("integer:true") || defStr.includes("|integer:") || defStr.includes("|integer") || typeof itemsType === "string" && itemsType.includes("integer");
11983
+ let arrayLength = null;
11984
+ if (typeof definition === "object" && definition !== null && typeof definition.length === "number") {
11985
+ arrayLength = definition.length;
11986
+ } else if (defStr.includes("length:")) {
11987
+ const match = defStr.match(/length:(\d+)/);
11988
+ if (match) arrayLength = parseInt(match[1], 10);
11989
+ }
11990
+ const isEmbedding = !isIntegerArray && arrayLength !== null && arrayLength >= 256;
11830
11991
  if (isIntegerArray) {
11831
11992
  this.addHook("beforeMap", name, "fromArrayOfNumbers");
11832
11993
  this.addHook("afterUnmap", name, "toArrayOfNumbers");
11994
+ } else if (isEmbedding) {
11995
+ this.addHook("beforeMap", name, "fromArrayOfEmbeddings");
11996
+ this.addHook("afterUnmap", name, "toArrayOfEmbeddings");
11833
11997
  } else {
11834
11998
  this.addHook("beforeMap", name, "fromArrayOfDecimals");
11835
11999
  this.addHook("afterUnmap", name, "toArrayOfDecimals");
@@ -11837,7 +12001,7 @@ class Schema {
11837
12001
  }
11838
12002
  continue;
11839
12003
  }
11840
- if (definition.includes("secret")) {
12004
+ if (defStr.includes("secret") || defType === "secret") {
11841
12005
  if (this.options.autoEncrypt) {
11842
12006
  this.addHook("beforeMap", name, "encrypt");
11843
12007
  }
@@ -11846,8 +12010,8 @@ class Schema {
11846
12010
  }
11847
12011
  continue;
11848
12012
  }
11849
- if (definition.includes("number")) {
11850
- const isInteger = definition.includes("integer:true") || definition.includes("|integer:") || definition.includes("|integer");
12013
+ if (defStr.includes("number") || defType === "number") {
12014
+ const isInteger = defStr.includes("integer:true") || defStr.includes("|integer:") || defStr.includes("|integer");
11851
12015
  if (isInteger) {
11852
12016
  this.addHook("beforeMap", name, "toBase62");
11853
12017
  this.addHook("afterUnmap", name, "fromBase62");
@@ -11857,17 +12021,17 @@ class Schema {
11857
12021
  }
11858
12022
  continue;
11859
12023
  }
11860
- if (definition.includes("boolean")) {
12024
+ if (defStr.includes("boolean") || defType === "boolean") {
11861
12025
  this.addHook("beforeMap", name, "fromBool");
11862
12026
  this.addHook("afterUnmap", name, "toBool");
11863
12027
  continue;
11864
12028
  }
11865
- if (definition.includes("json")) {
12029
+ if (defStr.includes("json") || defType === "json") {
11866
12030
  this.addHook("beforeMap", name, "toJSON");
11867
12031
  this.addHook("afterUnmap", name, "fromJSON");
11868
12032
  continue;
11869
12033
  }
11870
- if (definition === "object" || definition.includes("object")) {
12034
+ if (definition === "object" || defStr.includes("object") || defType === "object") {
11871
12035
  this.addHook("beforeMap", name, "toJSON");
11872
12036
  this.addHook("afterUnmap", name, "fromJSON");
11873
12037
  continue;
@@ -12009,7 +12173,8 @@ class Schema {
12009
12173
  const originalKey = reversedMap && reversedMap[key] ? reversedMap[key] : key;
12010
12174
  let parsedValue = value;
12011
12175
  const attrDef = this.getAttributeDefinition(originalKey);
12012
- if (typeof attrDef === "string" && attrDef.includes("number") && !attrDef.includes("array") && !attrDef.includes("decimal")) {
12176
+ const hasAfterUnmapHook = this.options.hooks?.afterUnmap?.[originalKey];
12177
+ if (!hasAfterUnmapHook && typeof attrDef === "string" && attrDef.includes("number") && !attrDef.includes("array") && !attrDef.includes("decimal")) {
12013
12178
  if (typeof parsedValue === "string" && parsedValue !== "") {
12014
12179
  parsedValue = decode(parsedValue);
12015
12180
  } else if (typeof parsedValue === "number") ; else {
@@ -12074,18 +12239,38 @@ class Schema {
12074
12239
  preprocessAttributesForValidation(attributes) {
12075
12240
  const processed = {};
12076
12241
  for (const [key, value] of Object.entries(attributes)) {
12077
- if (typeof value === "object" && value !== null && !Array.isArray(value)) {
12078
- const isExplicitRequired = value.$$type && value.$$type.includes("required");
12079
- const isExplicitOptional = value.$$type && value.$$type.includes("optional");
12080
- const objectConfig = {
12081
- type: "object",
12082
- properties: this.preprocessAttributesForValidation(value),
12083
- strict: false
12084
- };
12085
- if (isExplicitRequired) ; else if (isExplicitOptional || this.allNestedObjectsOptional) {
12086
- objectConfig.optional = true;
12242
+ if (typeof value === "string") {
12243
+ if (value.startsWith("embedding:")) {
12244
+ const lengthMatch = value.match(/embedding:(\d+)/);
12245
+ if (lengthMatch) {
12246
+ const length = lengthMatch[1];
12247
+ const rest = value.substring(`embedding:${length}`.length);
12248
+ processed[key] = `array|items:number|length:${length}|empty:false${rest}`;
12249
+ continue;
12250
+ }
12251
+ }
12252
+ if (value.startsWith("embedding|") || value === "embedding") {
12253
+ processed[key] = value.replace(/^embedding/, "array|items:number|empty:false");
12254
+ continue;
12255
+ }
12256
+ processed[key] = value;
12257
+ } else if (typeof value === "object" && value !== null && !Array.isArray(value)) {
12258
+ const hasValidatorType = value.type !== void 0 && key !== "$$type";
12259
+ if (hasValidatorType) {
12260
+ processed[key] = value;
12261
+ } else {
12262
+ const isExplicitRequired = value.$$type && value.$$type.includes("required");
12263
+ const isExplicitOptional = value.$$type && value.$$type.includes("optional");
12264
+ const objectConfig = {
12265
+ type: "object",
12266
+ properties: this.preprocessAttributesForValidation(value),
12267
+ strict: false
12268
+ };
12269
+ if (isExplicitRequired) ; else if (isExplicitOptional || this.allNestedObjectsOptional) {
12270
+ objectConfig.optional = true;
12271
+ }
12272
+ processed[key] = objectConfig;
12087
12273
  }
12088
- processed[key] = objectConfig;
12089
12274
  } else {
12090
12275
  processed[key] = value;
12091
12276
  }
@@ -15029,7 +15214,7 @@ class Database extends EventEmitter {
15029
15214
  this.id = idGenerator(7);
15030
15215
  this.version = "1";
15031
15216
  this.s3dbVersion = (() => {
15032
- const [ok, err, version] = tryFn(() => true ? "11.2.4" : "latest");
15217
+ const [ok, err, version] = tryFn(() => true ? "11.2.5" : "latest");
15033
15218
  return ok ? version : "latest";
15034
15219
  })();
15035
15220
  this.resources = {};
@@ -18932,5 +19117,1090 @@ class StateMachinePlugin extends Plugin {
18932
19117
  }
18933
19118
  }
18934
19119
 
18935
- export { AVAILABLE_BEHAVIORS, AnalyticsNotEnabledError, AuditPlugin, AuthenticationError, BackupPlugin, BaseError, BehaviorError, CachePlugin, Client, ConnectionString, ConnectionStringError, CostsPlugin, CryptoError, DEFAULT_BEHAVIOR, Database, DatabaseError, EncryptionError, ErrorMap, EventualConsistencyPlugin, FullTextPlugin, InvalidResourceItem, MetadataLimitError, MetricsPlugin, MissingMetadata, NoSuchBucket, NoSuchKey, NotFound, PartitionDriverError, PartitionError, PermissionError, Plugin, PluginError, PluginObject, PluginStorageError, QueueConsumerPlugin, ReplicatorPlugin, Resource, ResourceError, ResourceIdsPageReader, ResourceIdsReader, ResourceNotFound, ResourceReader, ResourceWriter, S3QueuePlugin, Database as S3db, S3dbError, SchedulerPlugin, Schema, SchemaError, StateMachinePlugin, StreamError, UnknownError, ValidationError, Validator, behaviors, calculateAttributeNamesSize, calculateAttributeSizes, calculateEffectiveLimit, calculateSystemOverhead, calculateTotalSize, calculateUTF8Bytes, clearUTF8Cache, clearUTF8Memo, clearUTF8Memory, decode, decodeDecimal, decrypt, S3db as default, encode, encodeDecimal, encrypt, getBehavior, getSizeBreakdown, idGenerator, mapAwsError, md5, passwordGenerator, sha256, streamToString, transformValue, tryFn, tryFnSync };
19120
+ function cosineDistance(a, b) {
19121
+ if (a.length !== b.length) {
19122
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19123
+ }
19124
+ let dotProduct2 = 0;
19125
+ let normA = 0;
19126
+ let normB = 0;
19127
+ for (let i = 0; i < a.length; i++) {
19128
+ dotProduct2 += a[i] * b[i];
19129
+ normA += a[i] * a[i];
19130
+ normB += b[i] * b[i];
19131
+ }
19132
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
19133
+ if (denominator === 0) {
19134
+ return a.every((v) => v === 0) && b.every((v) => v === 0) ? 0 : 1;
19135
+ }
19136
+ const similarity = dotProduct2 / denominator;
19137
+ return 1 - similarity;
19138
+ }
19139
+ function euclideanDistance(a, b) {
19140
+ if (a.length !== b.length) {
19141
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19142
+ }
19143
+ let sum = 0;
19144
+ for (let i = 0; i < a.length; i++) {
19145
+ const diff = a[i] - b[i];
19146
+ sum += diff * diff;
19147
+ }
19148
+ return Math.sqrt(sum);
19149
+ }
19150
+ function manhattanDistance(a, b) {
19151
+ if (a.length !== b.length) {
19152
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19153
+ }
19154
+ let sum = 0;
19155
+ for (let i = 0; i < a.length; i++) {
19156
+ sum += Math.abs(a[i] - b[i]);
19157
+ }
19158
+ return sum;
19159
+ }
19160
+ function dotProduct(a, b) {
19161
+ if (a.length !== b.length) {
19162
+ throw new Error(`Dimension mismatch: ${a.length} vs ${b.length}`);
19163
+ }
19164
+ let sum = 0;
19165
+ for (let i = 0; i < a.length; i++) {
19166
+ sum += a[i] * b[i];
19167
+ }
19168
+ return sum;
19169
+ }
19170
+ function normalize(vector) {
19171
+ const magnitude2 = Math.sqrt(
19172
+ vector.reduce((sum, val) => sum + val * val, 0)
19173
+ );
19174
+ if (magnitude2 === 0) {
19175
+ return vector.slice();
19176
+ }
19177
+ return vector.map((val) => val / magnitude2);
19178
+ }
19179
+
19180
+ function kmeans(vectors, k, options = {}) {
19181
+ const {
19182
+ maxIterations = 100,
19183
+ tolerance = 1e-4,
19184
+ distanceFn = euclideanDistance,
19185
+ seed = null,
19186
+ onIteration = null
19187
+ } = options;
19188
+ if (vectors.length === 0) {
19189
+ throw new Error("Cannot cluster empty vector array");
19190
+ }
19191
+ if (k < 1) {
19192
+ throw new Error(`k must be at least 1, got ${k}`);
19193
+ }
19194
+ if (k > vectors.length) {
19195
+ throw new Error(`k (${k}) cannot be greater than number of vectors (${vectors.length})`);
19196
+ }
19197
+ const dimensions = vectors[0].length;
19198
+ for (let i = 1; i < vectors.length; i++) {
19199
+ if (vectors[i].length !== dimensions) {
19200
+ throw new Error(`All vectors must have same dimensions. Expected ${dimensions}, got ${vectors[i].length} at index ${i}`);
19201
+ }
19202
+ }
19203
+ const centroids = initializeCentroidsKMeansPlusPlus(vectors, k, distanceFn, seed);
19204
+ let assignments = new Array(vectors.length);
19205
+ let iterations = 0;
19206
+ let converged = false;
19207
+ let previousInertia = Infinity;
19208
+ while (!converged && iterations < maxIterations) {
19209
+ const newAssignments = vectors.map((vector) => {
19210
+ let minDist = Infinity;
19211
+ let nearestCluster = 0;
19212
+ for (let i = 0; i < k; i++) {
19213
+ const dist = distanceFn(vector, centroids[i]);
19214
+ if (dist < minDist) {
19215
+ minDist = dist;
19216
+ nearestCluster = i;
19217
+ }
19218
+ }
19219
+ return nearestCluster;
19220
+ });
19221
+ let inertia2 = 0;
19222
+ vectors.forEach((vector, i) => {
19223
+ const dist = distanceFn(vector, centroids[newAssignments[i]]);
19224
+ inertia2 += dist * dist;
19225
+ });
19226
+ const inertiaChange = Math.abs(previousInertia - inertia2);
19227
+ converged = inertiaChange < tolerance;
19228
+ assignments = newAssignments;
19229
+ previousInertia = inertia2;
19230
+ if (onIteration) {
19231
+ onIteration(iterations + 1, inertia2, converged);
19232
+ }
19233
+ if (!converged) {
19234
+ const clusterSums = Array(k).fill(null).map(() => new Array(dimensions).fill(0));
19235
+ const clusterCounts = new Array(k).fill(0);
19236
+ vectors.forEach((vector, i) => {
19237
+ const cluster = assignments[i];
19238
+ clusterCounts[cluster]++;
19239
+ vector.forEach((val, j) => {
19240
+ clusterSums[cluster][j] += val;
19241
+ });
19242
+ });
19243
+ for (let i = 0; i < k; i++) {
19244
+ if (clusterCounts[i] > 0) {
19245
+ centroids[i] = clusterSums[i].map((sum) => sum / clusterCounts[i]);
19246
+ } else {
19247
+ const randomIdx = Math.floor(Math.random() * vectors.length);
19248
+ centroids[i] = [...vectors[randomIdx]];
19249
+ }
19250
+ }
19251
+ }
19252
+ iterations++;
19253
+ }
19254
+ let inertia = 0;
19255
+ vectors.forEach((vector, i) => {
19256
+ const dist = distanceFn(vector, centroids[assignments[i]]);
19257
+ inertia += dist * dist;
19258
+ });
19259
+ return {
19260
+ centroids,
19261
+ assignments,
19262
+ iterations,
19263
+ converged,
19264
+ inertia
19265
+ };
19266
+ }
19267
+ function initializeCentroidsKMeansPlusPlus(vectors, k, distanceFn, seed) {
19268
+ const centroids = [];
19269
+ const n = vectors.length;
19270
+ const firstIndex = seed !== null ? seed % n : Math.floor(Math.random() * n);
19271
+ centroids.push([...vectors[firstIndex]]);
19272
+ for (let i = 1; i < k; i++) {
19273
+ const distances = vectors.map((vector) => {
19274
+ return Math.min(...centroids.map((c) => distanceFn(vector, c)));
19275
+ });
19276
+ const squaredDistances = distances.map((d) => d * d);
19277
+ const totalSquared = squaredDistances.reduce((a, b) => a + b, 0);
19278
+ if (totalSquared === 0) {
19279
+ const randomIdx = Math.floor(Math.random() * n);
19280
+ centroids.push([...vectors[randomIdx]]);
19281
+ continue;
19282
+ }
19283
+ let threshold = Math.random() * totalSquared;
19284
+ let cumulativeSum = 0;
19285
+ for (let j = 0; j < n; j++) {
19286
+ cumulativeSum += squaredDistances[j];
19287
+ if (cumulativeSum >= threshold) {
19288
+ centroids.push([...vectors[j]]);
19289
+ break;
19290
+ }
19291
+ }
19292
+ }
19293
+ return centroids;
19294
+ }
19295
+ async function findOptimalK(vectors, options = {}) {
19296
+ const {
19297
+ minK = 2,
19298
+ maxK = Math.min(10, Math.floor(Math.sqrt(vectors.length / 2))),
19299
+ distanceFn = euclideanDistance,
19300
+ nReferences = 10,
19301
+ stabilityRuns = 5,
19302
+ ...kmeansOptions
19303
+ } = options;
19304
+ const metricsModule = await Promise.resolve().then(function () { return metrics; });
19305
+ const {
19306
+ silhouetteScore,
19307
+ daviesBouldinIndex,
19308
+ calinskiHarabaszIndex,
19309
+ gapStatistic,
19310
+ clusteringStability
19311
+ } = metricsModule;
19312
+ const results = [];
19313
+ for (let k = minK; k <= maxK; k++) {
19314
+ const kmeansResult = kmeans(vectors, k, { ...kmeansOptions, distanceFn });
19315
+ const silhouette = silhouetteScore(
19316
+ vectors,
19317
+ kmeansResult.assignments,
19318
+ kmeansResult.centroids,
19319
+ distanceFn
19320
+ );
19321
+ const daviesBouldin = daviesBouldinIndex(
19322
+ vectors,
19323
+ kmeansResult.assignments,
19324
+ kmeansResult.centroids,
19325
+ distanceFn
19326
+ );
19327
+ const calinskiHarabasz = calinskiHarabaszIndex(
19328
+ vectors,
19329
+ kmeansResult.assignments,
19330
+ kmeansResult.centroids,
19331
+ distanceFn
19332
+ );
19333
+ const gap = await gapStatistic(
19334
+ vectors,
19335
+ kmeansResult.assignments,
19336
+ kmeansResult.centroids,
19337
+ distanceFn,
19338
+ nReferences
19339
+ );
19340
+ const stability = clusteringStability(
19341
+ vectors,
19342
+ k,
19343
+ { ...kmeansOptions, distanceFn, nRuns: stabilityRuns }
19344
+ );
19345
+ results.push({
19346
+ k,
19347
+ inertia: kmeansResult.inertia,
19348
+ silhouette,
19349
+ daviesBouldin,
19350
+ calinskiHarabasz,
19351
+ gap: gap.gap,
19352
+ gapSk: gap.sk,
19353
+ stability: stability.stability,
19354
+ cvInertia: stability.cvInertia,
19355
+ iterations: kmeansResult.iterations,
19356
+ converged: kmeansResult.converged
19357
+ });
19358
+ }
19359
+ const elbowK = findElbowPoint(results.map((r) => r.inertia));
19360
+ const recommendations = {
19361
+ elbow: minK + elbowK,
19362
+ silhouette: results.reduce(
19363
+ (best, curr) => curr.silhouette > best.silhouette ? curr : best
19364
+ ).k,
19365
+ daviesBouldin: results.reduce(
19366
+ (best, curr) => curr.daviesBouldin < best.daviesBouldin ? curr : best
19367
+ ).k,
19368
+ calinskiHarabasz: results.reduce(
19369
+ (best, curr) => curr.calinskiHarabasz > best.calinskiHarabasz ? curr : best
19370
+ ).k,
19371
+ gap: results.reduce(
19372
+ (best, curr) => curr.gap > best.gap ? curr : best
19373
+ ).k,
19374
+ stability: results.reduce(
19375
+ (best, curr) => curr.stability > best.stability ? curr : best
19376
+ ).k
19377
+ };
19378
+ const votes = Object.values(recommendations);
19379
+ const consensus = votes.reduce((acc, k) => {
19380
+ acc[k] = (acc[k] || 0) + 1;
19381
+ return acc;
19382
+ }, {});
19383
+ const consensusK = parseInt(
19384
+ Object.entries(consensus).reduce((a, b) => b[1] > a[1] ? b : a)[0]
19385
+ );
19386
+ return {
19387
+ results,
19388
+ recommendations,
19389
+ consensus: consensusK,
19390
+ summary: {
19391
+ analysisRange: `${minK}-${maxK}`,
19392
+ totalVectors: vectors.length,
19393
+ dimensions: vectors[0].length,
19394
+ recommendation: consensusK,
19395
+ confidence: consensus[consensusK] / votes.length
19396
+ }
19397
+ };
19398
+ }
19399
+ function findElbowPoint(inertias) {
19400
+ const n = inertias.length;
19401
+ if (n < 3) return 0;
19402
+ let maxCurvature = -Infinity;
19403
+ let elbowIndex = 0;
19404
+ for (let i = 1; i < n - 1; i++) {
19405
+ const curvature = inertias[i - 1] - 2 * inertias[i] + inertias[i + 1];
19406
+ if (curvature > maxCurvature) {
19407
+ maxCurvature = curvature;
19408
+ elbowIndex = i;
19409
+ }
19410
+ }
19411
+ return elbowIndex;
19412
+ }
19413
+
19414
+ class VectorError extends PluginError {
19415
+ constructor(message, details = {}) {
19416
+ super(message, {
19417
+ pluginName: "VectorPlugin",
19418
+ ...details,
19419
+ description: details.description || `
19420
+ Vector Plugin Error
19421
+
19422
+ Operation: ${details.operation || "unknown"}
19423
+
19424
+ Common causes:
19425
+ 1. Vector dimension mismatch between vectors
19426
+ 2. Invalid distance metric specified (must be: cosine, euclidean, manhattan)
19427
+ 3. Empty vector array provided for clustering
19428
+ 4. k value larger than number of available vectors
19429
+ 5. Vector field not found or invalid in resource
19430
+ 6. Large vectors without proper behavior (use 'body-overflow' or 'body-only')
19431
+
19432
+ Available distance metrics:
19433
+ - cosine: Best for normalized vectors, semantic similarity. Range: [0, 2]
19434
+ - euclidean: Standard L2 distance, geometric proximity. Range: [0, \u221E)
19435
+ - manhattan: L1 distance, faster computation. Range: [0, \u221E)
19436
+
19437
+ Storage considerations:
19438
+ - Vectors > 250 dimensions may exceed S3 metadata limit (2KB)
19439
+ - Use behavior: 'body-overflow' or 'body-only' for large vectors
19440
+ - OpenAI ada-002 (1536 dims): ~10KB, requires body storage
19441
+ - Sentence Transformers (384 dims): ~2.7KB, requires body storage
19442
+ `.trim()
19443
+ });
19444
+ }
19445
+ }
19446
+
19447
+ class VectorPlugin extends Plugin {
19448
+ constructor(options = {}) {
19449
+ super(options);
19450
+ this.config = {
19451
+ dimensions: 1536,
19452
+ // Default to OpenAI text-embedding-3-small/3-large
19453
+ distanceMetric: "cosine",
19454
+ // Default metric
19455
+ storageThreshold: 1500,
19456
+ // Bytes - warn if vectors exceed this
19457
+ autoFixBehavior: false,
19458
+ // Automatically set body-overflow
19459
+ autoDetectVectorField: true,
19460
+ // Auto-detect embedding:XXX fields
19461
+ emitEvents: true,
19462
+ // Emit events for monitoring
19463
+ verboseEvents: false,
19464
+ // Emit detailed progress events
19465
+ eventThrottle: 100,
19466
+ // Throttle progress events (ms)
19467
+ ...options
19468
+ };
19469
+ this.distanceFunctions = {
19470
+ cosine: cosineDistance,
19471
+ euclidean: euclideanDistance,
19472
+ manhattan: manhattanDistance
19473
+ };
19474
+ this._vectorFieldCache = /* @__PURE__ */ new Map();
19475
+ this._throttleState = /* @__PURE__ */ new Map();
19476
+ }
19477
+ async onInstall() {
19478
+ this.emit("installed", { plugin: "VectorPlugin" });
19479
+ this.validateVectorStorage();
19480
+ this.installResourceMethods();
19481
+ }
19482
+ async onStart() {
19483
+ this.emit("started", { plugin: "VectorPlugin" });
19484
+ }
19485
+ async onStop() {
19486
+ this.emit("stopped", { plugin: "VectorPlugin" });
19487
+ }
19488
+ async onUninstall(options) {
19489
+ for (const resource of Object.values(this.database.resources)) {
19490
+ delete resource.vectorSearch;
19491
+ delete resource.cluster;
19492
+ delete resource.vectorDistance;
19493
+ delete resource.similarTo;
19494
+ delete resource.findSimilar;
19495
+ delete resource.distance;
19496
+ }
19497
+ this.emit("uninstalled", { plugin: "VectorPlugin" });
19498
+ }
19499
+ /**
19500
+ * Validate vector storage configuration for all resources
19501
+ *
19502
+ * Detects large vector fields and warns if proper behavior is not set.
19503
+ * Can optionally auto-fix by setting body-overflow behavior.
19504
+ */
19505
+ validateVectorStorage() {
19506
+ for (const resource of Object.values(this.database.resources)) {
19507
+ const vectorFields = this.findVectorFields(resource.schema.attributes);
19508
+ if (vectorFields.length === 0) continue;
19509
+ const totalVectorSize = vectorFields.reduce((sum, f) => sum + f.estimatedBytes, 0);
19510
+ if (totalVectorSize > this.config.storageThreshold) {
19511
+ const hasCorrectBehavior = ["body-overflow", "body-only"].includes(resource.behavior);
19512
+ if (!hasCorrectBehavior) {
19513
+ const warning = {
19514
+ resource: resource.name,
19515
+ vectorFields: vectorFields.map((f) => ({
19516
+ field: f.name,
19517
+ dimensions: f.length,
19518
+ estimatedBytes: f.estimatedBytes
19519
+ })),
19520
+ totalEstimatedBytes: totalVectorSize,
19521
+ metadataLimit: 2047,
19522
+ currentBehavior: resource.behavior || "default",
19523
+ recommendation: "body-overflow"
19524
+ };
19525
+ this.emit("vector:storage-warning", warning);
19526
+ if (this.config.autoFixBehavior) {
19527
+ resource.behavior = "body-overflow";
19528
+ this.emit("vector:behavior-fixed", {
19529
+ resource: resource.name,
19530
+ newBehavior: "body-overflow"
19531
+ });
19532
+ } else {
19533
+ console.warn(`\u26A0\uFE0F VectorPlugin: Resource '${resource.name}' has large vector fields (${totalVectorSize} bytes estimated)`);
19534
+ console.warn(` Current behavior: '${resource.behavior || "default"}'`);
19535
+ console.warn(` Recommendation: Add behavior: 'body-overflow' or 'body-only' to resource configuration`);
19536
+ console.warn(` Large vectors will exceed S3 metadata limit (2047 bytes) and cause errors.`);
19537
+ }
19538
+ }
19539
+ }
19540
+ }
19541
+ }
19542
+ /**
19543
+ * Auto-detect vector field from resource schema
19544
+ *
19545
+ * Looks for fields with type 'embedding:XXX' pattern.
19546
+ * Caches result per resource for performance.
19547
+ *
19548
+ * @param {Resource} resource - Resource instance
19549
+ * @returns {string|null} Detected vector field name or null
19550
+ */
19551
+ detectVectorField(resource) {
19552
+ if (this._vectorFieldCache.has(resource.name)) {
19553
+ return this._vectorFieldCache.get(resource.name);
19554
+ }
19555
+ const vectorField = this._findEmbeddingField(resource.schema.attributes);
19556
+ this._vectorFieldCache.set(resource.name, vectorField);
19557
+ if (vectorField && this.config.emitEvents) {
19558
+ this.emit("vector:field-detected", {
19559
+ resource: resource.name,
19560
+ vectorField,
19561
+ timestamp: Date.now()
19562
+ });
19563
+ }
19564
+ return vectorField;
19565
+ }
19566
+ /**
19567
+ * Recursively find embedding:XXX field in attributes
19568
+ *
19569
+ * @param {Object} attributes - Resource attributes
19570
+ * @param {string} path - Current path (for nested objects)
19571
+ * @returns {string|null} Field path or null
19572
+ */
19573
+ _findEmbeddingField(attributes, path = "") {
19574
+ for (const [key, attr] of Object.entries(attributes)) {
19575
+ const fullPath = path ? `${path}.${key}` : key;
19576
+ if (typeof attr === "string" && attr.startsWith("embedding:")) {
19577
+ return fullPath;
19578
+ }
19579
+ if (attr.type === "array" && attr.items === "number" && attr.length) {
19580
+ return fullPath;
19581
+ }
19582
+ if (attr.type === "object" && attr.props) {
19583
+ const nested = this._findEmbeddingField(attr.props, fullPath);
19584
+ if (nested) return nested;
19585
+ }
19586
+ }
19587
+ return null;
19588
+ }
19589
+ /**
19590
+ * Emit event with throttling support
19591
+ *
19592
+ * @param {string} eventName - Event name
19593
+ * @param {Object} data - Event data
19594
+ * @param {string} throttleKey - Unique key for throttling (optional)
19595
+ */
19596
+ _emitEvent(eventName, data, throttleKey = null) {
19597
+ if (!this.config.emitEvents) return;
19598
+ if (throttleKey) {
19599
+ const now = Date.now();
19600
+ const lastEmit = this._throttleState.get(throttleKey);
19601
+ if (lastEmit && now - lastEmit < this.config.eventThrottle) {
19602
+ return;
19603
+ }
19604
+ this._throttleState.set(throttleKey, now);
19605
+ }
19606
+ this.emit(eventName, data);
19607
+ }
19608
+ /**
19609
+ * Find vector fields in resource attributes
19610
+ *
19611
+ * @param {Object} attributes - Resource attributes
19612
+ * @param {string} path - Current path (for nested objects)
19613
+ * @returns {Array} Array of vector field info
19614
+ */
19615
+ findVectorFields(attributes, path = "") {
19616
+ const vectors = [];
19617
+ for (const [key, attr] of Object.entries(attributes)) {
19618
+ const fullPath = path ? `${path}.${key}` : key;
19619
+ if (attr.type === "array" && attr.items === "number" && attr.length) {
19620
+ vectors.push({
19621
+ name: fullPath,
19622
+ length: attr.length,
19623
+ estimatedBytes: this.estimateVectorBytes(attr.length)
19624
+ });
19625
+ }
19626
+ if (attr.type === "object" && attr.props) {
19627
+ vectors.push(...this.findVectorFields(attr.props, fullPath));
19628
+ }
19629
+ }
19630
+ return vectors;
19631
+ }
19632
+ /**
19633
+ * Estimate bytes required to store a vector in JSON format
19634
+ *
19635
+ * Conservative estimate: ~7 bytes per number + array overhead
19636
+ *
19637
+ * @param {number} dimensions - Number of dimensions
19638
+ * @returns {number} Estimated bytes
19639
+ */
19640
+ estimateVectorBytes(dimensions) {
19641
+ return dimensions * 7 + 50;
19642
+ }
19643
+ /**
19644
+ * Install vector methods on all resources
19645
+ */
19646
+ installResourceMethods() {
19647
+ for (const resource of Object.values(this.database.resources)) {
19648
+ const searchMethod = this.createVectorSearchMethod(resource);
19649
+ const clusterMethod = this.createClusteringMethod(resource);
19650
+ const distanceMethod = this.createDistanceMethod();
19651
+ resource.vectorSearch = searchMethod;
19652
+ resource.cluster = clusterMethod;
19653
+ resource.vectorDistance = distanceMethod;
19654
+ resource.similarTo = searchMethod;
19655
+ resource.findSimilar = searchMethod;
19656
+ resource.distance = distanceMethod;
19657
+ }
19658
+ }
19659
+ /**
19660
+ * Create vector search method for a resource
19661
+ *
19662
+ * Performs K-nearest neighbors search to find similar vectors.
19663
+ *
19664
+ * @param {Resource} resource - Resource instance
19665
+ * @returns {Function} Vector search method
19666
+ */
19667
+ createVectorSearchMethod(resource) {
19668
+ return async (queryVector, options = {}) => {
19669
+ const startTime = Date.now();
19670
+ let vectorField = options.vectorField;
19671
+ if (!vectorField && this.config.autoDetectVectorField) {
19672
+ vectorField = this.detectVectorField(resource);
19673
+ if (!vectorField) {
19674
+ vectorField = "vector";
19675
+ }
19676
+ } else if (!vectorField) {
19677
+ vectorField = "vector";
19678
+ }
19679
+ const {
19680
+ limit = 10,
19681
+ distanceMetric = this.config.distanceMetric,
19682
+ threshold = null,
19683
+ partition = null
19684
+ } = options;
19685
+ const distanceFn = this.distanceFunctions[distanceMetric];
19686
+ if (!distanceFn) {
19687
+ const error = new VectorError(`Invalid distance metric: ${distanceMetric}`, {
19688
+ operation: "vectorSearch",
19689
+ availableMetrics: Object.keys(this.distanceFunctions),
19690
+ providedMetric: distanceMetric
19691
+ });
19692
+ this._emitEvent("vector:search-error", {
19693
+ resource: resource.name,
19694
+ error: error.message,
19695
+ timestamp: Date.now()
19696
+ });
19697
+ throw error;
19698
+ }
19699
+ this._emitEvent("vector:search-start", {
19700
+ resource: resource.name,
19701
+ vectorField,
19702
+ limit,
19703
+ distanceMetric,
19704
+ partition,
19705
+ threshold,
19706
+ queryDimensions: queryVector.length,
19707
+ timestamp: startTime
19708
+ });
19709
+ try {
19710
+ let allRecords;
19711
+ if (partition) {
19712
+ this._emitEvent("vector:partition-filter", {
19713
+ resource: resource.name,
19714
+ partition,
19715
+ timestamp: Date.now()
19716
+ });
19717
+ allRecords = await resource.list({ partition, partitionValues: partition });
19718
+ } else {
19719
+ allRecords = await resource.getAll();
19720
+ }
19721
+ const totalRecords = allRecords.length;
19722
+ let processedRecords = 0;
19723
+ let dimensionMismatches = 0;
19724
+ const results = allRecords.filter((record) => record[vectorField] && Array.isArray(record[vectorField])).map((record, index) => {
19725
+ try {
19726
+ const distance = distanceFn(queryVector, record[vectorField]);
19727
+ processedRecords++;
19728
+ if (this.config.verboseEvents && processedRecords % 100 === 0) {
19729
+ this._emitEvent("vector:search-progress", {
19730
+ resource: resource.name,
19731
+ processed: processedRecords,
19732
+ total: totalRecords,
19733
+ progress: processedRecords / totalRecords * 100,
19734
+ timestamp: Date.now()
19735
+ }, `search-${resource.name}`);
19736
+ }
19737
+ return { record, distance };
19738
+ } catch (err) {
19739
+ dimensionMismatches++;
19740
+ if (this.config.verboseEvents) {
19741
+ this._emitEvent("vector:dimension-mismatch", {
19742
+ resource: resource.name,
19743
+ recordIndex: index,
19744
+ expected: queryVector.length,
19745
+ got: record[vectorField]?.length,
19746
+ timestamp: Date.now()
19747
+ });
19748
+ }
19749
+ return null;
19750
+ }
19751
+ }).filter((result) => result !== null).filter((result) => threshold === null || result.distance <= threshold).sort((a, b) => a.distance - b.distance).slice(0, limit);
19752
+ const duration = Date.now() - startTime;
19753
+ const throughput = totalRecords / (duration / 1e3);
19754
+ this._emitEvent("vector:search-complete", {
19755
+ resource: resource.name,
19756
+ vectorField,
19757
+ resultsCount: results.length,
19758
+ totalRecords,
19759
+ processedRecords,
19760
+ dimensionMismatches,
19761
+ duration,
19762
+ throughput: throughput.toFixed(2),
19763
+ timestamp: Date.now()
19764
+ });
19765
+ if (this.config.verboseEvents) {
19766
+ this._emitEvent("vector:performance", {
19767
+ operation: "search",
19768
+ resource: resource.name,
19769
+ duration,
19770
+ throughput: throughput.toFixed(2),
19771
+ recordsPerSecond: (processedRecords / (duration / 1e3)).toFixed(2),
19772
+ timestamp: Date.now()
19773
+ });
19774
+ }
19775
+ return results;
19776
+ } catch (error) {
19777
+ this._emitEvent("vector:search-error", {
19778
+ resource: resource.name,
19779
+ error: error.message,
19780
+ stack: error.stack,
19781
+ timestamp: Date.now()
19782
+ });
19783
+ throw error;
19784
+ }
19785
+ };
19786
+ }
19787
+ /**
19788
+ * Create clustering method for a resource
19789
+ *
19790
+ * Performs k-means clustering on resource vectors.
19791
+ *
19792
+ * @param {Resource} resource - Resource instance
19793
+ * @returns {Function} Clustering method
19794
+ */
19795
+ createClusteringMethod(resource) {
19796
+ return async (options = {}) => {
19797
+ const startTime = Date.now();
19798
+ let vectorField = options.vectorField;
19799
+ if (!vectorField && this.config.autoDetectVectorField) {
19800
+ vectorField = this.detectVectorField(resource);
19801
+ if (!vectorField) {
19802
+ vectorField = "vector";
19803
+ }
19804
+ } else if (!vectorField) {
19805
+ vectorField = "vector";
19806
+ }
19807
+ const {
19808
+ k = 5,
19809
+ distanceMetric = this.config.distanceMetric,
19810
+ partition = null,
19811
+ ...kmeansOptions
19812
+ } = options;
19813
+ const distanceFn = this.distanceFunctions[distanceMetric];
19814
+ if (!distanceFn) {
19815
+ const error = new VectorError(`Invalid distance metric: ${distanceMetric}`, {
19816
+ operation: "cluster",
19817
+ availableMetrics: Object.keys(this.distanceFunctions),
19818
+ providedMetric: distanceMetric
19819
+ });
19820
+ this._emitEvent("vector:cluster-error", {
19821
+ resource: resource.name,
19822
+ error: error.message,
19823
+ timestamp: Date.now()
19824
+ });
19825
+ throw error;
19826
+ }
19827
+ this._emitEvent("vector:cluster-start", {
19828
+ resource: resource.name,
19829
+ vectorField,
19830
+ k,
19831
+ distanceMetric,
19832
+ partition,
19833
+ maxIterations: kmeansOptions.maxIterations || 100,
19834
+ timestamp: startTime
19835
+ });
19836
+ try {
19837
+ let allRecords;
19838
+ if (partition) {
19839
+ this._emitEvent("vector:partition-filter", {
19840
+ resource: resource.name,
19841
+ partition,
19842
+ timestamp: Date.now()
19843
+ });
19844
+ allRecords = await resource.list({ partition, partitionValues: partition });
19845
+ } else {
19846
+ allRecords = await resource.getAll();
19847
+ }
19848
+ const recordsWithVectors = allRecords.filter(
19849
+ (record) => record[vectorField] && Array.isArray(record[vectorField])
19850
+ );
19851
+ if (recordsWithVectors.length === 0) {
19852
+ const error = new VectorError("No vectors found in resource", {
19853
+ operation: "cluster",
19854
+ resourceName: resource.name,
19855
+ vectorField
19856
+ });
19857
+ this._emitEvent("vector:empty-dataset", {
19858
+ resource: resource.name,
19859
+ vectorField,
19860
+ totalRecords: allRecords.length,
19861
+ timestamp: Date.now()
19862
+ });
19863
+ throw error;
19864
+ }
19865
+ const vectors = recordsWithVectors.map((record) => record[vectorField]);
19866
+ const result = kmeans(vectors, k, {
19867
+ ...kmeansOptions,
19868
+ distanceFn,
19869
+ onIteration: this.config.verboseEvents ? (iteration, inertia, converged) => {
19870
+ this._emitEvent("vector:cluster-iteration", {
19871
+ resource: resource.name,
19872
+ k,
19873
+ iteration,
19874
+ inertia,
19875
+ converged,
19876
+ timestamp: Date.now()
19877
+ }, `cluster-${resource.name}`);
19878
+ } : void 0
19879
+ });
19880
+ if (result.converged) {
19881
+ this._emitEvent("vector:cluster-converged", {
19882
+ resource: resource.name,
19883
+ k,
19884
+ iterations: result.iterations,
19885
+ inertia: result.inertia,
19886
+ timestamp: Date.now()
19887
+ });
19888
+ }
19889
+ const clusters = Array(k).fill(null).map(() => []);
19890
+ recordsWithVectors.forEach((record, i) => {
19891
+ const clusterIndex = result.assignments[i];
19892
+ clusters[clusterIndex].push(record);
19893
+ });
19894
+ const duration = Date.now() - startTime;
19895
+ const clusterSizes = clusters.map((c) => c.length);
19896
+ this._emitEvent("vector:cluster-complete", {
19897
+ resource: resource.name,
19898
+ vectorField,
19899
+ k,
19900
+ vectorCount: vectors.length,
19901
+ iterations: result.iterations,
19902
+ converged: result.converged,
19903
+ inertia: result.inertia,
19904
+ clusterSizes,
19905
+ duration,
19906
+ timestamp: Date.now()
19907
+ });
19908
+ if (this.config.verboseEvents) {
19909
+ this._emitEvent("vector:performance", {
19910
+ operation: "clustering",
19911
+ resource: resource.name,
19912
+ k,
19913
+ duration,
19914
+ iterationsPerSecond: (result.iterations / (duration / 1e3)).toFixed(2),
19915
+ vectorsPerSecond: (vectors.length / (duration / 1e3)).toFixed(2),
19916
+ timestamp: Date.now()
19917
+ });
19918
+ }
19919
+ return {
19920
+ clusters,
19921
+ centroids: result.centroids,
19922
+ inertia: result.inertia,
19923
+ iterations: result.iterations,
19924
+ converged: result.converged
19925
+ };
19926
+ } catch (error) {
19927
+ this._emitEvent("vector:cluster-error", {
19928
+ resource: resource.name,
19929
+ error: error.message,
19930
+ stack: error.stack,
19931
+ timestamp: Date.now()
19932
+ });
19933
+ throw error;
19934
+ }
19935
+ };
19936
+ }
19937
+ /**
19938
+ * Create distance calculation method
19939
+ *
19940
+ * @returns {Function} Distance method
19941
+ */
19942
+ createDistanceMethod() {
19943
+ return (vector1, vector2, metric = this.config.distanceMetric) => {
19944
+ const distanceFn = this.distanceFunctions[metric];
19945
+ if (!distanceFn) {
19946
+ throw new VectorError(`Invalid distance metric: ${metric}`, {
19947
+ operation: "vectorDistance",
19948
+ availableMetrics: Object.keys(this.distanceFunctions),
19949
+ providedMetric: metric
19950
+ });
19951
+ }
19952
+ return distanceFn(vector1, vector2);
19953
+ };
19954
+ }
19955
+ /**
19956
+ * Static utility: Normalize vector
19957
+ *
19958
+ * @param {number[]} vector - Input vector
19959
+ * @returns {number[]} Normalized vector
19960
+ */
19961
+ static normalize(vector) {
19962
+ return normalize(vector);
19963
+ }
19964
+ /**
19965
+ * Static utility: Calculate dot product
19966
+ *
19967
+ * @param {number[]} vector1 - First vector
19968
+ * @param {number[]} vector2 - Second vector
19969
+ * @returns {number} Dot product
19970
+ */
19971
+ static dotProduct(vector1, vector2) {
19972
+ return dotProduct(vector1, vector2);
19973
+ }
19974
+ /**
19975
+ * Static utility: Find optimal K for clustering
19976
+ *
19977
+ * Analyzes clustering quality across a range of K values using
19978
+ * multiple evaluation metrics.
19979
+ *
19980
+ * @param {number[][]} vectors - Vectors to analyze
19981
+ * @param {Object} options - Configuration options
19982
+ * @returns {Promise<Object>} Analysis results with recommendations
19983
+ */
19984
+ static async findOptimalK(vectors, options) {
19985
+ return findOptimalK(vectors, options);
19986
+ }
19987
+ }
19988
+
19989
+ function silhouetteScore(vectors, assignments, centroids, distanceFn = euclideanDistance) {
19990
+ const k = centroids.length;
19991
+ const n = vectors.length;
19992
+ const clusters = Array(k).fill(null).map(() => []);
19993
+ vectors.forEach((vector, i) => {
19994
+ clusters[assignments[i]].push(i);
19995
+ });
19996
+ let totalScore = 0;
19997
+ let validPoints = 0;
19998
+ if (clusters.every((c) => c.length <= 1)) {
19999
+ return 0;
20000
+ }
20001
+ for (let i = 0; i < n; i++) {
20002
+ const clusterIdx = assignments[i];
20003
+ const cluster = clusters[clusterIdx];
20004
+ if (cluster.length === 1) continue;
20005
+ let a = 0;
20006
+ for (const j of cluster) {
20007
+ if (i !== j) {
20008
+ a += distanceFn(vectors[i], vectors[j]);
20009
+ }
20010
+ }
20011
+ a /= cluster.length - 1;
20012
+ let b = Infinity;
20013
+ for (let otherCluster = 0; otherCluster < k; otherCluster++) {
20014
+ if (otherCluster === clusterIdx) continue;
20015
+ const otherPoints = clusters[otherCluster];
20016
+ if (otherPoints.length === 0) continue;
20017
+ let avgDist = 0;
20018
+ for (const j of otherPoints) {
20019
+ avgDist += distanceFn(vectors[i], vectors[j]);
20020
+ }
20021
+ avgDist /= otherPoints.length;
20022
+ b = Math.min(b, avgDist);
20023
+ }
20024
+ if (b === Infinity) continue;
20025
+ const maxAB = Math.max(a, b);
20026
+ const s = maxAB === 0 ? 0 : (b - a) / maxAB;
20027
+ totalScore += s;
20028
+ validPoints++;
20029
+ }
20030
+ return validPoints > 0 ? totalScore / validPoints : 0;
20031
+ }
20032
+ function daviesBouldinIndex(vectors, assignments, centroids, distanceFn = euclideanDistance) {
20033
+ const k = centroids.length;
20034
+ const scatters = new Array(k).fill(0);
20035
+ const clusterCounts = new Array(k).fill(0);
20036
+ vectors.forEach((vector, i) => {
20037
+ const cluster = assignments[i];
20038
+ scatters[cluster] += distanceFn(vector, centroids[cluster]);
20039
+ clusterCounts[cluster]++;
20040
+ });
20041
+ for (let i = 0; i < k; i++) {
20042
+ if (clusterCounts[i] > 0) {
20043
+ scatters[i] /= clusterCounts[i];
20044
+ }
20045
+ }
20046
+ let dbIndex = 0;
20047
+ let validClusters = 0;
20048
+ for (let i = 0; i < k; i++) {
20049
+ if (clusterCounts[i] === 0) continue;
20050
+ let maxRatio = 0;
20051
+ for (let j = 0; j < k; j++) {
20052
+ if (i === j || clusterCounts[j] === 0) continue;
20053
+ const centroidDist = distanceFn(centroids[i], centroids[j]);
20054
+ if (centroidDist === 0) continue;
20055
+ const ratio = (scatters[i] + scatters[j]) / centroidDist;
20056
+ maxRatio = Math.max(maxRatio, ratio);
20057
+ }
20058
+ dbIndex += maxRatio;
20059
+ validClusters++;
20060
+ }
20061
+ return validClusters > 0 ? dbIndex / validClusters : 0;
20062
+ }
20063
+ function calinskiHarabaszIndex(vectors, assignments, centroids, distanceFn = euclideanDistance) {
20064
+ const n = vectors.length;
20065
+ const k = centroids.length;
20066
+ if (k === 1 || k === n) return 0;
20067
+ const dimensions = vectors[0].length;
20068
+ const overallCentroid = new Array(dimensions).fill(0);
20069
+ vectors.forEach((vector) => {
20070
+ vector.forEach((val, dim) => {
20071
+ overallCentroid[dim] += val;
20072
+ });
20073
+ });
20074
+ overallCentroid.forEach((val, dim, arr) => {
20075
+ arr[dim] = val / n;
20076
+ });
20077
+ const clusterCounts = new Array(k).fill(0);
20078
+ vectors.forEach((vector, i) => {
20079
+ clusterCounts[assignments[i]]++;
20080
+ });
20081
+ let bgss = 0;
20082
+ for (let i = 0; i < k; i++) {
20083
+ if (clusterCounts[i] === 0) continue;
20084
+ const dist = distanceFn(centroids[i], overallCentroid);
20085
+ bgss += clusterCounts[i] * dist * dist;
20086
+ }
20087
+ let wcss = 0;
20088
+ vectors.forEach((vector, i) => {
20089
+ const cluster = assignments[i];
20090
+ const dist = distanceFn(vector, centroids[cluster]);
20091
+ wcss += dist * dist;
20092
+ });
20093
+ if (wcss === 0) return 0;
20094
+ return bgss / (k - 1) / (wcss / (n - k));
20095
+ }
20096
+ async function gapStatistic(vectors, assignments, centroids, distanceFn = euclideanDistance, nReferences = 10) {
20097
+ const n = vectors.length;
20098
+ const k = centroids.length;
20099
+ const dimensions = vectors[0].length;
20100
+ let wk = 0;
20101
+ vectors.forEach((vector, i) => {
20102
+ const dist = distanceFn(vector, centroids[assignments[i]]);
20103
+ wk += dist * dist;
20104
+ });
20105
+ wk = Math.log(wk + 1e-10);
20106
+ const referenceWks = [];
20107
+ const mins = new Array(dimensions).fill(Infinity);
20108
+ const maxs = new Array(dimensions).fill(-Infinity);
20109
+ vectors.forEach((vector) => {
20110
+ vector.forEach((val, dim) => {
20111
+ mins[dim] = Math.min(mins[dim], val);
20112
+ maxs[dim] = Math.max(maxs[dim], val);
20113
+ });
20114
+ });
20115
+ for (let ref = 0; ref < nReferences; ref++) {
20116
+ const refVectors = [];
20117
+ for (let i = 0; i < n; i++) {
20118
+ const refVector = new Array(dimensions);
20119
+ for (let dim = 0; dim < dimensions; dim++) {
20120
+ refVector[dim] = mins[dim] + Math.random() * (maxs[dim] - mins[dim]);
20121
+ }
20122
+ refVectors.push(refVector);
20123
+ }
20124
+ const refResult = kmeans(refVectors, k, { maxIterations: 50, distanceFn });
20125
+ let refWk = 0;
20126
+ refVectors.forEach((vector, i) => {
20127
+ const dist = distanceFn(vector, refResult.centroids[refResult.assignments[i]]);
20128
+ refWk += dist * dist;
20129
+ });
20130
+ referenceWks.push(Math.log(refWk + 1e-10));
20131
+ }
20132
+ const expectedWk = referenceWks.reduce((a, b) => a + b, 0) / nReferences;
20133
+ const gap = expectedWk - wk;
20134
+ const sdk = Math.sqrt(
20135
+ referenceWks.reduce((sum, wk2) => sum + Math.pow(wk2 - expectedWk, 2), 0) / nReferences
20136
+ );
20137
+ const sk = sdk * Math.sqrt(1 + 1 / nReferences);
20138
+ return { gap, sk, expectedWk, actualWk: wk };
20139
+ }
20140
+ function clusteringStability(vectors, k, options = {}) {
20141
+ const {
20142
+ nRuns = 10,
20143
+ distanceFn = euclideanDistance,
20144
+ ...kmeansOptions
20145
+ } = options;
20146
+ const inertias = [];
20147
+ const allAssignments = [];
20148
+ for (let run = 0; run < nRuns; run++) {
20149
+ const result = kmeans(vectors, k, {
20150
+ ...kmeansOptions,
20151
+ distanceFn,
20152
+ seed: run
20153
+ // Different seed for each run
20154
+ });
20155
+ inertias.push(result.inertia);
20156
+ allAssignments.push(result.assignments);
20157
+ }
20158
+ const assignmentSimilarities = [];
20159
+ for (let i = 0; i < nRuns - 1; i++) {
20160
+ for (let j = i + 1; j < nRuns; j++) {
20161
+ const similarity = calculateAssignmentSimilarity(allAssignments[i], allAssignments[j]);
20162
+ assignmentSimilarities.push(similarity);
20163
+ }
20164
+ }
20165
+ const avgInertia = inertias.reduce((a, b) => a + b, 0) / nRuns;
20166
+ const stdInertia = Math.sqrt(
20167
+ inertias.reduce((sum, val) => sum + Math.pow(val - avgInertia, 2), 0) / nRuns
20168
+ );
20169
+ const avgSimilarity = assignmentSimilarities.length > 0 ? assignmentSimilarities.reduce((a, b) => a + b, 0) / assignmentSimilarities.length : 1;
20170
+ return {
20171
+ avgInertia,
20172
+ stdInertia,
20173
+ cvInertia: avgInertia !== 0 ? stdInertia / avgInertia : 0,
20174
+ // Coefficient of variation
20175
+ avgSimilarity,
20176
+ stability: avgSimilarity
20177
+ // Higher is more stable
20178
+ };
20179
+ }
20180
+ function calculateAssignmentSimilarity(assignments1, assignments2) {
20181
+ const n = assignments1.length;
20182
+ let matches = 0;
20183
+ for (let i = 0; i < n; i++) {
20184
+ for (let j = i + 1; j < n; j++) {
20185
+ const sameCluster1 = assignments1[i] === assignments1[j];
20186
+ const sameCluster2 = assignments2[i] === assignments2[j];
20187
+ if (sameCluster1 === sameCluster2) {
20188
+ matches++;
20189
+ }
20190
+ }
20191
+ }
20192
+ const totalPairs = n * (n - 1) / 2;
20193
+ return totalPairs > 0 ? matches / totalPairs : 1;
20194
+ }
20195
+
20196
+ var metrics = /*#__PURE__*/Object.freeze({
20197
+ __proto__: null,
20198
+ calinskiHarabaszIndex: calinskiHarabaszIndex,
20199
+ clusteringStability: clusteringStability,
20200
+ daviesBouldinIndex: daviesBouldinIndex,
20201
+ gapStatistic: gapStatistic,
20202
+ silhouetteScore: silhouetteScore
20203
+ });
20204
+
20205
+ export { AVAILABLE_BEHAVIORS, AnalyticsNotEnabledError, AuditPlugin, AuthenticationError, BackupPlugin, BaseError, BehaviorError, CachePlugin, Client, ConnectionString, ConnectionStringError, CostsPlugin, CryptoError, DEFAULT_BEHAVIOR, Database, DatabaseError, EncryptionError, ErrorMap, EventualConsistencyPlugin, FullTextPlugin, InvalidResourceItem, MetadataLimitError, MetricsPlugin, MissingMetadata, NoSuchBucket, NoSuchKey, NotFound, PartitionDriverError, PartitionError, PermissionError, Plugin, PluginError, PluginObject, PluginStorageError, QueueConsumerPlugin, ReplicatorPlugin, Resource, ResourceError, ResourceIdsPageReader, ResourceIdsReader, ResourceNotFound, ResourceReader, ResourceWriter, S3QueuePlugin, Database as S3db, S3dbError, SchedulerPlugin, Schema, SchemaError, StateMachinePlugin, StreamError, UnknownError, ValidationError, Validator, VectorPlugin, behaviors, calculateAttributeNamesSize, calculateAttributeSizes, calculateEffectiveLimit, calculateSystemOverhead, calculateTotalSize, calculateUTF8Bytes, clearUTF8Cache, clearUTF8Memo, clearUTF8Memory, decode, decodeDecimal, decodeFixedPoint, decrypt, S3db as default, encode, encodeDecimal, encodeFixedPoint, encrypt, getBehavior, getSizeBreakdown, idGenerator, mapAwsError, md5, passwordGenerator, sha256, streamToString, transformValue, tryFn, tryFnSync };
18936
20206
  //# sourceMappingURL=s3db.es.js.map