s3db.js 11.2.3 → 11.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/s3db-cli.js +588 -74
  2. package/dist/s3db.cjs.js +2472 -150
  3. package/dist/s3db.cjs.js.map +1 -1
  4. package/dist/s3db.es.js +2464 -151
  5. package/dist/s3db.es.js.map +1 -1
  6. package/package.json +2 -1
  7. package/src/behaviors/enforce-limits.js +28 -4
  8. package/src/behaviors/index.js +6 -1
  9. package/src/client.class.js +11 -1
  10. package/src/concerns/base62.js +70 -0
  11. package/src/concerns/partition-queue.js +7 -1
  12. package/src/concerns/plugin-storage.js +75 -13
  13. package/src/database.class.js +19 -4
  14. package/src/errors.js +306 -27
  15. package/src/partition-drivers/base-partition-driver.js +12 -2
  16. package/src/partition-drivers/index.js +7 -1
  17. package/src/partition-drivers/memory-partition-driver.js +20 -5
  18. package/src/partition-drivers/sqs-partition-driver.js +6 -1
  19. package/src/plugins/audit.errors.js +46 -0
  20. package/src/plugins/backup/base-backup-driver.class.js +36 -6
  21. package/src/plugins/backup/filesystem-backup-driver.class.js +55 -7
  22. package/src/plugins/backup/index.js +40 -9
  23. package/src/plugins/backup/multi-backup-driver.class.js +69 -9
  24. package/src/plugins/backup/s3-backup-driver.class.js +48 -6
  25. package/src/plugins/backup.errors.js +45 -0
  26. package/src/plugins/cache/cache.class.js +8 -1
  27. package/src/plugins/cache.errors.js +47 -0
  28. package/src/plugins/cache.plugin.js +8 -1
  29. package/src/plugins/fulltext.errors.js +46 -0
  30. package/src/plugins/fulltext.plugin.js +15 -3
  31. package/src/plugins/index.js +1 -0
  32. package/src/plugins/metrics.errors.js +46 -0
  33. package/src/plugins/queue-consumer.plugin.js +31 -4
  34. package/src/plugins/queue.errors.js +46 -0
  35. package/src/plugins/replicator.errors.js +46 -0
  36. package/src/plugins/replicator.plugin.js +40 -5
  37. package/src/plugins/replicators/base-replicator.class.js +19 -3
  38. package/src/plugins/replicators/index.js +9 -3
  39. package/src/plugins/replicators/s3db-replicator.class.js +38 -8
  40. package/src/plugins/scheduler.errors.js +46 -0
  41. package/src/plugins/scheduler.plugin.js +79 -19
  42. package/src/plugins/state-machine.errors.js +47 -0
  43. package/src/plugins/state-machine.plugin.js +86 -17
  44. package/src/plugins/vector/distances.js +173 -0
  45. package/src/plugins/vector/kmeans.js +367 -0
  46. package/src/plugins/vector/metrics.js +369 -0
  47. package/src/plugins/vector/vector-error.js +43 -0
  48. package/src/plugins/vector.plugin.js +687 -0
  49. package/src/schema.class.js +232 -41
  50. package/src/stream/index.js +6 -1
  51. package/src/stream/resource-reader.class.js +6 -1
  52. package/src/validator.class.js +8 -0
@@ -0,0 +1,367 @@
1
+ /**
2
+ * K-Means Clustering Implementation
3
+ *
4
+ * Provides k-means clustering with k-means++ initialization
5
+ * and comprehensive optimal K analysis using multiple metrics.
6
+ */
7
+
8
+ import { euclideanDistance } from './distances.js';
9
+
10
+ /**
11
+ * K-means clustering algorithm
12
+ *
13
+ * @param {number[][]} vectors - Array of vectors to cluster
14
+ * @param {number} k - Number of clusters
15
+ * @param {Object} options - Configuration options
16
+ * @param {number} options.maxIterations - Maximum iterations (default: 100)
17
+ * @param {number} options.tolerance - Convergence tolerance (default: 0.0001)
18
+ * @param {Function} options.distanceFn - Distance function (default: euclideanDistance)
19
+ * @param {number|null} options.seed - Random seed for reproducibility (default: null)
20
+ * @param {Function} options.onIteration - Callback for each iteration (iteration, inertia, converged) (default: null)
21
+ * @returns {Object} Clustering results
22
+ * @throws {Error} If invalid parameters
23
+ */
24
+ export function kmeans(vectors, k, options = {}) {
25
+ const {
26
+ maxIterations = 100,
27
+ tolerance = 0.0001,
28
+ distanceFn = euclideanDistance,
29
+ seed = null,
30
+ onIteration = null
31
+ } = options;
32
+
33
+ if (vectors.length === 0) {
34
+ throw new Error('Cannot cluster empty vector array');
35
+ }
36
+
37
+ if (k < 1) {
38
+ throw new Error(`k must be at least 1, got ${k}`);
39
+ }
40
+
41
+ if (k > vectors.length) {
42
+ throw new Error(`k (${k}) cannot be greater than number of vectors (${vectors.length})`);
43
+ }
44
+
45
+ const dimensions = vectors[0].length;
46
+
47
+ // Validate all vectors have same dimensions
48
+ for (let i = 1; i < vectors.length; i++) {
49
+ if (vectors[i].length !== dimensions) {
50
+ throw new Error(`All vectors must have same dimensions. Expected ${dimensions}, got ${vectors[i].length} at index ${i}`);
51
+ }
52
+ }
53
+
54
+ // Initialize centroids using k-means++
55
+ const centroids = initializeCentroidsKMeansPlusPlus(vectors, k, distanceFn, seed);
56
+
57
+ let assignments = new Array(vectors.length);
58
+ let iterations = 0;
59
+ let converged = false;
60
+ let previousInertia = Infinity;
61
+
62
+ while (!converged && iterations < maxIterations) {
63
+ // Assign each vector to nearest centroid
64
+ const newAssignments = vectors.map(vector => {
65
+ let minDist = Infinity;
66
+ let nearestCluster = 0;
67
+
68
+ for (let i = 0; i < k; i++) {
69
+ const dist = distanceFn(vector, centroids[i]);
70
+ if (dist < minDist) {
71
+ minDist = dist;
72
+ nearestCluster = i;
73
+ }
74
+ }
75
+
76
+ return nearestCluster;
77
+ });
78
+
79
+ // Calculate inertia (sum of squared distances to centroids)
80
+ let inertia = 0;
81
+ vectors.forEach((vector, i) => {
82
+ const dist = distanceFn(vector, centroids[newAssignments[i]]);
83
+ inertia += dist * dist;
84
+ });
85
+
86
+ // Check for convergence
87
+ const inertiaChange = Math.abs(previousInertia - inertia);
88
+ converged = inertiaChange < tolerance;
89
+
90
+ assignments = newAssignments;
91
+ previousInertia = inertia;
92
+
93
+ // Call onIteration callback if provided
94
+ if (onIteration) {
95
+ onIteration(iterations + 1, inertia, converged);
96
+ }
97
+
98
+ if (!converged) {
99
+ // Recalculate centroids
100
+ const clusterSums = Array(k).fill(null).map(() => new Array(dimensions).fill(0));
101
+ const clusterCounts = new Array(k).fill(0);
102
+
103
+ vectors.forEach((vector, i) => {
104
+ const cluster = assignments[i];
105
+ clusterCounts[cluster]++;
106
+ vector.forEach((val, j) => {
107
+ clusterSums[cluster][j] += val;
108
+ });
109
+ });
110
+
111
+ // Update centroids
112
+ for (let i = 0; i < k; i++) {
113
+ if (clusterCounts[i] > 0) {
114
+ centroids[i] = clusterSums[i].map(sum => sum / clusterCounts[i]);
115
+ }
116
+ // If cluster is empty, reinitialize to a random vector
117
+ else {
118
+ const randomIdx = Math.floor(Math.random() * vectors.length);
119
+ centroids[i] = [...vectors[randomIdx]];
120
+ }
121
+ }
122
+ }
123
+
124
+ iterations++;
125
+ }
126
+
127
+ // Final inertia calculation
128
+ let inertia = 0;
129
+ vectors.forEach((vector, i) => {
130
+ const dist = distanceFn(vector, centroids[assignments[i]]);
131
+ inertia += dist * dist;
132
+ });
133
+
134
+ return {
135
+ centroids,
136
+ assignments,
137
+ iterations,
138
+ converged,
139
+ inertia
140
+ };
141
+ }
142
+
143
+ /**
144
+ * Initialize centroids using k-means++ algorithm
145
+ *
146
+ * K-means++ provides better initialization than random selection,
147
+ * leading to faster convergence and better final clustering.
148
+ *
149
+ * @param {number[][]} vectors - Input vectors
150
+ * @param {number} k - Number of centroids
151
+ * @param {Function} distanceFn - Distance function
152
+ * @param {number|null} seed - Random seed
153
+ * @returns {number[][]} Initial centroids
154
+ */
155
+ function initializeCentroidsKMeansPlusPlus(vectors, k, distanceFn, seed) {
156
+ const centroids = [];
157
+ const n = vectors.length;
158
+
159
+ // Choose first centroid randomly
160
+ const firstIndex = seed !== null ? seed % n : Math.floor(Math.random() * n);
161
+ centroids.push([...vectors[firstIndex]]);
162
+
163
+ // Choose remaining centroids
164
+ for (let i = 1; i < k; i++) {
165
+ // Calculate distance to nearest existing centroid
166
+ const distances = vectors.map(vector => {
167
+ return Math.min(...centroids.map(c => distanceFn(vector, c)));
168
+ });
169
+
170
+ // Square distances for probability distribution
171
+ const squaredDistances = distances.map(d => d * d);
172
+ const totalSquared = squaredDistances.reduce((a, b) => a + b, 0);
173
+
174
+ if (totalSquared === 0) {
175
+ // All remaining points are identical to existing centroids
176
+ // Choose randomly
177
+ const randomIdx = Math.floor(Math.random() * n);
178
+ centroids.push([...vectors[randomIdx]]);
179
+ continue;
180
+ }
181
+
182
+ // Choose next centroid with probability proportional to squared distance
183
+ let threshold = Math.random() * totalSquared;
184
+ let cumulativeSum = 0;
185
+
186
+ for (let j = 0; j < n; j++) {
187
+ cumulativeSum += squaredDistances[j];
188
+ if (cumulativeSum >= threshold) {
189
+ centroids.push([...vectors[j]]);
190
+ break;
191
+ }
192
+ }
193
+ }
194
+
195
+ return centroids;
196
+ }
197
+
198
+ /**
199
+ * Find optimal K using multiple evaluation metrics
200
+ *
201
+ * Analyzes clustering quality across a range of K values using:
202
+ * - Elbow method (inertia)
203
+ * - Silhouette score
204
+ * - Davies-Bouldin index
205
+ * - Calinski-Harabasz index
206
+ * - Gap statistic
207
+ * - Clustering stability
208
+ *
209
+ * @param {number[][]} vectors - Vectors to analyze
210
+ * @param {Object} options - Configuration options
211
+ * @param {number} options.minK - Minimum K to test (default: 2)
212
+ * @param {number} options.maxK - Maximum K to test (default: sqrt(n/2))
213
+ * @param {Function} options.distanceFn - Distance function (default: euclideanDistance)
214
+ * @param {number} options.nReferences - Number of reference datasets for Gap statistic (default: 10)
215
+ * @param {number} options.stabilityRuns - Number of runs for stability analysis (default: 5)
216
+ * @returns {Promise<Object>} Analysis results with recommendations
217
+ */
218
+ export async function findOptimalK(vectors, options = {}) {
219
+ const {
220
+ minK = 2,
221
+ maxK = Math.min(10, Math.floor(Math.sqrt(vectors.length / 2))),
222
+ distanceFn = euclideanDistance,
223
+ nReferences = 10,
224
+ stabilityRuns = 5,
225
+ ...kmeansOptions
226
+ } = options;
227
+
228
+ // Dynamic import to avoid circular dependency
229
+ const metricsModule = await import('./metrics.js');
230
+ const {
231
+ silhouetteScore,
232
+ daviesBouldinIndex,
233
+ calinskiHarabaszIndex,
234
+ gapStatistic,
235
+ clusteringStability
236
+ } = metricsModule;
237
+
238
+ const results = [];
239
+
240
+ for (let k = minK; k <= maxK; k++) {
241
+ // Run k-means
242
+ const kmeansResult = kmeans(vectors, k, { ...kmeansOptions, distanceFn });
243
+
244
+ // Calculate all metrics
245
+ const silhouette = silhouetteScore(
246
+ vectors,
247
+ kmeansResult.assignments,
248
+ kmeansResult.centroids,
249
+ distanceFn
250
+ );
251
+
252
+ const daviesBouldin = daviesBouldinIndex(
253
+ vectors,
254
+ kmeansResult.assignments,
255
+ kmeansResult.centroids,
256
+ distanceFn
257
+ );
258
+
259
+ const calinskiHarabasz = calinskiHarabaszIndex(
260
+ vectors,
261
+ kmeansResult.assignments,
262
+ kmeansResult.centroids,
263
+ distanceFn
264
+ );
265
+
266
+ const gap = await gapStatistic(
267
+ vectors,
268
+ kmeansResult.assignments,
269
+ kmeansResult.centroids,
270
+ distanceFn,
271
+ nReferences
272
+ );
273
+
274
+ const stability = clusteringStability(
275
+ vectors,
276
+ k,
277
+ { ...kmeansOptions, distanceFn, nRuns: stabilityRuns }
278
+ );
279
+
280
+ results.push({
281
+ k,
282
+ inertia: kmeansResult.inertia,
283
+ silhouette,
284
+ daviesBouldin,
285
+ calinskiHarabasz,
286
+ gap: gap.gap,
287
+ gapSk: gap.sk,
288
+ stability: stability.stability,
289
+ cvInertia: stability.cvInertia,
290
+ iterations: kmeansResult.iterations,
291
+ converged: kmeansResult.converged
292
+ });
293
+ }
294
+
295
+ // Calculate elbow point
296
+ const elbowK = findElbowPoint(results.map(r => r.inertia));
297
+
298
+ // Find best K by each metric
299
+ const recommendations = {
300
+ elbow: minK + elbowK,
301
+ silhouette: results.reduce((best, curr) =>
302
+ curr.silhouette > best.silhouette ? curr : best
303
+ ).k,
304
+ daviesBouldin: results.reduce((best, curr) =>
305
+ curr.daviesBouldin < best.daviesBouldin ? curr : best
306
+ ).k,
307
+ calinskiHarabasz: results.reduce((best, curr) =>
308
+ curr.calinskiHarabasz > best.calinskiHarabasz ? curr : best
309
+ ).k,
310
+ gap: results.reduce((best, curr) =>
311
+ curr.gap > best.gap ? curr : best
312
+ ).k,
313
+ stability: results.reduce((best, curr) =>
314
+ curr.stability > best.stability ? curr : best
315
+ ).k
316
+ };
317
+
318
+ // Calculate consensus (most common recommendation)
319
+ const votes = Object.values(recommendations);
320
+ const consensus = votes.reduce((acc, k) => {
321
+ acc[k] = (acc[k] || 0) + 1;
322
+ return acc;
323
+ }, {});
324
+
325
+ const consensusK = parseInt(
326
+ Object.entries(consensus).reduce((a, b) => b[1] > a[1] ? b : a)[0]
327
+ );
328
+
329
+ return {
330
+ results,
331
+ recommendations,
332
+ consensus: consensusK,
333
+ summary: {
334
+ analysisRange: `${minK}-${maxK}`,
335
+ totalVectors: vectors.length,
336
+ dimensions: vectors[0].length,
337
+ recommendation: consensusK,
338
+ confidence: consensus[consensusK] / votes.length
339
+ }
340
+ };
341
+ }
342
+
343
+ /**
344
+ * Find elbow point using method of maximum curvature
345
+ *
346
+ * @param {number[]} inertias - Array of inertia values
347
+ * @returns {number} Index of elbow point
348
+ */
349
+ function findElbowPoint(inertias) {
350
+ const n = inertias.length;
351
+ if (n < 3) return 0;
352
+
353
+ let maxCurvature = -Infinity;
354
+ let elbowIndex = 0;
355
+
356
+ for (let i = 1; i < n - 1; i++) {
357
+ // Calculate second derivative (curvature approximation)
358
+ const curvature = inertias[i - 1] - 2 * inertias[i] + inertias[i + 1];
359
+
360
+ if (curvature > maxCurvature) {
361
+ maxCurvature = curvature;
362
+ elbowIndex = i;
363
+ }
364
+ }
365
+
366
+ return elbowIndex;
367
+ }