embed-cluster 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +122 -0
- package/dist/__tests__/cluster.test.d.ts +2 -0
- package/dist/__tests__/cluster.test.d.ts.map +1 -0
- package/dist/__tests__/cluster.test.js +202 -0
- package/dist/__tests__/cluster.test.js.map +1 -0
- package/dist/__tests__/errors.test.d.ts +2 -0
- package/dist/__tests__/errors.test.d.ts.map +1 -0
- package/dist/__tests__/errors.test.js +68 -0
- package/dist/__tests__/errors.test.js.map +1 -0
- package/dist/__tests__/fixtures/embeddings-small.json +25 -0
- package/dist/__tests__/fixtures.test.d.ts +2 -0
- package/dist/__tests__/fixtures.test.d.ts.map +1 -0
- package/dist/__tests__/fixtures.test.js +44 -0
- package/dist/__tests__/fixtures.test.js.map +1 -0
- package/dist/__tests__/kmeans.test.d.ts +2 -0
- package/dist/__tests__/kmeans.test.d.ts.map +1 -0
- package/dist/__tests__/kmeans.test.js +220 -0
- package/dist/__tests__/kmeans.test.js.map +1 -0
- package/dist/__tests__/normalize.test.d.ts +2 -0
- package/dist/__tests__/normalize.test.d.ts.map +1 -0
- package/dist/__tests__/normalize.test.js +92 -0
- package/dist/__tests__/normalize.test.js.map +1 -0
- package/dist/__tests__/silhouette.test.d.ts +2 -0
- package/dist/__tests__/silhouette.test.d.ts.map +1 -0
- package/dist/__tests__/silhouette.test.js +126 -0
- package/dist/__tests__/silhouette.test.js.map +1 -0
- package/dist/__tests__/types.test.d.ts +2 -0
- package/dist/__tests__/types.test.d.ts.map +1 -0
- package/dist/__tests__/types.test.js +126 -0
- package/dist/__tests__/types.test.js.map +1 -0
- package/dist/clusterer.d.ts +17 -0
- package/dist/clusterer.d.ts.map +1 -0
- package/dist/clusterer.js +72 -0
- package/dist/clusterer.js.map +1 -0
- package/dist/errors.d.ts +7 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +14 -0
- package/dist/errors.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +21 -0
- package/dist/index.js.map +1 -0
- package/dist/kmeans.d.ts +6 -0
- package/dist/kmeans.d.ts.map +1 -0
- package/dist/kmeans.js +250 -0
- package/dist/kmeans.js.map +1 -0
- package/dist/normalize.d.ts +10 -0
- package/dist/normalize.d.ts.map +1 -0
- package/dist/normalize.js +21 -0
- package/dist/normalize.js.map +1 -0
- package/dist/optimal-k.d.ts +11 -0
- package/dist/optimal-k.d.ts.map +1 -0
- package/dist/optimal-k.js +49 -0
- package/dist/optimal-k.js.map +1 -0
- package/dist/silhouette.d.ts +16 -0
- package/dist/silhouette.d.ts.map +1 -0
- package/dist/silhouette.js +95 -0
- package/dist/silhouette.js.map +1 -0
- package/dist/types.d.ts +74 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/package.json +48 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.test.js","sourceRoot":"","sources":["../../src/__tests__/types.test.ts"],"names":[],"mappings":";;AAAA,mCAA8C;AAa9C,IAAA,iBAAQ,EAAC,6BAA6B,EAAE,GAAG,EAAE;IAC3C,IAAA,WAAE,EAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,IAAI,GAAc;YACtB,EAAE,EAAE,SAAS;YACb,IAAI,EAAE,aAAa;YACnB,SAAS,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC;SAC3B,CAAC;QACF,IAAA,eAAM,EAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAChC,IAAA,eAAM,EAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACtC,IAAA,eAAM,EAAC,IAAI,CAAC,SAAS,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,IAAI,GAAc;YACtB,EAAE,EAAE,GAAG;YACP,IAAI,EAAE,GAAG;YACT,SAAS,EAAE,CAAC,CAAC,CAAC;YACd,QAAQ,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE;SACnD,CAAC;QACF,IAAA,eAAM,EAAC,IAAI,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,qEAAqE,EAAE,GAAG,EAAE;QAC7E,MAAM,EAAE,GAAgB;YACtB,EAAE,EAAE,MAAM;YACV,IAAI,EAAE,WAAW;YACjB,SAAS,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC;YACrB,SAAS,EAAE,CAAC;YACZ,kBAAkB,EAAE,IAAI;SACzB,CAAC;QACF,IAAA,eAAM,EAAC,EAAE,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC7B,IAAA,eAAM,EAAC,EAAE,CAAC,kBAAkB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzC,2BAA2B;QAC3B,IAAA,eAAM,EAAC,EAAE,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC3B,IAAA,eAAM,EAAC,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,mDAAmD;QACnD,MAAM,IAAI,GAAmB,EAAE,CAAC;QAChC,IAAA,eAAM,EAAC,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC;QAE3B,MAAM,QAAQ,GAAmB;YAC/B,CAAC,EAAE,CAAC;YACJ,KAAK,EAAE,IAAI;YACX,IAAI,EAAE,EAAE;YACR,aAAa,EAAE,GAAG;YAClB,SAAS,EAAE,IAAI;YACf,IAAI,EAAE,EAAE;YACR,SAAS,EAAE,IAAI;YACf,OAAO,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC,OAAO;YACjC,UAAU,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;SACvE,CAAC;QACF,IAAA,eAAM,EAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,mDAAmD,EAAE,GAAG,EAAE;QAC3D,MAAM,UAAU,GAAqB,EAAE,KAAK,EAAE,GAAG,EAAE,UAAU,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC;QAC9E,MAAM,OAAO,GAAmB,EAAE,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;QAC9D,MAAM,MAAM,GAAkB;YAC5B,QAAQ,EAAE,EAAE;YACZ,OAAO;YACP,CAAC,EAAE,CAAC;YACJ,UAAU,EAAE,EAAE;YACd,SAAS,EAAE,IAAI;YACf,UAAU,EAAE,GAAG;SAChB,CAAC;QACF,IAAA,eAAM,EAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACpC,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzB,IAAA,eAAM,EAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpC,IAAA,eAAM,EAAC,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,MAAM,aAAa,GAAc;YAC/B,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE;gBAClC,MAAM,UAAU,GAAqB,EAAE,KAAK,EAAE,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;gBACpE,MAAM,OAAO,GAAmB,EAAE,UAAU,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;gBAC3D,MAAM,MAAM,GAAkB;oBAC5B,QAAQ,EAAE,EAAE;oBACZ,OAAO;oBACP,CAAC,EAAE,CAAC;oBACJ,UAAU,EAAE,EAAE;oBACd,SAAS,EAAE,IAAI;oBACf,UAAU,EAAE,EAAE;iBACf,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YACD,YAAY,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,CAAC,CAAC;gBACzC,CAAC,EAAE,CAAC;gBACJ,MAAM,EAAE,EAAE;gBACV,MAAM,EAAE,UAAmB;aAC5B,CAAC;YACF,eAAe,EAAE,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;SAC/D,CAAC;QAEF,IAAA,eAAM,EAAC,aAAa,CAAC,CAAC,WAAW,EAAE,CAAC;QACpC,IAAA,eAAM,EAAC,OAAO,aAAa,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACtD,IAAA,eAAM,EAAC,OAAO,aAAa,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC3D,IAAA,eAAM,EAAC,OAAO,aAAa,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAChE,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,KAAK,GAAuB;YAChC,aAAa;YACb,yBAAyB;YACzB,kBAAkB;YAClB,WAAW;YACX,iBAAiB;SAClB,CAAC;QACF,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC9B,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;QACvC,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,SAAS,CAAC,yBAAyB,CAAC,CAAC;QACnD,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;QAC5C,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;QACrC,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,IAAA,WAAE,EAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,OAAO,GAAY;YACvB,EAAE,EAAE,CAAC;YACL,QAAQ,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;YACnB,KAAK,EAAE,EAAE;YACT,IAAI,EAAE,CAAC;YACP,qBAAqB,EAAE,CAAC;YACxB,QAAQ,EAAE,CAAC;SACZ,CAAC;QACF,IAAA,eAAM,EAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,IAAA,eAAM,EAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { EmbedItem, ClusterResult, ClusterOptions, Clusterer } from './types';
|
|
2
|
+
export { findOptimalK } from './optimal-k';
|
|
3
|
+
export { silhouetteScore } from './silhouette';
|
|
4
|
+
/**
|
|
5
|
+
* Cluster a set of EmbedItems using k-means++ and return a ClusterResult
|
|
6
|
+
* with silhouette scores populated in quality.silhouette.
|
|
7
|
+
*
|
|
8
|
+
* Provide either `options.k` (fixed) or `options.autoK = true` to
|
|
9
|
+
* auto-select the optimal k.
|
|
10
|
+
*/
|
|
11
|
+
export declare function cluster(items: EmbedItem[], options?: ClusterOptions): Promise<ClusterResult>;
|
|
12
|
+
/**
|
|
13
|
+
* Create a pre-configured Clusterer instance bound to the given config.
|
|
14
|
+
* The returned object exposes cluster(), findOptimalK(), and silhouetteScore().
|
|
15
|
+
*/
|
|
16
|
+
export declare function createClusterer(config?: ClusterOptions): Clusterer;
|
|
17
|
+
//# sourceMappingURL=clusterer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"clusterer.d.ts","sourceRoot":"","sources":["../src/clusterer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,aAAa,EAAE,cAAc,EAAkB,SAAS,EAAE,MAAM,SAAS,CAAC;AAMnG,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAM/C;;;;;;GAMG;AACH,wBAAsB,OAAO,CAC3B,KAAK,EAAE,SAAS,EAAE,EAClB,OAAO,GAAE,cAAmB,GAC3B,OAAO,CAAC,aAAa,CAAC,CAiCxB;AAMD;;;GAGG;AACH,wBAAgB,eAAe,CAAC,MAAM,GAAE,cAAmB,GAAG,SAAS,CAiBtE"}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.silhouetteScore = exports.findOptimalK = void 0;
|
|
4
|
+
exports.cluster = cluster;
|
|
5
|
+
exports.createClusterer = createClusterer;
|
|
6
|
+
const errors_1 = require("./errors");
|
|
7
|
+
const kmeans_1 = require("./kmeans");
|
|
8
|
+
const silhouette_1 = require("./silhouette");
|
|
9
|
+
const optimal_k_1 = require("./optimal-k");
|
|
10
|
+
var optimal_k_2 = require("./optimal-k");
|
|
11
|
+
Object.defineProperty(exports, "findOptimalK", { enumerable: true, get: function () { return optimal_k_2.findOptimalK; } });
|
|
12
|
+
var silhouette_2 = require("./silhouette");
|
|
13
|
+
Object.defineProperty(exports, "silhouetteScore", { enumerable: true, get: function () { return silhouette_2.silhouetteScore; } });
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Convenience top-level cluster() function
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
/**
|
|
18
|
+
* Cluster a set of EmbedItems using k-means++ and return a ClusterResult
|
|
19
|
+
* with silhouette scores populated in quality.silhouette.
|
|
20
|
+
*
|
|
21
|
+
* Provide either `options.k` (fixed) or `options.autoK = true` to
|
|
22
|
+
* auto-select the optimal k.
|
|
23
|
+
*/
|
|
24
|
+
async function cluster(items, options = {}) {
|
|
25
|
+
if (items.length === 0) {
|
|
26
|
+
throw new errors_1.ClusterError('Input must not be empty', 'EMPTY_INPUT');
|
|
27
|
+
}
|
|
28
|
+
let k;
|
|
29
|
+
if (options.autoK) {
|
|
30
|
+
const optResult = (0, optimal_k_1.findOptimalK)(items, options);
|
|
31
|
+
k = optResult.k;
|
|
32
|
+
}
|
|
33
|
+
else if (options.k !== undefined) {
|
|
34
|
+
k = options.k;
|
|
35
|
+
}
|
|
36
|
+
else {
|
|
37
|
+
throw new errors_1.ClusterError('Provide options.k or set options.autoK = true', 'INVALID_OPTIONS');
|
|
38
|
+
}
|
|
39
|
+
const result = (0, kmeans_1.kMeans)(items, k, options);
|
|
40
|
+
// Populate silhouette scores in quality
|
|
41
|
+
const sil = (0, silhouette_1.silhouetteScore)(result);
|
|
42
|
+
result.quality.silhouette = sil;
|
|
43
|
+
// Apply labeler if provided
|
|
44
|
+
if (options.labeler) {
|
|
45
|
+
for (const c of result.clusters) {
|
|
46
|
+
const label = await Promise.resolve(options.labeler(c.items, c.id));
|
|
47
|
+
c.label = label;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return result;
|
|
51
|
+
}
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// createClusterer factory
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
/**
|
|
56
|
+
* Create a pre-configured Clusterer instance bound to the given config.
|
|
57
|
+
* The returned object exposes cluster(), findOptimalK(), and silhouetteScore().
|
|
58
|
+
*/
|
|
59
|
+
function createClusterer(config = {}) {
|
|
60
|
+
return {
|
|
61
|
+
async cluster(items, options = {}) {
|
|
62
|
+
return cluster(items, { ...config, ...options });
|
|
63
|
+
},
|
|
64
|
+
async findOptimalK(items, options = {}) {
|
|
65
|
+
return (0, optimal_k_1.findOptimalK)(items, { ...config, ...options });
|
|
66
|
+
},
|
|
67
|
+
silhouetteScore(result) {
|
|
68
|
+
return (0, silhouette_1.silhouetteScore)(result);
|
|
69
|
+
},
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
//# sourceMappingURL=clusterer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"clusterer.js","sourceRoot":"","sources":["../src/clusterer.ts"],"names":[],"mappings":";;;AAoBA,0BAoCC;AAUD,0CAiBC;AAlFD,qCAAwC;AACxC,qCAAkC;AAClC,6CAA+C;AAC/C,2CAA2C;AAE3C,yCAA2C;AAAlC,yGAAA,YAAY,OAAA;AACrB,2CAA+C;AAAtC,6GAAA,eAAe,OAAA;AAExB,8EAA8E;AAC9E,2CAA2C;AAC3C,8EAA8E;AAE9E;;;;;;GAMG;AACI,KAAK,UAAU,OAAO,CAC3B,KAAkB,EAClB,UAA0B,EAAE;IAE5B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,qBAAY,CAAC,yBAAyB,EAAE,aAAa,CAAC,CAAC;IACnE,CAAC;IAED,IAAI,CAAS,CAAC;IACd,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QAClB,MAAM,SAAS,GAAG,IAAA,wBAAY,EAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAC/C,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC;IAClB,CAAC;SAAM,IAAI,OAAO,CAAC,CAAC,KAAK,SAAS,EAAE,CAAC;QACnC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;IAChB,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,qBAAY,CACpB,+CAA+C,EAC/C,iBAAiB,CAClB,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,IAAA,eAAM,EAAC,KAAK,EAAE,CAAC,EAAE,OAAO,CAAC,CAAC;IAEzC,wCAAwC;IACxC,MAAM,GAAG,GAAG,IAAA,4BAAe,EAAC,MAAM,CAAC,CAAC;IACpC,MAAM,CAAC,OAAO,CAAC,UAAU,GAAG,GAAG,CAAC;IAEhC,4BAA4B;IAC5B,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YAChC,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YACpE,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC;QAClB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,8EAA8E;AAC9E,0BAA0B;AAC1B,8EAA8E;AAE9E;;;GAGG;AACH,SAAgB,eAAe,CAAC,SAAyB,EAAE;IACzD,OAAO;QACL,KAAK,CAAC,OAAO,CAAC,KAAkB,EAAE,UAA0B,EAAE;YAC5D,OAAO,OAAO,CAAC,KAAK,EAAE,EAAE,GAAG,MAAM,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;QACnD,CAAC;QAED,KAAK,CAAC,YAAY,CAChB,KAAkB,EAClB,UAAqC,EAAE;YAEvC,OAAO,IAAA,wBAAY,EAAC,KAAK,EAAE,EAAE,GAAG,MAAM,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;QACxD,CAAC;QAED,eAAe,CAAC,MAAqB;YACnC,OAAO,IAAA,4BAAe,EAAC,MAAM,CAAC,CAAC;QACjC,CAAC;KACF,CAAC;AACJ,CAAC"}
|
package/dist/errors.d.ts
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export type ClusterErrorCode = 'EMPTY_INPUT' | 'INCONSISTENT_DIMENSIONS' | 'DEGENERATE_INPUT' | 'INVALID_K' | 'INVALID_OPTIONS';
|
|
2
|
+
export declare class ClusterError extends Error {
|
|
3
|
+
readonly code: ClusterErrorCode;
|
|
4
|
+
readonly name = "ClusterError";
|
|
5
|
+
constructor(message: string, code: ClusterErrorCode);
|
|
6
|
+
}
|
|
7
|
+
//# sourceMappingURL=errors.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,gBAAgB,GACxB,aAAa,GACb,yBAAyB,GACzB,kBAAkB,GAClB,WAAW,GACX,iBAAiB,CAAC;AAEtB,qBAAa,YAAa,SAAQ,KAAK;IAInC,QAAQ,CAAC,IAAI,EAAE,gBAAgB;IAHjC,QAAQ,CAAC,IAAI,kBAAkB;gBAE7B,OAAO,EAAE,MAAM,EACN,IAAI,EAAE,gBAAgB;CAKlC"}
|
package/dist/errors.js
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ClusterError = void 0;
|
|
4
|
+
class ClusterError extends Error {
|
|
5
|
+
code;
|
|
6
|
+
name = 'ClusterError';
|
|
7
|
+
constructor(message, code) {
|
|
8
|
+
super(message);
|
|
9
|
+
this.code = code;
|
|
10
|
+
Object.setPrototypeOf(this, ClusterError.prototype);
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
exports.ClusterError = ClusterError;
|
|
14
|
+
//# sourceMappingURL=errors.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":";;;AAOA,MAAa,YAAa,SAAQ,KAAK;IAI1B;IAHF,IAAI,GAAG,cAAc,CAAC;IAC/B,YACE,OAAe,EACN,IAAsB;QAE/B,KAAK,CAAC,OAAO,CAAC,CAAC;QAFN,SAAI,GAAJ,IAAI,CAAkB;QAG/B,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;IACtD,CAAC;CACF;AATD,oCASC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export type { EmbedItem, ClusterItem, Cluster, SilhouetteResult, OptimalKResult, ClusterQuality, VisualizationData, LabelerFn, ClusterOptions, ClusterResult, Clusterer, } from './types';
|
|
2
|
+
export { ClusterError } from './errors';
|
|
3
|
+
export type { ClusterErrorCode } from './errors';
|
|
4
|
+
export { normalizeVector, normalizeVectors } from './normalize';
|
|
5
|
+
export { kMeans, euclideanDistance, cosineDistance, kMeansPlusPlusInit } from './kmeans';
|
|
6
|
+
export { silhouetteScore } from './silhouette';
|
|
7
|
+
export { findOptimalK } from './optimal-k';
|
|
8
|
+
export { cluster, createClusterer } from './clusterer';
|
|
9
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,YAAY,EACV,SAAS,EAAE,WAAW,EAAE,OAAO,EAAE,gBAAgB,EAAE,cAAc,EACjE,cAAc,EAAE,iBAAiB,EAAE,SAAS,EAC5C,cAAc,EAAE,aAAa,EAAE,SAAS,GACzC,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,YAAY,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AACjD,OAAO,EAAE,eAAe,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAChE,OAAO,EAAE,MAAM,EAAE,iBAAiB,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAC;AACzF,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC/C,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createClusterer = exports.cluster = exports.findOptimalK = exports.silhouetteScore = exports.kMeansPlusPlusInit = exports.cosineDistance = exports.euclideanDistance = exports.kMeans = exports.normalizeVectors = exports.normalizeVector = exports.ClusterError = void 0;
|
|
4
|
+
var errors_1 = require("./errors");
|
|
5
|
+
Object.defineProperty(exports, "ClusterError", { enumerable: true, get: function () { return errors_1.ClusterError; } });
|
|
6
|
+
var normalize_1 = require("./normalize");
|
|
7
|
+
Object.defineProperty(exports, "normalizeVector", { enumerable: true, get: function () { return normalize_1.normalizeVector; } });
|
|
8
|
+
Object.defineProperty(exports, "normalizeVectors", { enumerable: true, get: function () { return normalize_1.normalizeVectors; } });
|
|
9
|
+
var kmeans_1 = require("./kmeans");
|
|
10
|
+
Object.defineProperty(exports, "kMeans", { enumerable: true, get: function () { return kmeans_1.kMeans; } });
|
|
11
|
+
Object.defineProperty(exports, "euclideanDistance", { enumerable: true, get: function () { return kmeans_1.euclideanDistance; } });
|
|
12
|
+
Object.defineProperty(exports, "cosineDistance", { enumerable: true, get: function () { return kmeans_1.cosineDistance; } });
|
|
13
|
+
Object.defineProperty(exports, "kMeansPlusPlusInit", { enumerable: true, get: function () { return kmeans_1.kMeansPlusPlusInit; } });
|
|
14
|
+
var silhouette_1 = require("./silhouette");
|
|
15
|
+
Object.defineProperty(exports, "silhouetteScore", { enumerable: true, get: function () { return silhouette_1.silhouetteScore; } });
|
|
16
|
+
var optimal_k_1 = require("./optimal-k");
|
|
17
|
+
Object.defineProperty(exports, "findOptimalK", { enumerable: true, get: function () { return optimal_k_1.findOptimalK; } });
|
|
18
|
+
var clusterer_1 = require("./clusterer");
|
|
19
|
+
Object.defineProperty(exports, "cluster", { enumerable: true, get: function () { return clusterer_1.cluster; } });
|
|
20
|
+
Object.defineProperty(exports, "createClusterer", { enumerable: true, get: function () { return clusterer_1.createClusterer; } });
|
|
21
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAKA,mCAAwC;AAA/B,sGAAA,YAAY,OAAA;AAErB,yCAAgE;AAAvD,4GAAA,eAAe,OAAA;AAAE,6GAAA,gBAAgB,OAAA;AAC1C,mCAAyF;AAAhF,gGAAA,MAAM,OAAA;AAAE,2GAAA,iBAAiB,OAAA;AAAE,wGAAA,cAAc,OAAA;AAAE,4GAAA,kBAAkB,OAAA;AACtE,2CAA+C;AAAtC,6GAAA,eAAe,OAAA;AACxB,yCAA2C;AAAlC,yGAAA,YAAY,OAAA;AACrB,yCAAuD;AAA9C,oGAAA,OAAO,OAAA;AAAE,4GAAA,eAAe,OAAA"}
|
package/dist/kmeans.d.ts
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { EmbedItem, ClusterResult, ClusterOptions } from './types';
|
|
2
|
+
export declare function euclideanDistance(a: number[], b: number[]): number;
|
|
3
|
+
export declare function cosineDistance(a: number[], b: number[]): number;
|
|
4
|
+
export declare function kMeansPlusPlusInit(vectors: number[][], k: number, distFn: (a: number[], b: number[]) => number, rand: () => number): number[][];
|
|
5
|
+
export declare function kMeans(items: EmbedItem[], k: number, options?: ClusterOptions): ClusterResult;
|
|
6
|
+
//# sourceMappingURL=kmeans.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"kmeans.d.ts","sourceRoot":"","sources":["../src/kmeans.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAwB,aAAa,EAAE,cAAc,EAAkB,MAAM,SAAS,CAAC;AAO9G,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAOlE;AAED,wBAAgB,cAAc,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAY/D;AAoBD,wBAAgB,kBAAkB,CAChC,OAAO,EAAE,MAAM,EAAE,EAAE,EACnB,CAAC,EAAE,MAAM,EACT,MAAM,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,MAAM,EAC5C,IAAI,EAAE,MAAM,MAAM,GACjB,MAAM,EAAE,EAAE,CAkCZ;AA0ED,wBAAgB,MAAM,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,OAAO,GAAE,cAAmB,GAAG,aAAa,CAsIjG"}
|
package/dist/kmeans.js
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.euclideanDistance = euclideanDistance;
|
|
4
|
+
exports.cosineDistance = cosineDistance;
|
|
5
|
+
exports.kMeansPlusPlusInit = kMeansPlusPlusInit;
|
|
6
|
+
exports.kMeans = kMeans;
|
|
7
|
+
const errors_1 = require("./errors");
|
|
8
|
+
const normalize_1 = require("./normalize");
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
// Distance functions
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
function euclideanDistance(a, b) {
|
|
13
|
+
let sum = 0;
|
|
14
|
+
for (let i = 0; i < a.length; i++) {
|
|
15
|
+
const d = a[i] - b[i];
|
|
16
|
+
sum += d * d;
|
|
17
|
+
}
|
|
18
|
+
return Math.sqrt(sum);
|
|
19
|
+
}
|
|
20
|
+
function cosineDistance(a, b) {
|
|
21
|
+
let dot = 0;
|
|
22
|
+
let magA = 0;
|
|
23
|
+
let magB = 0;
|
|
24
|
+
for (let i = 0; i < a.length; i++) {
|
|
25
|
+
dot += a[i] * b[i];
|
|
26
|
+
magA += a[i] * a[i];
|
|
27
|
+
magB += b[i] * b[i];
|
|
28
|
+
}
|
|
29
|
+
const denom = Math.sqrt(magA) * Math.sqrt(magB);
|
|
30
|
+
if (denom === 0)
|
|
31
|
+
return 1;
|
|
32
|
+
return 1 - dot / denom;
|
|
33
|
+
}
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// Seeded pseudo-random number generator (mulberry32)
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
function makePrng(seed) {
|
|
38
|
+
let s = seed >>> 0;
|
|
39
|
+
return function () {
|
|
40
|
+
s += 0x6d2b79f5;
|
|
41
|
+
let t = Math.imul(s ^ (s >>> 15), 1 | s);
|
|
42
|
+
t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
|
|
43
|
+
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
// k-means++ initialization
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
function kMeansPlusPlusInit(vectors, k, distFn, rand) {
|
|
50
|
+
const n = vectors.length;
|
|
51
|
+
const centroids = [];
|
|
52
|
+
// Pick first centroid uniformly at random
|
|
53
|
+
const firstIdx = Math.floor(rand() * n);
|
|
54
|
+
centroids.push([...vectors[firstIdx]]);
|
|
55
|
+
for (let c = 1; c < k; c++) {
|
|
56
|
+
// Compute distance squared from each point to its nearest centroid
|
|
57
|
+
const distances = vectors.map(v => {
|
|
58
|
+
let minDist = Infinity;
|
|
59
|
+
for (const centroid of centroids) {
|
|
60
|
+
const d = distFn(v, centroid);
|
|
61
|
+
if (d < minDist)
|
|
62
|
+
minDist = d;
|
|
63
|
+
}
|
|
64
|
+
return minDist * minDist;
|
|
65
|
+
});
|
|
66
|
+
// Weighted random selection proportional to distance squared
|
|
67
|
+
const totalWeight = distances.reduce((s, d) => s + d, 0);
|
|
68
|
+
let threshold = rand() * totalWeight;
|
|
69
|
+
let chosen = n - 1;
|
|
70
|
+
for (let i = 0; i < n; i++) {
|
|
71
|
+
threshold -= distances[i];
|
|
72
|
+
if (threshold <= 0) {
|
|
73
|
+
chosen = i;
|
|
74
|
+
break;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
centroids.push([...vectors[chosen]]);
|
|
78
|
+
}
|
|
79
|
+
return centroids;
|
|
80
|
+
}
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
// Centroid computation
|
|
83
|
+
// ---------------------------------------------------------------------------
|
|
84
|
+
function computeCentroid(vectors) {
|
|
85
|
+
if (vectors.length === 0)
|
|
86
|
+
return [];
|
|
87
|
+
const dim = vectors[0].length;
|
|
88
|
+
const centroid = new Array(dim).fill(0);
|
|
89
|
+
for (const v of vectors) {
|
|
90
|
+
for (let i = 0; i < dim; i++) {
|
|
91
|
+
centroid[i] += v[i];
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
for (let i = 0; i < dim; i++) {
|
|
95
|
+
centroid[i] /= vectors.length;
|
|
96
|
+
}
|
|
97
|
+
return centroid;
|
|
98
|
+
}
|
|
99
|
+
function centroidShift(a, b) {
|
|
100
|
+
let sum = 0;
|
|
101
|
+
for (let i = 0; i < a.length; i++) {
|
|
102
|
+
const d = a[i] - b[i];
|
|
103
|
+
sum += d * d;
|
|
104
|
+
}
|
|
105
|
+
return Math.sqrt(sum);
|
|
106
|
+
}
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
// Inertia (sum of squared distances to assigned centroid)
|
|
109
|
+
// ---------------------------------------------------------------------------
|
|
110
|
+
function computeInertia(assignments, vectors, centroids, distFn) {
|
|
111
|
+
let inertia = 0;
|
|
112
|
+
for (let i = 0; i < vectors.length; i++) {
|
|
113
|
+
const d = distFn(vectors[i], centroids[assignments[i]]);
|
|
114
|
+
inertia += d * d;
|
|
115
|
+
}
|
|
116
|
+
return inertia;
|
|
117
|
+
}
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
// Validation helpers
|
|
120
|
+
// ---------------------------------------------------------------------------
|
|
121
|
+
function validateInput(items, k) {
|
|
122
|
+
if (items.length === 0) {
|
|
123
|
+
throw new errors_1.ClusterError('Input must not be empty', 'EMPTY_INPUT');
|
|
124
|
+
}
|
|
125
|
+
const dim = items[0].embedding.length;
|
|
126
|
+
for (const item of items) {
|
|
127
|
+
if (item.embedding.length !== dim) {
|
|
128
|
+
throw new errors_1.ClusterError('All embeddings must have the same dimension', 'INCONSISTENT_DIMENSIONS');
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
if (k < 1 || !Number.isInteger(k)) {
|
|
132
|
+
throw new errors_1.ClusterError('k must be a positive integer', 'INVALID_K');
|
|
133
|
+
}
|
|
134
|
+
if (k > items.length) {
|
|
135
|
+
throw new errors_1.ClusterError(`k (${k}) cannot exceed number of items (${items.length})`, 'INVALID_K');
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
// ---------------------------------------------------------------------------
|
|
139
|
+
// Main k-means function
|
|
140
|
+
// ---------------------------------------------------------------------------
|
|
141
|
+
function kMeans(items, k, options = {}) {
|
|
142
|
+
const startMs = Date.now();
|
|
143
|
+
validateInput(items, k);
|
|
144
|
+
const { maxIterations = 100, tolerance = 1e-4, seed = 42, normalize = true, distanceFn, } = options;
|
|
145
|
+
const distFn = distanceFn ?? euclideanDistance;
|
|
146
|
+
const rand = makePrng(seed);
|
|
147
|
+
// Prepare vectors (optionally normalize)
|
|
148
|
+
let vectors = items.map(it => it.embedding);
|
|
149
|
+
if (normalize) {
|
|
150
|
+
vectors = (0, normalize_1.normalizeVectors)(vectors);
|
|
151
|
+
}
|
|
152
|
+
// Initialize centroids with k-means++
|
|
153
|
+
let centroids = kMeansPlusPlusInit(vectors, k, distFn, rand);
|
|
154
|
+
let assignments = new Array(vectors.length).fill(0);
|
|
155
|
+
let iterations = 0;
|
|
156
|
+
let converged = false;
|
|
157
|
+
for (let iter = 0; iter < maxIterations; iter++) {
|
|
158
|
+
iterations++;
|
|
159
|
+
// Assignment step
|
|
160
|
+
const newAssignments = vectors.map(v => {
|
|
161
|
+
let minDist = Infinity;
|
|
162
|
+
let best = 0;
|
|
163
|
+
for (let c = 0; c < k; c++) {
|
|
164
|
+
const d = distFn(v, centroids[c]);
|
|
165
|
+
if (d < minDist) {
|
|
166
|
+
minDist = d;
|
|
167
|
+
best = c;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
return best;
|
|
171
|
+
});
|
|
172
|
+
// Update step — recompute centroids
|
|
173
|
+
const clusterVectors = Array.from({ length: k }, () => []);
|
|
174
|
+
for (let i = 0; i < vectors.length; i++) {
|
|
175
|
+
clusterVectors[newAssignments[i]].push(vectors[i]);
|
|
176
|
+
}
|
|
177
|
+
const newCentroids = centroids.map((old, c) => {
|
|
178
|
+
if (clusterVectors[c].length === 0) {
|
|
179
|
+
// Empty cluster: keep old centroid
|
|
180
|
+
return old;
|
|
181
|
+
}
|
|
182
|
+
return computeCentroid(clusterVectors[c]);
|
|
183
|
+
});
|
|
184
|
+
// Check convergence
|
|
185
|
+
const maxShift = newCentroids.reduce((max, nc, c) => {
|
|
186
|
+
return Math.max(max, centroidShift(nc, centroids[c]));
|
|
187
|
+
}, 0);
|
|
188
|
+
assignments = newAssignments;
|
|
189
|
+
centroids = newCentroids;
|
|
190
|
+
if (maxShift < tolerance) {
|
|
191
|
+
converged = true;
|
|
192
|
+
break;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
// Build Cluster objects
|
|
196
|
+
const clusterItems = Array.from({ length: k }, () => []);
|
|
197
|
+
for (let i = 0; i < vectors.length; i++) {
|
|
198
|
+
const cid = assignments[i];
|
|
199
|
+
const dist = distFn(vectors[i], centroids[cid]);
|
|
200
|
+
clusterItems[cid].push({
|
|
201
|
+
...items[i],
|
|
202
|
+
embedding: vectors[i], // use the (possibly normalized) vector
|
|
203
|
+
clusterId: cid,
|
|
204
|
+
distanceToCentroid: dist,
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
const clusters = centroids.map((centroid, c) => {
|
|
208
|
+
const members = clusterItems[c];
|
|
209
|
+
const avgDist = members.length > 0
|
|
210
|
+
? members.reduce((s, m) => s + m.distanceToCentroid, 0) / members.length
|
|
211
|
+
: 0;
|
|
212
|
+
// Cohesion: average pairwise distance within cluster
|
|
213
|
+
let cohesion = 0;
|
|
214
|
+
if (members.length > 1) {
|
|
215
|
+
let pairSum = 0;
|
|
216
|
+
let pairCount = 0;
|
|
217
|
+
for (let a = 0; a < members.length; a++) {
|
|
218
|
+
for (let b = a + 1; b < members.length; b++) {
|
|
219
|
+
pairSum += distFn(members[a].embedding, members[b].embedding);
|
|
220
|
+
pairCount++;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
cohesion = pairCount > 0 ? pairSum / pairCount : 0;
|
|
224
|
+
}
|
|
225
|
+
return {
|
|
226
|
+
id: c,
|
|
227
|
+
centroid,
|
|
228
|
+
items: members,
|
|
229
|
+
size: members.length,
|
|
230
|
+
avgDistanceToCentroid: avgDist,
|
|
231
|
+
cohesion,
|
|
232
|
+
};
|
|
233
|
+
});
|
|
234
|
+
const inertia = computeInertia(assignments, vectors, centroids, distFn);
|
|
235
|
+
// Silhouette scores are computed separately in silhouette.ts
|
|
236
|
+
// Provide a placeholder here; callers can compute via silhouetteScore()
|
|
237
|
+
const quality = {
|
|
238
|
+
silhouette: { score: 0, perCluster: new Array(k).fill(0) },
|
|
239
|
+
inertia,
|
|
240
|
+
};
|
|
241
|
+
return {
|
|
242
|
+
clusters,
|
|
243
|
+
quality,
|
|
244
|
+
k,
|
|
245
|
+
iterations,
|
|
246
|
+
converged,
|
|
247
|
+
durationMs: Date.now() - startMs,
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
//# sourceMappingURL=kmeans.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"kmeans.js","sourceRoot":"","sources":["../src/kmeans.ts"],"names":[],"mappings":";;AAQA,8CAOC;AAED,wCAYC;AAoBD,gDAuCC;AA0ED,wBAsIC;AAxSD,qCAAwC;AAExC,2CAA+C;AAE/C,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E,SAAgB,iBAAiB,CAAC,CAAW,EAAE,CAAW;IACxD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACtB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;IACf,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACxB,CAAC;AAED,SAAgB,cAAc,CAAC,CAAW,EAAE,CAAW;IACrD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACnB,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IACD,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChD,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC1B,OAAO,CAAC,GAAG,GAAG,GAAG,KAAK,CAAC;AACzB,CAAC;AAED,8EAA8E;AAC9E,qDAAqD;AACrD,8EAA8E;AAE9E,SAAS,QAAQ,CAAC,IAAY;IAC5B,IAAI,CAAC,GAAG,IAAI,KAAK,CAAC,CAAC;IACnB,OAAO;QACL,CAAC,IAAI,UAAU,CAAC;QAChB,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;QACzC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC/C,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,UAAU,CAAC;IAC/C,CAAC,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,2BAA2B;AAC3B,8EAA8E;AAE9E,SAAgB,kBAAkB,CAChC,OAAmB,EACnB,CAAS,EACT,MAA4C,EAC5C,IAAkB;IAElB,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IACzB,MAAM,SAAS,GAAe,EAAE,CAAC;IAEjC,0CAA0C;IAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC;IACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;IAEvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3B,mEAAmE;QACnE,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;YAChC,IAAI,OAAO,GAAG,QAAQ,CAAC;YACvB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;gBACjC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;gBAC9B,IAAI,CAAC,GAAG,OAAO;oBAAE,OAAO,GAAG,CAAC,CAAC;YAC/B,CAAC;YACD,OAAO,OAAO,GAAG,OAAO,CAAC;QAC3B,CAAC,CAAC,CAAC;QAEH,6DAA6D;QAC7D,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QACzD,IAAI,SAAS,GAAG,IAAI,EAAE,GAAG,WAAW,CAAC;QACrC,IAAI,MAAM,GAAG,CAAC,GAAG,CAAC,CAAC;QACnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3B,SAAS,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC;YAC1B,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;gBACnB,MAAM,GAAG,CAAC,CAAC;gBACX,MAAM;YACR,CAAC;QACH,CAAC;QACD,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,8EAA8E;AAC9E,uBAAuB;AACvB,8EAA8E;AAE9E,SAAS,eAAe,CAAC,OAAmB;IAC1C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACpC,MAAM,GAAG,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC9B,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAS,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAChD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7B,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7B,QAAQ,CAAC,CAAC,CAAC,IAAI,OAAO,CAAC,MAAM,CAAC;IAChC,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,aAAa,CAAC,CAAW,EAAE,CAAW;IAC7C,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACtB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;IACf,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACxB,CAAC;AAED,8EAA8E;AAC9E,0DAA0D;AAC1D,8EAA8E;AAE9E,SAAS,cAAc,CACrB,WAAqB,EACrB,OAAmB,EACnB,SAAqB,EACrB,MAA4C;IAE5C,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,CAAC,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACxD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E,SAAS,aAAa,CAAC,KAAkB,EAAE,CAAS;IAClD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,qBAAY,CAAC,yBAAyB,EAAE,aAAa,CAAC,CAAC;IACnE,CAAC;IACD,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC;IACtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,SAAS,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAClC,MAAM,IAAI,qBAAY,CAAC,6CAA6C,EAAE,yBAAyB,CAAC,CAAC;QACnG,CAAC;IACH,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;QAClC,MAAM,IAAI,qBAAY,CAAC,8BAA8B,EAAE,WAAW,CAAC,CAAC;IACtE,CAAC;IACD,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QACrB,MAAM,IAAI,qBAAY,CAAC,MAAM,CAAC,oCAAoC,KAAK,CAAC,MAAM,GAAG,EAAE,WAAW,CAAC,CAAC;IAClG,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,wBAAwB;AACxB,8EAA8E;AAE9E,SAAgB,MAAM,CAAC,KAAkB,EAAE,CAAS,EAAE,UAA0B,EAAE;IAChF,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE3B,aAAa,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAExB,MAAM,EACJ,aAAa,GAAG,GAAG,EACnB,SAAS,GAAG,IAAI,EAChB,IAAI,GAAG,EAAE,EACT,SAAS,GAAG,IAAI,EAChB,UAAU,GACX,GAAG,OAAO,CAAC;IAEZ,MAAM,MAAM,GAAG,UAAU,IAAI,iBAAiB,CAAC;IAC/C,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAE5B,yCAAyC;IACzC,IAAI,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC;IAC5C,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,GAAG,IAAA,4BAAgB,EAAC,OAAO,CAAC,CAAC;IACtC,CAAC;IAED,sCAAsC;IACtC,IAAI,SAAS,GAAG,kBAAkB,CAAC,OAAO,EAAE,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,CAAC;IAE7D,IAAI,WAAW,GAAG,IAAI,KAAK,CAAS,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC5D,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,SAAS,GAAG,KAAK,CAAC;IAEtB,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,aAAa,EAAE,IAAI,EAAE,EAAE,CAAC;QAChD,UAAU,EAAE,CAAC;QAEb,kBAAkB;QAClB,MAAM,cAAc,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;YACrC,IAAI,OAAO,GAAG,QAAQ,CAAC;YACvB,IAAI,IAAI,GAAG,CAAC,CAAC;YACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC3B,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;gBAClC,IAAI,CAAC,GAAG,OAAO,EAAE,CAAC;oBAChB,OAAO,GAAG,CAAC,CAAC;oBACZ,IAAI,GAAG,CAAC,CAAC;gBACX,CAAC;YACH,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;QAEH,oCAAoC;QACpC,MAAM,cAAc,GAAiB,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;QACzE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,cAAc,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,YAAY,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE;YAC5C,IAAI,cAAc,CAAC,CAAC,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACnC,mCAAmC;gBACnC,OAAO,GAAG,CAAC;YACb,CAAC;YACD,OAAO,eAAe,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QAEH,oBAAoB;QACpB,MAAM,QAAQ,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE;YAClD,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,aAAa,CAAC,EAAE,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACxD,CAAC,EAAE,CAAC,CAAC,CAAC;QAEN,WAAW,GAAG,cAAc,CAAC;QAC7B,SAAS,GAAG,YAAY,CAAC;QAEzB,IAAI,QAAQ,GAAG,SAAS,EAAE,CAAC;YACzB,SAAS,GAAG,IAAI,CAAC;YACjB,MAAM;QACR,CAAC;IACH,CAAC;IAED,wBAAwB;IACxB,MAAM,YAAY,GAAoB,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;IAC1E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,GAAG,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC;QAChD,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;YACrB,GAAG,KAAK,CAAC,CAAC,CAAC;YACX,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC,EAAI,uCAAuC;YAChE,SAAS,EAAE,GAAG;YACd,kBAAkB,EAAE,IAAI;SACzB,CAAC,CAAC;IACL,CAAC;IAED,MAAM,QAAQ,GAAc,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE;QACxD,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC;YAChC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,kBAAkB,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM;YACxE,CAAC,CAAC,CAAC,CAAC;QAEN,qDAAqD;QACrD,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,IAAI,OAAO,GAAG,CAAC,CAAC;YAChB,IAAI,SAAS,GAAG,CAAC,CAAC;YAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACxC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5C,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;oBAC9D,SAAS,EAAE,CAAC;gBACd,CAAC;YACH,CAAC;YACD,QAAQ,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QACrD,CAAC;QAED,OAAO;YACL,EAAE,EAAE,CAAC;YACL,QAAQ;YACR,KAAK,EAAE,OAAO;YACd,IAAI,EAAE,OAAO,CAAC,MAAM;YACpB,qBAAqB,EAAE,OAAO;YAC9B,QAAQ;SACT,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,GAAG,cAAc,CAAC,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;IAExE,6DAA6D;IAC7D,wEAAwE;IACxE,MAAM,OAAO,GAAmB;QAC9B,UAAU,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,EAAE,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE;QAC1D,OAAO;KACR,CAAC;IAEF,OAAO;QACL,QAAQ;QACR,OAAO;QACP,CAAC;QACD,UAAU;QACV,SAAS;QACT,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;KACjC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* L2-normalize a single vector. Returns a unit-length copy.
|
|
3
|
+
* If the vector has zero magnitude, returns a zero vector (unchanged).
|
|
4
|
+
*/
|
|
5
|
+
export declare function normalizeVector(vec: number[]): number[];
|
|
6
|
+
/**
|
|
7
|
+
* L2-normalize a batch of vectors. Each vector is normalized independently.
|
|
8
|
+
*/
|
|
9
|
+
export declare function normalizeVectors(vecs: number[][]): number[][];
|
|
10
|
+
//# sourceMappingURL=normalize.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize.d.ts","sourceRoot":"","sources":["../src/normalize.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAIvD;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,MAAM,EAAE,EAAE,CAE7D"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.normalizeVector = normalizeVector;
|
|
4
|
+
exports.normalizeVectors = normalizeVectors;
|
|
5
|
+
/**
|
|
6
|
+
* L2-normalize a single vector. Returns a unit-length copy.
|
|
7
|
+
* If the vector has zero magnitude, returns a zero vector (unchanged).
|
|
8
|
+
*/
|
|
9
|
+
function normalizeVector(vec) {
|
|
10
|
+
const magnitude = Math.sqrt(vec.reduce((sum, x) => sum + x * x, 0));
|
|
11
|
+
if (magnitude === 0)
|
|
12
|
+
return [...vec];
|
|
13
|
+
return vec.map(x => x / magnitude);
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* L2-normalize a batch of vectors. Each vector is normalized independently.
|
|
17
|
+
*/
|
|
18
|
+
function normalizeVectors(vecs) {
|
|
19
|
+
return vecs.map(normalizeVector);
|
|
20
|
+
}
|
|
21
|
+
//# sourceMappingURL=normalize.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize.js","sourceRoot":"","sources":["../src/normalize.ts"],"names":[],"mappings":";;AAIA,0CAIC;AAKD,4CAEC;AAfD;;;GAGG;AACH,SAAgB,eAAe,CAAC,GAAa;IAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACpE,IAAI,SAAS,KAAK,CAAC;QAAE,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC;IACrC,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC;AACrC,CAAC;AAED;;GAEG;AACH,SAAgB,gBAAgB,CAAC,IAAgB;IAC/C,OAAO,IAAI,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;AACnC,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { EmbedItem, OptimalKResult, ClusterOptions } from './types';
|
|
2
|
+
/**
|
|
3
|
+
* Try k from kMin to kMax, run k-means for each, compute silhouette score,
|
|
4
|
+
* and return the k that maximises the silhouette score.
|
|
5
|
+
*
|
|
6
|
+
* Defaults:
|
|
7
|
+
* kMin = 2
|
|
8
|
+
* kMax = min(10, floor(sqrt(n)))
|
|
9
|
+
*/
|
|
10
|
+
export declare function findOptimalK(items: EmbedItem[], options?: Omit<ClusterOptions, 'k'>): OptimalKResult;
|
|
11
|
+
//# sourceMappingURL=optimal-k.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"optimal-k.d.ts","sourceRoot":"","sources":["../src/optimal-k.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAQzE;;;;;;;GAOG;AACH,wBAAgB,YAAY,CAC1B,KAAK,EAAE,SAAS,EAAE,EAClB,OAAO,GAAE,IAAI,CAAC,cAAc,EAAE,GAAG,CAAM,GACtC,cAAc,CAoChB"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.findOptimalK = findOptimalK;
|
|
4
|
+
const kmeans_1 = require("./kmeans");
|
|
5
|
+
const silhouette_1 = require("./silhouette");
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
// Automatic k selection via silhouette scoring
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
/**
|
|
10
|
+
* Try k from kMin to kMax, run k-means for each, compute silhouette score,
|
|
11
|
+
* and return the k that maximises the silhouette score.
|
|
12
|
+
*
|
|
13
|
+
* Defaults:
|
|
14
|
+
* kMin = 2
|
|
15
|
+
* kMax = min(10, floor(sqrt(n)))
|
|
16
|
+
*/
|
|
17
|
+
function findOptimalK(items, options = {}) {
|
|
18
|
+
const n = items.length;
|
|
19
|
+
const kMin = 2;
|
|
20
|
+
const kMax = options.maxK ?? Math.min(10, Math.floor(Math.sqrt(n)));
|
|
21
|
+
// Guard: need at least 2 items and kMax >= kMin
|
|
22
|
+
const effectiveKMax = Math.min(kMax, n - 1);
|
|
23
|
+
const effectiveKMin = Math.min(kMin, effectiveKMax);
|
|
24
|
+
const scores = [];
|
|
25
|
+
for (let k = effectiveKMin; k <= effectiveKMax; k++) {
|
|
26
|
+
const result = (0, kmeans_1.kMeans)(items, k, { ...options, k });
|
|
27
|
+
const sil = (0, silhouette_1.silhouetteScore)(result);
|
|
28
|
+
scores.push({
|
|
29
|
+
k,
|
|
30
|
+
silhouette: sil.score,
|
|
31
|
+
inertia: result.quality.inertia,
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
// Select k with highest silhouette score
|
|
35
|
+
let bestK = effectiveKMin;
|
|
36
|
+
let bestScore = -Infinity;
|
|
37
|
+
for (const entry of scores) {
|
|
38
|
+
if (entry.silhouette > bestScore) {
|
|
39
|
+
bestScore = entry.silhouette;
|
|
40
|
+
bestK = entry.k;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return {
|
|
44
|
+
k: bestK,
|
|
45
|
+
scores,
|
|
46
|
+
method: 'silhouette',
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
//# sourceMappingURL=optimal-k.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"optimal-k.js","sourceRoot":"","sources":["../src/optimal-k.ts"],"names":[],"mappings":";;AAgBA,oCAuCC;AAtDD,qCAAkC;AAClC,6CAA+C;AAE/C,8EAA8E;AAC9E,+CAA+C;AAC/C,8EAA8E;AAE9E;;;;;;;GAOG;AACH,SAAgB,YAAY,CAC1B,KAAkB,EAClB,UAAqC,EAAE;IAEvC,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,MAAM,IAAI,GAAG,CAAC,CAAC;IACf,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEpE,gDAAgD;IAChD,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5C,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;IAEpD,MAAM,MAAM,GAA8D,EAAE,CAAC;IAE7E,KAAK,IAAI,CAAC,GAAG,aAAa,EAAE,CAAC,IAAI,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;QACpD,MAAM,MAAM,GAAG,IAAA,eAAM,EAAC,KAAK,EAAE,CAAC,EAAE,EAAE,GAAG,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;QACnD,MAAM,GAAG,GAAG,IAAA,4BAAe,EAAC,MAAM,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC;YACV,CAAC;YACD,UAAU,EAAE,GAAG,CAAC,KAAK;YACrB,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,OAAO;SAChC,CAAC,CAAC;IACL,CAAC;IAED,yCAAyC;IACzC,IAAI,KAAK,GAAG,aAAa,CAAC;IAC1B,IAAI,SAAS,GAAG,CAAC,QAAQ,CAAC;IAC1B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,UAAU,GAAG,SAAS,EAAE,CAAC;YACjC,SAAS,GAAG,KAAK,CAAC,UAAU,CAAC;YAC7B,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,OAAO;QACL,CAAC,EAAE,KAAK;QACR,MAAM;QACN,MAAM,EAAE,YAAY;KACrB,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { ClusterResult, SilhouetteResult } from './types';
|
|
2
|
+
/**
|
|
3
|
+
* Compute the silhouette score for a clustering result.
|
|
4
|
+
*
|
|
5
|
+
* Per-item silhouette coefficient:
|
|
6
|
+
* s(i) = (b(i) - a(i)) / max(a(i), b(i))
|
|
7
|
+
*
|
|
8
|
+
* where:
|
|
9
|
+
* a(i) = mean distance from item i to all other items in the same cluster
|
|
10
|
+
* b(i) = mean distance from item i to all items in the nearest other cluster
|
|
11
|
+
*
|
|
12
|
+
* Returns value in [-1, 1]; higher is better.
|
|
13
|
+
* If only one cluster exists, returns 0 for all items.
|
|
14
|
+
*/
|
|
15
|
+
export declare function silhouetteScore(result: ClusterResult, distFn?: (a: number[], b: number[]) => number): SilhouetteResult;
|
|
16
|
+
//# sourceMappingURL=silhouette.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"silhouette.d.ts","sourceRoot":"","sources":["../src/silhouette.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAO/D;;;;;;;;;;;;GAYG;AACH,wBAAgB,eAAe,CAC7B,MAAM,EAAE,aAAa,EACrB,MAAM,GAAE,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,MAA0B,GAC/D,gBAAgB,CAoFlB"}
|