embed-cluster 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/README.md +122 -0
  2. package/dist/__tests__/cluster.test.d.ts +2 -0
  3. package/dist/__tests__/cluster.test.d.ts.map +1 -0
  4. package/dist/__tests__/cluster.test.js +202 -0
  5. package/dist/__tests__/cluster.test.js.map +1 -0
  6. package/dist/__tests__/errors.test.d.ts +2 -0
  7. package/dist/__tests__/errors.test.d.ts.map +1 -0
  8. package/dist/__tests__/errors.test.js +68 -0
  9. package/dist/__tests__/errors.test.js.map +1 -0
  10. package/dist/__tests__/fixtures/embeddings-small.json +25 -0
  11. package/dist/__tests__/fixtures.test.d.ts +2 -0
  12. package/dist/__tests__/fixtures.test.d.ts.map +1 -0
  13. package/dist/__tests__/fixtures.test.js +44 -0
  14. package/dist/__tests__/fixtures.test.js.map +1 -0
  15. package/dist/__tests__/kmeans.test.d.ts +2 -0
  16. package/dist/__tests__/kmeans.test.d.ts.map +1 -0
  17. package/dist/__tests__/kmeans.test.js +220 -0
  18. package/dist/__tests__/kmeans.test.js.map +1 -0
  19. package/dist/__tests__/normalize.test.d.ts +2 -0
  20. package/dist/__tests__/normalize.test.d.ts.map +1 -0
  21. package/dist/__tests__/normalize.test.js +92 -0
  22. package/dist/__tests__/normalize.test.js.map +1 -0
  23. package/dist/__tests__/silhouette.test.d.ts +2 -0
  24. package/dist/__tests__/silhouette.test.d.ts.map +1 -0
  25. package/dist/__tests__/silhouette.test.js +126 -0
  26. package/dist/__tests__/silhouette.test.js.map +1 -0
  27. package/dist/__tests__/types.test.d.ts +2 -0
  28. package/dist/__tests__/types.test.d.ts.map +1 -0
  29. package/dist/__tests__/types.test.js +126 -0
  30. package/dist/__tests__/types.test.js.map +1 -0
  31. package/dist/clusterer.d.ts +17 -0
  32. package/dist/clusterer.d.ts.map +1 -0
  33. package/dist/clusterer.js +72 -0
  34. package/dist/clusterer.js.map +1 -0
  35. package/dist/errors.d.ts +7 -0
  36. package/dist/errors.d.ts.map +1 -0
  37. package/dist/errors.js +14 -0
  38. package/dist/errors.js.map +1 -0
  39. package/dist/index.d.ts +9 -0
  40. package/dist/index.d.ts.map +1 -0
  41. package/dist/index.js +21 -0
  42. package/dist/index.js.map +1 -0
  43. package/dist/kmeans.d.ts +6 -0
  44. package/dist/kmeans.d.ts.map +1 -0
  45. package/dist/kmeans.js +250 -0
  46. package/dist/kmeans.js.map +1 -0
  47. package/dist/normalize.d.ts +10 -0
  48. package/dist/normalize.d.ts.map +1 -0
  49. package/dist/normalize.js +21 -0
  50. package/dist/normalize.js.map +1 -0
  51. package/dist/optimal-k.d.ts +11 -0
  52. package/dist/optimal-k.d.ts.map +1 -0
  53. package/dist/optimal-k.js +49 -0
  54. package/dist/optimal-k.js.map +1 -0
  55. package/dist/silhouette.d.ts +16 -0
  56. package/dist/silhouette.d.ts.map +1 -0
  57. package/dist/silhouette.js +95 -0
  58. package/dist/silhouette.js.map +1 -0
  59. package/dist/types.d.ts +74 -0
  60. package/dist/types.d.ts.map +1 -0
  61. package/dist/types.js +3 -0
  62. package/dist/types.js.map +1 -0
  63. package/package.json +48 -0
@@ -0,0 +1,95 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.silhouetteScore = silhouetteScore;
4
+ const kmeans_1 = require("./kmeans");
5
+ // ---------------------------------------------------------------------------
6
+ // Silhouette scoring
7
+ // ---------------------------------------------------------------------------
8
+ /**
9
+ * Compute the silhouette score for a clustering result.
10
+ *
11
+ * Per-item silhouette coefficient:
12
+ * s(i) = (b(i) - a(i)) / max(a(i), b(i))
13
+ *
14
+ * where:
15
+ * a(i) = mean distance from item i to all other items in the same cluster
16
+ * b(i) = mean distance from item i to all items in the nearest other cluster
17
+ *
18
+ * Returns value in [-1, 1]; higher is better.
19
+ * If only one cluster exists, returns 0 for all items.
20
+ */
21
+ function silhouetteScore(result, distFn = kmeans_1.euclideanDistance) {
22
+ const { clusters } = result;
23
+ // Need at least 2 clusters to compute silhouette
24
+ if (clusters.length < 2) {
25
+ return {
26
+ score: 0,
27
+ perCluster: clusters.map(() => 0),
28
+ perItem: clusters.flatMap(c => c.items.map(() => 0)),
29
+ };
30
+ }
31
+ const allPerItem = [];
32
+ const perCluster = [];
33
+ for (const cluster of clusters) {
34
+ if (cluster.items.length === 0) {
35
+ perCluster.push(0);
36
+ continue;
37
+ }
38
+ const clusterScores = [];
39
+ for (const item of cluster.items) {
40
+ const v = item.embedding;
41
+ // a(i): mean intra-cluster distance
42
+ let a = 0;
43
+ if (cluster.items.length > 1) {
44
+ let aSum = 0;
45
+ for (const other of cluster.items) {
46
+ if (other.id !== item.id) {
47
+ aSum += distFn(v, other.embedding);
48
+ }
49
+ }
50
+ a = aSum / (cluster.items.length - 1);
51
+ }
52
+ // If only 1 item in cluster, a = 0
53
+ // b(i): mean distance to nearest other cluster
54
+ let b = Infinity;
55
+ for (const otherCluster of clusters) {
56
+ if (otherCluster.id === cluster.id)
57
+ continue;
58
+ if (otherCluster.items.length === 0)
59
+ continue;
60
+ let bSum = 0;
61
+ for (const other of otherCluster.items) {
62
+ bSum += distFn(v, other.embedding);
63
+ }
64
+ const bMean = bSum / otherCluster.items.length;
65
+ if (bMean < b)
66
+ b = bMean;
67
+ }
68
+ // Silhouette for this item
69
+ let s;
70
+ if (b === Infinity) {
71
+ // Only one non-empty cluster
72
+ s = 0;
73
+ }
74
+ else {
75
+ const maxAB = Math.max(a, b);
76
+ s = maxAB === 0 ? 0 : (b - a) / maxAB;
77
+ }
78
+ clusterScores.push(s);
79
+ allPerItem.push(s);
80
+ }
81
+ const clusterMean = clusterScores.length > 0
82
+ ? clusterScores.reduce((sum, s) => sum + s, 0) / clusterScores.length
83
+ : 0;
84
+ perCluster.push(clusterMean);
85
+ }
86
+ const overallScore = allPerItem.length > 0
87
+ ? allPerItem.reduce((sum, s) => sum + s, 0) / allPerItem.length
88
+ : 0;
89
+ return {
90
+ score: overallScore,
91
+ perCluster,
92
+ perItem: allPerItem,
93
+ };
94
+ }
95
+ //# sourceMappingURL=silhouette.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"silhouette.js","sourceRoot":"","sources":["../src/silhouette.ts"],"names":[],"mappings":";;AAoBA,0CAuFC;AA1GD,qCAA6C;AAE7C,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E;;;;;;;;;;;;GAYG;AACH,SAAgB,eAAe,CAC7B,MAAqB,EACrB,SAA+C,0BAAiB;IAEhE,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,CAAC;IAE5B,iDAAiD;IACjD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,OAAO;YACL,KAAK,EAAE,CAAC;YACR,UAAU,EAAE,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;YACjC,OAAO,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;SACrD,CAAC;IACJ,CAAC;IAED,MAAM,UAAU,GAAa,EAAE,CAAC;IAChC,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC/B,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACnB,SAAS;QACX,CAAC;QAED,MAAM,aAAa,GAAa,EAAE,CAAC;QAEnC,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACjC,MAAM,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC;YAEzB,oCAAoC;YACpC,IAAI,CAAC,GAAG,CAAC,CAAC;YACV,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC7B,IAAI,IAAI,GAAG,CAAC,CAAC;gBACb,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;oBAClC,IAAI,KAAK,CAAC,EAAE,KAAK,IAAI,CAAC,EAAE,EAAE,CAAC;wBACzB,IAAI,IAAI,MAAM,CAAC,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;oBACrC,CAAC;gBACH,CAAC;gBACD,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YACxC,CAAC;YACD,mCAAmC;YAEnC,+CAA+C;YAC/C,IAAI,CAAC,GAAG,QAAQ,CAAC;YACjB,KAAK,MAAM,YAAY,IAAI,QAAQ,EAAE,CAAC;gBACpC,IAAI,YAAY,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE;oBAAE,SAAS;gBAC7C,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC;oBAAE,SAAS;gBAE9C,IAAI,IAAI,GAAG,CAAC,CAAC;gBACb,KAAK,MAAM,KAAK,IAAI,YAAY,CAAC,KAAK,EAAE,CAAC;oBACvC,IAAI,IAAI,MAAM,CAAC,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;gBACrC,CAAC;gBACD,MAAM,KAAK,GAAG,IAAI,GAAG,YAAY,CAAC,KAAK,CAAC,MAAM,CAAC;gBAC/C,IAAI,KAAK,GAAG,CAAC;oBAAE,CAAC,GAAG,KAAK,CAAC;YAC3B,CAAC;YAED,2BAA2B;YAC3B,IAAI,CAAS,CAAC;YACd,IAAI,CAAC,KAAK,QAAQ,EAAE,CAAC;gBACnB,6BAA6B;gBAC7B,CAAC,GAAG,CAAC,CAAC;YACR,CAAC;iBAAM,CAAC;gBACN,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC7B,CAAC,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC;YACxC,CAAC;YAED,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACtB,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrB,CAAC;QAED,MAAM,WAAW,GACf,aAAa,CAAC,MAAM,GAAG,CAAC;YACtB,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,aAAa,CAAC,MAAM;YACrE,CAAC,CAAC,CAAC,CAAC;QACR,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC/B,CAAC;IAED,MAAM,YAAY,GAChB,UAAU,CAAC,MAAM,GAAG,CAAC;QACnB,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM;QAC/D,CAAC,CAAC,CAAC,CAAC;IAER,OAAO;QACL,KAAK,EAAE,YAAY;QACnB,UAAU;QACV,OAAO,EAAE,UAAU;KACpB,CAAC;AACJ,CAAC"}
@@ -0,0 +1,74 @@
1
+ export interface EmbedItem {
2
+ id: string;
3
+ text: string;
4
+ embedding: number[];
5
+ metadata?: Record<string, unknown>;
6
+ }
7
+ export interface ClusterItem extends EmbedItem {
8
+ clusterId: number;
9
+ distanceToCentroid: number;
10
+ }
11
+ export interface Cluster {
12
+ id: number;
13
+ centroid: number[];
14
+ items: ClusterItem[];
15
+ label?: string;
16
+ size: number;
17
+ avgDistanceToCentroid: number;
18
+ cohesion: number;
19
+ }
20
+ export interface SilhouetteResult {
21
+ score: number;
22
+ perCluster: number[];
23
+ perItem?: number[];
24
+ }
25
+ export interface OptimalKResult {
26
+ k: number;
27
+ scores: Array<{
28
+ k: number;
29
+ silhouette: number;
30
+ inertia: number;
31
+ }>;
32
+ method: 'silhouette' | 'elbow' | 'combined';
33
+ }
34
+ export interface ClusterQuality {
35
+ silhouette: SilhouetteResult;
36
+ inertia: number;
37
+ daviesBouldin?: number;
38
+ calinski?: number;
39
+ }
40
+ export interface VisualizationData {
41
+ points: Array<{
42
+ id: string;
43
+ x: number;
44
+ y: number;
45
+ clusterId: number;
46
+ }>;
47
+ method: 'pca' | 'umap' | 'tsne';
48
+ }
49
+ export type LabelerFn = (items: EmbedItem[], clusterId: number) => Promise<string> | string;
50
+ export interface ClusterOptions {
51
+ k?: number;
52
+ autoK?: boolean;
53
+ maxK?: number;
54
+ maxIterations?: number;
55
+ tolerance?: number;
56
+ seed?: number;
57
+ normalize?: boolean;
58
+ labeler?: LabelerFn;
59
+ distanceFn?: (a: number[], b: number[]) => number;
60
+ }
61
+ export interface ClusterResult {
62
+ clusters: Cluster[];
63
+ quality: ClusterQuality;
64
+ k: number;
65
+ iterations: number;
66
+ converged: boolean;
67
+ durationMs: number;
68
+ }
69
+ export interface Clusterer {
70
+ cluster(items: EmbedItem[], options?: ClusterOptions): Promise<ClusterResult>;
71
+ findOptimalK(items: EmbedItem[], options?: Omit<ClusterOptions, 'k'>): Promise<OptimalKResult>;
72
+ silhouetteScore(result: ClusterResult): SilhouetteResult;
73
+ }
74
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,MAAM,WAAW,WAAY,SAAQ,SAAS;IAC5C,SAAS,EAAE,MAAM,CAAC;IAClB,kBAAkB,EAAE,MAAM,CAAC;CAC5B;AAED,MAAM,WAAW,OAAO;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,KAAK,EAAE,WAAW,EAAE,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,qBAAqB,EAAE,MAAM,CAAC;IAC9B,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,cAAc;IAC7B,CAAC,EAAE,MAAM,CAAC;IACV,MAAM,EAAE,KAAK,CAAC;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAClE,MAAM,EAAE,YAAY,GAAG,OAAO,GAAG,UAAU,CAAC;CAC7C;AAED,MAAM,WAAW,cAAc;IAC7B,UAAU,EAAE,gBAAgB,CAAC;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACvE,MAAM,EAAE,KAAK,GAAG,MAAM,GAAG,MAAM,CAAC;CACjC;AAED,MAAM,MAAM,SAAS,GAAG,CACtB,KAAK,EAAE,SAAS,EAAE,EAClB,SAAS,EAAE,MAAM,KACd,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;AAE9B,MAAM,WAAW,cAAc;IAC7B,CAAC,CAAC,EAAE,MAAM,CAAC;IACX,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,OAAO,CAAC,EAAE,SAAS,CAAC;IACpB,UAAU,CAAC,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,MAAM,CAAC;CACnD;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,OAAO,EAAE,cAAc,CAAC;IACxB,CAAC,EAAE,MAAM,CAAC;IACV,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,SAAS;IACxB,OAAO,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC;IAC9E,YAAY,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE,OAAO,CAAC,EAAE,IAAI,CAAC,cAAc,EAAE,GAAG,CAAC,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC;IAC/F,eAAe,CAAC,MAAM,EAAE,aAAa,GAAG,gBAAgB,CAAC;CAC1D"}
package/dist/types.js ADDED
@@ -0,0 +1,3 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
package/package.json ADDED
@@ -0,0 +1,48 @@
1
+ {
2
+ "name": "embed-cluster",
3
+ "version": "0.3.0",
4
+ "description": "Cluster embeddings into topics with automatic labeling",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "bin": {
8
+ "embed-cluster": "dist/cli.js"
9
+ },
10
+ "files": [
11
+ "dist"
12
+ ],
13
+ "scripts": {
14
+ "build": "tsc",
15
+ "test": "vitest run",
16
+ "lint": "eslint src/",
17
+ "prepublishOnly": "npm run build"
18
+ },
19
+ "keywords": [],
20
+ "author": "",
21
+ "license": "MIT",
22
+ "engines": {
23
+ "node": ">=18"
24
+ },
25
+ "publishConfig": {
26
+ "access": "public"
27
+ },
28
+ "peerDependencies": {
29
+ "ml-pca": ">=4.0.0",
30
+ "umap-js": ">=1.4.0"
31
+ },
32
+ "peerDependenciesMeta": {
33
+ "ml-pca": {
34
+ "optional": true
35
+ },
36
+ "umap-js": {
37
+ "optional": true
38
+ }
39
+ },
40
+ "devDependencies": {
41
+ "@types/node": "^25.5.0",
42
+ "@typescript-eslint/eslint-plugin": "^8.57.1",
43
+ "@typescript-eslint/parser": "^8.57.1",
44
+ "eslint": "^10.1.0",
45
+ "typescript": "^5.9.3",
46
+ "vitest": "^4.1.0"
47
+ }
48
+ }