@thi.ng/text-analysis 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Change Log
2
2
 
3
- - **Last updated**: 2025-06-14T20:56:27Z
3
+ - **Last updated**: 2025-06-15T12:37:24Z
4
4
  - **Generator**: [thi.ng/monopub](https://thi.ng/monopub)
5
5
 
6
6
  All notable changes to this project will be documented in this file.
@@ -11,6 +11,13 @@ See [Conventional Commits](https://conventionalcommits.org/) for commit guidelin
11
11
  **Note:** Unlisted _patch_ versions only involve non-code or otherwise excluded changes
12
12
  and/or version bumps of transitive dependencies.
13
13
 
14
+ ## [0.3.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.3.0) (2025-06-15)
15
+
16
+ #### 🚀 Features
17
+
18
+ - update kmeansDense ([d35b6bd](https://github.com/thi-ng/umbrella/commit/d35b6bd))
19
+ - update results to include original `docs` for each cluster
20
+
14
21
  ## [0.2.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.2.0) (2025-06-14)
15
22
 
16
23
  #### 🚀 Features
package/cluster.d.ts CHANGED
@@ -18,7 +18,12 @@ export declare const JACCARD_DIST_DENSE: Untransformed<ReadonlyVec>;
18
18
  * @param docs
19
19
  * @param opts
20
20
  */
21
- export declare const kmeansDense: (k: number, docs: ReadonlyVec[], opts?: Partial<KMeansOpts>) => import("@thi.ng/k-means").Cluster[];
21
+ export declare const kmeansDense: (k: number, docs: ReadonlyVec[], opts?: Partial<KMeansOpts>) => {
22
+ docs: ReadonlyVec[];
23
+ id: number;
24
+ centroid: ReadonlyVec;
25
+ items: number[];
26
+ }[];
22
27
  /**
23
28
  * k-means clustering for sparse multi-hot vectors. First converts vectors into
24
29
  * dense versions (using {@link toDense}), then calls {@link kmeansDense} to
@@ -34,7 +39,12 @@ export declare const kmeansDense: (k: number, docs: ReadonlyVec[], opts?: Partia
34
39
  */
35
40
  export declare const kmeansSparse: (k: number, docs: ReadonlyVec[], opts: Partial<KMeansOpts> & {
36
41
  dim: number;
37
- }) => import("@thi.ng/k-means").Cluster[];
42
+ }) => {
43
+ docs: ReadonlyVec[];
44
+ id: number;
45
+ centroid: ReadonlyVec;
46
+ items: number[];
47
+ }[];
38
48
  export declare function clusterBounds(docs: ReadonlyVec[]): {
39
49
  centroid: ReadonlyVec;
40
50
  radius: number;
package/cluster.js CHANGED
@@ -11,7 +11,10 @@ import { distSq } from "@thi.ng/vectors/distsq";
11
11
  import { mean } from "@thi.ng/vectors/mean";
12
12
  import { toDense } from "./vec.js";
13
13
  const JACCARD_DIST_DENSE = new Untransformed(distJaccard);
14
- const kmeansDense = (k, docs, opts) => kmeans(k, docs, { maxIter: 100, ...opts });
14
+ const kmeansDense = (k, docs, opts) => kmeans(k, docs, { maxIter: 100, ...opts }).map((cluster) => ({
15
+ ...cluster,
16
+ docs: lookup(docs, cluster.items)
17
+ }));
15
18
  const kmeansSparse = (k, docs, opts) => kmeansDense(
16
19
  k,
17
20
  docs.map((x) => toDense(opts.dim, x)),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@thi.ng/text-analysis",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities",
5
5
  "type": "module",
6
6
  "module": "./index.js",
@@ -141,5 +141,5 @@
141
141
  "status": "alpha",
142
142
  "year": 2021
143
143
  },
144
- "gitHead": "14e994e531d32053e948768998324d443436a542\n"
144
+ "gitHead": "4635a24acc2623894887ca31189fdffda87ff9d3\n"
145
145
  }