@thi.ng/text-analysis 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -1
- package/cluster.d.ts +12 -2
- package/cluster.js +4 -1
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Change Log
|
|
2
2
|
|
|
3
|
-
- **Last updated**: 2025-06-
|
|
3
|
+
- **Last updated**: 2025-06-15T12:37:24Z
|
|
4
4
|
- **Generator**: [thi.ng/monopub](https://thi.ng/monopub)
|
|
5
5
|
|
|
6
6
|
All notable changes to this project will be documented in this file.
|
|
@@ -11,6 +11,13 @@ See [Conventional Commits](https://conventionalcommits.org/) for commit guidelin
|
|
|
11
11
|
**Note:** Unlisted _patch_ versions only involve non-code or otherwise excluded changes
|
|
12
12
|
and/or version bumps of transitive dependencies.
|
|
13
13
|
|
|
14
|
+
## [0.3.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.3.0) (2025-06-15)
|
|
15
|
+
|
|
16
|
+
#### 🚀 Features
|
|
17
|
+
|
|
18
|
+
- update kmeansDense ([d35b6bd](https://github.com/thi-ng/umbrella/commit/d35b6bd))
|
|
19
|
+
- update results to include original `docs` for each cluster
|
|
20
|
+
|
|
14
21
|
## [0.2.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.2.0) (2025-06-14)
|
|
15
22
|
|
|
16
23
|
#### 🚀 Features
|
package/cluster.d.ts
CHANGED
|
@@ -18,7 +18,12 @@ export declare const JACCARD_DIST_DENSE: Untransformed<ReadonlyVec>;
|
|
|
18
18
|
* @param docs
|
|
19
19
|
* @param opts
|
|
20
20
|
*/
|
|
21
|
-
export declare const kmeansDense: (k: number, docs: ReadonlyVec[], opts?: Partial<KMeansOpts>) =>
|
|
21
|
+
export declare const kmeansDense: (k: number, docs: ReadonlyVec[], opts?: Partial<KMeansOpts>) => {
|
|
22
|
+
docs: ReadonlyVec[];
|
|
23
|
+
id: number;
|
|
24
|
+
centroid: ReadonlyVec;
|
|
25
|
+
items: number[];
|
|
26
|
+
}[];
|
|
22
27
|
/**
|
|
23
28
|
* k-means clustering for sparse multi-hot vectors. First converts vectors into
|
|
24
29
|
* dense versions (using {@link toDense}), then calls {@link kmeansDense} to
|
|
@@ -34,7 +39,12 @@ export declare const kmeansDense: (k: number, docs: ReadonlyVec[], opts?: Partia
|
|
|
34
39
|
*/
|
|
35
40
|
export declare const kmeansSparse: (k: number, docs: ReadonlyVec[], opts: Partial<KMeansOpts> & {
|
|
36
41
|
dim: number;
|
|
37
|
-
}) =>
|
|
42
|
+
}) => {
|
|
43
|
+
docs: ReadonlyVec[];
|
|
44
|
+
id: number;
|
|
45
|
+
centroid: ReadonlyVec;
|
|
46
|
+
items: number[];
|
|
47
|
+
}[];
|
|
38
48
|
export declare function clusterBounds(docs: ReadonlyVec[]): {
|
|
39
49
|
centroid: ReadonlyVec;
|
|
40
50
|
radius: number;
|
package/cluster.js
CHANGED
|
@@ -11,7 +11,10 @@ import { distSq } from "@thi.ng/vectors/distsq";
|
|
|
11
11
|
import { mean } from "@thi.ng/vectors/mean";
|
|
12
12
|
import { toDense } from "./vec.js";
|
|
13
13
|
const JACCARD_DIST_DENSE = new Untransformed(distJaccard);
|
|
14
|
-
const kmeansDense = (k, docs, opts) => kmeans(k, docs, { maxIter: 100, ...opts })
|
|
14
|
+
const kmeansDense = (k, docs, opts) => kmeans(k, docs, { maxIter: 100, ...opts }).map((cluster) => ({
|
|
15
|
+
...cluster,
|
|
16
|
+
docs: lookup(docs, cluster.items)
|
|
17
|
+
}));
|
|
15
18
|
const kmeansSparse = (k, docs, opts) => kmeansDense(
|
|
16
19
|
k,
|
|
17
20
|
docs.map((x) => toDense(opts.dim, x)),
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@thi.ng/text-analysis",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"module": "./index.js",
|
|
@@ -141,5 +141,5 @@
|
|
|
141
141
|
"status": "alpha",
|
|
142
142
|
"year": 2021
|
|
143
143
|
},
|
|
144
|
-
"gitHead": "
|
|
144
|
+
"gitHead": "4635a24acc2623894887ca31189fdffda87ff9d3\n"
|
|
145
145
|
}
|