@thi.ng/text-analysis 0.4.19 → 0.4.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +144 -144
- package/CHANGELOG.md +0 -69
package/package.json
CHANGED
|
@@ -1,145 +1,145 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
}
|
|
2
|
+
"name": "@thi.ng/text-analysis",
|
|
3
|
+
"version": "0.4.23",
|
|
4
|
+
"description": "Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"module": "./index.js",
|
|
7
|
+
"typings": "./index.d.ts",
|
|
8
|
+
"sideEffects": false,
|
|
9
|
+
"repository": {
|
|
10
|
+
"type": "git",
|
|
11
|
+
"url": "https://github.com/thi-ng/umbrella.git"
|
|
12
|
+
},
|
|
13
|
+
"homepage": "https://thi.ng/text-analysis",
|
|
14
|
+
"funding": [
|
|
15
|
+
{
|
|
16
|
+
"type": "github",
|
|
17
|
+
"url": "https://github.com/sponsors/postspectacular"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"type": "patreon",
|
|
21
|
+
"url": "https://patreon.com/thing_umbrella"
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"type": "liberapay",
|
|
25
|
+
"url": "https://liberapay.com/thi.ng"
|
|
26
|
+
}
|
|
27
|
+
],
|
|
28
|
+
"author": "Karsten Schmidt (https://thi.ng)",
|
|
29
|
+
"license": "Apache-2.0",
|
|
30
|
+
"scripts": {
|
|
31
|
+
"build": "yarn build:esbuild && yarn build:decl",
|
|
32
|
+
"build:decl": "tsc --declaration --emitDeclarationOnly",
|
|
33
|
+
"build:esbuild": "esbuild --format=esm --platform=neutral --target=es2022 --tsconfig=tsconfig.json --outdir=. src/**/*.ts",
|
|
34
|
+
"clean": "bun ../../tools/src/clean-package.ts",
|
|
35
|
+
"doc": "typedoc --options ../../typedoc.json --out doc src/index.ts",
|
|
36
|
+
"doc:readme": "bun ../../tools/src/module-stats.ts && bun ../../tools/src/readme.ts",
|
|
37
|
+
"pub": "npm publish --access public",
|
|
38
|
+
"test": "bun test",
|
|
39
|
+
"tool:tangle": "../../node_modules/.bin/tangle src/**/*.ts"
|
|
40
|
+
},
|
|
41
|
+
"dependencies": {
|
|
42
|
+
"@thi.ng/api": "^8.12.6",
|
|
43
|
+
"@thi.ng/arrays": "^2.13.15",
|
|
44
|
+
"@thi.ng/bidir-index": "^1.3.11",
|
|
45
|
+
"@thi.ng/checks": "^3.7.22",
|
|
46
|
+
"@thi.ng/distance": "^3.0.20",
|
|
47
|
+
"@thi.ng/k-means": "^2.0.18",
|
|
48
|
+
"@thi.ng/strings": "^3.9.26",
|
|
49
|
+
"@thi.ng/transducers": "^9.6.14",
|
|
50
|
+
"@thi.ng/vectors": "^8.6.10"
|
|
51
|
+
},
|
|
52
|
+
"devDependencies": {
|
|
53
|
+
"esbuild": "^0.25.11",
|
|
54
|
+
"typedoc": "^0.28.14",
|
|
55
|
+
"typescript": "^5.9.3"
|
|
56
|
+
},
|
|
57
|
+
"keywords": [
|
|
58
|
+
"analysis",
|
|
59
|
+
"centroid",
|
|
60
|
+
"cluster",
|
|
61
|
+
"composition",
|
|
62
|
+
"decode",
|
|
63
|
+
"dense",
|
|
64
|
+
"encode",
|
|
65
|
+
"frequency",
|
|
66
|
+
"functional",
|
|
67
|
+
"histogram",
|
|
68
|
+
"k-means",
|
|
69
|
+
"ngram",
|
|
70
|
+
"pipeline",
|
|
71
|
+
"similarity",
|
|
72
|
+
"sparse",
|
|
73
|
+
"stem",
|
|
74
|
+
"text",
|
|
75
|
+
"tf-idf",
|
|
76
|
+
"tokenizer",
|
|
77
|
+
"transducer",
|
|
78
|
+
"typescript",
|
|
79
|
+
"vocabulary",
|
|
80
|
+
"vector"
|
|
81
|
+
],
|
|
82
|
+
"publishConfig": {
|
|
83
|
+
"access": "public"
|
|
84
|
+
},
|
|
85
|
+
"browser": {
|
|
86
|
+
"process": false,
|
|
87
|
+
"setTimeout": false
|
|
88
|
+
},
|
|
89
|
+
"engines": {
|
|
90
|
+
"node": ">=18"
|
|
91
|
+
},
|
|
92
|
+
"files": [
|
|
93
|
+
"./*.js",
|
|
94
|
+
"./*.d.ts"
|
|
95
|
+
],
|
|
96
|
+
"exports": {
|
|
97
|
+
".": {
|
|
98
|
+
"default": "./index.js"
|
|
99
|
+
},
|
|
100
|
+
"./api": {
|
|
101
|
+
"default": "./api.js"
|
|
102
|
+
},
|
|
103
|
+
"./cluster": {
|
|
104
|
+
"default": "./cluster.js"
|
|
105
|
+
},
|
|
106
|
+
"./frequencies": {
|
|
107
|
+
"default": "./frequencies.js"
|
|
108
|
+
},
|
|
109
|
+
"./ngrams": {
|
|
110
|
+
"default": "./ngrams.js"
|
|
111
|
+
},
|
|
112
|
+
"./replace": {
|
|
113
|
+
"default": "./replace.js"
|
|
114
|
+
},
|
|
115
|
+
"./similarity": {
|
|
116
|
+
"default": "./similarity.js"
|
|
117
|
+
},
|
|
118
|
+
"./stem": {
|
|
119
|
+
"default": "./stem.js"
|
|
120
|
+
},
|
|
121
|
+
"./stop-words": {
|
|
122
|
+
"default": "./stop-words.js"
|
|
123
|
+
},
|
|
124
|
+
"./tf-idf": {
|
|
125
|
+
"default": "./tf-idf.js"
|
|
126
|
+
},
|
|
127
|
+
"./tokenize": {
|
|
128
|
+
"default": "./tokenize.js"
|
|
129
|
+
},
|
|
130
|
+
"./vec": {
|
|
131
|
+
"default": "./vec.js"
|
|
132
|
+
},
|
|
133
|
+
"./vocab": {
|
|
134
|
+
"default": "./vocab.js"
|
|
135
|
+
},
|
|
136
|
+
"./xform": {
|
|
137
|
+
"default": "./xform.js"
|
|
138
|
+
}
|
|
139
|
+
},
|
|
140
|
+
"thi.ng": {
|
|
141
|
+
"status": "alpha",
|
|
142
|
+
"year": 2021
|
|
143
|
+
},
|
|
144
|
+
"gitHead": "136a5e5ef0b69e82329db00d806c3c4e8f1aa063\n"
|
|
145
|
+
}
|
package/CHANGELOG.md
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
# Change Log
|
|
2
|
-
|
|
3
|
-
- **Last updated**: 2025-09-01T16:38:35Z
|
|
4
|
-
- **Generator**: [thi.ng/monopub](https://thi.ng/monopub)
|
|
5
|
-
|
|
6
|
-
All notable changes to this project will be documented in this file.
|
|
7
|
-
Only versions published since **2022-01-01** are listed here.
|
|
8
|
-
Please consult the Git history for older version information.
|
|
9
|
-
See [Conventional Commits](https://conventionalcommits.org/) for commit guidelines.
|
|
10
|
-
|
|
11
|
-
**Note:** Unlisted _patch_ versions only involve non-code or otherwise excluded changes
|
|
12
|
-
and/or version bumps of transitive dependencies.
|
|
13
|
-
|
|
14
|
-
### [0.4.7](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.4.7) (2025-07-20)
|
|
15
|
-
|
|
16
|
-
#### 🩹 Bug fixes
|
|
17
|
-
|
|
18
|
-
- fix [#532](https://github.com/thi-ng/umbrella/issues/532), fix centralTermsVec() ([6e19974](https://github.com/thi-ng/umbrella/commit/6e19974))
|
|
19
|
-
- update non-zero check
|
|
20
|
-
|
|
21
|
-
## [0.4.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.4.0) (2025-06-18)
|
|
22
|
-
|
|
23
|
-
#### 🚀 Features
|
|
24
|
-
|
|
25
|
-
- add `filterDocsFrequency()` ([6ac1f90](https://github.com/thi-ng/umbrella/commit/6ac1f90))
|
|
26
|
-
|
|
27
|
-
#### ⏱ Performance improvements
|
|
28
|
-
|
|
29
|
-
- minor update kmeansDense() ([ebd5618](https://github.com/thi-ng/umbrella/commit/ebd5618))
|
|
30
|
-
- internal use `lookupUnsafe()`
|
|
31
|
-
|
|
32
|
-
### [0.3.1](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.3.1) (2025-06-15)
|
|
33
|
-
|
|
34
|
-
#### 🩹 Bug fixes
|
|
35
|
-
|
|
36
|
-
- update pkg exports ([ea72b9f](https://github.com/thi-ng/umbrella/commit/ea72b9f))
|
|
37
|
-
|
|
38
|
-
## [0.3.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.3.0) (2025-06-15)
|
|
39
|
-
|
|
40
|
-
#### 🚀 Features
|
|
41
|
-
|
|
42
|
-
- update kmeansDense ([d35b6bd](https://github.com/thi-ng/umbrella/commit/d35b6bd))
|
|
43
|
-
- update results to include original `docs` for each cluster
|
|
44
|
-
|
|
45
|
-
## [0.2.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.2.0) (2025-06-14)
|
|
46
|
-
|
|
47
|
-
#### 🚀 Features
|
|
48
|
-
|
|
49
|
-
- add/migrate refactored tf-idf functions ([d311acc](https://github.com/thi-ng/umbrella/commit/d311acc))
|
|
50
|
-
- add/update vocab & vector encoding helpers, restructure ([9e4f60c](https://github.com/thi-ng/umbrella/commit/9e4f60c))
|
|
51
|
-
- add filterDocsIDF() ([f682b58](https://github.com/thi-ng/umbrella/commit/f682b58))
|
|
52
|
-
- add k-mean clustering fns ([3533843](https://github.com/thi-ng/umbrella/commit/3533843))
|
|
53
|
-
|
|
54
|
-
#### ♻️ Refactoring
|
|
55
|
-
|
|
56
|
-
- update imports/exports ([a44be87](https://github.com/thi-ng/umbrella/commit/a44be87))
|
|
57
|
-
|
|
58
|
-
## [0.1.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.1.0) (2025-06-09)
|
|
59
|
-
|
|
60
|
-
#### 🚀 Features
|
|
61
|
-
|
|
62
|
-
- import as new pkg ([89fe9bb](https://github.com/thi-ng/umbrella/commit/89fe9bb))
|
|
63
|
-
- add re-exports from transducers/vectors pkgs
|
|
64
|
-
- add porter stemmer, contractions & stop words from old [@thi.ng/notes](https://github.com/thi-ng/umbrella/tree/main/packages/notes) repo
|
|
65
|
-
- add porter fixtures
|
|
66
|
-
- update defVocab() to use [@thi.ng/bidir-index](https://github.com/thi-ng/umbrella/tree/main/packages/bidir-index)
|
|
67
|
-
- add sparse vector support
|
|
68
|
-
- add/port cosine/jaccard similarities from vectors/sparse pkgs
|
|
69
|
-
- add doc strings & examples
|