@thi.ng/text-analysis 0.4.19 → 0.4.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +144 -144
  2. package/CHANGELOG.md +0 -69
package/package.json CHANGED
@@ -1,145 +1,145 @@
1
1
  {
2
- "name": "@thi.ng/text-analysis",
3
- "version": "0.4.19",
4
- "description": "Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities",
5
- "type": "module",
6
- "module": "./index.js",
7
- "typings": "./index.d.ts",
8
- "sideEffects": false,
9
- "repository": {
10
- "type": "git",
11
- "url": "https://github.com/thi-ng/umbrella.git"
12
- },
13
- "homepage": "https://thi.ng/text-analysis",
14
- "funding": [
15
- {
16
- "type": "github",
17
- "url": "https://github.com/sponsors/postspectacular"
18
- },
19
- {
20
- "type": "patreon",
21
- "url": "https://patreon.com/thing_umbrella"
22
- },
23
- {
24
- "type": "liberapay",
25
- "url": "https://liberapay.com/thi.ng"
26
- }
27
- ],
28
- "author": "Karsten Schmidt (https://thi.ng)",
29
- "license": "Apache-2.0",
30
- "scripts": {
31
- "build": "yarn build:esbuild && yarn build:decl",
32
- "build:decl": "tsc --declaration --emitDeclarationOnly",
33
- "build:esbuild": "esbuild --format=esm --platform=neutral --target=es2022 --tsconfig=tsconfig.json --outdir=. src/**/*.ts",
34
- "clean": "bun ../../tools/src/clean-package.ts",
35
- "doc": "typedoc --options ../../typedoc.json --out doc src/index.ts",
36
- "doc:readme": "bun ../../tools/src/module-stats.ts && bun ../../tools/src/readme.ts",
37
- "pub": "yarn npm publish --access public",
38
- "test": "bun test",
39
- "tool:tangle": "../../node_modules/.bin/tangle src/**/*.ts"
40
- },
41
- "dependencies": {
42
- "@thi.ng/api": "^8.12.2",
43
- "@thi.ng/arrays": "^2.13.11",
44
- "@thi.ng/bidir-index": "^1.3.7",
45
- "@thi.ng/checks": "^3.7.18",
46
- "@thi.ng/distance": "^3.0.16",
47
- "@thi.ng/k-means": "^2.0.14",
48
- "@thi.ng/strings": "^3.9.22",
49
- "@thi.ng/transducers": "^9.6.10",
50
- "@thi.ng/vectors": "^8.6.6"
51
- },
52
- "devDependencies": {
53
- "esbuild": "^0.25.9",
54
- "typedoc": "^0.28.12",
55
- "typescript": "^5.9.2"
56
- },
57
- "keywords": [
58
- "analysis",
59
- "centroid",
60
- "cluster",
61
- "composition",
62
- "decode",
63
- "dense",
64
- "encode",
65
- "frequency",
66
- "functional",
67
- "histogram",
68
- "k-means",
69
- "ngram",
70
- "pipeline",
71
- "similarity",
72
- "sparse",
73
- "stem",
74
- "text",
75
- "tf-idf",
76
- "tokenizer",
77
- "transducer",
78
- "typescript",
79
- "vocabulary",
80
- "vector"
81
- ],
82
- "publishConfig": {
83
- "access": "public"
84
- },
85
- "browser": {
86
- "process": false,
87
- "setTimeout": false
88
- },
89
- "engines": {
90
- "node": ">=18"
91
- },
92
- "files": [
93
- "./*.js",
94
- "./*.d.ts"
95
- ],
96
- "exports": {
97
- ".": {
98
- "default": "./index.js"
99
- },
100
- "./api": {
101
- "default": "./api.js"
102
- },
103
- "./cluster": {
104
- "default": "./cluster.js"
105
- },
106
- "./frequencies": {
107
- "default": "./frequencies.js"
108
- },
109
- "./ngrams": {
110
- "default": "./ngrams.js"
111
- },
112
- "./replace": {
113
- "default": "./replace.js"
114
- },
115
- "./similarity": {
116
- "default": "./similarity.js"
117
- },
118
- "./stem": {
119
- "default": "./stem.js"
120
- },
121
- "./stop-words": {
122
- "default": "./stop-words.js"
123
- },
124
- "./tf-idf": {
125
- "default": "./tf-idf.js"
126
- },
127
- "./tokenize": {
128
- "default": "./tokenize.js"
129
- },
130
- "./vec": {
131
- "default": "./vec.js"
132
- },
133
- "./vocab": {
134
- "default": "./vocab.js"
135
- },
136
- "./xform": {
137
- "default": "./xform.js"
138
- }
139
- },
140
- "thi.ng": {
141
- "status": "alpha",
142
- "year": 2021
143
- },
144
- "gitHead": "b7ede4f099767e0175ea8e09257208f73970b220\n"
145
- }
2
+ "name": "@thi.ng/text-analysis",
3
+ "version": "0.4.23",
4
+ "description": "Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities",
5
+ "type": "module",
6
+ "module": "./index.js",
7
+ "typings": "./index.d.ts",
8
+ "sideEffects": false,
9
+ "repository": {
10
+ "type": "git",
11
+ "url": "https://github.com/thi-ng/umbrella.git"
12
+ },
13
+ "homepage": "https://thi.ng/text-analysis",
14
+ "funding": [
15
+ {
16
+ "type": "github",
17
+ "url": "https://github.com/sponsors/postspectacular"
18
+ },
19
+ {
20
+ "type": "patreon",
21
+ "url": "https://patreon.com/thing_umbrella"
22
+ },
23
+ {
24
+ "type": "liberapay",
25
+ "url": "https://liberapay.com/thi.ng"
26
+ }
27
+ ],
28
+ "author": "Karsten Schmidt (https://thi.ng)",
29
+ "license": "Apache-2.0",
30
+ "scripts": {
31
+ "build": "yarn build:esbuild && yarn build:decl",
32
+ "build:decl": "tsc --declaration --emitDeclarationOnly",
33
+ "build:esbuild": "esbuild --format=esm --platform=neutral --target=es2022 --tsconfig=tsconfig.json --outdir=. src/**/*.ts",
34
+ "clean": "bun ../../tools/src/clean-package.ts",
35
+ "doc": "typedoc --options ../../typedoc.json --out doc src/index.ts",
36
+ "doc:readme": "bun ../../tools/src/module-stats.ts && bun ../../tools/src/readme.ts",
37
+ "pub": "npm publish --access public",
38
+ "test": "bun test",
39
+ "tool:tangle": "../../node_modules/.bin/tangle src/**/*.ts"
40
+ },
41
+ "dependencies": {
42
+ "@thi.ng/api": "^8.12.6",
43
+ "@thi.ng/arrays": "^2.13.15",
44
+ "@thi.ng/bidir-index": "^1.3.11",
45
+ "@thi.ng/checks": "^3.7.22",
46
+ "@thi.ng/distance": "^3.0.20",
47
+ "@thi.ng/k-means": "^2.0.18",
48
+ "@thi.ng/strings": "^3.9.26",
49
+ "@thi.ng/transducers": "^9.6.14",
50
+ "@thi.ng/vectors": "^8.6.10"
51
+ },
52
+ "devDependencies": {
53
+ "esbuild": "^0.25.11",
54
+ "typedoc": "^0.28.14",
55
+ "typescript": "^5.9.3"
56
+ },
57
+ "keywords": [
58
+ "analysis",
59
+ "centroid",
60
+ "cluster",
61
+ "composition",
62
+ "decode",
63
+ "dense",
64
+ "encode",
65
+ "frequency",
66
+ "functional",
67
+ "histogram",
68
+ "k-means",
69
+ "ngram",
70
+ "pipeline",
71
+ "similarity",
72
+ "sparse",
73
+ "stem",
74
+ "text",
75
+ "tf-idf",
76
+ "tokenizer",
77
+ "transducer",
78
+ "typescript",
79
+ "vocabulary",
80
+ "vector"
81
+ ],
82
+ "publishConfig": {
83
+ "access": "public"
84
+ },
85
+ "browser": {
86
+ "process": false,
87
+ "setTimeout": false
88
+ },
89
+ "engines": {
90
+ "node": ">=18"
91
+ },
92
+ "files": [
93
+ "./*.js",
94
+ "./*.d.ts"
95
+ ],
96
+ "exports": {
97
+ ".": {
98
+ "default": "./index.js"
99
+ },
100
+ "./api": {
101
+ "default": "./api.js"
102
+ },
103
+ "./cluster": {
104
+ "default": "./cluster.js"
105
+ },
106
+ "./frequencies": {
107
+ "default": "./frequencies.js"
108
+ },
109
+ "./ngrams": {
110
+ "default": "./ngrams.js"
111
+ },
112
+ "./replace": {
113
+ "default": "./replace.js"
114
+ },
115
+ "./similarity": {
116
+ "default": "./similarity.js"
117
+ },
118
+ "./stem": {
119
+ "default": "./stem.js"
120
+ },
121
+ "./stop-words": {
122
+ "default": "./stop-words.js"
123
+ },
124
+ "./tf-idf": {
125
+ "default": "./tf-idf.js"
126
+ },
127
+ "./tokenize": {
128
+ "default": "./tokenize.js"
129
+ },
130
+ "./vec": {
131
+ "default": "./vec.js"
132
+ },
133
+ "./vocab": {
134
+ "default": "./vocab.js"
135
+ },
136
+ "./xform": {
137
+ "default": "./xform.js"
138
+ }
139
+ },
140
+ "thi.ng": {
141
+ "status": "alpha",
142
+ "year": 2021
143
+ },
144
+ "gitHead": "136a5e5ef0b69e82329db00d806c3c4e8f1aa063\n"
145
+ }
package/CHANGELOG.md DELETED
@@ -1,69 +0,0 @@
1
- # Change Log
2
-
3
- - **Last updated**: 2025-09-01T16:38:35Z
4
- - **Generator**: [thi.ng/monopub](https://thi.ng/monopub)
5
-
6
- All notable changes to this project will be documented in this file.
7
- Only versions published since **2022-01-01** are listed here.
8
- Please consult the Git history for older version information.
9
- See [Conventional Commits](https://conventionalcommits.org/) for commit guidelines.
10
-
11
- **Note:** Unlisted _patch_ versions only involve non-code or otherwise excluded changes
12
- and/or version bumps of transitive dependencies.
13
-
14
- ### [0.4.7](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.4.7) (2025-07-20)
15
-
16
- #### 🩹 Bug fixes
17
-
18
- - fix [#532](https://github.com/thi-ng/umbrella/issues/532), fix centralTermsVec() ([6e19974](https://github.com/thi-ng/umbrella/commit/6e19974))
19
- - update non-zero check
20
-
21
- ## [0.4.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.4.0) (2025-06-18)
22
-
23
- #### 🚀 Features
24
-
25
- - add `filterDocsFrequency()` ([6ac1f90](https://github.com/thi-ng/umbrella/commit/6ac1f90))
26
-
27
- #### ⏱ Performance improvements
28
-
29
- - minor update kmeansDense() ([ebd5618](https://github.com/thi-ng/umbrella/commit/ebd5618))
30
- - internal use `lookupUnsafe()`
31
-
32
- ### [0.3.1](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.3.1) (2025-06-15)
33
-
34
- #### 🩹 Bug fixes
35
-
36
- - update pkg exports ([ea72b9f](https://github.com/thi-ng/umbrella/commit/ea72b9f))
37
-
38
- ## [0.3.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.3.0) (2025-06-15)
39
-
40
- #### 🚀 Features
41
-
42
- - update kmeansDense ([d35b6bd](https://github.com/thi-ng/umbrella/commit/d35b6bd))
43
- - update results to include original `docs` for each cluster
44
-
45
- ## [0.2.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.2.0) (2025-06-14)
46
-
47
- #### 🚀 Features
48
-
49
- - add/migrate refactored tf-idf functions ([d311acc](https://github.com/thi-ng/umbrella/commit/d311acc))
50
- - add/update vocab & vector encoding helpers, restructure ([9e4f60c](https://github.com/thi-ng/umbrella/commit/9e4f60c))
51
- - add filterDocsIDF() ([f682b58](https://github.com/thi-ng/umbrella/commit/f682b58))
52
- - add k-mean clustering fns ([3533843](https://github.com/thi-ng/umbrella/commit/3533843))
53
-
54
- #### ♻️ Refactoring
55
-
56
- - update imports/exports ([a44be87](https://github.com/thi-ng/umbrella/commit/a44be87))
57
-
58
- ## [0.1.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/text-analysis@0.1.0) (2025-06-09)
59
-
60
- #### 🚀 Features
61
-
62
- - import as new pkg ([89fe9bb](https://github.com/thi-ng/umbrella/commit/89fe9bb))
63
- - add re-exports from transducers/vectors pkgs
64
- - add porter stemmer, contractions & stop words from old [@thi.ng/notes](https://github.com/thi-ng/umbrella/tree/main/packages/notes) repo
65
- - add porter fixtures
66
- - update defVocab() to use [@thi.ng/bidir-index](https://github.com/thi-ng/umbrella/tree/main/packages/bidir-index)
67
- - add sparse vector support
68
- - add/port cosine/jaccard similarities from vectors/sparse pkgs
69
- - add doc strings & examples