@sc-voice/tools 2.12.0 → 2.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/text/tfidf-space.mjs +21 -19
- package/src/text/word-vector.mjs +3 -2
package/package.json
CHANGED
package/src/text/tfidf-space.mjs
CHANGED
|
@@ -70,27 +70,24 @@ export class TfidfSpace {
|
|
|
70
70
|
return TfidfSpace.removeNonWords(sAbbr);
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
static idfStandard(
|
|
74
|
-
|
|
75
|
-
let { corpus } = space;
|
|
76
|
-
let wordDocs = corpus.wordDocCount[word] || 0;
|
|
77
|
-
return Math.log((corpus.size + 1) / (wordDocs + 1));
|
|
73
|
+
static idfStandard(nDocs, wdc, idfWeight) {
|
|
74
|
+
return Math.log((nDocs + 1) / (wdc + 1));
|
|
78
75
|
}
|
|
79
76
|
|
|
80
|
-
static idfTunable(
|
|
81
|
-
const msg = 'w7e.
|
|
82
|
-
let { corpus } = space;
|
|
83
|
-
let wordDocs = corpus.wordDocCount[word] || 0;
|
|
77
|
+
static idfTunable(nDocs, wdc, idfWeight) {
|
|
78
|
+
const msg = 'w7e.idfTunable:';
|
|
84
79
|
// NOTE: This is NOT the usual formula
|
|
85
80
|
// Map to [0:ignore..1:important]
|
|
86
|
-
return
|
|
87
|
-
? 1 -
|
|
88
|
-
Math.exp(((wordDocs - corpus.size) / wordDocs) * idfWeight)
|
|
81
|
+
return nDocs
|
|
82
|
+
? 1 - Math.exp(((wdc - nDocs) / wdc) * idfWeight)
|
|
89
83
|
: 1;
|
|
90
84
|
}
|
|
91
85
|
|
|
92
86
|
idf(word, idfWeight = this.idfWeight) {
|
|
93
|
-
|
|
87
|
+
let { corpus } = this;
|
|
88
|
+
let wdc = corpus.wordDocCount[word] || 0;
|
|
89
|
+
let nDocs = corpus.size;
|
|
90
|
+
return this.idfFunction(nDocs, wdc, idfWeight);
|
|
94
91
|
}
|
|
95
92
|
|
|
96
93
|
addCorpusDocument(id, bow) {
|
|
@@ -128,20 +125,20 @@ export class TfidfSpace {
|
|
|
128
125
|
return count ? count / words.length : 0;
|
|
129
126
|
}
|
|
130
127
|
|
|
131
|
-
|
|
132
|
-
const msg = 'w7e.
|
|
128
|
+
tfidfOfBow(bow) {
|
|
129
|
+
const msg = 'w7e.tfidfOfBow:';
|
|
133
130
|
let { corpus, idfWeight } = this;
|
|
134
131
|
|
|
135
132
|
// More efficient implementation of tf * idf
|
|
136
|
-
let
|
|
137
|
-
let nWords = words.
|
|
133
|
+
let words = Object.keys(bow);
|
|
134
|
+
let nWords = words.reduce((a,w)=>a+bow[w],0);
|
|
138
135
|
|
|
139
136
|
let vTfIdf = words.reduce((a, word) => {
|
|
140
137
|
let wd = bow[word] || 0;
|
|
141
138
|
let tf = wd ? wd / nWords : 0;
|
|
142
|
-
let
|
|
139
|
+
let wdc = corpus.wordDocCount[word] || 0;
|
|
143
140
|
let idf = corpus.size
|
|
144
|
-
? 1 - Math.exp(((
|
|
141
|
+
? 1 - Math.exp(((wdc - corpus.size) / wdc) * idfWeight)
|
|
145
142
|
: 1;
|
|
146
143
|
let tfidf = tf * idf;
|
|
147
144
|
if (tfidf) {
|
|
@@ -153,6 +150,11 @@ export class TfidfSpace {
|
|
|
153
150
|
return vTfIdf;
|
|
154
151
|
}
|
|
155
152
|
|
|
153
|
+
tfidf(text) { // TfIdf of words in text w/r to corpus
|
|
154
|
+
let { bow } = this.countWords(text);
|
|
155
|
+
return this.tfidfOfBow(bow);
|
|
156
|
+
}
|
|
157
|
+
|
|
156
158
|
countWords(str) {
|
|
157
159
|
const msg = 'w7e.countWords:';
|
|
158
160
|
if (str == null) {
|
package/src/text/word-vector.mjs
CHANGED
|
@@ -20,10 +20,11 @@ export class WordVector extends Object {
|
|
|
20
20
|
return this.$length;
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
-
toString() {
|
|
23
|
+
toString(opts={}) {
|
|
24
|
+
let { precision=2 } = opts;
|
|
24
25
|
let sv = Object.entries(this).reduce((a, e) => {
|
|
25
26
|
let [k, v] = e;
|
|
26
|
-
let vf = v.toFixed(
|
|
27
|
+
let vf = v.toFixed(precision).replace(/\.0*$/,'');
|
|
27
28
|
a.push(`${k}:${vf}`);
|
|
28
29
|
return a;
|
|
29
30
|
}, []);
|