@sc-voice/tools 2.12.0 → 2.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sc-voice/tools",
3
- "version": "2.12.0",
3
+ "version": "2.14.0",
4
4
  "description": "Utilities for SC-Voice",
5
5
  "main": "index.mjs",
6
6
  "files": [
@@ -70,27 +70,24 @@ export class TfidfSpace {
70
70
  return TfidfSpace.removeNonWords(sAbbr);
71
71
  }
72
72
 
73
- static idfStandard(space, word) {
74
- const msg = 'w7e.idfStandard:';
75
- let { corpus } = space;
76
- let wordDocs = corpus.wordDocCount[word] || 0;
77
- return Math.log((corpus.size + 1) / (wordDocs + 1));
73
+ static idfStandard(nDocs, wdc, idfWeight) {
74
+ return Math.log((nDocs + 1) / (wdc + 1));
78
75
  }
79
76
 
80
- static idfTunable(space, word, idfWeight) {
81
- const msg = 'w7e.idf:';
82
- let { corpus } = space;
83
- let wordDocs = corpus.wordDocCount[word] || 0;
77
+ static idfTunable(nDocs, wdc, idfWeight) {
78
+ const msg = 'w7e.idfTunable:';
84
79
  // NOTE: This is NOT the usual formula
85
80
  // Map to [0:ignore..1:important]
86
- return corpus.size
87
- ? 1 -
88
- Math.exp(((wordDocs - corpus.size) / wordDocs) * idfWeight)
81
+ return nDocs
82
+ ? 1 - Math.exp(((wdc - nDocs) / wdc) * idfWeight)
89
83
  : 1;
90
84
  }
91
85
 
92
86
  idf(word, idfWeight = this.idfWeight) {
93
- return this.idfFunction(this, word, idfWeight);
87
+ let { corpus } = this;
88
+ let wdc = corpus.wordDocCount[word] || 0;
89
+ let nDocs = corpus.size;
90
+ return this.idfFunction(nDocs, wdc, idfWeight);
94
91
  }
95
92
 
96
93
  addCorpusDocument(id, bow) {
@@ -128,20 +125,20 @@ export class TfidfSpace {
128
125
  return count ? count / words.length : 0;
129
126
  }
130
127
 
131
- tfidf(doc) {
132
- const msg = 'w7e.tfidf:';
128
+ tfidfOfBow(bow) {
129
+ const msg = 'w7e.tfidfOfBow:';
133
130
  let { corpus, idfWeight } = this;
134
131
 
135
132
  // More efficient implementation of tf * idf
136
- let { bow, words } = this.countWords(doc);
137
- let nWords = words.length;
133
+ let words = Object.keys(bow);
134
+ let nWords = words.reduce((a,w)=>a+bow[w],0);
138
135
 
139
136
  let vTfIdf = words.reduce((a, word) => {
140
137
  let wd = bow[word] || 0;
141
138
  let tf = wd ? wd / nWords : 0;
142
- let wc = corpus.wordDocCount[word] || 0;
139
+ let wdc = corpus.wordDocCount[word] || 0;
143
140
  let idf = corpus.size
144
- ? 1 - Math.exp(((wc - corpus.size) / wc) * idfWeight)
141
+ ? 1 - Math.exp(((wdc - corpus.size) / wdc) * idfWeight)
145
142
  : 1;
146
143
  let tfidf = tf * idf;
147
144
  if (tfidf) {
@@ -153,6 +150,11 @@ export class TfidfSpace {
153
150
  return vTfIdf;
154
151
  }
155
152
 
153
+ tfidf(text) { // TfIdf of words in text w/r to corpus
154
+ let { bow } = this.countWords(text);
155
+ return this.tfidfOfBow(bow);
156
+ }
157
+
156
158
  countWords(str) {
157
159
  const msg = 'w7e.countWords:';
158
160
  if (str == null) {
@@ -20,10 +20,11 @@ export class WordVector extends Object {
20
20
  return this.$length;
21
21
  }
22
22
 
23
- toString() {
23
+ toString(opts={}) {
24
+ let { precision=2 } = opts;
24
25
  let sv = Object.entries(this).reduce((a, e) => {
25
26
  let [k, v] = e;
26
- let vf = v.toFixed(2);
27
+ let vf = v.toFixed(precision).replace(/\.0*$/,'');
27
28
  a.push(`${k}:${vf}`);
28
29
  return a;
29
30
  }, []);