@sc-voice/tools 2.11.0 → 2.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sc-voice/tools",
3
- "version": "2.11.0",
3
+ "version": "2.13.0",
4
4
  "description": "Utilities for SC-Voice",
5
5
  "main": "index.mjs",
6
6
  "files": [
@@ -70,41 +70,37 @@ export class TfidfSpace {
70
70
  return TfidfSpace.removeNonWords(sAbbr);
71
71
  }
72
72
 
73
- static idfStandard(space, word) {
74
- const msg = 'w7e.idfStandard:';
75
- let { corpus } = space;
76
- let wordDocs = corpus.wordDocCount[word] || 0;
77
- return Math.log((corpus.size + 1) / (wordDocs + 1));
73
+ static idfStandard(nDocs, wdc, idfWeight) {
74
+ return Math.log((nDocs + 1) / (wdc + 1));
78
75
  }
79
76
 
80
- static idfTunable(space, word, idfWeight) {
81
- const msg = 'w7e.idf:';
82
- let { corpus } = space;
83
- let wordDocs = corpus.wordDocCount[word] || 0;
77
+ static idfTunable(nDocs, wdc, idfWeight) {
78
+ const msg = 'w7e.idfTunable:';
84
79
  // NOTE: This is NOT the usual formula
85
80
  // Map to [0:ignore..1:important]
86
- return corpus.size
87
- ? 1 -
88
- Math.exp(((wordDocs - corpus.size) / wordDocs) * idfWeight)
81
+ return nDocs
82
+ ? 1 - Math.exp(((wdc - nDocs) / wdc) * idfWeight)
89
83
  : 1;
90
84
  }
91
85
 
92
86
  idf(word, idfWeight = this.idfWeight) {
93
- return this.idfFunction(this, word, idfWeight);
87
+ let { corpus } = this;
88
+ let wdc = corpus.wordDocCount[word] || 0;
89
+ let nDocs = corpus.size;
90
+ return this.idfFunction(nDocs, wdc, idfWeight);
94
91
  }
95
92
 
96
- addCorpusDocument(id, bow, nWords) {
93
+ addCorpusDocument(id, bow) {
97
94
  const msg = 't8w.addCorpusDocument:';
98
95
  let { corpus } = this;
99
96
  if (id == null) {
100
97
  throw new Error(`${msg} id?`);
101
98
  }
102
99
  if (bow == null) {
100
+ // Bag-of-words maps word to wordCount(word,doc)
103
101
  throw new Error(`${msg} bow?`);
104
102
  }
105
- if (nWords == null) {
106
- throw new Error(`${msg} nWords?`);
107
- }
103
+ let nWords = Object.values(bow).reduce((a,v)=>a+v);
108
104
  let docInfo = { id, bow, nWords };
109
105
  corpus.wordDocCount.increment(bow.oneHot());
110
106
  corpus.addDocument(id, docInfo);
@@ -129,20 +125,20 @@ export class TfidfSpace {
129
125
  return count ? count / words.length : 0;
130
126
  }
131
127
 
132
- tfidf(doc) {
133
- const msg = 'w7e.tfidf:';
128
+ tfidfOfBow(bow) {
129
+ const msg = 'w7e.tfidfOfBow:';
134
130
  let { corpus, idfWeight } = this;
135
131
 
136
132
  // More efficient implementation of tf * idf
137
- let { bow, words } = this.countWords(doc);
138
- let nWords = words.length;
133
+ let words = Object.keys(bow);
134
+ let nWords = words.reduce((a,w)=>a+bow[w],0);
139
135
 
140
136
  let vTfIdf = words.reduce((a, word) => {
141
137
  let wd = bow[word] || 0;
142
138
  let tf = wd ? wd / nWords : 0;
143
- let wc = corpus.wordDocCount[word] || 0;
139
+ let wdc = corpus.wordDocCount[word] || 0;
144
140
  let idf = corpus.size
145
- ? 1 - Math.exp(((wc - corpus.size) / wc) * idfWeight)
141
+ ? 1 - Math.exp(((wdc - corpus.size) / wdc) * idfWeight)
146
142
  : 1;
147
143
  let tfidf = tf * idf;
148
144
  if (tfidf) {
@@ -154,6 +150,11 @@ export class TfidfSpace {
154
150
  return vTfIdf;
155
151
  }
156
152
 
153
+ tfidf(text) { // TfIdf of words in text w/r to corpus
154
+ let { bow } = this.countWords(text);
155
+ return this.tfidfOfBow(bow);
156
+ }
157
+
157
158
  countWords(str) {
158
159
  const msg = 'w7e.countWords:';
159
160
  if (str == null) {