@sc-voice/tools 2.17.0 → 2.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sc-voice/tools",
3
- "version": "2.17.0",
3
+ "version": "2.18.0",
4
4
  "description": "Utilities for SC-Voice",
5
5
  "main": "index.mjs",
6
6
  "files": [
@@ -52,9 +52,11 @@ export class TfidfSpace {
52
52
 
53
53
  static removeNonWords(s) {
54
54
  const RE_RESERVED = /[_-]/g; // allowed in bow words
55
+ const RE_LQUOTE = /[“‘«]/g;
55
56
  const RE_PUNCT = /[.,:;$"'“”‘’!?«»\[\]]/g;
56
57
  const RE_SPACE = /\s+/g;
57
58
  return TfidfSpace.removeHtml(s)
59
+ .replace(RE_LQUOTE, '__LQUOTE ')
58
60
  .replace(RE_PUNCT, '')
59
61
  .replace(RE_SPACE, ' ')
60
62
  .trim();
@@ -107,7 +109,7 @@ export class TfidfSpace {
107
109
  // Bag-of-words maps word to wordCount(word,doc)
108
110
  throw new Error(`${msg} bow?`);
109
111
  }
110
- let nWords = Object.values(bow).reduce((a,v)=>a+v);
112
+ let nWords = Object.values(bow).reduce((a, v) => a + v);
111
113
  let docInfo = { id, bow, nWords };
112
114
  corpus.wordDocCount.increment(bow.oneHot());
113
115
  corpus.addDocument(id, docInfo);
@@ -118,7 +120,7 @@ export class TfidfSpace {
118
120
  addDocument(id, doc) {
119
121
  let { corpus } = this;
120
122
  let { bow, words } = this.countWords(doc);
121
-
123
+
122
124
  return this.addCorpusDocument(id, bow, words.length);
123
125
  }
124
126
 
@@ -138,7 +140,7 @@ export class TfidfSpace {
138
140
 
139
141
  // More efficient implementation of tf * idf
140
142
  let words = Object.keys(bow);
141
- let nWords = words.reduce((a,w)=>a+bow[w],0);
143
+ let nWords = words.reduce((a, w) => a + bow[w], 0);
142
144
 
143
145
  let vTfIdf = words.reduce((a, word) => {
144
146
  let wd = bow[word] || 0;
@@ -157,7 +159,8 @@ export class TfidfSpace {
157
159
  return vTfIdf;
158
160
  }
159
161
 
160
- tfidf(text) { // TfIdf of words in text w/r to corpus
162
+ tfidf(text) {
163
+ // TfIdf of words in text w/r to corpus
161
164
  let { bow } = this.countWords(text);
162
165
  return this.tfidfOfBow(bow);
163
166
  }
@@ -20,12 +20,12 @@ export class WordVector extends Object {
20
20
  return this.$length;
21
21
  }
22
22
 
23
- toString(opts={}) {
24
- let { order='value', precision=2 } = opts;
23
+ toString(opts = {}) {
24
+ let { order = 'value', precision = 2 } = opts;
25
25
  let entries = Object.entries(this);
26
26
  switch (order) {
27
27
  case 'key':
28
- entries.sort((a,b)=>{
28
+ entries.sort((a, b) => {
29
29
  let [ka] = a;
30
30
  let [kb] = b;
31
31
  return ka.localeCompare(kb);
@@ -33,16 +33,16 @@ export class WordVector extends Object {
33
33
  break;
34
34
  case 'value':
35
35
  default:
36
- entries.sort((a,b)=>{
36
+ entries.sort((a, b) => {
37
37
  let [ka, va] = a;
38
38
  let [kb, vb] = b;
39
- return (vb-va) || ka.localeCompare(kb);
39
+ return vb - va || ka.localeCompare(kb);
40
40
  });
41
41
  break;
42
42
  }
43
43
  let sv = entries.reduce((a, e) => {
44
44
  let [k, v] = e;
45
- let vf = v.toFixed(precision).replace(/\.0*$/,'');
45
+ let vf = v.toFixed(precision).replace(/\.0*$/, '');
46
46
  a.push(`${k}:${vf}`);
47
47
  return a;
48
48
  }, []);