@sc-voice/tools 2.17.0 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sc-voice/tools",
3
- "version": "2.17.0",
3
+ "version": "2.19.0",
4
4
  "description": "Utilities for SC-Voice",
5
5
  "main": "index.mjs",
6
6
  "files": [
package/src/defines.mjs CHANGED
@@ -8,5 +8,7 @@ export const DBG = {
8
8
  DEEPL_MOCK_XLT: 0, // use mock translation
9
9
  DEEPL_TEST_API: 0, // test with live DeepL API ($$$)
10
10
  DEEPL_XLT: 0, // test live translation
11
+ L7C_FETCH_LEGACY: 0,
12
+ L7C_FETCH_LEGACY_SC: 0, // ignore test cache and use SC
11
13
  WORD_MAP_TRANFORMER: 0,
12
14
  };
@@ -37,20 +37,33 @@ export class LegacyDoc {
37
37
  return true;
38
38
  }
39
39
 
40
- static async fetchLegacy(opts = {}) {
41
- const msg = 'L7c.fetch:';
42
- const dbg = DBG.FETCH_LEGACY;
40
+ static legacyUrl(opts={}) {
43
41
  let {
44
- endPoint = 'https://suttacentral.net/api/suttas',
42
+ endPoint = 'https://staging.suttacentral.net/api/suttas',
45
43
  sutta_uid,
46
44
  lang,
47
45
  author,
46
+ } = opts;
47
+
48
+ return [endPoint, sutta_uid, `${author}?lang=${lang}`].join('/');
49
+ }
50
+
51
+ static async fetchLegacy(opts = {}) {
52
+ const msg = 'L7c.fetchLegacy:';
53
+ const dbg = DBG.L7C_FETCH_LEGACY;
54
+ let {
48
55
  maxBuffer = 10 * 1024 * 1024,
56
+ cache,
49
57
  } = opts;
50
- let url = [endPoint, sutta_uid, `${author}?lang=${lang}`].join(
51
- '/',
52
- );
53
- let res = await fetch(url);
58
+ let url = LegacyDoc.legacyUrl(opts);
59
+ let res;
60
+ if (cache) {
61
+ res = cache(url);
62
+ dbg && console.log(msg, '[1]cached', res.ok);
63
+ } else {
64
+ res = await fetch(url, {maxBuffer});
65
+ dbg && console.log(msg, '[2]scapi', res.ok);
66
+ }
54
67
  if (!res.ok) {
55
68
  throw new Error(`${msg} {res.status} ${url}`);
56
69
  }
@@ -52,9 +52,11 @@ export class TfidfSpace {
52
52
 
53
53
  static removeNonWords(s) {
54
54
  const RE_RESERVED = /[_-]/g; // allowed in bow words
55
+ const RE_LQUOTE = /[“‘«]/g;
55
56
  const RE_PUNCT = /[.,:;$"'“”‘’!?«»\[\]]/g;
56
57
  const RE_SPACE = /\s+/g;
57
58
  return TfidfSpace.removeHtml(s)
59
+ .replace(RE_LQUOTE, '__LQUOTE ')
58
60
  .replace(RE_PUNCT, '')
59
61
  .replace(RE_SPACE, ' ')
60
62
  .trim();
@@ -107,7 +109,7 @@ export class TfidfSpace {
107
109
  // Bag-of-words maps word to wordCount(word,doc)
108
110
  throw new Error(`${msg} bow?`);
109
111
  }
110
- let nWords = Object.values(bow).reduce((a,v)=>a+v);
112
+ let nWords = Object.values(bow).reduce((a, v) => a + v);
111
113
  let docInfo = { id, bow, nWords };
112
114
  corpus.wordDocCount.increment(bow.oneHot());
113
115
  corpus.addDocument(id, docInfo);
@@ -118,7 +120,7 @@ export class TfidfSpace {
118
120
  addDocument(id, doc) {
119
121
  let { corpus } = this;
120
122
  let { bow, words } = this.countWords(doc);
121
-
123
+
122
124
  return this.addCorpusDocument(id, bow, words.length);
123
125
  }
124
126
 
@@ -138,7 +140,7 @@ export class TfidfSpace {
138
140
 
139
141
  // More efficient implementation of tf * idf
140
142
  let words = Object.keys(bow);
141
- let nWords = words.reduce((a,w)=>a+bow[w],0);
143
+ let nWords = words.reduce((a, w) => a + bow[w], 0);
142
144
 
143
145
  let vTfIdf = words.reduce((a, word) => {
144
146
  let wd = bow[word] || 0;
@@ -157,7 +159,8 @@ export class TfidfSpace {
157
159
  return vTfIdf;
158
160
  }
159
161
 
160
- tfidf(text) { // TfIdf of words in text w/r to corpus
162
+ tfidf(text) {
163
+ // TfIdf of words in text w/r to corpus
161
164
  let { bow } = this.countWords(text);
162
165
  return this.tfidfOfBow(bow);
163
166
  }
@@ -20,12 +20,12 @@ export class WordVector extends Object {
20
20
  return this.$length;
21
21
  }
22
22
 
23
- toString(opts={}) {
24
- let { order='value', precision=2 } = opts;
23
+ toString(opts = {}) {
24
+ let { order = 'value', precision = 2 } = opts;
25
25
  let entries = Object.entries(this);
26
26
  switch (order) {
27
27
  case 'key':
28
- entries.sort((a,b)=>{
28
+ entries.sort((a, b) => {
29
29
  let [ka] = a;
30
30
  let [kb] = b;
31
31
  return ka.localeCompare(kb);
@@ -33,16 +33,16 @@ export class WordVector extends Object {
33
33
  break;
34
34
  case 'value':
35
35
  default:
36
- entries.sort((a,b)=>{
36
+ entries.sort((a, b) => {
37
37
  let [ka, va] = a;
38
38
  let [kb, vb] = b;
39
- return (vb-va) || ka.localeCompare(kb);
39
+ return vb - va || ka.localeCompare(kb);
40
40
  });
41
41
  break;
42
42
  }
43
43
  let sv = entries.reduce((a, e) => {
44
44
  let [k, v] = e;
45
- let vf = v.toFixed(precision).replace(/\.0*$/,'');
45
+ let vf = v.toFixed(precision).replace(/\.0*$/, '');
46
46
  a.push(`${k}:${vf}`);
47
47
  return a;
48
48
  }, []);