@sc-voice/tools 2.19.0 → 2.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sc-voice/tools",
3
- "version": "2.19.0",
3
+ "version": "2.22.0",
4
4
  "description": "Utilities for SC-Voice",
5
5
  "main": "index.mjs",
6
6
  "files": [
@@ -39,7 +39,7 @@ export class LegacyDoc {
39
39
 
40
40
  static legacyUrl(opts={}) {
41
41
  let {
42
- endPoint = 'https://staging.suttacentral.net/api/suttas',
42
+ endPoint = 'https://suttacentral.net/api/suttas',
43
43
  sutta_uid,
44
44
  lang,
45
45
  author,
@@ -65,7 +65,7 @@ export class LegacyDoc {
65
65
  dbg && console.log(msg, '[2]scapi', res.ok);
66
66
  }
67
67
  if (!res.ok) {
68
- throw new Error(`${msg} {res.status} ${url}`);
68
+ throw new Error(`${msg} ${res.status} ${url}`);
69
69
  }
70
70
  let json = await res.json();
71
71
  let { translation } = json;
@@ -15,6 +15,7 @@ export class TfidfSpace {
15
15
  idfWeight = GOLDEN_FUDGE, // IDF dampening
16
16
  idfFunction = TfidfSpace.idfTunable,
17
17
  normalizeText,
18
+ leftQuoteToken,
18
19
  } = opts;
19
20
  if (lang == null) {
20
21
  throw new Error(`${msg} lang?`);
@@ -31,7 +32,7 @@ export class TfidfSpace {
31
32
  throw new Error(`${msg} normalizeText?`);
32
33
  }
33
34
  }
34
- Object.defineProperty(this, 'normalizeText', {
35
+ Object.defineProperty(this, '_normalizeText', {
35
36
  value: normalizeText,
36
37
  });
37
38
  Object.defineProperty(this, 'idfFunction', {
@@ -43,6 +44,7 @@ export class TfidfSpace {
43
44
  lang,
44
45
  corpus,
45
46
  idfWeight,
47
+ leftQuoteToken,
46
48
  });
47
49
  }
48
50
 
@@ -50,23 +52,26 @@ export class TfidfSpace {
50
52
  return s.replace(/<[^>]*>/gi, '');
51
53
  }
52
54
 
53
- static removeNonWords(s) {
55
+ static removeNonWords(s, opts={}) {
54
56
  const RE_RESERVED = /[_-]/g; // allowed in bow words
55
57
  const RE_LQUOTE = /[“‘«]/g;
56
58
  const RE_PUNCT = /[.,:;$"'“”‘’!?«»\[\]]/g;
57
59
  const RE_SPACE = /\s+/g;
60
+ let {
61
+ leftQuoteToken = '', // TBD: is this useful?
62
+ } = opts;
58
63
  return TfidfSpace.removeHtml(s)
59
- .replace(RE_LQUOTE, '__LQUOTE ')
64
+ .replace(RE_LQUOTE, leftQuoteToken)
60
65
  .replace(RE_PUNCT, '')
61
66
  .replace(RE_SPACE, ' ')
62
67
  .trim();
63
68
  }
64
69
 
65
- static normalizeEN(s) {
66
- return TfidfSpace.removeNonWords(s.toLowerCase());
70
+ static normalizeEN(s, opts={}) {
71
+ return TfidfSpace.removeNonWords(s.toLowerCase(), opts);
67
72
  }
68
73
 
69
- static normalizeFR(s) {
74
+ static normalizeFR(s, opts={}) {
70
75
  let sAbbr = s
71
76
  .toLowerCase()
72
77
  .replace(/\bd[’']/gi, 'de ')
@@ -76,7 +81,7 @@ export class TfidfSpace {
76
81
  .replace(/\bm[’']/gi, 'm_')
77
82
  .replace(/\bn[’']/gi, 'n_')
78
83
  .replace(/\bc[’']/gi, 'c_');
79
- return TfidfSpace.removeNonWords(sAbbr);
84
+ return TfidfSpace.removeNonWords(sAbbr, opts);
80
85
  }
81
86
 
82
87
  static idfStandard(nDocs, wdc, idfWeight) {
@@ -165,6 +170,10 @@ export class TfidfSpace {
165
170
  return this.tfidfOfBow(bow);
166
171
  }
167
172
 
173
+ normalizeText(str) {
174
+ return this._normalizeText(str, this);
175
+ }
176
+
168
177
  countWords(str) {
169
178
  const msg = 'w7e.countWords:';
170
179
  if (str == null) {
@@ -21,7 +21,13 @@ export class WordVector extends Object {
21
21
  }
22
22
 
23
23
  toString(opts = {}) {
24
- let { order = 'value', precision = 2 } = opts;
24
+ const msg = 'w10r.toString:';
25
+ let { order = 'value', minValue, precision = 2 } = opts;
26
+
27
+ if (minValue == null) {
28
+ minValue = Math.pow(10, -precision) / 2;
29
+ }
30
+
25
31
  let entries = Object.entries(this);
26
32
  switch (order) {
27
33
  case 'key':
@@ -42,8 +48,12 @@ export class WordVector extends Object {
42
48
  }
43
49
  let sv = entries.reduce((a, e) => {
44
50
  let [k, v] = e;
45
- let vf = v.toFixed(precision).replace(/\.0*$/, '');
46
- a.push(`${k}:${vf}`);
51
+ if (minValue <= v) {
52
+ let vf = v.toFixed(precision)
53
+ .replace(/\.0*$/, '')
54
+ .replace(/0\./,'.');
55
+ a.push(`${k}:${vf}`);
56
+ }
47
57
  return a;
48
58
  }, []);
49
59
  return sv.join(',');