@sc-voice/tools 3.0.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sc-voice/tools",
3
- "version": "3.0.0",
3
+ "version": "3.2.0",
4
4
  "description": "Utilities for SC-Voice",
5
5
  "main": "index.mjs",
6
6
  "files": [
package/src/defines.mjs CHANGED
@@ -9,6 +9,6 @@ export const DBG = {
9
9
  DEEPL_TEST_API: 0, // test with live DeepL API ($$$)
10
10
  DEEPL_XLT: 0, // test live translation
11
11
  L7C_FETCH_LEGACY: 0,
12
- L7C_FETCH_LEGACY_SC: 0, // ignore test cache and use SC
12
+ L7C_FETCH_LEGACY_SC: 0, // ignore test cache and use SC
13
13
  WORD_MAP_TRANFORMER: 0,
14
14
  };
@@ -37,7 +37,7 @@ export class LegacyDoc {
37
37
  return true;
38
38
  }
39
39
 
40
- static legacyUrl(opts={}) {
40
+ static legacyUrl(opts = {}) {
41
41
  let {
42
42
  endPoint = 'https://suttacentral.net/api/suttas',
43
43
  sutta_uid,
@@ -51,17 +51,14 @@ export class LegacyDoc {
51
51
  static async fetchLegacy(opts = {}) {
52
52
  const msg = 'L7c.fetchLegacy:';
53
53
  const dbg = DBG.L7C_FETCH_LEGACY;
54
- let {
55
- maxBuffer = 10 * 1024 * 1024,
56
- cache,
57
- } = opts;
54
+ let { maxBuffer = 10 * 1024 * 1024, cache } = opts;
58
55
  let url = LegacyDoc.legacyUrl(opts);
59
56
  let res;
60
57
  if (cache) {
61
58
  res = cache(url);
62
59
  dbg && console.log(msg, '[1]cached', res.ok);
63
60
  } else {
64
- res = await fetch(url, {maxBuffer});
61
+ res = await fetch(url, { maxBuffer });
65
62
  dbg && console.log(msg, '[2]scapi', res.ok);
66
63
  }
67
64
  if (!res.ok) {
@@ -52,7 +52,7 @@ export class TfidfSpace {
52
52
  return s.replace(/<[^>]*>/gi, '');
53
53
  }
54
54
 
55
- static removeNonWords(s, opts={}) {
55
+ static removeNonWords(s, opts = {}) {
56
56
  const RE_RESERVED = /[_-]/g; // allowed in bow words
57
57
  const RE_LQUOTE = /[“‘«]/g;
58
58
  const RE_PUNCT = /[.,:;$"'“”‘’!?«»\[\]]/g;
@@ -67,11 +67,11 @@ export class TfidfSpace {
67
67
  .trim();
68
68
  }
69
69
 
70
- static normalizeEN(s, opts={}) {
70
+ static normalizeEN(s, opts = {}) {
71
71
  return TfidfSpace.removeNonWords(s.toLowerCase(), opts);
72
72
  }
73
73
 
74
- static normalizeFR(s, opts={}) {
74
+ static normalizeFR(s, opts = {}) {
75
75
  let sAbbr = s
76
76
  .toLowerCase()
77
77
  .replace(/\bd[’']/gi, 'de ')
@@ -49,9 +49,10 @@ export class WordVector extends Object {
49
49
  let sv = entries.reduce((a, e) => {
50
50
  let [k, v] = e;
51
51
  if (minValue <= v) {
52
- let vf = v.toFixed(precision)
52
+ let vf = v
53
+ .toFixed(precision)
53
54
  .replace(/\.0*$/, '')
54
- .replace(/0\./,'.');
55
+ .replace(/0\./, '.');
55
56
  a.push(`${k}:${vf}`);
56
57
  }
57
58
  return a;
@@ -59,7 +60,8 @@ export class WordVector extends Object {
59
60
  return sv.join(',');
60
61
  }
61
62
 
62
- norm() { // L2 norm
63
+ norm() {
64
+ // L2 norm
63
65
  let keys = Object.keys(this);
64
66
  if (keys.length === 0) {
65
67
  return 0;
@@ -74,9 +76,10 @@ export class WordVector extends Object {
74
76
  add(vec2) {
75
77
  let keys = Object.keys(vec2);
76
78
  return keys.reduce((a, k) => {
79
+ let v1 = a[k] || 0;
77
80
  let v2 = vec2[k];
78
81
  if (v2) {
79
- a[k] = (a[k] || 0) + v2;
82
+ a[k] = v1 + v2;
80
83
  }
81
84
  return a;
82
85
  }, new WordVector(this));
@@ -85,14 +88,28 @@ export class WordVector extends Object {
85
88
  increment(vec2) {
86
89
  let keys = Object.keys(vec2);
87
90
  return keys.reduce((a, k) => {
91
+ let v1 = a[k] || 0;
88
92
  let v2 = vec2[k];
89
93
  if (v2) {
90
- a[k] = (a[k] || 0) + v2;
94
+ a[k] = v1 + v2;
91
95
  }
92
96
  return a;
93
97
  }, this);
94
98
  }
95
99
 
100
+ multiply(vec2) {
101
+ const msg = 'w8r.multiply:';
102
+ let keys = Object.keys(vec2);
103
+ return keys.reduce((a, k) => {
104
+ let v1 = this[k];
105
+ let v2 = vec2[k];
106
+ if (v1 && v2) {
107
+ a[k] = v1 * v2;
108
+ }
109
+ return a;
110
+ }, new WordVector({}));
111
+ }
112
+
96
113
  dot(vec2) {
97
114
  const msg = 'w8r.dot:';
98
115
  if (vec2 == null) {
@@ -114,7 +131,7 @@ export class WordVector extends Object {
114
131
  }, this);
115
132
  }
116
133
 
117
- hadamardL1(vec2 = {}) {
134
+ hadamardL1(vec2 = {}) {
118
135
  // L1-norm of Hadamard product shows how
119
136
  // the cosine similarity score is apportioned
120
137
  let keys = Object.keys(this);
@@ -134,7 +151,7 @@ export class WordVector extends Object {
134
151
  return hadamard; // empty vector
135
152
  }
136
153
  let n12 = this.norm() * vec2.norm();
137
- return hadamard.scale(1/n12);
154
+ return hadamard.scale(1 / n12);
138
155
  }
139
156
 
140
157
  similar(vec2) {
@@ -157,4 +174,28 @@ export class WordVector extends Object {
157
174
  return a;
158
175
  }, new WordVector());
159
176
  }
177
+
178
+ andOneHot(vec2) {
179
+ return Object.keys(this).reduce((a, k) => {
180
+ if (this[k] && vec2[k]) {
181
+ a[k] = 1;
182
+ }
183
+ return a;
184
+ }, new WordVector());
185
+ }
186
+
187
+ orOneHot(vec2) {
188
+ let result = Object.keys(this).reduce((a, k) => {
189
+ if (this[k] || vec2[k]) {
190
+ a[k] = 1;
191
+ }
192
+ return a;
193
+ }, new WordVector());
194
+ return Object.keys(vec2).reduce((a, k) => {
195
+ if (this[k] || vec2[k]) {
196
+ a[k] = 1;
197
+ }
198
+ return a;
199
+ }, result);
200
+ }
160
201
  } // WordVector