@sc-voice/tools 3.0.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/defines.mjs +1 -1
- package/src/text/legacy-doc.mjs +3 -6
- package/src/text/tfidf-space.mjs +3 -3
- package/src/text/word-vector.mjs +48 -7
package/package.json
CHANGED
package/src/defines.mjs
CHANGED
|
@@ -9,6 +9,6 @@ export const DBG = {
|
|
|
9
9
|
DEEPL_TEST_API: 0, // test with live DeepL API ($$$)
|
|
10
10
|
DEEPL_XLT: 0, // test live translation
|
|
11
11
|
L7C_FETCH_LEGACY: 0,
|
|
12
|
-
L7C_FETCH_LEGACY_SC: 0, // ignore test cache and use SC
|
|
12
|
+
L7C_FETCH_LEGACY_SC: 0, // ignore test cache and use SC
|
|
13
13
|
WORD_MAP_TRANFORMER: 0,
|
|
14
14
|
};
|
package/src/text/legacy-doc.mjs
CHANGED
|
@@ -37,7 +37,7 @@ export class LegacyDoc {
|
|
|
37
37
|
return true;
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
-
static legacyUrl(opts={}) {
|
|
40
|
+
static legacyUrl(opts = {}) {
|
|
41
41
|
let {
|
|
42
42
|
endPoint = 'https://suttacentral.net/api/suttas',
|
|
43
43
|
sutta_uid,
|
|
@@ -51,17 +51,14 @@ export class LegacyDoc {
|
|
|
51
51
|
static async fetchLegacy(opts = {}) {
|
|
52
52
|
const msg = 'L7c.fetchLegacy:';
|
|
53
53
|
const dbg = DBG.L7C_FETCH_LEGACY;
|
|
54
|
-
let {
|
|
55
|
-
maxBuffer = 10 * 1024 * 1024,
|
|
56
|
-
cache,
|
|
57
|
-
} = opts;
|
|
54
|
+
let { maxBuffer = 10 * 1024 * 1024, cache } = opts;
|
|
58
55
|
let url = LegacyDoc.legacyUrl(opts);
|
|
59
56
|
let res;
|
|
60
57
|
if (cache) {
|
|
61
58
|
res = cache(url);
|
|
62
59
|
dbg && console.log(msg, '[1]cached', res.ok);
|
|
63
60
|
} else {
|
|
64
|
-
res = await fetch(url, {maxBuffer});
|
|
61
|
+
res = await fetch(url, { maxBuffer });
|
|
65
62
|
dbg && console.log(msg, '[2]scapi', res.ok);
|
|
66
63
|
}
|
|
67
64
|
if (!res.ok) {
|
package/src/text/tfidf-space.mjs
CHANGED
|
@@ -52,7 +52,7 @@ export class TfidfSpace {
|
|
|
52
52
|
return s.replace(/<[^>]*>/gi, '');
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
-
static removeNonWords(s, opts={}) {
|
|
55
|
+
static removeNonWords(s, opts = {}) {
|
|
56
56
|
const RE_RESERVED = /[_-]/g; // allowed in bow words
|
|
57
57
|
const RE_LQUOTE = /[“‘«]/g;
|
|
58
58
|
const RE_PUNCT = /[.,:;$"'“”‘’!?«»\[\]]/g;
|
|
@@ -67,11 +67,11 @@ export class TfidfSpace {
|
|
|
67
67
|
.trim();
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
-
static normalizeEN(s, opts={}) {
|
|
70
|
+
static normalizeEN(s, opts = {}) {
|
|
71
71
|
return TfidfSpace.removeNonWords(s.toLowerCase(), opts);
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
-
static normalizeFR(s, opts={}) {
|
|
74
|
+
static normalizeFR(s, opts = {}) {
|
|
75
75
|
let sAbbr = s
|
|
76
76
|
.toLowerCase()
|
|
77
77
|
.replace(/\bd[’']/gi, 'de ')
|
package/src/text/word-vector.mjs
CHANGED
|
@@ -49,9 +49,10 @@ export class WordVector extends Object {
|
|
|
49
49
|
let sv = entries.reduce((a, e) => {
|
|
50
50
|
let [k, v] = e;
|
|
51
51
|
if (minValue <= v) {
|
|
52
|
-
let vf = v
|
|
52
|
+
let vf = v
|
|
53
|
+
.toFixed(precision)
|
|
53
54
|
.replace(/\.0*$/, '')
|
|
54
|
-
.replace(/0\./,'.');
|
|
55
|
+
.replace(/0\./, '.');
|
|
55
56
|
a.push(`${k}:${vf}`);
|
|
56
57
|
}
|
|
57
58
|
return a;
|
|
@@ -59,7 +60,8 @@ export class WordVector extends Object {
|
|
|
59
60
|
return sv.join(',');
|
|
60
61
|
}
|
|
61
62
|
|
|
62
|
-
norm() {
|
|
63
|
+
norm() {
|
|
64
|
+
// L2 norm
|
|
63
65
|
let keys = Object.keys(this);
|
|
64
66
|
if (keys.length === 0) {
|
|
65
67
|
return 0;
|
|
@@ -74,9 +76,10 @@ export class WordVector extends Object {
|
|
|
74
76
|
add(vec2) {
|
|
75
77
|
let keys = Object.keys(vec2);
|
|
76
78
|
return keys.reduce((a, k) => {
|
|
79
|
+
let v1 = a[k] || 0;
|
|
77
80
|
let v2 = vec2[k];
|
|
78
81
|
if (v2) {
|
|
79
|
-
a[k] =
|
|
82
|
+
a[k] = v1 + v2;
|
|
80
83
|
}
|
|
81
84
|
return a;
|
|
82
85
|
}, new WordVector(this));
|
|
@@ -85,14 +88,28 @@ export class WordVector extends Object {
|
|
|
85
88
|
increment(vec2) {
|
|
86
89
|
let keys = Object.keys(vec2);
|
|
87
90
|
return keys.reduce((a, k) => {
|
|
91
|
+
let v1 = a[k] || 0;
|
|
88
92
|
let v2 = vec2[k];
|
|
89
93
|
if (v2) {
|
|
90
|
-
a[k] =
|
|
94
|
+
a[k] = v1 + v2;
|
|
91
95
|
}
|
|
92
96
|
return a;
|
|
93
97
|
}, this);
|
|
94
98
|
}
|
|
95
99
|
|
|
100
|
+
multiply(vec2) {
|
|
101
|
+
const msg = 'w8r.multiply:';
|
|
102
|
+
let keys = Object.keys(vec2);
|
|
103
|
+
return keys.reduce((a, k) => {
|
|
104
|
+
let v1 = this[k];
|
|
105
|
+
let v2 = vec2[k];
|
|
106
|
+
if (v1 && v2) {
|
|
107
|
+
a[k] = v1 * v2;
|
|
108
|
+
}
|
|
109
|
+
return a;
|
|
110
|
+
}, new WordVector({}));
|
|
111
|
+
}
|
|
112
|
+
|
|
96
113
|
dot(vec2) {
|
|
97
114
|
const msg = 'w8r.dot:';
|
|
98
115
|
if (vec2 == null) {
|
|
@@ -114,7 +131,7 @@ export class WordVector extends Object {
|
|
|
114
131
|
}, this);
|
|
115
132
|
}
|
|
116
133
|
|
|
117
|
-
hadamardL1(vec2 = {}) {
|
|
134
|
+
hadamardL1(vec2 = {}) {
|
|
118
135
|
// L1-norm of Hadamard product shows how
|
|
119
136
|
// the cosine similarity score is apportioned
|
|
120
137
|
let keys = Object.keys(this);
|
|
@@ -134,7 +151,7 @@ export class WordVector extends Object {
|
|
|
134
151
|
return hadamard; // empty vector
|
|
135
152
|
}
|
|
136
153
|
let n12 = this.norm() * vec2.norm();
|
|
137
|
-
return hadamard.scale(1/n12);
|
|
154
|
+
return hadamard.scale(1 / n12);
|
|
138
155
|
}
|
|
139
156
|
|
|
140
157
|
similar(vec2) {
|
|
@@ -157,4 +174,28 @@ export class WordVector extends Object {
|
|
|
157
174
|
return a;
|
|
158
175
|
}, new WordVector());
|
|
159
176
|
}
|
|
177
|
+
|
|
178
|
+
andOneHot(vec2) {
|
|
179
|
+
return Object.keys(this).reduce((a, k) => {
|
|
180
|
+
if (this[k] && vec2[k]) {
|
|
181
|
+
a[k] = 1;
|
|
182
|
+
}
|
|
183
|
+
return a;
|
|
184
|
+
}, new WordVector());
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
orOneHot(vec2) {
|
|
188
|
+
let result = Object.keys(this).reduce((a, k) => {
|
|
189
|
+
if (this[k] || vec2[k]) {
|
|
190
|
+
a[k] = 1;
|
|
191
|
+
}
|
|
192
|
+
return a;
|
|
193
|
+
}, new WordVector());
|
|
194
|
+
return Object.keys(vec2).reduce((a, k) => {
|
|
195
|
+
if (this[k] || vec2[k]) {
|
|
196
|
+
a[k] = 1;
|
|
197
|
+
}
|
|
198
|
+
return a;
|
|
199
|
+
}, result);
|
|
200
|
+
}
|
|
160
201
|
} // WordVector
|