@sc-voice/tools 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +70 -0
- package/index.mjs +31 -0
- package/package.json +38 -0
- package/src/defines.mjs +9 -0
- package/src/math/fraction.mjs +94 -0
- package/src/text/aligner.mjs +562 -0
- package/src/text/bilara-path.mjs +84 -0
- package/src/text/ebt-doc.mjs +124 -0
- package/src/text/legacy-doc.mjs +80 -0
- package/src/text/merkle-json.mjs +264 -0
- package/src/text/sutta-central-id.mjs +362 -0
- package/src/text/unicode.mjs +370 -0
- package/src/text/word-space.mjs +213 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import { DBG } from '../defines.mjs';
|
|
2
|
+
|
|
3
|
+
class Vector extends Object {
|
|
4
|
+
constructor(props) {
|
|
5
|
+
super();
|
|
6
|
+
Object.assign(this, props);
|
|
7
|
+
Object.defineProperty(this, '$length', {
|
|
8
|
+
writable: true,
|
|
9
|
+
});
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
get length() {
|
|
13
|
+
if (this.$length == null) {
|
|
14
|
+
this.$length = Object.keys(this).length;
|
|
15
|
+
}
|
|
16
|
+
return this.$length;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
toString() {
|
|
20
|
+
let sv = Object.entries(this).reduce((a, e) => {
|
|
21
|
+
let [k, v] = e;
|
|
22
|
+
let vf = v.toFixed(2);
|
|
23
|
+
a.push(`${k}:${vf}`);
|
|
24
|
+
return a;
|
|
25
|
+
}, []);
|
|
26
|
+
return sv.join(',');
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
norm() {
|
|
30
|
+
let keys = Object.keys(this);
|
|
31
|
+
if (keys.length === 0) {
|
|
32
|
+
return 0;
|
|
33
|
+
}
|
|
34
|
+
let sumSqr = keys.reduce((a, k) => {
|
|
35
|
+
let v = this[k];
|
|
36
|
+
return a + v * v;
|
|
37
|
+
}, 0);
|
|
38
|
+
return Math.sqrt(sumSqr);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
add(vec2) {
|
|
42
|
+
let keys = Object.keys(vec2);
|
|
43
|
+
return keys.reduce((a, k) => {
|
|
44
|
+
let v2 = vec2[k];
|
|
45
|
+
if (v2) {
|
|
46
|
+
a[k] = (a[k] || 0) + v2;
|
|
47
|
+
}
|
|
48
|
+
return a;
|
|
49
|
+
}, new Vector(this));
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
dot(vec2) {
|
|
53
|
+
const msg = 'V4r.dot:';
|
|
54
|
+
if (vec2 == null) {
|
|
55
|
+
throw new Error(`${msg} vec2?`);
|
|
56
|
+
}
|
|
57
|
+
let keys = Object.keys(this);
|
|
58
|
+
return keys.reduce((a, k) => {
|
|
59
|
+
let v1 = this[k];
|
|
60
|
+
let v2 = vec2[k] || 0;
|
|
61
|
+
|
|
62
|
+
return a + v1 * v2;
|
|
63
|
+
}, 0);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
intersect(vec2) {
|
|
67
|
+
let keys = Object.keys(this);
|
|
68
|
+
return keys.reduce((a, k) => {
|
|
69
|
+
let v1 = this[k];
|
|
70
|
+
let v2 = vec2[k] || 0;
|
|
71
|
+
if (v1 && v2) {
|
|
72
|
+
a[k] = v1 * v2;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return a;
|
|
76
|
+
}, new Vector());
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
similar(vec2) {
|
|
80
|
+
const msg = 'V4r.similar:';
|
|
81
|
+
if (vec2 == null) {
|
|
82
|
+
throw new Error(`${msg} vec2?`);
|
|
83
|
+
}
|
|
84
|
+
let d = this.dot(vec2);
|
|
85
|
+
let norm1 = this.norm();
|
|
86
|
+
let norm2 = vec2.norm();
|
|
87
|
+
let den = norm1 * norm2;
|
|
88
|
+
return den ? d / den : 0;
|
|
89
|
+
}
|
|
90
|
+
} // Vector
|
|
91
|
+
|
|
92
|
+
export class WordSpace {
|
|
93
|
+
constructor(opts = {}) {
|
|
94
|
+
let {
|
|
95
|
+
lang, // 2-letter code: fr, en, es, pt
|
|
96
|
+
minWord = 4, // minimum word length
|
|
97
|
+
normalize,
|
|
98
|
+
normalizeVector = WordSpace.normalizeVector,
|
|
99
|
+
wordMap = {}, // word replacement map
|
|
100
|
+
reWordMap,
|
|
101
|
+
} = opts;
|
|
102
|
+
|
|
103
|
+
wordMap = Object.keys(wordMap).reduce((a, w) => {
|
|
104
|
+
let wLow = w.toLowerCase();
|
|
105
|
+
a[wLow] = wordMap[w].toLowerCase();
|
|
106
|
+
return a;
|
|
107
|
+
}, {});
|
|
108
|
+
if (!normalize) {
|
|
109
|
+
switch (lang) {
|
|
110
|
+
case 'fr':
|
|
111
|
+
normalize = WordSpace.normalizeFR;
|
|
112
|
+
break;
|
|
113
|
+
default:
|
|
114
|
+
normalize = (s) => s;
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
Object.assign(this, {
|
|
120
|
+
lang,
|
|
121
|
+
minWord,
|
|
122
|
+
normalize,
|
|
123
|
+
normalizeVector,
|
|
124
|
+
reWordMap,
|
|
125
|
+
wordMap,
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
static compileWordMap(wordMap) {
|
|
130
|
+
return (
|
|
131
|
+
wordMap &&
|
|
132
|
+
Object.keys(wordMap).map((pat) => {
|
|
133
|
+
let rep = wordMap[pat];
|
|
134
|
+
return {
|
|
135
|
+
re: new RegExp(pat, 'iugm'),
|
|
136
|
+
rep,
|
|
137
|
+
};
|
|
138
|
+
})
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
static normalizeFR(s) {
|
|
143
|
+
return s
|
|
144
|
+
.replace(/[«»]/gi, '')
|
|
145
|
+
.replace(/\bd’/gi, 'de ')
|
|
146
|
+
.replace(/\bl’/gi, 'le ')
|
|
147
|
+
.replace(/\bs’/gi, 'se ')
|
|
148
|
+
.replace('?', '$QUESTION')
|
|
149
|
+
.replace('!', '$EXCLAMATION')
|
|
150
|
+
.trim();
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
applyWordMap(text) {
|
|
154
|
+
const msg = 'W7e.applyWordMap:';
|
|
155
|
+
const dbg = DBG.APPLY_WORD_MAP;
|
|
156
|
+
let { wordMap, reWordMap } = this;
|
|
157
|
+
if (reWordMap == null) {
|
|
158
|
+
reWordMap = WordSpace.compileWordMap(wordMap);
|
|
159
|
+
this.reWordMap = reWordMap;
|
|
160
|
+
}
|
|
161
|
+
dbg && console.log(msg, { text });
|
|
162
|
+
let rslt = text;
|
|
163
|
+
for (let i = 0; i < reWordMap.length; i++) {
|
|
164
|
+
let { re, rep } = reWordMap[i];
|
|
165
|
+
rslt = rslt.replaceAll(re, rep);
|
|
166
|
+
dbg && console.log(msg, { i, rslt, re });
|
|
167
|
+
}
|
|
168
|
+
return rslt;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
static get Vector() {
|
|
172
|
+
return Vector;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
static normalizeVector(v) {
|
|
176
|
+
let tau = 0.618034; // Golden ratio
|
|
177
|
+
let vNew = new Vector(v);
|
|
178
|
+
Object.entries(v).forEach((e) => {
|
|
179
|
+
let [key, value] = e;
|
|
180
|
+
vNew[key] = 1 - Math.exp(-value / tau);
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
return vNew;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
string2Vector(str, scale = 1) {
|
|
187
|
+
const msg = 'W7e.string2Vector:';
|
|
188
|
+
if (str == null) {
|
|
189
|
+
throw new Error(`${msg} str?`);
|
|
190
|
+
}
|
|
191
|
+
let dbg = 0;
|
|
192
|
+
let { normalize, normalizeVector, minWord, wordMap } = this;
|
|
193
|
+
let sWordMap = this.applyWordMap(str);
|
|
194
|
+
let sNorm = normalize(sWordMap)
|
|
195
|
+
.toLowerCase()
|
|
196
|
+
.trim()
|
|
197
|
+
.replace(/[-]/g, ' ')
|
|
198
|
+
.replace(/[.,_:;"'“”‘’!?]/g, '');
|
|
199
|
+
let words = sNorm.split(' ');
|
|
200
|
+
let v = words.reduce((a, w) => {
|
|
201
|
+
if (w.length >= minWord) {
|
|
202
|
+
a[w] = (a[w] || 0) + scale;
|
|
203
|
+
}
|
|
204
|
+
return a;
|
|
205
|
+
}, new Vector());
|
|
206
|
+
|
|
207
|
+
if (normalizeVector) {
|
|
208
|
+
v = normalizeVector(v);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
return v;
|
|
212
|
+
}
|
|
213
|
+
} // WordSpace
|