@sc-voice/tools 1.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.mjs +0 -16
- package/package.json +1 -1
- package/src/translate/aligner.mjs +0 -651
- package/src/translate/deepl-adapter.mjs +0 -353
- package/src/translate/dpd-transformer.mjs +0 -17
- package/src/translate/mock-deepl.mjs +0 -351
- package/src/translate/quote-parser.mjs +0 -681
package/index.mjs
CHANGED
|
@@ -31,19 +31,3 @@ export const Graph = {
|
|
|
31
31
|
Sankey,
|
|
32
32
|
}
|
|
33
33
|
|
|
34
|
-
import {
|
|
35
|
-
Aligner, Alignment, AlignmentStatus
|
|
36
|
-
} from './src/translate/aligner.mjs';
|
|
37
|
-
import { DpdTransformer } from './src/translate/dpd-transformer.mjs';
|
|
38
|
-
import { MockDeepL } from './src/translate/mock-deepl.mjs';
|
|
39
|
-
import { DeepLAdapter } from './src/translate/deepl-adapter.mjs';
|
|
40
|
-
import { QuoteParser } from './src/translate/quote-parser.mjs';
|
|
41
|
-
export const Translate = {
|
|
42
|
-
Aligner,
|
|
43
|
-
Alignment,
|
|
44
|
-
AlignmentStatus,
|
|
45
|
-
DeepLAdapter,
|
|
46
|
-
DpdTransformer,
|
|
47
|
-
MockDeepL,
|
|
48
|
-
QuoteParser,
|
|
49
|
-
}
|
package/package.json
CHANGED
|
@@ -1,651 +0,0 @@
|
|
|
1
|
-
import { DBG } from '../defines.mjs';
|
|
2
|
-
import { Fraction } from '../math/fraction.mjs';
|
|
3
|
-
import { EbtDoc } from '../text/ebt-doc.mjs';
|
|
4
|
-
import { LegacyDoc } from '../text/legacy-doc.mjs';
|
|
5
|
-
import { SuttaCentralId } from '../text/sutta-central-id.mjs';
|
|
6
|
-
import { Unicode } from '../text/unicode.mjs';
|
|
7
|
-
import {
|
|
8
|
-
WordMapTransformer,
|
|
9
|
-
WordSpace,
|
|
10
|
-
} from '../text/word-space.mjs';
|
|
11
|
-
|
|
12
|
-
const STATE_OK = 'ok';
|
|
13
|
-
const STATE_WARN = 'warn';
|
|
14
|
-
const STATE_ERROR = 'error';
|
|
15
|
-
const STATE_DONE = 'done';
|
|
16
|
-
const {
|
|
17
|
-
GREEN_CHECKBOX,
|
|
18
|
-
LEFT_ARROW,
|
|
19
|
-
RIGHT_ARROW,
|
|
20
|
-
CHECKMARK,
|
|
21
|
-
ELLIPSIS,
|
|
22
|
-
WARNING,
|
|
23
|
-
RED_X,
|
|
24
|
-
} = Unicode;
|
|
25
|
-
const {
|
|
26
|
-
BLACK,
|
|
27
|
-
WHITE,
|
|
28
|
-
RED,
|
|
29
|
-
GREEN,
|
|
30
|
-
BLUE,
|
|
31
|
-
CYAN,
|
|
32
|
-
MAGENTA,
|
|
33
|
-
YELLOW,
|
|
34
|
-
NO_COLOR,
|
|
35
|
-
} = Unicode.LINUX_COLOR;
|
|
36
|
-
|
|
37
|
-
let alignmentCtor = false;
|
|
38
|
-
|
|
39
|
-
class PaliTransformer {
|
|
40
|
-
constructor(transformer) {
|
|
41
|
-
let { wordMap } = transformer;
|
|
42
|
-
this.transformer = transformer;
|
|
43
|
-
|
|
44
|
-
let reList;
|
|
45
|
-
let entries = Object.entries(wordMap);
|
|
46
|
-
reList = entries.reduce((a, e) => {
|
|
47
|
-
let [legacyText, paliText] = e;
|
|
48
|
-
if (paliText) {
|
|
49
|
-
a.set(paliText, new RegExp(`\\b${paliText}`, 'gi'));
|
|
50
|
-
}
|
|
51
|
-
return a;
|
|
52
|
-
}, new Map());
|
|
53
|
-
this.reList = reList;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
get wordMap() {
|
|
57
|
-
return this.transformer.wordMap;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
transform(text) {
|
|
61
|
-
const msg = 'P14r.transform';
|
|
62
|
-
const dbg = DBG.PALI_TRANSFORMER;
|
|
63
|
-
let { transformer } = this;
|
|
64
|
-
dbg && console.log(msg, text);
|
|
65
|
-
return transformer.transform(text);
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
normalize(text) {
|
|
69
|
-
const msg = 'P14r.normalize';
|
|
70
|
-
const dbg = DBG.PALI_TRANSFORMER;
|
|
71
|
-
let { transformer } = this;
|
|
72
|
-
dbg && console.log(msg, text);
|
|
73
|
-
return transformer.normalize(text);
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
export class DpdTransformer {
|
|
78
|
-
constructor(opts = {}) {
|
|
79
|
-
const msg = 'D12r.ctor:';
|
|
80
|
-
let { dictionary } = opts;
|
|
81
|
-
if (dictionary == null) {
|
|
82
|
-
throw new Error(`${msg} dictionary?`);
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
this.dictionary = dictionary;
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
transform(text) {
|
|
89
|
-
return text;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
normalize(text) {
|
|
93
|
-
return text;
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
export class Aligner {
|
|
98
|
-
constructor(opts = {}) {
|
|
99
|
-
const msg = 'A5r.ctor:';
|
|
100
|
-
let {
|
|
101
|
-
alignMethod = 'alignPali',
|
|
102
|
-
authorAligned, // author of segment aligned document
|
|
103
|
-
authorLegacy, // author of legacy document
|
|
104
|
-
dbgScid,
|
|
105
|
-
groupDecay = 0.5, // group exponential decay
|
|
106
|
-
groupSize = 1, // comparison group size
|
|
107
|
-
lang, // 2-letter ISO language (en, fr, es, pt)
|
|
108
|
-
maxScanSize, // maximum segments to scan for alignment
|
|
109
|
-
minScanSize = 5, // minimum number of segments to scan
|
|
110
|
-
minScore = 0.1, // minimum alignment score
|
|
111
|
-
minWord,
|
|
112
|
-
normalizeVector,
|
|
113
|
-
scvEndpoint = 'https://www.api.sc-voice.net/scv',
|
|
114
|
-
wordSpace,
|
|
115
|
-
} = opts;
|
|
116
|
-
if (wordSpace == null) {
|
|
117
|
-
wordSpace = new WordSpace({
|
|
118
|
-
lang,
|
|
119
|
-
minWord,
|
|
120
|
-
normalizeVector,
|
|
121
|
-
});
|
|
122
|
-
}
|
|
123
|
-
if (alignMethod === 'alignPali') {
|
|
124
|
-
wordSpace.transformer = new PaliTransformer(
|
|
125
|
-
wordSpace.transformer,
|
|
126
|
-
);
|
|
127
|
-
}
|
|
128
|
-
if (lang == null) {
|
|
129
|
-
lang = wordSpace.lang;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
Object.assign(this, {
|
|
133
|
-
alignMethod,
|
|
134
|
-
authorAligned,
|
|
135
|
-
authorLegacy,
|
|
136
|
-
dbgScid,
|
|
137
|
-
groupSize,
|
|
138
|
-
groupDecay,
|
|
139
|
-
lang,
|
|
140
|
-
minScore,
|
|
141
|
-
minScanSize,
|
|
142
|
-
maxScanSize,
|
|
143
|
-
scvEndpoint,
|
|
144
|
-
wordSpace,
|
|
145
|
-
});
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
async fetchMLDoc(scid) {
|
|
149
|
-
const msg = 'Aligner.fetchMLDoc:';
|
|
150
|
-
let { lang, scvEndpoint, authorAligned } = this;
|
|
151
|
-
let url = [
|
|
152
|
-
scvEndpoint,
|
|
153
|
-
'search',
|
|
154
|
-
`${scid}%20-da%20${authorAligned}%20-ml1`,
|
|
155
|
-
lang,
|
|
156
|
-
].join('/');
|
|
157
|
-
try {
|
|
158
|
-
let res = await fetch(url);
|
|
159
|
-
let json = await res.json();
|
|
160
|
-
let mld = json.mlDocs[0];
|
|
161
|
-
return mld;
|
|
162
|
-
} catch (e) {
|
|
163
|
-
console.error(msg, e);
|
|
164
|
-
throw e;
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
createAlignment(opts = {}) {
|
|
169
|
-
const msg = 'A7t.createAlignment:';
|
|
170
|
-
const dbg = DBG.CREATE_ALIGNMENT;
|
|
171
|
-
let {
|
|
172
|
-
dbgScid = this.dbgScid,
|
|
173
|
-
legacyDoc,
|
|
174
|
-
mlDoc,
|
|
175
|
-
minScore = this.minScore,
|
|
176
|
-
minScanSize = this.minScanSize,
|
|
177
|
-
maxScanSize = this.maxScanSize,
|
|
178
|
-
scidsExp,
|
|
179
|
-
} = opts;
|
|
180
|
-
let { lang } = this;
|
|
181
|
-
if (!(legacyDoc instanceof LegacyDoc)) {
|
|
182
|
-
throw new Error(`${msg} legacyDoc?`);
|
|
183
|
-
}
|
|
184
|
-
if (mlDoc == null) {
|
|
185
|
-
throw new Error(`${msg} mlDoc?`);
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
let { author, author_uid, lines, footer } = legacyDoc;
|
|
189
|
-
let nLines = lines.length;
|
|
190
|
-
let lineCursor = new Fraction(0, nLines, 'lines');
|
|
191
|
-
let scids = Object.keys(mlDoc.segMap);
|
|
192
|
-
let nSegs = scids.length;
|
|
193
|
-
scids.sort(SuttaCentralId.compareLow);
|
|
194
|
-
let segCursor = new Fraction(0, nSegs, 'segs');
|
|
195
|
-
if (nSegs < nLines) {
|
|
196
|
-
throw new Error(`${msg} nSegs:${nSegs} < nLines:${nLines}?`);
|
|
197
|
-
}
|
|
198
|
-
if (maxScanSize == null) {
|
|
199
|
-
maxScanSize = Math.ceil(Math.max(1, (nSegs - nLines) * 0.8));
|
|
200
|
-
}
|
|
201
|
-
if (minScanSize < 1) {
|
|
202
|
-
throw new Error(`${msg} minScanSize? ${minScanSize} `);
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
let { sutta_uid: suid, docAuthor, bilaraPaths } = mlDoc;
|
|
206
|
-
let bilaraPath = bilaraPaths.reduce((a, p) => {
|
|
207
|
-
if (p.includes(docAuthor)) {
|
|
208
|
-
a = p.replaceAll(docAuthor, author_uid);
|
|
209
|
-
}
|
|
210
|
-
return a;
|
|
211
|
-
});
|
|
212
|
-
let docOpts = {
|
|
213
|
-
suid,
|
|
214
|
-
lang,
|
|
215
|
-
author,
|
|
216
|
-
author_uid,
|
|
217
|
-
bilaraPath,
|
|
218
|
-
footer,
|
|
219
|
-
};
|
|
220
|
-
|
|
221
|
-
const optsAlignment = {
|
|
222
|
-
aligner: this,
|
|
223
|
-
dbgScid,
|
|
224
|
-
ebtDoc: EbtDoc.create(docOpts),
|
|
225
|
-
legacyDoc,
|
|
226
|
-
lineCursor,
|
|
227
|
-
mlDoc,
|
|
228
|
-
minScore,
|
|
229
|
-
minScanSize,
|
|
230
|
-
maxScanSize,
|
|
231
|
-
scids,
|
|
232
|
-
scidsExp,
|
|
233
|
-
segCursor,
|
|
234
|
-
vMLDoc: this.mlDocVectors(mlDoc),
|
|
235
|
-
};
|
|
236
|
-
alignmentCtor = true;
|
|
237
|
-
let alignment = new Alignment(optsAlignment);
|
|
238
|
-
alignmentCtor = false;
|
|
239
|
-
|
|
240
|
-
return alignment;
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
mlDocVectors(mld) {
|
|
244
|
-
const msg = 'Aligner.mlDocVectors';
|
|
245
|
-
const dbg = DBG.ML_DOC_VECTORS;
|
|
246
|
-
let { alignMethod, groupDecay, groupSize, wordSpace } = this;
|
|
247
|
-
let { wordMap } = wordSpace.transformer;
|
|
248
|
-
let { segMap, lang } = mld;
|
|
249
|
-
let segs = Object.entries(segMap);
|
|
250
|
-
let iLastSeg = segs.length - 1;
|
|
251
|
-
let reList;
|
|
252
|
-
|
|
253
|
-
if (alignMethod === 'alignPali') {
|
|
254
|
-
let entries = Object.entries(wordMap);
|
|
255
|
-
reList = entries.reduce((a, e) => {
|
|
256
|
-
let [legacyText, paliText] = e;
|
|
257
|
-
if (paliText) {
|
|
258
|
-
a.set(paliText, new RegExp(`\\b${paliText}`, 'gi'));
|
|
259
|
-
}
|
|
260
|
-
return a;
|
|
261
|
-
}, new Map());
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
let vectorMap = {};
|
|
265
|
-
let segGroup = [];
|
|
266
|
-
for (let i = segs.length; i-- > 0; ) {
|
|
267
|
-
let [scid, seg] = segs[i];
|
|
268
|
-
let vGroup = new WordSpace.Vector();
|
|
269
|
-
|
|
270
|
-
let { pli } = seg;
|
|
271
|
-
let segData = seg[lang] || '';
|
|
272
|
-
switch (alignMethod) {
|
|
273
|
-
case 'alignPali':
|
|
274
|
-
{
|
|
275
|
-
// for aligning Pali, we add all Pali words that
|
|
276
|
-
// occur in the Pali for a segment to the
|
|
277
|
-
// vector input text
|
|
278
|
-
let pliWords = [];
|
|
279
|
-
reList.forEach((re, paliText, map) => {
|
|
280
|
-
let nMatch = pli.match(re)?.length || 0;
|
|
281
|
-
if (nMatch) {
|
|
282
|
-
for (let i = 0; i < nMatch; i++) {
|
|
283
|
-
pliWords.push(paliText);
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
});
|
|
287
|
-
if (pliWords.length) {
|
|
288
|
-
segData += ' ' + pliWords.join(' ');
|
|
289
|
-
dbg === scid &&
|
|
290
|
-
console.log(msg, 'segData', scid, segData);
|
|
291
|
-
}
|
|
292
|
-
}
|
|
293
|
-
break;
|
|
294
|
-
case 'DPD':
|
|
295
|
-
break;
|
|
296
|
-
}
|
|
297
|
-
segGroup.unshift(segData);
|
|
298
|
-
if (segGroup.length > groupSize) {
|
|
299
|
-
segGroup.pop();
|
|
300
|
-
}
|
|
301
|
-
let scale = 1;
|
|
302
|
-
vGroup = segGroup.reduce((a, seg, i) => {
|
|
303
|
-
let vScale = wordSpace.string2Vector(segData, scale);
|
|
304
|
-
scale *= groupDecay;
|
|
305
|
-
return a.add(vScale);
|
|
306
|
-
}, vGroup);
|
|
307
|
-
vectorMap[scid] = vGroup;
|
|
308
|
-
}
|
|
309
|
-
return vectorMap;
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
export class Alignment {
|
|
314
|
-
constructor(opts = {}) {
|
|
315
|
-
const msg = 'A7t.ctor:';
|
|
316
|
-
if (!alignmentCtor) {
|
|
317
|
-
throw new Error(`${msg} createAlignment()?`);
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
Object.assign(this, opts);
|
|
321
|
-
|
|
322
|
-
Object.defineProperty(this, 'lang', {
|
|
323
|
-
get: () => this.aligner.lang,
|
|
324
|
-
});
|
|
325
|
-
Object.defineProperty(this, 'state', {
|
|
326
|
-
get: () => this.status.state,
|
|
327
|
-
});
|
|
328
|
-
Object.defineProperty(this, 'wordSpace', {
|
|
329
|
-
get: () => this.aligner.wordSpace,
|
|
330
|
-
});
|
|
331
|
-
Object.defineProperty(this, 'status', {
|
|
332
|
-
get: () => {
|
|
333
|
-
let { legacyDoc, history } = this;
|
|
334
|
-
if (history.length === 0) {
|
|
335
|
-
let { uid, lang, author_uid } = legacyDoc;
|
|
336
|
-
let text = `${uid}/${lang}/${author_uid} unaligned`;
|
|
337
|
-
return new AlignmentStatus(this, { text });
|
|
338
|
-
}
|
|
339
|
-
return history.at(-1);
|
|
340
|
-
},
|
|
341
|
-
});
|
|
342
|
-
|
|
343
|
-
this.history = [];
|
|
344
|
-
let { legacyDoc } = this;
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
pushStatus(opts) {
|
|
348
|
-
let status = new AlignmentStatus(this, opts);
|
|
349
|
-
this.history.push(status);
|
|
350
|
-
return status;
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
alignLine(legacyText, opts = {}) {
|
|
354
|
-
const msg = 'A7t.alignLine:';
|
|
355
|
-
const dbg = DBG.ALIGN_LINE;
|
|
356
|
-
if (typeof opts !== 'object') {
|
|
357
|
-
throw new Error(`${msg} opts?`);
|
|
358
|
-
}
|
|
359
|
-
let { dbgScid = this.dbgScid } = opts;
|
|
360
|
-
// biome-ignore format:
|
|
361
|
-
let { ebtDoc, legacyDoc, lineCursor, maxScanSize, minScanSize,
|
|
362
|
-
minScore, mlDoc, scids, segCursor, vMLDoc, wordSpace,
|
|
363
|
-
} = this;
|
|
364
|
-
let vLegacy = wordSpace.string2Vector(legacyText);
|
|
365
|
-
let scoreMax = 0;
|
|
366
|
-
let segMap = mlDoc.segMap;
|
|
367
|
-
let scoreId;
|
|
368
|
-
let scanning = (i) =>
|
|
369
|
-
i < maxScanSize && (i < minScanSize || scoreMax < minScore);
|
|
370
|
-
for (let i = 0; scanning(i); i++) {
|
|
371
|
-
let scid = scids[segCursor.numerator + i];
|
|
372
|
-
if (scid == null) {
|
|
373
|
-
console.log(error, '[1]scid?', segCursor.toString());
|
|
374
|
-
break;
|
|
375
|
-
}
|
|
376
|
-
let vSeg = vMLDoc[scid];
|
|
377
|
-
if (vSeg == null) {
|
|
378
|
-
throw new Error(`${msg}scid[${scid}]? ${vMLDoc.length}`);
|
|
379
|
-
}
|
|
380
|
-
let score = vLegacy.similar(vSeg);
|
|
381
|
-
if (minScanSize <= i) {
|
|
382
|
-
// Scan exceeded minScanSize. We might be lost.
|
|
383
|
-
// Or maybe we got lucky and translator omitted many segments.
|
|
384
|
-
// For example, MN8 42 segments are skipped for Môhan
|
|
385
|
-
if (score) {
|
|
386
|
-
let percent = (score * 100).toFixed(0);
|
|
387
|
-
let linePos = `line ${lineCursor.n + 1}`;
|
|
388
|
-
this.pushStatus({
|
|
389
|
-
state: STATE_WARN,
|
|
390
|
-
text: `SCAN+${i}`,
|
|
391
|
-
score,
|
|
392
|
-
scid,
|
|
393
|
-
legacyText,
|
|
394
|
-
});
|
|
395
|
-
dbg && console.log(msg, this.status.summary);
|
|
396
|
-
}
|
|
397
|
-
}
|
|
398
|
-
// biome-ignore format:
|
|
399
|
-
if (dbg > 1 && scid === dbgScid) {
|
|
400
|
-
let seg = mlDoc?.segMap[scid] || {};
|
|
401
|
-
let intersection = vLegacy.intersect(vSeg).toString();
|
|
402
|
-
let { pli } = seg;
|
|
403
|
-
console.log(msg, 'dbgScid', {
|
|
404
|
-
legacyText, vLegacy: vLegacy.toString(),
|
|
405
|
-
seg, vSeg: vSeg.toString(),
|
|
406
|
-
score, intersection,
|
|
407
|
-
});
|
|
408
|
-
}
|
|
409
|
-
if (scoreMax < score) {
|
|
410
|
-
scoreMax = score;
|
|
411
|
-
scoreId = scid;
|
|
412
|
-
if (dbg > 1 && dbgScid) {
|
|
413
|
-
let cmp = SuttaCentralId.compareLow(scoreId, dbgScid);
|
|
414
|
-
let intersection = vLegacy.intersect(vSeg).toString();
|
|
415
|
-
// biome-ignore format:
|
|
416
|
-
if (cmp <= 0) {
|
|
417
|
-
console.log(msg, `scoreMax-${dbgScid}`,
|
|
418
|
-
{ scoreId, scoreMax, intersection, });
|
|
419
|
-
} else {
|
|
420
|
-
let segExp = segMap && segMap[dbgScid];
|
|
421
|
-
console.log( msg, `scoreMax-${dbgScid}-MISMATCH?`,
|
|
422
|
-
segCursor.toString(),
|
|
423
|
-
lineCursor.toString(),
|
|
424
|
-
{ scoreId, segExp, legacyText, scoreMax, intersection},
|
|
425
|
-
);
|
|
426
|
-
}
|
|
427
|
-
}
|
|
428
|
-
}
|
|
429
|
-
} // for
|
|
430
|
-
|
|
431
|
-
let vSeg = vMLDoc[scoreId];
|
|
432
|
-
let intersection = vLegacy.intersect(vSeg);
|
|
433
|
-
|
|
434
|
-
if (scoreId == null || scoreMax < minScore) {
|
|
435
|
-
let iEnd =
|
|
436
|
-
Math.min(scids.length, segCursor.numerator + maxScanSize) - 1;
|
|
437
|
-
let lastId = scids[iEnd];
|
|
438
|
-
let scanned = iEnd - segCursor.numerator + 1;
|
|
439
|
-
// biome-ignore format:
|
|
440
|
-
this.pushStatus({
|
|
441
|
-
state: STATE_ERROR,
|
|
442
|
-
text: `${maxScanSize} UNMATCHED`,
|
|
443
|
-
legacyText,
|
|
444
|
-
score: scoreMax,
|
|
445
|
-
scid: scoreId,
|
|
446
|
-
intersection,
|
|
447
|
-
vLegacy,
|
|
448
|
-
vSeg,
|
|
449
|
-
});
|
|
450
|
-
dbg && console.log(msg, this.status.summary);
|
|
451
|
-
return undefined;
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
// STATE_OK: Current line matches current segment
|
|
455
|
-
ebtDoc.segMap[scoreId] = legacyText;
|
|
456
|
-
|
|
457
|
-
lineCursor.increment();
|
|
458
|
-
let iFound = scids.indexOf(scoreId);
|
|
459
|
-
if (iFound >= 0) {
|
|
460
|
-
segCursor.numerator = iFound + 1;
|
|
461
|
-
} else {
|
|
462
|
-
dbg &&
|
|
463
|
-
console.error(msg, `${ERROR} iFound?`, {
|
|
464
|
-
lineCursor,
|
|
465
|
-
scoreId,
|
|
466
|
-
});
|
|
467
|
-
}
|
|
468
|
-
let status = this.pushStatus({
|
|
469
|
-
score: scoreMax,
|
|
470
|
-
scid: scoreId,
|
|
471
|
-
intersection,
|
|
472
|
-
legacyText,
|
|
473
|
-
vLegacy,
|
|
474
|
-
vSeg,
|
|
475
|
-
iLine: lineCursor.n,
|
|
476
|
-
});
|
|
477
|
-
dbg && console.log(msg, status.summary);
|
|
478
|
-
if (lineCursor.value === 1) {
|
|
479
|
-
let { uid, lang, author_uid } = this.legacyDoc;
|
|
480
|
-
let lineCur = lineCursor.toString();
|
|
481
|
-
status = this.pushStatus({
|
|
482
|
-
state: STATE_DONE,
|
|
483
|
-
text: `${uid}/${lang}/${author_uid} aligned ${lineCur}`,
|
|
484
|
-
context: lineCursor.toString(),
|
|
485
|
-
});
|
|
486
|
-
dbg && console.log(msg, this.status.summary);
|
|
487
|
-
}
|
|
488
|
-
|
|
489
|
-
return status;
|
|
490
|
-
} // alignLine
|
|
491
|
-
|
|
492
|
-
alignAll() {
|
|
493
|
-
const msg = 'A7t.alignAll:';
|
|
494
|
-
let dbg = DBG.ALIGN_ALL;
|
|
495
|
-
//biome-ignore format:
|
|
496
|
-
let {
|
|
497
|
-
aligner, ebtDoc, legacyDoc, lineCursor, maxScanSize, minScanSize,
|
|
498
|
-
mlDoc, scidsExp, segCursor, vMLDoc,
|
|
499
|
-
} = this;
|
|
500
|
-
let { lang, alignMethod, wordSpace } = aligner;
|
|
501
|
-
let { segMap } = mlDoc;
|
|
502
|
-
let scids = Object.keys(segMap);
|
|
503
|
-
scids.sort(SuttaCentralId.compareLow);
|
|
504
|
-
let { lines } = legacyDoc;
|
|
505
|
-
let rPrev;
|
|
506
|
-
let iEnd = lines.length - 1;
|
|
507
|
-
|
|
508
|
-
while (lineCursor.difference < 0) {
|
|
509
|
-
let line = lines[lineCursor.numerator];
|
|
510
|
-
let curScid = scids[segCursor.numerator];
|
|
511
|
-
let dbgScid = scidsExp?.[lineCursor.numerator];
|
|
512
|
-
let r = this.alignLine(line, { dbgScid });
|
|
513
|
-
rPrev = r;
|
|
514
|
-
// biome-ignore format:
|
|
515
|
-
if (r == null) {
|
|
516
|
-
let { vSeg, vLegacy, intersection } = this.status;
|
|
517
|
-
dbg && console.log(msg, 'UNMATCHED',
|
|
518
|
-
lineCursor.toString(),
|
|
519
|
-
segCursor.toString(),
|
|
520
|
-
{ curScid, line, minScanSize, maxScanSize, vSeg, vLegacy, intersection },
|
|
521
|
-
);
|
|
522
|
-
return null;
|
|
523
|
-
}
|
|
524
|
-
}
|
|
525
|
-
|
|
526
|
-
return ebtDoc;
|
|
527
|
-
} // alignAll
|
|
528
|
-
} // class Alignment
|
|
529
|
-
|
|
530
|
-
export class AlignmentStatus {
|
|
531
|
-
constructor(alignment, opts = {}) {
|
|
532
|
-
let { lineCursor, segCursor } = alignment;
|
|
533
|
-
let {
|
|
534
|
-
text,
|
|
535
|
-
scid,
|
|
536
|
-
state = STATE_OK,
|
|
537
|
-
score,
|
|
538
|
-
intersection,
|
|
539
|
-
legacyText,
|
|
540
|
-
vLegacy,
|
|
541
|
-
vSeg,
|
|
542
|
-
iLine = lineCursor.n + 1,
|
|
543
|
-
} = opts;
|
|
544
|
-
|
|
545
|
-
Object.assign(this, {
|
|
546
|
-
iLine,
|
|
547
|
-
intersection: intersection?.toString(),
|
|
548
|
-
legacyText,
|
|
549
|
-
lineCursor: lineCursor && new Fraction(lineCursor),
|
|
550
|
-
text,
|
|
551
|
-
scid,
|
|
552
|
-
score,
|
|
553
|
-
segCursor: segCursor && new Fraction(segCursor),
|
|
554
|
-
state,
|
|
555
|
-
vLegacy: vLegacy?.toString(),
|
|
556
|
-
vSeg: vSeg?.toString(),
|
|
557
|
-
});
|
|
558
|
-
|
|
559
|
-
Object.defineProperty(this, 'alignment', {
|
|
560
|
-
value: alignment,
|
|
561
|
-
});
|
|
562
|
-
Object.defineProperty(this, 'scorePercent', {
|
|
563
|
-
get: () =>
|
|
564
|
-
this.score == null
|
|
565
|
-
? '--%'
|
|
566
|
-
: `${(100 * this.score)?.toFixed(0)}%`,
|
|
567
|
-
});
|
|
568
|
-
Object.defineProperty(this, 'lineCur', {
|
|
569
|
-
get: () => this?.lineCursor?.toString(),
|
|
570
|
-
});
|
|
571
|
-
Object.defineProperty(this, 'segCur', {
|
|
572
|
-
get: () => this?.segCursor?.toString(),
|
|
573
|
-
});
|
|
574
|
-
}
|
|
575
|
-
|
|
576
|
-
static get STATE_ERROR() {
|
|
577
|
-
return STATE_ERROR;
|
|
578
|
-
}
|
|
579
|
-
static get STATE_DONE() {
|
|
580
|
-
return STATE_DONE;
|
|
581
|
-
}
|
|
582
|
-
static get STATE_OK() {
|
|
583
|
-
return STATE_OK;
|
|
584
|
-
}
|
|
585
|
-
static get STATE_WARN() {
|
|
586
|
-
return STATE_WARN;
|
|
587
|
-
}
|
|
588
|
-
|
|
589
|
-
get summary() {
|
|
590
|
-
let {
|
|
591
|
-
alignment,
|
|
592
|
-
state,
|
|
593
|
-
text,
|
|
594
|
-
scid,
|
|
595
|
-
scorePercent,
|
|
596
|
-
lineCur,
|
|
597
|
-
lineCursor,
|
|
598
|
-
segCur,
|
|
599
|
-
segCursor,
|
|
600
|
-
score,
|
|
601
|
-
legacyText = '',
|
|
602
|
-
iLine,
|
|
603
|
-
} = this;
|
|
604
|
-
|
|
605
|
-
let status = [];
|
|
606
|
-
let symbol;
|
|
607
|
-
let color = NO_COLOR;
|
|
608
|
-
let context = legacyText ? `${iLine}:` + legacyText : '';
|
|
609
|
-
let { minScore } = alignment;
|
|
610
|
-
let CTX_LEN = 25;
|
|
611
|
-
switch (state) {
|
|
612
|
-
case STATE_ERROR:
|
|
613
|
-
symbol = RED_X;
|
|
614
|
-
color = RED;
|
|
615
|
-
break;
|
|
616
|
-
case STATE_WARN:
|
|
617
|
-
color = YELLOW;
|
|
618
|
-
symbol = WARNING + ' ';
|
|
619
|
-
context = context.substring(0, CTX_LEN) + ELLIPSIS;
|
|
620
|
-
break;
|
|
621
|
-
case STATE_DONE:
|
|
622
|
-
symbol = CHECKMARK + ' ';
|
|
623
|
-
color = WHITE;
|
|
624
|
-
break;
|
|
625
|
-
case STATE_OK:
|
|
626
|
-
symbol = CHECKMARK;
|
|
627
|
-
context = context.substring(0, CTX_LEN) + ELLIPSIS;
|
|
628
|
-
color = NO_COLOR;
|
|
629
|
-
break;
|
|
630
|
-
default:
|
|
631
|
-
symbol = RED_X;
|
|
632
|
-
text = `UNKNOWN STATE ${state}`;
|
|
633
|
-
color = RED;
|
|
634
|
-
break;
|
|
635
|
-
}
|
|
636
|
-
status.push(color + symbol);
|
|
637
|
-
status.push(text);
|
|
638
|
-
if (score) {
|
|
639
|
-
status.push(scid);
|
|
640
|
-
status.push(`segs[${segCursor.n}]`);
|
|
641
|
-
status.push(
|
|
642
|
-
score < minScore
|
|
643
|
-
? RED + LEFT_ARROW + scorePercent + RIGHT_ARROW + color
|
|
644
|
-
: GREEN + LEFT_ARROW + scorePercent + RIGHT_ARROW + color,
|
|
645
|
-
);
|
|
646
|
-
}
|
|
647
|
-
context && status.push(context + NO_COLOR);
|
|
648
|
-
|
|
649
|
-
return status.join(' ');
|
|
650
|
-
}
|
|
651
|
-
} // AlignmentStatus
|