@sc-voice/tools 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.mjs +21 -6
- package/package.json +2 -1
- package/src/defines.mjs +6 -1
- package/src/graph/sankey.mjs +56 -0
- package/src/math/fraction.mjs +8 -0
- package/src/text/ebt-doc.mjs +6 -2
- package/src/text/legacy-doc.mjs +29 -2
- package/src/text/word-space.mjs +83 -54
- package/src/{text → translate}/aligner.mjs +127 -38
- package/src/translate/deepl-adapter.mjs +353 -0
- package/src/translate/dpd-transformer.mjs +17 -0
- package/src/translate/mock-deepl.mjs +351 -0
- package/src/translate/quote-parser.mjs +681 -0
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import { DBG } from '../defines.mjs';
|
|
2
2
|
import { Fraction } from '../math/fraction.mjs';
|
|
3
|
-
import { EbtDoc } from '
|
|
4
|
-
import { LegacyDoc } from '
|
|
5
|
-
import { SuttaCentralId } from '
|
|
6
|
-
import { Unicode } from '
|
|
7
|
-
import {
|
|
3
|
+
import { EbtDoc } from '../text/ebt-doc.mjs';
|
|
4
|
+
import { LegacyDoc } from '../text/legacy-doc.mjs';
|
|
5
|
+
import { SuttaCentralId } from '../text/sutta-central-id.mjs';
|
|
6
|
+
import { Unicode } from '../text/unicode.mjs';
|
|
7
|
+
import {
|
|
8
|
+
WordMapTransformer,
|
|
9
|
+
WordSpace,
|
|
10
|
+
} from '../text/word-space.mjs';
|
|
8
11
|
|
|
9
12
|
const STATE_OK = 'ok';
|
|
10
13
|
const STATE_WARN = 'warn';
|
|
@@ -33,13 +36,72 @@ const {
|
|
|
33
36
|
|
|
34
37
|
let alignmentCtor = false;
|
|
35
38
|
|
|
39
|
+
class PaliTransformer {
|
|
40
|
+
constructor(transformer) {
|
|
41
|
+
let { wordMap } = transformer;
|
|
42
|
+
this.transformer = transformer;
|
|
43
|
+
|
|
44
|
+
let reList;
|
|
45
|
+
let entries = Object.entries(wordMap);
|
|
46
|
+
reList = entries.reduce((a, e) => {
|
|
47
|
+
let [legacyText, paliText] = e;
|
|
48
|
+
if (paliText) {
|
|
49
|
+
a.set(paliText, new RegExp(`\\b${paliText}`, 'gi'));
|
|
50
|
+
}
|
|
51
|
+
return a;
|
|
52
|
+
}, new Map());
|
|
53
|
+
this.reList = reList;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
get wordMap() {
|
|
57
|
+
return this.transformer.wordMap;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
transform(text) {
|
|
61
|
+
const msg = 'P14r.transform';
|
|
62
|
+
const dbg = DBG.PALI_TRANSFORMER;
|
|
63
|
+
let { transformer } = this;
|
|
64
|
+
dbg && console.log(msg, text);
|
|
65
|
+
return transformer.transform(text);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
normalize(text) {
|
|
69
|
+
const msg = 'P14r.normalize';
|
|
70
|
+
const dbg = DBG.PALI_TRANSFORMER;
|
|
71
|
+
let { transformer } = this;
|
|
72
|
+
dbg && console.log(msg, text);
|
|
73
|
+
return transformer.normalize(text);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export class DpdTransformer {
|
|
78
|
+
constructor(opts = {}) {
|
|
79
|
+
const msg = 'D12r.ctor:';
|
|
80
|
+
let { dictionary } = opts;
|
|
81
|
+
if (dictionary == null) {
|
|
82
|
+
throw new Error(`${msg} dictionary?`);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
this.dictionary = dictionary;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
transform(text) {
|
|
89
|
+
return text;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
normalize(text) {
|
|
93
|
+
return text;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
36
97
|
export class Aligner {
|
|
37
98
|
constructor(opts = {}) {
|
|
38
|
-
const msg = '
|
|
99
|
+
const msg = 'A5r.ctor:';
|
|
39
100
|
let {
|
|
40
|
-
|
|
101
|
+
alignMethod = 'alignPali',
|
|
41
102
|
authorAligned, // author of segment aligned document
|
|
42
103
|
authorLegacy, // author of legacy document
|
|
104
|
+
dbgScid,
|
|
43
105
|
groupDecay = 0.5, // group exponential decay
|
|
44
106
|
groupSize = 1, // comparison group size
|
|
45
107
|
lang, // 2-letter ISO language (en, fr, es, pt)
|
|
@@ -52,16 +114,26 @@ export class Aligner {
|
|
|
52
114
|
wordSpace,
|
|
53
115
|
} = opts;
|
|
54
116
|
if (wordSpace == null) {
|
|
55
|
-
wordSpace = new WordSpace({
|
|
117
|
+
wordSpace = new WordSpace({
|
|
118
|
+
lang,
|
|
119
|
+
minWord,
|
|
120
|
+
normalizeVector,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
if (alignMethod === 'alignPali') {
|
|
124
|
+
wordSpace.transformer = new PaliTransformer(
|
|
125
|
+
wordSpace.transformer,
|
|
126
|
+
);
|
|
56
127
|
}
|
|
57
128
|
if (lang == null) {
|
|
58
129
|
lang = wordSpace.lang;
|
|
59
130
|
}
|
|
60
131
|
|
|
61
132
|
Object.assign(this, {
|
|
62
|
-
|
|
133
|
+
alignMethod,
|
|
63
134
|
authorAligned,
|
|
64
135
|
authorLegacy,
|
|
136
|
+
dbgScid,
|
|
65
137
|
groupSize,
|
|
66
138
|
groupDecay,
|
|
67
139
|
lang,
|
|
@@ -97,6 +169,7 @@ export class Aligner {
|
|
|
97
169
|
const msg = 'A7t.createAlignment:';
|
|
98
170
|
const dbg = DBG.CREATE_ALIGNMENT;
|
|
99
171
|
let {
|
|
172
|
+
dbgScid = this.dbgScid,
|
|
100
173
|
legacyDoc,
|
|
101
174
|
mlDoc,
|
|
102
175
|
minScore = this.minScore,
|
|
@@ -112,7 +185,8 @@ export class Aligner {
|
|
|
112
185
|
throw new Error(`${msg} mlDoc?`);
|
|
113
186
|
}
|
|
114
187
|
|
|
115
|
-
let
|
|
188
|
+
let { author, author_uid, lines, footer } = legacyDoc;
|
|
189
|
+
let nLines = lines.length;
|
|
116
190
|
let lineCursor = new Fraction(0, nLines, 'lines');
|
|
117
191
|
let scids = Object.keys(mlDoc.segMap);
|
|
118
192
|
let nSegs = scids.length;
|
|
@@ -128,18 +202,25 @@ export class Aligner {
|
|
|
128
202
|
throw new Error(`${msg} minScanSize? ${minScanSize} `);
|
|
129
203
|
}
|
|
130
204
|
|
|
131
|
-
let { sutta_uid:suid, docAuthor, bilaraPaths } = mlDoc;
|
|
132
|
-
let
|
|
133
|
-
let bilaraPath = bilaraPaths.reduce((a,p)=>{
|
|
205
|
+
let { sutta_uid: suid, docAuthor, bilaraPaths } = mlDoc;
|
|
206
|
+
let bilaraPath = bilaraPaths.reduce((a, p) => {
|
|
134
207
|
if (p.includes(docAuthor)) {
|
|
135
208
|
a = p.replaceAll(docAuthor, author_uid);
|
|
136
209
|
}
|
|
137
210
|
return a;
|
|
138
211
|
});
|
|
139
|
-
let docOpts = {
|
|
212
|
+
let docOpts = {
|
|
213
|
+
suid,
|
|
214
|
+
lang,
|
|
215
|
+
author,
|
|
216
|
+
author_uid,
|
|
217
|
+
bilaraPath,
|
|
218
|
+
footer,
|
|
219
|
+
};
|
|
140
220
|
|
|
141
221
|
const optsAlignment = {
|
|
142
222
|
aligner: this,
|
|
223
|
+
dbgScid,
|
|
143
224
|
ebtDoc: EbtDoc.create(docOpts),
|
|
144
225
|
legacyDoc,
|
|
145
226
|
lineCursor,
|
|
@@ -162,14 +243,14 @@ export class Aligner {
|
|
|
162
243
|
mlDocVectors(mld) {
|
|
163
244
|
const msg = 'Aligner.mlDocVectors';
|
|
164
245
|
const dbg = DBG.ML_DOC_VECTORS;
|
|
165
|
-
let {
|
|
166
|
-
let { wordMap } = wordSpace;
|
|
246
|
+
let { alignMethod, groupDecay, groupSize, wordSpace } = this;
|
|
247
|
+
let { wordMap } = wordSpace.transformer;
|
|
167
248
|
let { segMap, lang } = mld;
|
|
168
249
|
let segs = Object.entries(segMap);
|
|
169
250
|
let iLastSeg = segs.length - 1;
|
|
170
251
|
let reList;
|
|
171
252
|
|
|
172
|
-
if (alignPali) {
|
|
253
|
+
if (alignMethod === 'alignPali') {
|
|
173
254
|
let entries = Object.entries(wordMap);
|
|
174
255
|
reList = entries.reduce((a, e) => {
|
|
175
256
|
let [legacyText, paliText] = e;
|
|
@@ -184,26 +265,34 @@ export class Aligner {
|
|
|
184
265
|
let segGroup = [];
|
|
185
266
|
for (let i = segs.length; i-- > 0; ) {
|
|
186
267
|
let [scid, seg] = segs[i];
|
|
268
|
+
let vGroup = new WordSpace.Vector();
|
|
269
|
+
|
|
187
270
|
let { pli } = seg;
|
|
188
271
|
let segData = seg[lang] || '';
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
272
|
+
switch (alignMethod) {
|
|
273
|
+
case 'alignPali':
|
|
274
|
+
{
|
|
275
|
+
// for aligning Pali, we add all Pali words that
|
|
276
|
+
// occur in the Pali for a segment to the
|
|
277
|
+
// vector input text
|
|
278
|
+
let pliWords = [];
|
|
279
|
+
reList.forEach((re, paliText, map) => {
|
|
280
|
+
let nMatch = pli.match(re)?.length || 0;
|
|
281
|
+
if (nMatch) {
|
|
282
|
+
for (let i = 0; i < nMatch; i++) {
|
|
283
|
+
pliWords.push(paliText);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
});
|
|
287
|
+
if (pliWords.length) {
|
|
288
|
+
segData += ' ' + pliWords.join(' ');
|
|
289
|
+
dbg === scid &&
|
|
290
|
+
console.log(msg, 'segData', scid, segData);
|
|
200
291
|
}
|
|
201
292
|
}
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
dbg === scid && console.log(msg, 'segData', scid, segData);
|
|
206
|
-
}
|
|
293
|
+
break;
|
|
294
|
+
case 'DPD':
|
|
295
|
+
break;
|
|
207
296
|
}
|
|
208
297
|
segGroup.unshift(segData);
|
|
209
298
|
if (segGroup.length > groupSize) {
|
|
@@ -267,7 +356,7 @@ export class Alignment {
|
|
|
267
356
|
if (typeof opts !== 'object') {
|
|
268
357
|
throw new Error(`${msg} opts?`);
|
|
269
358
|
}
|
|
270
|
-
let { dbgScid } = opts;
|
|
359
|
+
let { dbgScid = this.dbgScid } = opts;
|
|
271
360
|
// biome-ignore format:
|
|
272
361
|
let { ebtDoc, legacyDoc, lineCursor, maxScanSize, minScanSize,
|
|
273
362
|
minScore, mlDoc, scids, segCursor, vMLDoc, wordSpace,
|
|
@@ -281,6 +370,7 @@ export class Alignment {
|
|
|
281
370
|
for (let i = 0; scanning(i); i++) {
|
|
282
371
|
let scid = scids[segCursor.numerator + i];
|
|
283
372
|
if (scid == null) {
|
|
373
|
+
console.log(error, '[1]scid?', segCursor.toString());
|
|
284
374
|
break;
|
|
285
375
|
}
|
|
286
376
|
let vSeg = vMLDoc[scid];
|
|
@@ -407,7 +497,7 @@ export class Alignment {
|
|
|
407
497
|
aligner, ebtDoc, legacyDoc, lineCursor, maxScanSize, minScanSize,
|
|
408
498
|
mlDoc, scidsExp, segCursor, vMLDoc,
|
|
409
499
|
} = this;
|
|
410
|
-
let { lang,
|
|
500
|
+
let { lang, alignMethod, wordSpace } = aligner;
|
|
411
501
|
let { segMap } = mlDoc;
|
|
412
502
|
let scids = Object.keys(segMap);
|
|
413
503
|
scids.sort(SuttaCentralId.compareLow);
|
|
@@ -417,7 +507,6 @@ export class Alignment {
|
|
|
417
507
|
|
|
418
508
|
while (lineCursor.difference < 0) {
|
|
419
509
|
let line = lines[lineCursor.numerator];
|
|
420
|
-
dbg > 1 && console.log(msg, lineCursor.toString(), line);
|
|
421
510
|
let curScid = scids[segCursor.numerator];
|
|
422
511
|
let dbgScid = scidsExp?.[lineCursor.numerator];
|
|
423
512
|
let r = this.alignLine(line, { dbgScid });
|
|
@@ -425,12 +514,12 @@ export class Alignment {
|
|
|
425
514
|
// biome-ignore format:
|
|
426
515
|
if (r == null) {
|
|
427
516
|
let { vSeg, vLegacy, intersection } = this.status;
|
|
428
|
-
dbg && console.log(
|
|
517
|
+
dbg && console.log(msg, 'UNMATCHED',
|
|
429
518
|
lineCursor.toString(),
|
|
430
519
|
segCursor.toString(),
|
|
431
520
|
{ curScid, line, minScanSize, maxScanSize, vSeg, vLegacy, intersection },
|
|
432
521
|
);
|
|
433
|
-
|
|
522
|
+
return null;
|
|
434
523
|
}
|
|
435
524
|
}
|
|
436
525
|
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
import * as deepl from 'deepl-node';
|
|
2
|
+
import { DBG } from '../defines.mjs';
|
|
3
|
+
import { MockDeepL } from './mock-deepl.mjs';
|
|
4
|
+
|
|
5
|
+
const EMPTY_TEXT = '911911911';
|
|
6
|
+
const TRANSLATE_OPTS = {
|
|
7
|
+
tag_handling: 'xml',
|
|
8
|
+
formality: 'more',
|
|
9
|
+
};
|
|
10
|
+
const DST_AUTHOR = 'no-author';
|
|
11
|
+
|
|
12
|
+
let mockApi = DBG.MOCK_DEEPL;
|
|
13
|
+
|
|
14
|
+
export class DeepLAdapter {
|
|
15
|
+
#authKey;
|
|
16
|
+
|
|
17
|
+
constructor(opts = {}) {
|
|
18
|
+
let {
|
|
19
|
+
authKey,
|
|
20
|
+
glossary,
|
|
21
|
+
glossaryName,
|
|
22
|
+
initialized,
|
|
23
|
+
sourceLang, // deepl lang
|
|
24
|
+
targetLang, // deepl lang
|
|
25
|
+
translateOpts,
|
|
26
|
+
translator,
|
|
27
|
+
dstLang,
|
|
28
|
+
dstLang2, // bilara-data lang
|
|
29
|
+
srcLang2, // bilara-data lang
|
|
30
|
+
srcLang,
|
|
31
|
+
} = DeepLAdapter.srcDstLangs(opts);
|
|
32
|
+
|
|
33
|
+
let emsg = 'use DeepLAdapter.create()';
|
|
34
|
+
let check = 1;
|
|
35
|
+
if (null == authKey) throw new Error(`${emsg} ${check}`);
|
|
36
|
+
check++;
|
|
37
|
+
if (null == dstLang2) throw new Error(`${emsg} ${check}`);
|
|
38
|
+
check++;
|
|
39
|
+
if (null == glossaryName) throw new Error(`${emsg} ${check}`);
|
|
40
|
+
check++;
|
|
41
|
+
if (null == initialized) throw new Error(`${emsg} ${check}`);
|
|
42
|
+
check++;
|
|
43
|
+
if (null == sourceLang) throw new Error(`${emsg} ${check}`);
|
|
44
|
+
check++;
|
|
45
|
+
if (null == targetLang) throw new Error(`${emsg} ${check}`);
|
|
46
|
+
check++;
|
|
47
|
+
if (null == srcLang2) throw new Error(`${emsg} ${check}`);
|
|
48
|
+
check++;
|
|
49
|
+
if (null == translateOpts) throw new Error(`${emsg} ${check}`);
|
|
50
|
+
check++;
|
|
51
|
+
if (null == translator) throw new Error(`${emsg} ${check}`);
|
|
52
|
+
check++;
|
|
53
|
+
|
|
54
|
+
this.#authKey = authKey;
|
|
55
|
+
|
|
56
|
+
Object.assign(this, {
|
|
57
|
+
dstLang,
|
|
58
|
+
dstLang2,
|
|
59
|
+
glossary,
|
|
60
|
+
glossaryName,
|
|
61
|
+
initialized,
|
|
62
|
+
srcLang,
|
|
63
|
+
srcLang2,
|
|
64
|
+
sourceLang,
|
|
65
|
+
targetLang,
|
|
66
|
+
translateOpts: JSON.parse(JSON.stringify(translateOpts)),
|
|
67
|
+
translator,
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
static srcDstLangs(opts = {}) {
|
|
72
|
+
let { srcLang = 'en', dstLang = 'pt-pt' } = opts;
|
|
73
|
+
srcLang = srcLang.toLowerCase();
|
|
74
|
+
let srcLang2 = srcLang.split('-')[0];
|
|
75
|
+
dstLang = dstLang.toLowerCase();
|
|
76
|
+
let dstLang2 = dstLang.split('-')[0];
|
|
77
|
+
|
|
78
|
+
return Object.assign({}, opts, {
|
|
79
|
+
srcLang,
|
|
80
|
+
srcLang2,
|
|
81
|
+
dstLang,
|
|
82
|
+
dstLang2,
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
static deeplLang(lang) {
|
|
87
|
+
switch (lang) {
|
|
88
|
+
case 'pt':
|
|
89
|
+
return 'pt-PT';
|
|
90
|
+
default:
|
|
91
|
+
return lang;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
static glossaryName(opts = {}) {
|
|
96
|
+
const msg = 'D10r.glossaryName()';
|
|
97
|
+
const dbg = DBG.GLOSSARY;
|
|
98
|
+
let { dstAuthor = DST_AUTHOR } = opts;
|
|
99
|
+
let {
|
|
100
|
+
dstLang,
|
|
101
|
+
dstLang2, // bilara-data lang
|
|
102
|
+
srcLang2, // bilara-data lang
|
|
103
|
+
srcLang,
|
|
104
|
+
} = DeepLAdapter.srcDstLangs(opts);
|
|
105
|
+
let name =
|
|
106
|
+
`D10r_${srcLang2}_${dstLang2}_${dstAuthor}`.toLowerCase();
|
|
107
|
+
dbg && console.log(msg, name);
|
|
108
|
+
return name;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
static async create(opts = {}) {
|
|
112
|
+
const msg = 'D10r.create()';
|
|
113
|
+
const dbg = DBG.GLOSSARY;
|
|
114
|
+
let {
|
|
115
|
+
authKey,
|
|
116
|
+
srcLang,
|
|
117
|
+
srcLang2,
|
|
118
|
+
dstLang,
|
|
119
|
+
dstLang2,
|
|
120
|
+
dstAuthor = DST_AUTHOR,
|
|
121
|
+
sourceLang,
|
|
122
|
+
targetLang,
|
|
123
|
+
translateOpts = TRANSLATE_OPTS,
|
|
124
|
+
updateGlossary = false,
|
|
125
|
+
translator,
|
|
126
|
+
} = DeepLAdapter.srcDstLangs(opts);
|
|
127
|
+
dbg && console.log(msg, '[1]opts', opts);
|
|
128
|
+
if (authKey == null) {
|
|
129
|
+
throw new Error(`${msg} authKey?`);
|
|
130
|
+
}
|
|
131
|
+
sourceLang = sourceLang || DeepLAdapter.deeplLang(srcLang);
|
|
132
|
+
targetLang = targetLang || DeepLAdapter.deeplLang(dstLang);
|
|
133
|
+
if (translator == null) {
|
|
134
|
+
dbg && console.log(msg, '[2]new deepl.Translator()');
|
|
135
|
+
let deeplOpts = {};
|
|
136
|
+
translator = mockApi
|
|
137
|
+
? new MockDeepL.Translator(authKey)
|
|
138
|
+
: new deepl.Translator(authKey);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
let glossaryName = DeepLAdapter.glossaryName({
|
|
142
|
+
srcLang,
|
|
143
|
+
dstLang,
|
|
144
|
+
dstAuthor,
|
|
145
|
+
});
|
|
146
|
+
let glossaries = await translator.listGlossaries();
|
|
147
|
+
let glossary = glossaries.reduce((a, g) => {
|
|
148
|
+
return g.name === glossaryName ? g : a;
|
|
149
|
+
}, null);
|
|
150
|
+
if (updateGlossary) {
|
|
151
|
+
console.warn(msg, '[3]updateGlossary', glossaryName);
|
|
152
|
+
dbg && console.log(msg, '[4]uploadGlossary');
|
|
153
|
+
glossary = await DeepLAdapter.uploadGlossary({
|
|
154
|
+
srcLang,
|
|
155
|
+
dstLang,
|
|
156
|
+
dstAuthor,
|
|
157
|
+
translator,
|
|
158
|
+
glossaries,
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
if (glossary) {
|
|
162
|
+
let { glossaryId, name } = glossary;
|
|
163
|
+
dbg &&
|
|
164
|
+
console.warn(
|
|
165
|
+
msg,
|
|
166
|
+
'[5]using glossary',
|
|
167
|
+
name,
|
|
168
|
+
glossaryId && glossaryId.substring(0, 8),
|
|
169
|
+
);
|
|
170
|
+
} else {
|
|
171
|
+
let dbg = DBG.GLOSSARY;
|
|
172
|
+
dbg && console.log(msg, '[6]no glossary');
|
|
173
|
+
}
|
|
174
|
+
translateOpts = translateOpts
|
|
175
|
+
? JSON.parse(JSON.stringify(translateOpts))
|
|
176
|
+
: TRANSLATE_OPTS;
|
|
177
|
+
if (glossary) {
|
|
178
|
+
translateOpts.glossary = glossary;
|
|
179
|
+
}
|
|
180
|
+
let initialized = true;
|
|
181
|
+
|
|
182
|
+
let ctorOpts = {
|
|
183
|
+
authKey,
|
|
184
|
+
dstLang,
|
|
185
|
+
dstLang2,
|
|
186
|
+
glossary,
|
|
187
|
+
glossaryName,
|
|
188
|
+
initialized,
|
|
189
|
+
srcLang,
|
|
190
|
+
srcLang2,
|
|
191
|
+
sourceLang,
|
|
192
|
+
targetLang,
|
|
193
|
+
translateOpts,
|
|
194
|
+
translator,
|
|
195
|
+
};
|
|
196
|
+
dbg &&
|
|
197
|
+
console.log(msg, '[7]ctor', {
|
|
198
|
+
sourceLang,
|
|
199
|
+
targetLang,
|
|
200
|
+
glossaryName,
|
|
201
|
+
});
|
|
202
|
+
return new DeepLAdapter(ctorOpts);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
static setMockApi(value) {
|
|
206
|
+
mockApi = value;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
static asGlossaryEntries(strObj) {
|
|
210
|
+
const msg = 'd12r.asToGlossaryEntries:';
|
|
211
|
+
let dbg = DBG.KVG_TO_GLOSSARY_ENTRIES;
|
|
212
|
+
|
|
213
|
+
if (strObj instanceof deepl.GlossaryEntries) {
|
|
214
|
+
return strObj;
|
|
215
|
+
}
|
|
216
|
+
let nEntries = 0;
|
|
217
|
+
let entries;
|
|
218
|
+
|
|
219
|
+
if (typeof strObj === 'string') {
|
|
220
|
+
// assume kvg string
|
|
221
|
+
entries = strObj.split('\n').reduce((a, kv) => {
|
|
222
|
+
let [key, value] = kv.split(/\|/);
|
|
223
|
+
if (key && !value) {
|
|
224
|
+
throw new Error(`${msg} [1]no value for key:${key}`);
|
|
225
|
+
} else if (!key && value) {
|
|
226
|
+
throw new Error(`${msg} [2]no key for value:${value}`);
|
|
227
|
+
} else if (!key && !value) {
|
|
228
|
+
// ignore
|
|
229
|
+
} else {
|
|
230
|
+
key = key.trim();
|
|
231
|
+
value = value.trim();
|
|
232
|
+
a[key] = value;
|
|
233
|
+
dbg > 1 && console.log(msg, '[3]', { key, value });
|
|
234
|
+
nEntries++;
|
|
235
|
+
}
|
|
236
|
+
return a;
|
|
237
|
+
}, []);
|
|
238
|
+
} else if (typeof strObj === 'object') {
|
|
239
|
+
entries = strObj;
|
|
240
|
+
} else {
|
|
241
|
+
throw new Error(`${msg} string or object?`);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
return new deepl.GlossaryEntries({ entries });
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
static async uploadGlossary(opts = {}) {
|
|
248
|
+
const msg = 'D10r.uploadGlossary()';
|
|
249
|
+
const dbg = DBG.GLOSSARY;
|
|
250
|
+
const dbgv = DBG.VERBOSE && dbg;
|
|
251
|
+
let {
|
|
252
|
+
srcLang,
|
|
253
|
+
srcLang2,
|
|
254
|
+
dstLang,
|
|
255
|
+
dstLang2,
|
|
256
|
+
dstAuthor,
|
|
257
|
+
translator,
|
|
258
|
+
glossaries,
|
|
259
|
+
glossaryEntries,
|
|
260
|
+
} = DeepLAdapter.srcDstLangs(opts);
|
|
261
|
+
if (glossaryEntries == null) {
|
|
262
|
+
throw new Error(`${msg} glossaryEntries?`);
|
|
263
|
+
}
|
|
264
|
+
let nEntries = Object.keys(glossaryEntries).length;
|
|
265
|
+
let glossaryName = DeepLAdapter.glossaryName({
|
|
266
|
+
srcLang,
|
|
267
|
+
dstLang,
|
|
268
|
+
dstAuthor,
|
|
269
|
+
});
|
|
270
|
+
let glossary;
|
|
271
|
+
|
|
272
|
+
if (glossaries == null) {
|
|
273
|
+
glossaries = await translator.listGlossaries();
|
|
274
|
+
}
|
|
275
|
+
for (let i = 0; i < glossaries.length; i++) {
|
|
276
|
+
let g = glossaries[i];
|
|
277
|
+
if (g.name === glossaryName) {
|
|
278
|
+
dbg && console.log(msg, '[1]deleting', g.glossaryId);
|
|
279
|
+
await translator.deleteGlossary(g.glossaryId);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
let sourceLang = DeepLAdapter.deeplLang(srcLang);
|
|
284
|
+
let targetLang = DeepLAdapter.deeplLang(dstLang);
|
|
285
|
+
glossary = await translator.createGlossary(
|
|
286
|
+
glossaryName,
|
|
287
|
+
sourceLang,
|
|
288
|
+
targetLang,
|
|
289
|
+
glossaryEntries,
|
|
290
|
+
);
|
|
291
|
+
let { glossaryId } = glossary;
|
|
292
|
+
dbg &&
|
|
293
|
+
console.log(msg, '[6]createGlossary', {
|
|
294
|
+
fName,
|
|
295
|
+
glossaryName,
|
|
296
|
+
sourceLang,
|
|
297
|
+
targetLang,
|
|
298
|
+
glossaryId,
|
|
299
|
+
nEntries,
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
return glossary;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
async deleteGlossary(id) {
|
|
306
|
+
const msg = 'd12r.deleteGlossary:';
|
|
307
|
+
let { translator } = this;
|
|
308
|
+
dbg && console.log(msg, '[1]deleting', id);
|
|
309
|
+
await translator.deleteGlossary(id);
|
|
310
|
+
dbg>1 && console.log(msg, '[2]deleted', id);
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
async listGlossaries() {
|
|
314
|
+
let { translator } = this;
|
|
315
|
+
|
|
316
|
+
let glossaries = await translator.listGlossaries();
|
|
317
|
+
return glossaries;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
async translate(texts) {
|
|
321
|
+
const msg = 'D10r.translate()';
|
|
322
|
+
const dbg = DBG.DEEPL_XLT;
|
|
323
|
+
const dbgv = dbg && DBG.VERBOSE;
|
|
324
|
+
let { translator, srcLang, dstLang, translateOpts } = this;
|
|
325
|
+
|
|
326
|
+
let sourceLang = DeepLAdapter.deeplLang(srcLang);
|
|
327
|
+
let targetLang = DeepLAdapter.deeplLang(dstLang);
|
|
328
|
+
texts = texts.map((t) => t || EMPTY_TEXT);
|
|
329
|
+
dbgv && console.log(msg, '[1]translateOpts', translateOpts);
|
|
330
|
+
let results = await translator.translateText(
|
|
331
|
+
texts,
|
|
332
|
+
sourceLang,
|
|
333
|
+
targetLang,
|
|
334
|
+
translateOpts,
|
|
335
|
+
);
|
|
336
|
+
if (dbg) {
|
|
337
|
+
results.forEach((result, i) => {
|
|
338
|
+
console.log(
|
|
339
|
+
msg,
|
|
340
|
+
`\n[${i}<] `,
|
|
341
|
+
`${texts[i]}$`,
|
|
342
|
+
`\n[${i}>] `,
|
|
343
|
+
`${results[i]?.text}$`,
|
|
344
|
+
);
|
|
345
|
+
});
|
|
346
|
+
}
|
|
347
|
+
results = results.map((r) =>
|
|
348
|
+
r.text === EMPTY_TEXT ? '' : r.text,
|
|
349
|
+
);
|
|
350
|
+
|
|
351
|
+
return results;
|
|
352
|
+
}
|
|
353
|
+
} // DeepLAdapter
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export class DpdTransformer {
|
|
2
|
+
constructor(opts = {}) {
|
|
3
|
+
const msg = 'D14r.ctor:';
|
|
4
|
+
let { dictionary } = opts;
|
|
5
|
+
if (dictionary == null) {
|
|
6
|
+
throw new Error(`${msg} dictionary?`);
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
Object.assign(this, {
|
|
10
|
+
dictionary,
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
transform(text) {
|
|
15
|
+
return text;
|
|
16
|
+
}
|
|
17
|
+
}
|