@diplodoc/cli 4.13.2 → 4.13.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +105 -83
- package/build/index.js.map +3 -3
- package/package.json +2 -5
- package/src/cmd/translate/index.ts +131 -92
package/package.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"author": "Yandex Data UI Team <data-ui@yandex-team.ru>",
|
|
4
4
|
"description": "Make documentation using yfm-docs in Markdown and HTML formats",
|
|
5
5
|
"license": "MIT",
|
|
6
|
-
"version": "4.13.
|
|
6
|
+
"version": "4.13.3",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
9
9
|
"url": "git@github.com:diplodoc-platform/cli.git"
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
"@aws-sdk/client-s3": "^3.369.0",
|
|
34
34
|
"@diplodoc/client": "^2.0.3",
|
|
35
35
|
"@diplodoc/latex-extension": "^1.0.3",
|
|
36
|
-
"@diplodoc/markdown-translation": "^
|
|
36
|
+
"@diplodoc/markdown-translation": "^2.0.0-alpha-2",
|
|
37
37
|
"@diplodoc/mermaid-extension": "^1.2.1",
|
|
38
38
|
"@diplodoc/openapi-extension": "^1.4.13",
|
|
39
39
|
"@diplodoc/transform": "^4.8.2",
|
|
@@ -42,13 +42,10 @@
|
|
|
42
42
|
"ajv": "^8.11.0",
|
|
43
43
|
"async": "^3.2.4",
|
|
44
44
|
"chalk": "4.1.2",
|
|
45
|
-
"fast-xml-parser": "^4.0.11",
|
|
46
45
|
"glob": "^8.0.3",
|
|
47
46
|
"highlight.js": "^11.7.0",
|
|
48
47
|
"html-escaper": "^3.0.3",
|
|
49
|
-
"http-status-codes": "^2.2.0",
|
|
50
48
|
"js-yaml": "4.1.0",
|
|
51
|
-
"json-stringify-safe": "^5.0.1",
|
|
52
49
|
"mime-types": "2.1.35",
|
|
53
50
|
"minimatch": "^9.0.3",
|
|
54
51
|
"node-html-parser": "^6.1.5",
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
import glob from 'glob';
|
|
2
|
+
import {ok} from 'assert';
|
|
1
3
|
import {asyncify, eachLimit, retry} from 'async';
|
|
2
4
|
|
|
3
|
-
import {dirname, join, resolve} from 'path';
|
|
5
|
+
import {basename, dirname, join, resolve} from 'path';
|
|
4
6
|
import {mkdir, readFile, writeFile} from 'fs/promises';
|
|
5
|
-
import {XMLParser} from 'fast-xml-parser';
|
|
6
7
|
|
|
7
8
|
import {Session} from '@yandex-cloud/nodejs-sdk/dist/session';
|
|
8
9
|
import {TranslationServiceClient} from '@yandex-cloud/nodejs-sdk/dist/generated/yandex/cloud/service_clients';
|
|
@@ -11,15 +12,17 @@ import {
|
|
|
11
12
|
TranslateRequest,
|
|
12
13
|
} from '@yandex-cloud/nodejs-sdk/dist/generated/yandex/cloud/ai/translate/v2/translation_service';
|
|
13
14
|
|
|
14
|
-
|
|
15
|
+
// @ts-ignore
|
|
16
|
+
import {compose, extract} from '@diplodoc/markdown-translation';
|
|
15
17
|
|
|
16
18
|
import {ArgvService} from '../../services';
|
|
17
19
|
import {getYandexOAuthToken} from '../../packages/credentials';
|
|
18
|
-
import {
|
|
20
|
+
import {logger} from '../../utils';
|
|
19
21
|
|
|
20
22
|
import {Arguments, Argv} from 'yargs';
|
|
21
23
|
|
|
22
|
-
import {YandexCloudTranslateGlossaryPair} from '../../models';
|
|
24
|
+
// import {YandexCloudTranslateGlossaryPair} from '../../models';
|
|
25
|
+
import {argvValidator} from '../../validator';
|
|
23
26
|
|
|
24
27
|
const command = 'translate';
|
|
25
28
|
|
|
@@ -32,14 +35,16 @@ const translate = {
|
|
|
32
35
|
builder,
|
|
33
36
|
};
|
|
34
37
|
|
|
35
|
-
const MD_GLOB = '**/*.md';
|
|
36
38
|
const REQUESTS_LIMIT = 20;
|
|
37
39
|
const BYTES_LIMIT = 10000;
|
|
38
40
|
const RETRY_LIMIT = 3;
|
|
39
|
-
const MTRANS_LOCALE = 'MTRANS';
|
|
40
41
|
|
|
41
42
|
function builder<T>(argv: Argv<T>) {
|
|
42
43
|
return argv
|
|
44
|
+
.option('folder-id', {
|
|
45
|
+
describe: 'folder id',
|
|
46
|
+
type: 'string',
|
|
47
|
+
})
|
|
43
48
|
.option('source-language', {
|
|
44
49
|
alias: 'sl',
|
|
45
50
|
describe: 'source language code',
|
|
@@ -50,10 +55,11 @@ function builder<T>(argv: Argv<T>) {
|
|
|
50
55
|
describe: 'target language code',
|
|
51
56
|
type: 'string',
|
|
52
57
|
})
|
|
53
|
-
.
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
58
|
+
.check(argvValidator);
|
|
59
|
+
// .demandOption(
|
|
60
|
+
// ['source-language', 'target-language'],
|
|
61
|
+
// 'command requires to specify source and target languages',
|
|
62
|
+
// );
|
|
57
63
|
}
|
|
58
64
|
|
|
59
65
|
class TranslatorError extends Error {
|
|
@@ -66,34 +72,62 @@ class TranslatorError extends Error {
|
|
|
66
72
|
}
|
|
67
73
|
}
|
|
68
74
|
|
|
75
|
+
type TranslateConfig = {
|
|
76
|
+
input: string;
|
|
77
|
+
output?: string;
|
|
78
|
+
sourceLanguage: string;
|
|
79
|
+
targetLanguage: string;
|
|
80
|
+
folderId?: string;
|
|
81
|
+
glossary?: string;
|
|
82
|
+
include?: string[];
|
|
83
|
+
exclude?: string[];
|
|
84
|
+
};
|
|
85
|
+
|
|
69
86
|
/* eslint-disable-next-line @typescript-eslint/no-explicit-any */
|
|
70
87
|
async function handler(args: Arguments<any>) {
|
|
71
88
|
ArgvService.init({
|
|
89
|
+
...(args.translate || {}),
|
|
72
90
|
...args,
|
|
73
91
|
});
|
|
74
92
|
|
|
75
93
|
const {
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
94
|
+
folderId,
|
|
95
|
+
// yandexCloudTranslateGlossaryPairs,
|
|
96
|
+
sourceLanguage,
|
|
97
|
+
targetLanguage,
|
|
98
|
+
exclude = [],
|
|
99
|
+
} = ArgvService.getConfig() as unknown as TranslateConfig;
|
|
100
|
+
|
|
101
|
+
let {input, output, include = []} = ArgvService.getConfig() as unknown as TranslateConfig;
|
|
83
102
|
|
|
84
103
|
logger.info(
|
|
85
104
|
input,
|
|
86
105
|
`translating documentation from ${sourceLanguage} to ${targetLanguage} language`,
|
|
87
106
|
);
|
|
88
107
|
|
|
108
|
+
output = output || input;
|
|
109
|
+
|
|
110
|
+
ok(input, 'Required param input is not configured');
|
|
111
|
+
ok(sourceLanguage, 'Required param sourceLanguage is not configured');
|
|
112
|
+
ok(targetLanguage, 'Required param targetLanguage is not configured');
|
|
113
|
+
|
|
89
114
|
try {
|
|
90
|
-
|
|
115
|
+
if (input.endsWith('.md')) {
|
|
116
|
+
include = [basename(input)];
|
|
117
|
+
input = dirname(input);
|
|
118
|
+
} else if (!include.length) {
|
|
119
|
+
include.push('**/*');
|
|
120
|
+
}
|
|
91
121
|
|
|
92
|
-
(
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
122
|
+
const files = ([] as string[]).concat(
|
|
123
|
+
...include.map((match) =>
|
|
124
|
+
glob.sync(match, {
|
|
125
|
+
cwd: join(input, sourceLanguage),
|
|
126
|
+
ignore: exclude,
|
|
127
|
+
}),
|
|
128
|
+
),
|
|
129
|
+
);
|
|
130
|
+
const found = [...new Set(files)];
|
|
97
131
|
|
|
98
132
|
const oauthToken = await getYandexOAuthToken();
|
|
99
133
|
|
|
@@ -102,8 +136,8 @@ async function handler(args: Arguments<any>) {
|
|
|
102
136
|
output,
|
|
103
137
|
sourceLanguage,
|
|
104
138
|
targetLanguage,
|
|
105
|
-
yandexCloudTranslateGlossaryPairs,
|
|
106
|
-
folderId
|
|
139
|
+
// yandexCloudTranslateGlossaryPairs,
|
|
140
|
+
folderId,
|
|
107
141
|
oauthToken,
|
|
108
142
|
};
|
|
109
143
|
|
|
@@ -128,12 +162,12 @@ async function handler(args: Arguments<any>) {
|
|
|
128
162
|
|
|
129
163
|
export type TranslatorParams = {
|
|
130
164
|
oauthToken: string;
|
|
131
|
-
folderId: string;
|
|
165
|
+
folderId: string | undefined;
|
|
132
166
|
input: string;
|
|
133
167
|
output: string;
|
|
134
168
|
sourceLanguage: string;
|
|
135
169
|
targetLanguage: string;
|
|
136
|
-
yandexCloudTranslateGlossaryPairs: YandexCloudTranslateGlossaryPair[];
|
|
170
|
+
// yandexCloudTranslateGlossaryPairs: YandexCloudTranslateGlossaryPair[];
|
|
137
171
|
};
|
|
138
172
|
|
|
139
173
|
function translator(params: TranslatorParams) {
|
|
@@ -144,53 +178,80 @@ function translator(params: TranslatorParams) {
|
|
|
144
178
|
output,
|
|
145
179
|
sourceLanguage,
|
|
146
180
|
targetLanguage,
|
|
147
|
-
yandexCloudTranslateGlossaryPairs,
|
|
181
|
+
// yandexCloudTranslateGlossaryPairs,
|
|
148
182
|
} = params;
|
|
149
183
|
|
|
184
|
+
const tmap = new Map<string, Defer>();
|
|
150
185
|
const session = new Session({oauthToken});
|
|
151
186
|
const client = session.client(TranslationServiceClient);
|
|
152
|
-
const request = (texts: string[]) => () =>
|
|
153
|
-
client
|
|
187
|
+
const request = (texts: string[]) => () => {
|
|
188
|
+
return client
|
|
154
189
|
.translate(
|
|
155
190
|
TranslateRequest.fromPartial({
|
|
156
191
|
texts,
|
|
157
192
|
folderId,
|
|
158
193
|
sourceLanguageCode: sourceLanguage,
|
|
159
194
|
targetLanguageCode: targetLanguage,
|
|
160
|
-
glossaryConfig: {
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
},
|
|
165
|
-
format: Format.
|
|
195
|
+
// glossaryConfig: {
|
|
196
|
+
// glossaryData: {
|
|
197
|
+
// glossaryPairs: yandexCloudTranslateGlossaryPairs,
|
|
198
|
+
// },
|
|
199
|
+
// },
|
|
200
|
+
format: Format.HTML,
|
|
166
201
|
}),
|
|
167
202
|
)
|
|
168
|
-
.then((results) =>
|
|
203
|
+
.then((results) => {
|
|
204
|
+
return results.translations.map(({text}, index) => {
|
|
205
|
+
const defer = tmap.get(texts[index]);
|
|
206
|
+
if (defer) {
|
|
207
|
+
defer.resolve([text]);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return text;
|
|
211
|
+
});
|
|
212
|
+
});
|
|
213
|
+
};
|
|
169
214
|
|
|
170
215
|
return async (mdPath: string) => {
|
|
216
|
+
if (!mdPath.endsWith('.md')) {
|
|
217
|
+
return;
|
|
218
|
+
}
|
|
219
|
+
|
|
171
220
|
try {
|
|
172
221
|
logger.info(mdPath, 'translating');
|
|
173
222
|
|
|
174
|
-
const
|
|
223
|
+
const inputPath = resolve(input, sourceLanguage, mdPath);
|
|
224
|
+
const outputPath = resolve(output, targetLanguage, mdPath);
|
|
225
|
+
const md = await readFile(inputPath, {encoding: 'utf-8'});
|
|
226
|
+
|
|
227
|
+
await mkdir(dirname(outputPath), {recursive: true});
|
|
228
|
+
|
|
229
|
+
if (!md) {
|
|
230
|
+
await writeFile(outputPath, md);
|
|
231
|
+
return;
|
|
232
|
+
}
|
|
175
233
|
|
|
176
|
-
const {
|
|
234
|
+
const {units, skeleton} = extract({
|
|
177
235
|
source: {
|
|
178
236
|
language: sourceLanguage,
|
|
179
|
-
locale:
|
|
237
|
+
locale: 'RU',
|
|
180
238
|
},
|
|
181
239
|
target: {
|
|
182
240
|
language: targetLanguage,
|
|
183
|
-
locale:
|
|
241
|
+
locale: 'US',
|
|
184
242
|
},
|
|
185
243
|
markdown: md,
|
|
186
244
|
markdownPath: mdPath,
|
|
187
245
|
skeletonPath: '',
|
|
188
246
|
});
|
|
189
247
|
|
|
190
|
-
|
|
248
|
+
if (!units.length) {
|
|
249
|
+
await writeFile(outputPath, md);
|
|
250
|
+
return;
|
|
251
|
+
}
|
|
191
252
|
|
|
192
253
|
const parts = await Promise.all(
|
|
193
|
-
|
|
254
|
+
(units as string[]).reduce(
|
|
194
255
|
(
|
|
195
256
|
{
|
|
196
257
|
promises,
|
|
@@ -213,7 +274,14 @@ function translator(params: TranslatorParams) {
|
|
|
213
274
|
return {promises, buffer, bufferSize};
|
|
214
275
|
}
|
|
215
276
|
|
|
216
|
-
|
|
277
|
+
const defer = tmap.get(text);
|
|
278
|
+
if (defer) {
|
|
279
|
+
console.log('SKIPPED', text);
|
|
280
|
+
promises.push(defer.promise);
|
|
281
|
+
return {promises, buffer, bufferSize};
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if (bufferSize + text.length > BYTES_LIMIT) {
|
|
217
285
|
promises.push(backoff(request(buffer)));
|
|
218
286
|
buffer = [];
|
|
219
287
|
bufferSize = 0;
|
|
@@ -221,6 +289,11 @@ function translator(params: TranslatorParams) {
|
|
|
221
289
|
|
|
222
290
|
buffer.push(text);
|
|
223
291
|
bufferSize += text.length;
|
|
292
|
+
tmap.set(text, new Defer());
|
|
293
|
+
|
|
294
|
+
if (index === units.length - 1) {
|
|
295
|
+
promises.push(backoff(request(buffer)));
|
|
296
|
+
}
|
|
224
297
|
|
|
225
298
|
return {promises, buffer, bufferSize};
|
|
226
299
|
},
|
|
@@ -234,21 +307,12 @@ function translator(params: TranslatorParams) {
|
|
|
234
307
|
|
|
235
308
|
const translations = ([] as string[]).concat(...parts);
|
|
236
309
|
|
|
237
|
-
const
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
sources: texts,
|
|
241
|
-
targets: translations,
|
|
242
|
-
});
|
|
243
|
-
|
|
244
|
-
const composed = await markdownTranslation.compose({
|
|
245
|
-
xlf: translatedXLIFF,
|
|
310
|
+
const composed = await compose({
|
|
311
|
+
useSource: true,
|
|
312
|
+
units: translations,
|
|
246
313
|
skeleton,
|
|
247
314
|
});
|
|
248
315
|
|
|
249
|
-
const outputPath = mdPath.replace(input, output);
|
|
250
|
-
|
|
251
|
-
await mkdir(dirname(outputPath), {recursive: true});
|
|
252
316
|
await writeFile(outputPath, composed);
|
|
253
317
|
|
|
254
318
|
logger.info(outputPath, 'finished translating');
|
|
@@ -273,44 +337,19 @@ function backoff(action: () => Promise<string[]>): Promise<string[]> {
|
|
|
273
337
|
);
|
|
274
338
|
}
|
|
275
339
|
|
|
276
|
-
|
|
277
|
-
|
|
340
|
+
class Defer {
|
|
341
|
+
resolve!: (text: string[]) => void;
|
|
278
342
|
|
|
279
|
-
|
|
343
|
+
reject!: (error: any) => void;
|
|
280
344
|
|
|
281
|
-
|
|
282
|
-
? inputs.map(({source}: {source: string}) => source)
|
|
283
|
-
: [inputs.source];
|
|
284
|
-
}
|
|
345
|
+
promise: Promise<string[]>;
|
|
285
346
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
function createXLIFFDocument(params: CreateXLIFFDocumentParams) {
|
|
294
|
-
const {sourceLanguage, targetLanguage, sources, targets} = params;
|
|
295
|
-
|
|
296
|
-
const unit = (text: string, i: number): string => `
|
|
297
|
-
<trans-unit id="${i + 1}">
|
|
298
|
-
<source xml:lang="${sourceLanguage}">${sources[i]}</source>
|
|
299
|
-
<target xml:lang="${targetLanguage}">${text}</target>
|
|
300
|
-
</trans-unit>`;
|
|
301
|
-
|
|
302
|
-
const doc = `
|
|
303
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
304
|
-
<xliff xmlns="urn:oasis:names:tc:xliff:document:1.2" version="1.2">
|
|
305
|
-
<file original="" source-language="${sourceLanguage}" target-language="${targetLanguage}">
|
|
306
|
-
<header>
|
|
307
|
-
<skl><external-file href="" /></skl>
|
|
308
|
-
</header>
|
|
309
|
-
<body>${targets.map(unit)}</body>
|
|
310
|
-
</file>
|
|
311
|
-
</xliff>`;
|
|
312
|
-
|
|
313
|
-
return doc;
|
|
347
|
+
constructor() {
|
|
348
|
+
this.promise = new Promise((resolve, reject) => {
|
|
349
|
+
this.resolve = resolve;
|
|
350
|
+
this.reject = reject;
|
|
351
|
+
});
|
|
352
|
+
}
|
|
314
353
|
}
|
|
315
354
|
|
|
316
355
|
export {translate};
|