@diplodoc/cli 4.13.2 → 4.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "author": "Yandex Data UI Team <data-ui@yandex-team.ru>",
4
4
  "description": "Make documentation using yfm-docs in Markdown and HTML formats",
5
5
  "license": "MIT",
6
- "version": "4.13.2",
6
+ "version": "4.13.3",
7
7
  "repository": {
8
8
  "type": "git",
9
9
  "url": "git@github.com:diplodoc-platform/cli.git"
@@ -33,7 +33,7 @@
33
33
  "@aws-sdk/client-s3": "^3.369.0",
34
34
  "@diplodoc/client": "^2.0.3",
35
35
  "@diplodoc/latex-extension": "^1.0.3",
36
- "@diplodoc/markdown-translation": "^1.1.0",
36
+ "@diplodoc/markdown-translation": "^2.0.0-alpha-2",
37
37
  "@diplodoc/mermaid-extension": "^1.2.1",
38
38
  "@diplodoc/openapi-extension": "^1.4.13",
39
39
  "@diplodoc/transform": "^4.8.2",
@@ -42,13 +42,10 @@
42
42
  "ajv": "^8.11.0",
43
43
  "async": "^3.2.4",
44
44
  "chalk": "4.1.2",
45
- "fast-xml-parser": "^4.0.11",
46
45
  "glob": "^8.0.3",
47
46
  "highlight.js": "^11.7.0",
48
47
  "html-escaper": "^3.0.3",
49
- "http-status-codes": "^2.2.0",
50
48
  "js-yaml": "4.1.0",
51
- "json-stringify-safe": "^5.0.1",
52
49
  "mime-types": "2.1.35",
53
50
  "minimatch": "^9.0.3",
54
51
  "node-html-parser": "^6.1.5",
@@ -1,8 +1,9 @@
1
+ import glob from 'glob';
2
+ import {ok} from 'assert';
1
3
  import {asyncify, eachLimit, retry} from 'async';
2
4
 
3
- import {dirname, join, resolve} from 'path';
5
+ import {basename, dirname, join, resolve} from 'path';
4
6
  import {mkdir, readFile, writeFile} from 'fs/promises';
5
- import {XMLParser} from 'fast-xml-parser';
6
7
 
7
8
  import {Session} from '@yandex-cloud/nodejs-sdk/dist/session';
8
9
  import {TranslationServiceClient} from '@yandex-cloud/nodejs-sdk/dist/generated/yandex/cloud/service_clients';
@@ -11,15 +12,17 @@ import {
11
12
  TranslateRequest,
12
13
  } from '@yandex-cloud/nodejs-sdk/dist/generated/yandex/cloud/ai/translate/v2/translation_service';
13
14
 
14
- import markdownTranslation from '@diplodoc/markdown-translation';
15
+ // @ts-ignore
16
+ import {compose, extract} from '@diplodoc/markdown-translation';
15
17
 
16
18
  import {ArgvService} from '../../services';
17
19
  import {getYandexOAuthToken} from '../../packages/credentials';
18
- import {glob, logger} from '../../utils';
20
+ import {logger} from '../../utils';
19
21
 
20
22
  import {Arguments, Argv} from 'yargs';
21
23
 
22
- import {YandexCloudTranslateGlossaryPair} from '../../models';
24
+ // import {YandexCloudTranslateGlossaryPair} from '../../models';
25
+ import {argvValidator} from '../../validator';
23
26
 
24
27
  const command = 'translate';
25
28
 
@@ -32,14 +35,16 @@ const translate = {
32
35
  builder,
33
36
  };
34
37
 
35
- const MD_GLOB = '**/*.md';
36
38
  const REQUESTS_LIMIT = 20;
37
39
  const BYTES_LIMIT = 10000;
38
40
  const RETRY_LIMIT = 3;
39
- const MTRANS_LOCALE = 'MTRANS';
40
41
 
41
42
  function builder<T>(argv: Argv<T>) {
42
43
  return argv
44
+ .option('folder-id', {
45
+ describe: 'folder id',
46
+ type: 'string',
47
+ })
43
48
  .option('source-language', {
44
49
  alias: 'sl',
45
50
  describe: 'source language code',
@@ -50,10 +55,11 @@ function builder<T>(argv: Argv<T>) {
50
55
  describe: 'target language code',
51
56
  type: 'string',
52
57
  })
53
- .demandOption(
54
- ['source-language', 'target-language'],
55
- 'command requires to specify source and target languages',
56
- );
58
+ .check(argvValidator);
59
+ // .demandOption(
60
+ // ['source-language', 'target-language'],
61
+ // 'command requires to specify source and target languages',
62
+ // );
57
63
  }
58
64
 
59
65
  class TranslatorError extends Error {
@@ -66,34 +72,62 @@ class TranslatorError extends Error {
66
72
  }
67
73
  }
68
74
 
75
+ type TranslateConfig = {
76
+ input: string;
77
+ output?: string;
78
+ sourceLanguage: string;
79
+ targetLanguage: string;
80
+ folderId?: string;
81
+ glossary?: string;
82
+ include?: string[];
83
+ exclude?: string[];
84
+ };
85
+
69
86
  /* eslint-disable-next-line @typescript-eslint/no-explicit-any */
70
87
  async function handler(args: Arguments<any>) {
71
88
  ArgvService.init({
89
+ ...(args.translate || {}),
72
90
  ...args,
73
91
  });
74
92
 
75
93
  const {
76
- input,
77
- output,
78
- yandexCloudTranslateFolderId,
79
- yandexCloudTranslateGlossaryPairs,
80
- sl: sourceLanguage,
81
- tl: targetLanguage,
82
- } = args;
94
+ folderId,
95
+ // yandexCloudTranslateGlossaryPairs,
96
+ sourceLanguage,
97
+ targetLanguage,
98
+ exclude = [],
99
+ } = ArgvService.getConfig() as unknown as TranslateConfig;
100
+
101
+ let {input, output, include = []} = ArgvService.getConfig() as unknown as TranslateConfig;
83
102
 
84
103
  logger.info(
85
104
  input,
86
105
  `translating documentation from ${sourceLanguage} to ${targetLanguage} language`,
87
106
  );
88
107
 
108
+ output = output || input;
109
+
110
+ ok(input, 'Required param input is not configured');
111
+ ok(sourceLanguage, 'Required param sourceLanguage is not configured');
112
+ ok(targetLanguage, 'Required param targetLanguage is not configured');
113
+
89
114
  try {
90
- let found = [];
115
+ if (input.endsWith('.md')) {
116
+ include = [basename(input)];
117
+ input = dirname(input);
118
+ } else if (!include.length) {
119
+ include.push('**/*');
120
+ }
91
121
 
92
- ({
93
- state: {found},
94
- } = await glob(join(input, MD_GLOB), {
95
- nosort: true,
96
- }));
122
+ const files = ([] as string[]).concat(
123
+ ...include.map((match) =>
124
+ glob.sync(match, {
125
+ cwd: join(input, sourceLanguage),
126
+ ignore: exclude,
127
+ }),
128
+ ),
129
+ );
130
+ const found = [...new Set(files)];
97
131
 
98
132
  const oauthToken = await getYandexOAuthToken();
99
133
 
@@ -102,8 +136,8 @@ async function handler(args: Arguments<any>) {
102
136
  output,
103
137
  sourceLanguage,
104
138
  targetLanguage,
105
- yandexCloudTranslateGlossaryPairs,
106
- folderId: yandexCloudTranslateFolderId,
139
+ // yandexCloudTranslateGlossaryPairs,
140
+ folderId,
107
141
  oauthToken,
108
142
  };
109
143
 
@@ -128,12 +162,12 @@ async function handler(args: Arguments<any>) {
128
162
 
129
163
  export type TranslatorParams = {
130
164
  oauthToken: string;
131
- folderId: string;
165
+ folderId: string | undefined;
132
166
  input: string;
133
167
  output: string;
134
168
  sourceLanguage: string;
135
169
  targetLanguage: string;
136
- yandexCloudTranslateGlossaryPairs: YandexCloudTranslateGlossaryPair[];
170
+ // yandexCloudTranslateGlossaryPairs: YandexCloudTranslateGlossaryPair[];
137
171
  };
138
172
 
139
173
  function translator(params: TranslatorParams) {
@@ -144,53 +178,80 @@ function translator(params: TranslatorParams) {
144
178
  output,
145
179
  sourceLanguage,
146
180
  targetLanguage,
147
- yandexCloudTranslateGlossaryPairs,
181
+ // yandexCloudTranslateGlossaryPairs,
148
182
  } = params;
149
183
 
184
+ const tmap = new Map<string, Defer>();
150
185
  const session = new Session({oauthToken});
151
186
  const client = session.client(TranslationServiceClient);
152
- const request = (texts: string[]) => () =>
153
- client
187
+ const request = (texts: string[]) => () => {
188
+ return client
154
189
  .translate(
155
190
  TranslateRequest.fromPartial({
156
191
  texts,
157
192
  folderId,
158
193
  sourceLanguageCode: sourceLanguage,
159
194
  targetLanguageCode: targetLanguage,
160
- glossaryConfig: {
161
- glossaryData: {
162
- glossaryPairs: yandexCloudTranslateGlossaryPairs,
163
- },
164
- },
165
- format: Format.PLAIN_TEXT,
195
+ // glossaryConfig: {
196
+ // glossaryData: {
197
+ // glossaryPairs: yandexCloudTranslateGlossaryPairs,
198
+ // },
199
+ // },
200
+ format: Format.HTML,
166
201
  }),
167
202
  )
168
- .then((results) => results.translations.map(({text}) => text));
203
+ .then((results) => {
204
+ return results.translations.map(({text}, index) => {
205
+ const defer = tmap.get(texts[index]);
206
+ if (defer) {
207
+ defer.resolve([text]);
208
+ }
209
+
210
+ return text;
211
+ });
212
+ });
213
+ };
169
214
 
170
215
  return async (mdPath: string) => {
216
+ if (!mdPath.endsWith('.md')) {
217
+ return;
218
+ }
219
+
171
220
  try {
172
221
  logger.info(mdPath, 'translating');
173
222
 
174
- const md = await readFile(resolve(mdPath), {encoding: 'utf-8'});
223
+ const inputPath = resolve(input, sourceLanguage, mdPath);
224
+ const outputPath = resolve(output, targetLanguage, mdPath);
225
+ const md = await readFile(inputPath, {encoding: 'utf-8'});
226
+
227
+ await mkdir(dirname(outputPath), {recursive: true});
228
+
229
+ if (!md) {
230
+ await writeFile(outputPath, md);
231
+ return;
232
+ }
175
233
 
176
- const {xlf, skeleton} = markdownTranslation.extract({
234
+ const {units, skeleton} = extract({
177
235
  source: {
178
236
  language: sourceLanguage,
179
- locale: sourceLanguage.toUpperCase(),
237
+ locale: 'RU',
180
238
  },
181
239
  target: {
182
240
  language: targetLanguage,
183
- locale: targetLanguage.toUpperCase(),
241
+ locale: 'US',
184
242
  },
185
243
  markdown: md,
186
244
  markdownPath: mdPath,
187
245
  skeletonPath: '',
188
246
  });
189
247
 
190
- const texts = parseSourcesFromXLIFF(xlf);
248
+ if (!units.length) {
249
+ await writeFile(outputPath, md);
250
+ return;
251
+ }
191
252
 
192
253
  const parts = await Promise.all(
193
- texts.reduce(
254
+ (units as string[]).reduce(
194
255
  (
195
256
  {
196
257
  promises,
@@ -213,7 +274,14 @@ function translator(params: TranslatorParams) {
213
274
  return {promises, buffer, bufferSize};
214
275
  }
215
276
 
216
- if (bufferSize + text.length > BYTES_LIMIT || index === texts.length - 1) {
277
+ const defer = tmap.get(text);
278
+ if (defer) {
279
+ console.log('SKIPPED', text);
280
+ promises.push(defer.promise);
281
+ return {promises, buffer, bufferSize};
282
+ }
283
+
284
+ if (bufferSize + text.length > BYTES_LIMIT) {
217
285
  promises.push(backoff(request(buffer)));
218
286
  buffer = [];
219
287
  bufferSize = 0;
@@ -221,6 +289,11 @@ function translator(params: TranslatorParams) {
221
289
 
222
290
  buffer.push(text);
223
291
  bufferSize += text.length;
292
+ tmap.set(text, new Defer());
293
+
294
+ if (index === units.length - 1) {
295
+ promises.push(backoff(request(buffer)));
296
+ }
224
297
 
225
298
  return {promises, buffer, bufferSize};
226
299
  },
@@ -234,21 +307,12 @@ function translator(params: TranslatorParams) {
234
307
 
235
308
  const translations = ([] as string[]).concat(...parts);
236
309
 
237
- const translatedXLIFF = createXLIFFDocument({
238
- sourceLanguage: sourceLanguage + '-' + MTRANS_LOCALE,
239
- targetLanguage: targetLanguage + '-' + MTRANS_LOCALE,
240
- sources: texts,
241
- targets: translations,
242
- });
243
-
244
- const composed = await markdownTranslation.compose({
245
- xlf: translatedXLIFF,
310
+ const composed = await compose({
311
+ useSource: true,
312
+ units: translations,
246
313
  skeleton,
247
314
  });
248
315
 
249
- const outputPath = mdPath.replace(input, output);
250
-
251
- await mkdir(dirname(outputPath), {recursive: true});
252
316
  await writeFile(outputPath, composed);
253
317
 
254
318
  logger.info(outputPath, 'finished translating');
@@ -273,44 +337,19 @@ function backoff(action: () => Promise<string[]>): Promise<string[]> {
273
337
  );
274
338
  }
275
339
 
276
- function parseSourcesFromXLIFF(xliff: string): string[] {
277
- const parser = new XMLParser();
340
+ class Defer {
341
+ resolve!: (text: string[]) => void;
278
342
 
279
- const inputs = parser.parse(xliff)?.xliff?.file?.body['trans-unit'] ?? [];
343
+ reject!: (error: any) => void;
280
344
 
281
- return Array.isArray(inputs)
282
- ? inputs.map(({source}: {source: string}) => source)
283
- : [inputs.source];
284
- }
345
+ promise: Promise<string[]>;
285
346
 
286
- export type CreateXLIFFDocumentParams = {
287
- sourceLanguage: string;
288
- targetLanguage: string;
289
- sources: string[];
290
- targets: string[];
291
- };
292
-
293
- function createXLIFFDocument(params: CreateXLIFFDocumentParams) {
294
- const {sourceLanguage, targetLanguage, sources, targets} = params;
295
-
296
- const unit = (text: string, i: number): string => `
297
- <trans-unit id="${i + 1}">
298
- <source xml:lang="${sourceLanguage}">${sources[i]}</source>
299
- <target xml:lang="${targetLanguage}">${text}</target>
300
- </trans-unit>`;
301
-
302
- const doc = `
303
- <?xml version="1.0" encoding="UTF-8"?>
304
- <xliff xmlns="urn:oasis:names:tc:xliff:document:1.2" version="1.2">
305
- <file original="" source-language="${sourceLanguage}" target-language="${targetLanguage}">
306
- <header>
307
- <skl><external-file href="" /></skl>
308
- </header>
309
- <body>${targets.map(unit)}</body>
310
- </file>
311
- </xliff>`;
312
-
313
- return doc;
347
+ constructor() {
348
+ this.promise = new Promise((resolve, reject) => {
349
+ this.resolve = resolve;
350
+ this.reject = reject;
351
+ });
352
+ }
314
353
  }
315
354
 
316
355
  export {translate};