@diplodoc/cli 4.13.6 → 4.13.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,9 +2,9 @@ import {Arguments} from 'yargs';
2
2
  import {ArgvService} from '../../services';
3
3
  import {logger} from '../../utils';
4
4
  import {ok} from 'assert';
5
- import {basename, dirname, join, resolve} from 'path';
6
- import glob from 'glob';
7
- import {getYandexOAuthToken} from '../../packages/credentials';
5
+ import {dirname, extname, resolve} from 'path';
6
+ import {mkdir} from 'fs/promises';
7
+ import {AuthInfo, getYandexAuth} from './yandex/auth';
8
8
  import {asyncify, eachLimit, retry} from 'async';
9
9
  import {Session} from '@yandex-cloud/nodejs-sdk/dist/session';
10
10
  import {TranslationServiceClient} from '@yandex-cloud/nodejs-sdk/dist/generated/yandex/cloud/service_clients';
@@ -12,10 +12,18 @@ import {
12
12
  TranslateRequest_Format as Format,
13
13
  TranslateRequest,
14
14
  } from '@yandex-cloud/nodejs-sdk/dist/generated/yandex/cloud/ai/translate/v2/translation_service';
15
- import {mkdir, readFile, writeFile} from 'fs/promises';
16
15
 
17
- // @ts-ignore
18
- import {compose, extract} from '@diplodoc/markdown-translation';
16
+ import {
17
+ Defer,
18
+ TranslateParams,
19
+ bytes,
20
+ compose,
21
+ dumpFile,
22
+ extract,
23
+ flat,
24
+ loadFile,
25
+ normalizeParams,
26
+ } from './utils';
19
27
 
20
28
  const REQUESTS_LIMIT = 20;
21
29
  const BYTES_LIMIT = 10000;
@@ -31,96 +39,75 @@ class TranslatorError extends Error {
31
39
  }
32
40
  }
33
41
 
34
- type TranslateConfig = {
35
- input: string;
36
- output?: string;
37
- sourceLanguage: string;
38
- targetLanguage: string;
39
- folderId?: string;
40
- glossary?: string;
41
- include?: string[];
42
- exclude?: string[];
43
- };
42
+ class RequestError extends Error {
43
+ code: string;
44
+
45
+ constructor(error: Error) {
46
+ super(error?.message || String(error));
47
+ this.code = 'REQUEST_ERROR';
48
+ }
49
+ }
44
50
 
45
51
  export async function handler(args: Arguments<any>) {
46
- ArgvService.init({
52
+ const params = normalizeParams({
47
53
  ...(args.translate || {}),
48
54
  ...args,
49
55
  });
50
56
 
51
- const {
52
- folderId,
53
- // yandexCloudTranslateGlossaryPairs,
54
- sourceLanguage,
55
- targetLanguage,
56
- exclude = [],
57
- } = ArgvService.getConfig() as unknown as TranslateConfig;
58
-
59
- let {input, output, include = []} = ArgvService.getConfig() as unknown as TranslateConfig;
57
+ ArgvService.init(params);
60
58
 
61
- logger.info(
62
- input,
63
- `translating documentation from ${sourceLanguage} to ${targetLanguage} language`,
64
- );
65
-
66
- output = output || input;
59
+ const {input, output, auth, folder, source, targets, files, dryRun} =
60
+ ArgvService.getConfig() as unknown as TranslateParams;
67
61
 
68
- ok(input, 'Required param input is not configured');
69
- ok(sourceLanguage, 'Required param sourceLanguage is not configured');
70
- ok(targetLanguage, 'Required param targetLanguage is not configured');
62
+ ok(auth, 'Required param auth is not configured');
63
+ ok(folder, 'Required param folder is not configured');
64
+ ok(source, `Required param source is not configured`);
65
+ ok(targets.length, `Required param target is not configured`);
71
66
 
72
67
  try {
73
- if (input.endsWith('.md')) {
74
- include = [basename(input)];
75
- input = dirname(input);
76
- } else if (!include.length) {
77
- include.push('**/*');
78
- }
79
-
80
- const files = ([] as string[]).concat(
81
- ...include.map((match) =>
82
- glob.sync(match, {
83
- cwd: join(input, sourceLanguage),
84
- ignore: exclude,
68
+ const authInfo = getYandexAuth(auth);
69
+
70
+ for (const target of targets) {
71
+ const translatorParams = {
72
+ input,
73
+ output,
74
+ sourceLanguage: source[0],
75
+ targetLanguage: target[0],
76
+ // yandexCloudTranslateGlossaryPairs,
77
+ folderId: folder,
78
+ auth: authInfo,
79
+ dryRun,
80
+ };
81
+
82
+ const cache = new Map<string, Defer>();
83
+ const request = requester(translatorParams, cache);
84
+ const split = splitter(request, cache);
85
+ const translate = translator(translatorParams, split);
86
+
87
+ await eachLimit(
88
+ files,
89
+ REQUESTS_LIMIT,
90
+ asyncify(async function (file: string) {
91
+ try {
92
+ await translate(file);
93
+ } catch (error: any) {
94
+ logger.error(file, error.message);
95
+ }
85
96
  }),
86
- ),
87
- );
88
- const found = [...new Set(files)];
89
-
90
- const oauthToken = await getYandexOAuthToken();
91
-
92
- const translatorParams = {
93
- input,
94
- output,
95
- sourceLanguage,
96
- targetLanguage,
97
- // yandexCloudTranslateGlossaryPairs,
98
- folderId,
99
- oauthToken,
100
- };
101
-
102
- const translateFn = translator(translatorParams);
97
+ );
103
98
 
104
- await eachLimit(found, REQUESTS_LIMIT, asyncify(translateFn));
105
- } catch (err) {
106
- if (err instanceof Error || err instanceof TranslatorError) {
107
- const message = err.message;
99
+ console.log('PROCESSED', `bytes: ${request.stat.bytes} chunks: ${request.stat.chunks}`);
100
+ }
101
+ } catch (error: any) {
102
+ const message = error.message;
108
103
 
109
- const file = err instanceof TranslatorError ? err.path : '';
104
+ const file = error instanceof TranslatorError ? error.path : '';
110
105
 
111
- logger.error(file, message);
112
- }
106
+ logger.error(file, message);
113
107
  }
114
-
115
- logger.info(
116
- output,
117
- `translated documentation from ${sourceLanguage} to ${targetLanguage} language`,
118
- );
119
108
  }
120
109
 
121
- export type TranslatorParams = {
122
- oauthToken: string;
123
- folderId: string | undefined;
110
+ type TranslatorParams = {
124
111
  input: string;
125
112
  output: string;
126
113
  sourceLanguage: string;
@@ -128,157 +115,161 @@ export type TranslatorParams = {
128
115
  // yandexCloudTranslateGlossaryPairs: YandexCloudTranslateGlossaryPair[];
129
116
  };
130
117
 
131
- function translator(params: TranslatorParams) {
132
- const {
133
- oauthToken,
134
- folderId,
135
- input,
136
- output,
137
- sourceLanguage,
138
- targetLanguage,
139
- // yandexCloudTranslateGlossaryPairs,
140
- } = params;
141
-
142
- const tmap = new Map<string, Defer>();
143
- const session = new Session({oauthToken});
118
+ type RequesterParams = {
119
+ auth: AuthInfo;
120
+ folderId: string | undefined;
121
+ sourceLanguage: string;
122
+ targetLanguage: string;
123
+ dryRun: boolean;
124
+ };
125
+
126
+ type Request = {
127
+ (texts: string[]): () => Promise<string[]>;
128
+ stat: {
129
+ bytes: number;
130
+ chunks: number;
131
+ };
132
+ };
133
+
134
+ type Split = (path: string, texts: string[]) => Promise<string[]>[];
135
+
136
+ type Cache = Map<string, Defer>;
137
+
138
+ function requester(params: RequesterParams, cache: Cache) {
139
+ const {auth, folderId, sourceLanguage, targetLanguage, dryRun} = params;
140
+ const session = new Session(auth);
144
141
  const client = session.client(TranslationServiceClient);
145
- const request = (texts: string[]) => () => {
146
- return client
147
- .translate(
148
- TranslateRequest.fromPartial({
149
- texts,
150
- folderId,
151
- sourceLanguageCode: sourceLanguage,
152
- targetLanguageCode: targetLanguage,
153
- // glossaryConfig: {
154
- // glossaryData: {
155
- // glossaryPairs: yandexCloudTranslateGlossaryPairs,
156
- // },
157
- // },
158
- format: Format.HTML,
159
- }),
160
- )
161
- .then((results) => {
162
- return results.translations.map(({text}, index) => {
163
- const defer = tmap.get(texts[index]);
164
- if (defer) {
165
- defer.resolve([text]);
166
- }
142
+ const resolve = (text: string, index: number, texts: string[]) => {
143
+ const defer = cache.get(texts[index]);
144
+ if (defer) {
145
+ defer.resolve([text]);
146
+ }
147
+ return text;
148
+ };
149
+
150
+ const request = function request(texts: string[]) {
151
+ request.stat.bytes += bytes(texts);
152
+ request.stat.chunks++;
167
153
 
168
- return text;
154
+ return async function () {
155
+ if (dryRun) {
156
+ return texts.map(resolve);
157
+ }
158
+
159
+ return client
160
+ .translate(
161
+ TranslateRequest.fromPartial({
162
+ texts,
163
+ folderId,
164
+ sourceLanguageCode: sourceLanguage,
165
+ targetLanguageCode: targetLanguage,
166
+ // glossaryConfig: {
167
+ // glossaryData: {
168
+ // glossaryPairs: yandexCloudTranslateGlossaryPairs,
169
+ // },
170
+ // },
171
+ format: Format.HTML,
172
+ }),
173
+ )
174
+ .then((results) => {
175
+ return results.translations.map(({text}, index) => {
176
+ return resolve(text, index, texts);
177
+ });
178
+ })
179
+ .catch((error) => {
180
+ console.error(error);
181
+ throw new RequestError(error);
169
182
  });
170
- });
183
+ };
171
184
  };
172
185
 
173
- return async (mdPath: string) => {
174
- if (!mdPath.endsWith('.md')) {
186
+ request.stat = {
187
+ bytes: 0,
188
+ chunks: 0,
189
+ };
190
+
191
+ return request;
192
+ }
193
+
194
+ function translator(params: TranslatorParams, split: Split) {
195
+ const {input, output, sourceLanguage, targetLanguage} = params;
196
+
197
+ return async (path: string) => {
198
+ const ext = extname(path);
199
+ if (!['.yaml', '.json', '.md'].includes(ext)) {
175
200
  return;
176
201
  }
177
202
 
178
- try {
179
- logger.info(mdPath, 'translating');
203
+ const inputPath = resolve(input, path);
204
+ const outputPath = resolve(output, path);
205
+ const content = await loadFile(inputPath);
180
206
 
181
- const inputPath = resolve(input, sourceLanguage, mdPath);
182
- const outputPath = resolve(output, targetLanguage, mdPath);
183
- const md = await readFile(inputPath, {encoding: 'utf-8'});
207
+ await mkdir(dirname(outputPath), {recursive: true});
184
208
 
185
- await mkdir(dirname(outputPath), {recursive: true});
209
+ if (!content) {
210
+ await dumpFile(outputPath, content);
211
+ return;
212
+ }
186
213
 
187
- if (!md) {
188
- await writeFile(outputPath, md);
189
- return;
190
- }
214
+ const {units, skeleton} = extract(content, {
215
+ source: {
216
+ language: sourceLanguage,
217
+ locale: 'RU',
218
+ },
219
+ target: {
220
+ language: targetLanguage,
221
+ locale: 'US',
222
+ },
223
+ });
191
224
 
192
- const {units, skeleton} = extract({
193
- source: {
194
- language: sourceLanguage,
195
- locale: 'RU',
196
- },
197
- target: {
198
- language: targetLanguage,
199
- locale: 'US',
200
- },
201
- markdown: md,
202
- markdownPath: mdPath,
203
- skeletonPath: '',
204
- });
205
-
206
- if (!units.length) {
207
- await writeFile(outputPath, md);
208
- return;
209
- }
225
+ if (!units.length) {
226
+ await dumpFile(outputPath, content);
227
+ return;
228
+ }
210
229
 
211
- const parts = await Promise.all(
212
- (units as string[]).reduce(
213
- (
214
- {
215
- promises,
216
- buffer,
217
- bufferSize,
218
- }: {
219
- promises: Promise<string[]>[];
220
- buffer: string[];
221
- bufferSize: number;
222
- },
223
- text,
224
- index,
225
- ) => {
226
- if (text.length >= BYTES_LIMIT) {
227
- logger.warn(
228
- mdPath,
229
- 'Skip document part for translation. Part is too big.',
230
- );
231
- promises.push(Promise.resolve([text]));
232
- return {promises, buffer, bufferSize};
233
- }
234
-
235
- const defer = tmap.get(text);
236
- if (defer) {
237
- console.log('SKIPPED', text);
238
- promises.push(defer.promise);
239
- return {promises, buffer, bufferSize};
240
- }
241
-
242
- if (bufferSize + text.length > BYTES_LIMIT) {
243
- promises.push(backoff(request(buffer)));
244
- buffer = [];
245
- bufferSize = 0;
246
- }
247
-
248
- buffer.push(text);
249
- bufferSize += text.length;
250
- tmap.set(text, new Defer());
251
-
252
- if (index === units.length - 1) {
253
- promises.push(backoff(request(buffer)));
254
- }
255
-
256
- return {promises, buffer, bufferSize};
257
- },
258
- {
259
- promises: [],
260
- buffer: [],
261
- bufferSize: 0,
262
- },
263
- ).promises,
264
- );
230
+ const parts = flat(await Promise.all(split(path, units)));
231
+ const composed = compose(skeleton, parts, {useSource: true});
265
232
 
266
- const translations = ([] as string[]).concat(...parts);
233
+ await dumpFile(outputPath, composed);
234
+ };
235
+ }
267
236
 
268
- const composed = await compose({
269
- useSource: true,
270
- units: translations,
271
- skeleton,
272
- });
237
+ function splitter(request: Request, cache: Cache): Split {
238
+ return function (path: string, texts: string[]) {
239
+ const promises: Promise<string[]>[] = [];
240
+ let buffer: string[] = [];
241
+ let bufferSize = 0;
273
242
 
274
- await writeFile(outputPath, composed);
243
+ const release = () => {
244
+ promises.push(backoff(request(buffer)));
245
+ buffer = [];
246
+ bufferSize = 0;
247
+ };
275
248
 
276
- logger.info(outputPath, 'finished translating');
277
- } catch (err) {
278
- if (err instanceof Error) {
279
- throw new TranslatorError(err.toString(), mdPath);
249
+ for (const text of texts) {
250
+ const defer = cache.get(text);
251
+
252
+ if (defer) {
253
+ promises.push(defer.promise);
254
+ } else if (text.length >= BYTES_LIMIT) {
255
+ logger.warn(path, 'Skip document part for translation. Part is too big.');
256
+ promises.push(Promise.resolve([text]));
257
+ } else {
258
+ if (bufferSize + text.length > BYTES_LIMIT) {
259
+ release();
260
+ }
261
+
262
+ buffer.push(text);
263
+ bufferSize += text.length;
264
+ cache.set(text, new Defer());
280
265
  }
281
266
  }
267
+
268
+ if (bufferSize) {
269
+ promises.push(backoff(request(buffer)));
270
+ }
271
+
272
+ return promises;
282
273
  };
283
274
  }
284
275
 
@@ -286,26 +277,8 @@ function backoff(action: () => Promise<string[]>): Promise<string[]> {
286
277
  return retry(
287
278
  {
288
279
  times: RETRY_LIMIT,
289
- interval: (count: number) => {
290
- // eslint-disable-next-line no-bitwise
291
- return (1 << count) * 1000;
292
- },
280
+ interval: (count: number) => Math.pow(2, count) * 1000,
293
281
  },
294
282
  asyncify(action),
295
283
  );
296
284
  }
297
-
298
- class Defer {
299
- resolve!: (text: string[]) => void;
300
-
301
- reject!: (error: any) => void;
302
-
303
- promise: Promise<string[]>;
304
-
305
- constructor() {
306
- this.promise = new Promise((resolve, reject) => {
307
- this.resolve = resolve;
308
- this.reject = reject;
309
- });
310
- }
311
- }
@@ -1,5 +1,5 @@
1
- import {extract} from '../xliff/extract';
2
- import {compose} from '../xliff/compose';
1
+ import {extract} from './extract';
2
+ import {compose} from './compose';
3
3
  import {handler} from './handler';
4
4
 
5
5
  import {Argv} from 'yargs';
@@ -18,32 +18,55 @@ const translate = {
18
18
  };
19
19
 
20
20
  function builder<T>(argv: Argv<T>) {
21
- return argv
22
- .command(extract)
23
- .command(compose)
24
- .option('folder-id', {
25
- describe: 'folder id',
26
- type: 'string',
27
- })
28
- .option('source-language', {
29
- alias: 'sl',
30
- describe: 'source language code',
31
- type: 'string',
32
- })
33
- .option('target-language', {
34
- alias: 'tl',
35
- describe: 'target language code',
36
- type: 'string',
37
- })
38
- .option('include', {
39
- describe: 'relative to input globs to include in processing',
40
- type: 'string',
41
- })
42
- .option('exclude', {
43
- describe: 'relative to input globs to exclude from processing',
44
- type: 'string',
45
- })
46
- .check(argvValidator);
21
+ return (
22
+ argv
23
+ // @ts-ignore
24
+ .command(extract)
25
+ // @ts-ignore
26
+ .command(compose)
27
+ .option('input', {
28
+ alias: 'i',
29
+ describe: 'input folder with markdown files',
30
+ type: 'string',
31
+ default: process.cwd(),
32
+ })
33
+ .option('output', {
34
+ alias: 'o',
35
+ describe: 'output folder to store xliff and skeleton files',
36
+ type: 'string',
37
+ })
38
+ .option('auth', {
39
+ describe: 'auth credentials path',
40
+ type: 'string',
41
+ })
42
+ .option('folder', {
43
+ describe: 'folder',
44
+ type: 'string',
45
+ })
46
+ .option('source', {
47
+ alias: ['sl', 'sll', 'source-language', 'source-language-locale'],
48
+ describe: 'source language and locale',
49
+ type: 'string',
50
+ })
51
+ .option('target', {
52
+ alias: ['tl', 'tll', 'target-language', 'target-language-locale'],
53
+ describe: 'target language and locale',
54
+ type: 'string',
55
+ })
56
+ .option('include', {
57
+ describe: 'relative to input globs to include in processing',
58
+ type: 'string',
59
+ })
60
+ .option('exclude', {
61
+ describe: 'relative to input globs to exclude from processing',
62
+ type: 'string',
63
+ })
64
+ .option('dry-run', {
65
+ describe: 'check command usage',
66
+ type: 'boolean',
67
+ })
68
+ .check(argvValidator)
69
+ );
47
70
  }
48
71
 
49
72
  export {translate};
@@ -0,0 +1,52 @@
1
+ import {JSONValue, resolveRefs} from '@diplodoc/translation';
2
+ import {extname} from 'path';
3
+ import {dump, load} from 'js-yaml';
4
+ import {readFile, writeFile} from 'fs/promises';
5
+
6
+ function parseFile(text: string, path: string): JSONValue | string {
7
+ if (typeof text !== 'string') {
8
+ return text;
9
+ }
10
+
11
+ switch (extname(path)) {
12
+ case '.yaml':
13
+ return load(text) as object;
14
+ case '.json':
15
+ return JSON.parse(text);
16
+ default:
17
+ return text;
18
+ }
19
+ }
20
+
21
+ function stringifyFile(content: JSONValue | string, path: string): string {
22
+ if (typeof content === 'string') {
23
+ return content;
24
+ }
25
+
26
+ switch (extname(path)) {
27
+ case '.yaml':
28
+ return dump(content);
29
+ case '.json':
30
+ return JSON.stringify(content);
31
+ default:
32
+ return content as unknown as string;
33
+ }
34
+ }
35
+
36
+ export async function loadFile<T = string | JSONValue>(path: string): Promise<T> {
37
+ const text = await readFile(path, 'utf8');
38
+
39
+ let content = parseFile(text, path);
40
+
41
+ if (content && typeof content === 'object') {
42
+ content = await resolveRefs(content, path, parseFile);
43
+ }
44
+
45
+ return content as T;
46
+ }
47
+
48
+ export async function dumpFile(path: string, content: string | JSONValue) {
49
+ const text = stringifyFile(content, path);
50
+
51
+ await writeFile(path, text, 'utf8');
52
+ }