@diplodoc/cli 4.13.6 → 4.13.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,23 @@
1
+ import axios from 'axios';
1
2
  import {Arguments} from 'yargs';
2
3
  import {ArgvService} from '../../services';
3
4
  import {logger} from '../../utils';
4
5
  import {ok} from 'assert';
5
- import {basename, dirname, join, resolve} from 'path';
6
- import glob from 'glob';
7
- import {getYandexOAuthToken} from '../../packages/credentials';
6
+ import {dirname, extname, join, resolve} from 'path';
7
+ import {mkdir} from 'fs/promises';
8
+ import {getYandexAuth} from './yandex/auth';
8
9
  import {asyncify, eachLimit, retry} from 'async';
9
- import {Session} from '@yandex-cloud/nodejs-sdk/dist/session';
10
- import {TranslationServiceClient} from '@yandex-cloud/nodejs-sdk/dist/generated/yandex/cloud/service_clients';
11
- import {
12
- TranslateRequest_Format as Format,
13
- TranslateRequest,
14
- } from '@yandex-cloud/nodejs-sdk/dist/generated/yandex/cloud/ai/translate/v2/translation_service';
15
- import {mkdir, readFile, writeFile} from 'fs/promises';
16
10
 
17
- // @ts-ignore
18
- import {compose, extract} from '@diplodoc/markdown-translation';
11
+ import {
12
+ Defer,
13
+ TranslateParams,
14
+ bytes,
15
+ compose,
16
+ dumpFile,
17
+ extract,
18
+ loadFile,
19
+ normalizeParams,
20
+ } from './utils';
19
21
 
20
22
  const REQUESTS_LIMIT = 20;
21
23
  const BYTES_LIMIT = 10000;
@@ -31,96 +33,75 @@ class TranslatorError extends Error {
31
33
  }
32
34
  }
33
35
 
34
- type TranslateConfig = {
35
- input: string;
36
- output?: string;
37
- sourceLanguage: string;
38
- targetLanguage: string;
39
- folderId?: string;
40
- glossary?: string;
41
- include?: string[];
42
- exclude?: string[];
43
- };
36
+ class RequestError extends Error {
37
+ code: string;
38
+
39
+ constructor(error: Error) {
40
+ super(error?.message || String(error));
41
+ this.code = 'REQUEST_ERROR';
42
+ }
43
+ }
44
44
 
45
45
  export async function handler(args: Arguments<any>) {
46
- ArgvService.init({
46
+ const params = normalizeParams({
47
47
  ...(args.translate || {}),
48
48
  ...args,
49
49
  });
50
50
 
51
- const {
52
- folderId,
53
- // yandexCloudTranslateGlossaryPairs,
54
- sourceLanguage,
55
- targetLanguage,
56
- exclude = [],
57
- } = ArgvService.getConfig() as unknown as TranslateConfig;
51
+ ArgvService.init(params);
58
52
 
59
- let {input, output, include = []} = ArgvService.getConfig() as unknown as TranslateConfig;
60
-
61
- logger.info(
62
- input,
63
- `translating documentation from ${sourceLanguage} to ${targetLanguage} language`,
64
- );
53
+ const {input, output, auth, folder, source, targets, files, dryRun} =
54
+ ArgvService.getConfig() as unknown as TranslateParams;
65
55
 
66
- output = output || input;
67
-
68
- ok(input, 'Required param input is not configured');
69
- ok(sourceLanguage, 'Required param sourceLanguage is not configured');
70
- ok(targetLanguage, 'Required param targetLanguage is not configured');
56
+ ok(auth, 'Required param auth is not configured');
57
+ ok(folder, 'Required param folder is not configured');
58
+ ok(source, `Required param source is not configured`);
59
+ ok(targets.length, `Required param target is not configured`);
71
60
 
72
61
  try {
73
- if (input.endsWith('.md')) {
74
- include = [basename(input)];
75
- input = dirname(input);
76
- } else if (!include.length) {
77
- include.push('**/*');
78
- }
79
-
80
- const files = ([] as string[]).concat(
81
- ...include.map((match) =>
82
- glob.sync(match, {
83
- cwd: join(input, sourceLanguage),
84
- ignore: exclude,
62
+ const authInfo = getYandexAuth(auth);
63
+
64
+ for (const target of targets) {
65
+ const translatorParams = {
66
+ input,
67
+ output,
68
+ sourceLanguage: source[0],
69
+ targetLanguage: target[0],
70
+ // yandexCloudTranslateGlossaryPairs,
71
+ folderId: folder,
72
+ auth: authInfo,
73
+ dryRun,
74
+ };
75
+
76
+ const cache = new Map<string, Defer>();
77
+ const request = requester(translatorParams, cache);
78
+ const split = splitter(request, cache);
79
+ const translate = translator(translatorParams, split);
80
+
81
+ await eachLimit(
82
+ files,
83
+ REQUESTS_LIMIT,
84
+ asyncify(async function (file: string) {
85
+ try {
86
+ await translate(file);
87
+ } catch (error: any) {
88
+ logger.error(file, error.message);
89
+ }
85
90
  }),
86
- ),
87
- );
88
- const found = [...new Set(files)];
89
-
90
- const oauthToken = await getYandexOAuthToken();
91
-
92
- const translatorParams = {
93
- input,
94
- output,
95
- sourceLanguage,
96
- targetLanguage,
97
- // yandexCloudTranslateGlossaryPairs,
98
- folderId,
99
- oauthToken,
100
- };
101
-
102
- const translateFn = translator(translatorParams);
91
+ );
103
92
 
104
- await eachLimit(found, REQUESTS_LIMIT, asyncify(translateFn));
105
- } catch (err) {
106
- if (err instanceof Error || err instanceof TranslatorError) {
107
- const message = err.message;
93
+ console.log('PROCESSED', `bytes: ${request.stat.bytes} chunks: ${request.stat.chunks}`);
94
+ }
95
+ } catch (error: any) {
96
+ const message = error.message;
108
97
 
109
- const file = err instanceof TranslatorError ? err.path : '';
98
+ const file = error instanceof TranslatorError ? error.path : '';
110
99
 
111
- logger.error(file, message);
112
- }
100
+ logger.error(file, message);
113
101
  }
114
-
115
- logger.info(
116
- output,
117
- `translated documentation from ${sourceLanguage} to ${targetLanguage} language`,
118
- );
119
102
  }
120
103
 
121
- export type TranslatorParams = {
122
- oauthToken: string;
123
- folderId: string | undefined;
104
+ type TranslatorParams = {
124
105
  input: string;
125
106
  output: string;
126
107
  sourceLanguage: string;
@@ -128,157 +109,176 @@ export type TranslatorParams = {
128
109
  // yandexCloudTranslateGlossaryPairs: YandexCloudTranslateGlossaryPair[];
129
110
  };
130
111
 
131
- function translator(params: TranslatorParams) {
132
- const {
133
- oauthToken,
134
- folderId,
135
- input,
136
- output,
137
- sourceLanguage,
138
- targetLanguage,
139
- // yandexCloudTranslateGlossaryPairs,
140
- } = params;
141
-
142
- const tmap = new Map<string, Defer>();
143
- const session = new Session({oauthToken});
144
- const client = session.client(TranslationServiceClient);
145
- const request = (texts: string[]) => () => {
146
- return client
147
- .translate(
148
- TranslateRequest.fromPartial({
149
- texts,
112
+ type RequesterParams = {
113
+ auth: string;
114
+ folderId: string | undefined;
115
+ sourceLanguage: string;
116
+ targetLanguage: string;
117
+ dryRun: boolean;
118
+ };
119
+
120
+ type Request = {
121
+ (texts: string[]): () => Promise<string[]>;
122
+ stat: {
123
+ bytes: number;
124
+ chunks: number;
125
+ };
126
+ };
127
+
128
+ type Split = (path: string, texts: string[]) => Promise<string>[];
129
+
130
+ type Cache = Map<string, Defer>;
131
+
132
+ type Translations = {
133
+ translations: {
134
+ text: string;
135
+ }[];
136
+ };
137
+
138
+ function requester(params: RequesterParams, cache: Cache) {
139
+ const {auth, folderId, sourceLanguage, targetLanguage, dryRun} = params;
140
+ const resolve = (text: string, index: number, texts: string[]) => {
141
+ const defer = cache.get(texts[index]);
142
+ if (defer) {
143
+ defer.resolve(text);
144
+ }
145
+ return text;
146
+ };
147
+
148
+ const request = function request(texts: string[]) {
149
+ request.stat.bytes += bytes(texts);
150
+ request.stat.chunks++;
151
+
152
+ return async function () {
153
+ if (dryRun) {
154
+ return texts.map(resolve);
155
+ }
156
+
157
+ return axios({
158
+ method: 'POST',
159
+ url: 'https://translate.api.cloud.yandex.net/translate/v2/translate',
160
+ headers: {
161
+ 'Content-Type': 'application/json',
162
+ Authorization: auth,
163
+ },
164
+ data: {
150
165
  folderId,
166
+ texts,
151
167
  sourceLanguageCode: sourceLanguage,
152
168
  targetLanguageCode: targetLanguage,
153
- // glossaryConfig: {
154
- // glossaryData: {
155
- // glossaryPairs: yandexCloudTranslateGlossaryPairs,
156
- // },
157
- // },
158
- format: Format.HTML,
159
- }),
160
- )
161
- .then((results) => {
162
- return results.translations.map(({text}, index) => {
163
- const defer = tmap.get(texts[index]);
164
- if (defer) {
165
- defer.resolve([text]);
169
+ format: 'HTML',
170
+ },
171
+ })
172
+ .then(({data, status}) => {
173
+ if (status === 200) {
174
+ return data;
175
+ } else {
176
+ throw new Error(data.message);
166
177
  }
167
-
168
- return text;
178
+ })
179
+ .then((result: Translations) => {
180
+ return result.translations.map(({text}, index) => {
181
+ return resolve(text, index, texts);
182
+ });
183
+ })
184
+ .catch((error) => {
185
+ console.error(error);
186
+ throw new RequestError(error);
169
187
  });
170
- });
188
+ };
189
+ };
190
+
191
+ request.stat = {
192
+ bytes: 0,
193
+ chunks: 0,
171
194
  };
172
195
 
173
- return async (mdPath: string) => {
174
- if (!mdPath.endsWith('.md')) {
196
+ return request;
197
+ }
198
+
199
+ function translator(params: TranslatorParams, split: Split) {
200
+ const {input, output, sourceLanguage, targetLanguage} = params;
201
+ const inputRoot = resolve(input);
202
+ const outputRoot = resolve(output);
203
+
204
+ return async (path: string) => {
205
+ const ext = extname(path);
206
+ if (!['.yaml', '.json', '.md'].includes(ext)) {
175
207
  return;
176
208
  }
177
209
 
178
- try {
179
- logger.info(mdPath, 'translating');
210
+ const inputPath = join(inputRoot, path);
211
+ const outputPath = join(outputRoot, path.replace(sourceLanguage, targetLanguage));
212
+ const content = await loadFile(inputPath);
180
213
 
181
- const inputPath = resolve(input, sourceLanguage, mdPath);
182
- const outputPath = resolve(output, targetLanguage, mdPath);
183
- const md = await readFile(inputPath, {encoding: 'utf-8'});
214
+ await mkdir(dirname(outputPath), {recursive: true});
184
215
 
185
- await mkdir(dirname(outputPath), {recursive: true});
216
+ if (!content) {
217
+ await dumpFile(outputPath, content);
218
+ return;
219
+ }
186
220
 
187
- if (!md) {
188
- await writeFile(outputPath, md);
189
- return;
190
- }
221
+ const {units, skeleton} = extract(content, {
222
+ source: {
223
+ language: sourceLanguage,
224
+ locale: 'RU',
225
+ },
226
+ target: {
227
+ language: targetLanguage,
228
+ locale: 'US',
229
+ },
230
+ });
191
231
 
192
- const {units, skeleton} = extract({
193
- source: {
194
- language: sourceLanguage,
195
- locale: 'RU',
196
- },
197
- target: {
198
- language: targetLanguage,
199
- locale: 'US',
200
- },
201
- markdown: md,
202
- markdownPath: mdPath,
203
- skeletonPath: '',
204
- });
205
-
206
- if (!units.length) {
207
- await writeFile(outputPath, md);
208
- return;
209
- }
232
+ if (!units.length) {
233
+ await dumpFile(outputPath, content);
234
+ return;
235
+ }
210
236
 
211
- const parts = await Promise.all(
212
- (units as string[]).reduce(
213
- (
214
- {
215
- promises,
216
- buffer,
217
- bufferSize,
218
- }: {
219
- promises: Promise<string[]>[];
220
- buffer: string[];
221
- bufferSize: number;
222
- },
223
- text,
224
- index,
225
- ) => {
226
- if (text.length >= BYTES_LIMIT) {
227
- logger.warn(
228
- mdPath,
229
- 'Skip document part for translation. Part is too big.',
230
- );
231
- promises.push(Promise.resolve([text]));
232
- return {promises, buffer, bufferSize};
233
- }
234
-
235
- const defer = tmap.get(text);
236
- if (defer) {
237
- console.log('SKIPPED', text);
238
- promises.push(defer.promise);
239
- return {promises, buffer, bufferSize};
240
- }
241
-
242
- if (bufferSize + text.length > BYTES_LIMIT) {
243
- promises.push(backoff(request(buffer)));
244
- buffer = [];
245
- bufferSize = 0;
246
- }
247
-
248
- buffer.push(text);
249
- bufferSize += text.length;
250
- tmap.set(text, new Defer());
251
-
252
- if (index === units.length - 1) {
253
- promises.push(backoff(request(buffer)));
254
- }
255
-
256
- return {promises, buffer, bufferSize};
257
- },
258
- {
259
- promises: [],
260
- buffer: [],
261
- bufferSize: 0,
262
- },
263
- ).promises,
264
- );
237
+ const parts = await Promise.all(split(path, units));
238
+ const composed = compose(skeleton, parts, {useSource: true});
239
+
240
+ await dumpFile(outputPath, composed);
241
+ };
242
+ }
265
243
 
266
- const translations = ([] as string[]).concat(...parts);
244
+ function splitter(request: Request, cache: Cache): Split {
245
+ return function (path: string, texts: string[]) {
246
+ const promises: Promise<string>[] = [];
247
+ let buffer: string[] = [];
248
+ let bufferSize = 0;
267
249
 
268
- const composed = await compose({
269
- useSource: true,
270
- units: translations,
271
- skeleton,
272
- });
250
+ const release = () => {
251
+ backoff(request(buffer));
252
+ buffer = [];
253
+ bufferSize = 0;
254
+ };
273
255
 
274
- await writeFile(outputPath, composed);
256
+ for (const text of texts) {
257
+ if (text.length >= BYTES_LIMIT) {
258
+ logger.warn(path, 'Skip document part for translation. Part is too big.');
259
+ promises.push(Promise.resolve(text));
260
+ } else {
261
+ const defer = cache.get(text) || new Defer();
262
+ promises.push(defer.promise);
263
+
264
+ if (!cache.get(text)) {
265
+ if (bufferSize + text.length > BYTES_LIMIT) {
266
+ release();
267
+ }
275
268
 
276
- logger.info(outputPath, 'finished translating');
277
- } catch (err) {
278
- if (err instanceof Error) {
279
- throw new TranslatorError(err.toString(), mdPath);
269
+ buffer.push(text);
270
+ bufferSize += text.length;
271
+ }
272
+
273
+ cache.set(text, defer);
280
274
  }
281
275
  }
276
+
277
+ if (bufferSize) {
278
+ release();
279
+ }
280
+
281
+ return promises;
282
282
  };
283
283
  }
284
284
 
@@ -286,26 +286,8 @@ function backoff(action: () => Promise<string[]>): Promise<string[]> {
286
286
  return retry(
287
287
  {
288
288
  times: RETRY_LIMIT,
289
- interval: (count: number) => {
290
- // eslint-disable-next-line no-bitwise
291
- return (1 << count) * 1000;
292
- },
289
+ interval: (count: number) => Math.pow(2, count) * 1000,
293
290
  },
294
291
  asyncify(action),
295
292
  );
296
293
  }
297
-
298
- class Defer {
299
- resolve!: (text: string[]) => void;
300
-
301
- reject!: (error: any) => void;
302
-
303
- promise: Promise<string[]>;
304
-
305
- constructor() {
306
- this.promise = new Promise((resolve, reject) => {
307
- this.resolve = resolve;
308
- this.reject = reject;
309
- });
310
- }
311
- }
@@ -1,5 +1,5 @@
1
- import {extract} from '../xliff/extract';
2
- import {compose} from '../xliff/compose';
1
+ import {extract} from './extract';
2
+ import {compose} from './compose';
3
3
  import {handler} from './handler';
4
4
 
5
5
  import {Argv} from 'yargs';
@@ -18,32 +18,55 @@ const translate = {
18
18
  };
19
19
 
20
20
  function builder<T>(argv: Argv<T>) {
21
- return argv
22
- .command(extract)
23
- .command(compose)
24
- .option('folder-id', {
25
- describe: 'folder id',
26
- type: 'string',
27
- })
28
- .option('source-language', {
29
- alias: 'sl',
30
- describe: 'source language code',
31
- type: 'string',
32
- })
33
- .option('target-language', {
34
- alias: 'tl',
35
- describe: 'target language code',
36
- type: 'string',
37
- })
38
- .option('include', {
39
- describe: 'relative to input globs to include in processing',
40
- type: 'string',
41
- })
42
- .option('exclude', {
43
- describe: 'relative to input globs to exclude from processing',
44
- type: 'string',
45
- })
46
- .check(argvValidator);
21
+ return (
22
+ argv
23
+ // @ts-ignore
24
+ .command(extract)
25
+ // @ts-ignore
26
+ .command(compose)
27
+ .option('input', {
28
+ alias: 'i',
29
+ describe: 'input folder with markdown files',
30
+ type: 'string',
31
+ default: process.cwd(),
32
+ })
33
+ .option('output', {
34
+ alias: 'o',
35
+ describe: 'output folder to store xliff and skeleton files',
36
+ type: 'string',
37
+ })
38
+ .option('auth', {
39
+ describe: 'auth credentials path',
40
+ type: 'string',
41
+ })
42
+ .option('folder', {
43
+ describe: 'folder',
44
+ type: 'string',
45
+ })
46
+ .option('source', {
47
+ alias: ['sl', 'sll', 'source-language', 'source-language-locale'],
48
+ describe: 'source language and locale',
49
+ type: 'string',
50
+ })
51
+ .option('target', {
52
+ alias: ['tl', 'tll', 'target-language', 'target-language-locale'],
53
+ describe: 'target language and locale',
54
+ type: 'string',
55
+ })
56
+ .option('include', {
57
+ describe: 'relative to input globs to include in processing',
58
+ type: 'string',
59
+ })
60
+ .option('exclude', {
61
+ describe: 'relative to input globs to exclude from processing',
62
+ type: 'string',
63
+ })
64
+ .option('dry-run', {
65
+ describe: 'check command usage',
66
+ type: 'boolean',
67
+ })
68
+ .check(argvValidator)
69
+ );
47
70
  }
48
71
 
49
72
  export {translate};
@@ -0,0 +1,52 @@
1
+ import {JSONValue, resolveRefs} from '@diplodoc/translation';
2
+ import {extname} from 'path';
3
+ import {dump, load} from 'js-yaml';
4
+ import {readFile, writeFile} from 'fs/promises';
5
+
6
+ function parseFile(text: string, path: string): JSONValue | string {
7
+ if (typeof text !== 'string') {
8
+ return text;
9
+ }
10
+
11
+ switch (extname(path)) {
12
+ case '.yaml':
13
+ return load(text) as object;
14
+ case '.json':
15
+ return JSON.parse(text);
16
+ default:
17
+ return text;
18
+ }
19
+ }
20
+
21
+ function stringifyFile(content: JSONValue | string, path: string): string {
22
+ if (typeof content === 'string') {
23
+ return content;
24
+ }
25
+
26
+ switch (extname(path)) {
27
+ case '.yaml':
28
+ return dump(content);
29
+ case '.json':
30
+ return JSON.stringify(content);
31
+ default:
32
+ return content as unknown as string;
33
+ }
34
+ }
35
+
36
+ export async function loadFile<T = string | JSONValue>(path: string): Promise<T> {
37
+ const text = await readFile(path, 'utf8');
38
+
39
+ let content = parseFile(text, path);
40
+
41
+ if (content && typeof content === 'object') {
42
+ content = await resolveRefs(content, path, parseFile);
43
+ }
44
+
45
+ return content as T;
46
+ }
47
+
48
+ export async function dumpFile(path: string, content: string | JSONValue) {
49
+ const text = stringifyFile(content, path);
50
+
51
+ await writeFile(path, text, 'utf8');
52
+ }