@yxw007/translate 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,7 @@
1
- // translate v0.1.5 Copyright (c) 2024 Potter<aa4790139@gmail.com> and contributors
1
+ // translate v0.2.0 Copyright (c) 2025 Potter<aa4790139@gmail.com> and contributors
2
+ import 'fs/promises';
3
+ import 'fs';
4
+ import 'path';
2
5
  import { TranslateClient, TranslateTextCommand } from '@aws-sdk/client-translate';
3
6
 
4
7
  class TranslationError extends Error {
@@ -113,6 +116,9 @@ function useLogger(name = "") {
113
116
  };
114
117
  }
115
118
 
119
+ function sleep(ms) {
120
+ return new Promise((resolve) => setTimeout(resolve, ms));
121
+ }
116
122
  function getGapLine() {
117
123
  return "-".repeat(20);
118
124
  }
@@ -130,6 +136,66 @@ async function throwResponseError(name, res) {
130
136
  catch (e) { }
131
137
  return new TranslationError(name, `Translate fail ! ${res.status}: ${res.statusText} ${bodyRes?.message ?? ""}`);
132
138
  }
139
+ function splitText(text, maxCharacterNum) {
140
+ const SPLIT_PRIORITY = [
141
+ /\n\n+/, // 段落分隔(优先保留空行)
142
+ /[.。!??!\n]/, // 中日韩句子结束符+英文标点+换行
143
+ /[;;]/, // 分号(中英文)
144
+ /[,,]/g, // 逗号(中英文)
145
+ /\s/, // 空格(避免切分单词)
146
+ ];
147
+ const BEST_MATCH_RATIO = 0.7;
148
+ const chunks = [];
149
+ while (text.length > 0) {
150
+ const chunk = text.slice(0, maxCharacterNum);
151
+ // Scene 1:Prioritization of cases not subject to severance
152
+ if (text.length <= maxCharacterNum) {
153
+ chunks.push(text);
154
+ break;
155
+ }
156
+ // Scene 2:Finding Split Points by Priority
157
+ let splitPos = -1;
158
+ for (const delimiter of SPLIT_PRIORITY) {
159
+ const regex = new RegExp(delimiter.source + "(?=[^]*)", "g"); // back-to-front search
160
+ let m, longestMatch;
161
+ while ((m = regex.exec(chunk)) !== null) {
162
+ if (m.index === regex.lastIndex) {
163
+ regex.lastIndex++;
164
+ }
165
+ if (longestMatch != null) {
166
+ longestMatch = m.index > longestMatch.index ? m : longestMatch;
167
+ }
168
+ else {
169
+ longestMatch = m;
170
+ }
171
+ }
172
+ if (longestMatch?.index !== undefined && longestMatch.index >= maxCharacterNum * BEST_MATCH_RATIO) {
173
+ splitPos = longestMatch.index;
174
+ break; // Finding Quality Split Points
175
+ }
176
+ }
177
+ // Scene 3:Conservative splitting in the absence of a suitable separator
178
+ if (splitPos === -1) {
179
+ splitPos = chunk.lastIndexOf(" ", maxCharacterNum); // look for the space
180
+ splitPos = splitPos === -1 ? maxCharacterNum : splitPos; // forcible division
181
+ }
182
+ if (splitPos == 0) {
183
+ text = text.slice(splitPos + 1);
184
+ }
185
+ else {
186
+ chunks.push(text.slice(0, splitPos));
187
+ text = text.slice(splitPos);
188
+ }
189
+ }
190
+ return chunks;
191
+ }
192
+ function isOverMaxCharacterNum(text, max_character_num) {
193
+ if (!text || text.length <= 0) {
194
+ return false;
195
+ }
196
+ const total = text.reduce((pre, cur) => pre + cur.length, 0);
197
+ return total > max_character_num;
198
+ }
133
199
 
134
200
  function google(options) {
135
201
  const base = "https://translate.googleapis.com/translate_a/single";
@@ -1446,25 +1512,29 @@ function deepl$2(options) {
1446
1512
  if (!Array.isArray(text)) {
1447
1513
  text = [text];
1448
1514
  }
1449
- const requestBody = JSON.stringify({
1450
- text,
1451
- source_lang: from === "auto" ? undefined : from,
1452
- target_lang: to,
1453
- });
1454
1515
  const res = await fetch(url, {
1455
1516
  method: "POST",
1456
1517
  headers: {
1457
- "Content-Type": "application/json; charset=UTF-8;",
1518
+ "Content-Type": "application/json; charset=UTF-8",
1458
1519
  Authorization: `DeepL-Auth-Key ${key}`,
1520
+ Accept: "*/*",
1521
+ Host: "api-free.deepl.com",
1459
1522
  Connection: "keep-alive",
1460
1523
  },
1461
- body: requestBody,
1524
+ body: JSON.stringify({
1525
+ text: text,
1526
+ source_lang: from === "auto" ? undefined : from,
1527
+ target_lang: to,
1528
+ }),
1462
1529
  });
1463
1530
  if (!res.ok) {
1464
1531
  throw await throwResponseError(this.name, res);
1465
1532
  }
1466
1533
  const bodyRes = await res.json();
1467
- const body = bodyRes?.translations;
1534
+ if (bodyRes.error) {
1535
+ throw new TranslationError(this.name, `Translate fail ! code: ${bodyRes.error.code}, message: ${bodyRes.error.message}`);
1536
+ }
1537
+ const body = bodyRes.translations;
1468
1538
  if (!body || body.length === 0) {
1469
1539
  throw new TranslationError(this.name, "Translate fail ! translate's result is null or empty");
1470
1540
  }
@@ -2371,15 +2441,20 @@ function getLanguage(engine) {
2371
2441
  }
2372
2442
 
2373
2443
  const appName = "Translate";
2444
+ const defaultMaxCharacterNum = 1000;
2374
2445
 
2375
2446
  const logger = useLogger();
2376
2447
  const cache = new Cache();
2377
2448
  class Translator {
2378
2449
  engines;
2379
2450
  cache_time;
2380
- constructor(cache_time = 60 * 1000) {
2451
+ concurrencyMax;
2452
+ concurrencyDelay;
2453
+ constructor(cache_time = 60 * 1000, concurrencyMax = 4, concurrencyDelay = 20) {
2381
2454
  this.engines = new Map();
2382
2455
  this.cache_time = cache_time;
2456
+ this.concurrencyMax = concurrencyMax;
2457
+ this.concurrencyDelay = concurrencyDelay;
2383
2458
  }
2384
2459
  /**
2385
2460
  * This method is obsolete, please use the addEngine method
@@ -2427,8 +2502,7 @@ class Translator {
2427
2502
  if (cache.get(key)) {
2428
2503
  return Promise.resolve(cache.get(key)?.value);
2429
2504
  }
2430
- return engineInstance
2431
- .translate(text, options)
2505
+ return this.concurrencyHandle(engineInstance, text, options)
2432
2506
  .then((translated) => {
2433
2507
  cache.set(key, translated, cache_time ?? this.cache_time);
2434
2508
  return translated;
@@ -2443,6 +2517,55 @@ class Translator {
2443
2517
  }
2444
2518
  });
2445
2519
  }
2520
+ async concurrencyHandle(engine, text, options) {
2521
+ const { max_character_num = defaultMaxCharacterNum } = options;
2522
+ const maxCharacterNum = max_character_num > 0 ? max_character_num : defaultMaxCharacterNum;
2523
+ if (Array.isArray(text)) {
2524
+ if (isOverMaxCharacterNum(text, max_character_num)) {
2525
+ throw new TranslationError(appName, "String arrays do not support automatic character splitting, and the total number of characters in a string array exceeds the limit on the number of translated characters.");
2526
+ }
2527
+ return engine.translate(text, options);
2528
+ }
2529
+ else {
2530
+ return this.concurrencyTranslate(engine, text, options, maxCharacterNum);
2531
+ }
2532
+ }
2533
+ async concurrencyTranslate(engine, text, options, maxCharacterMum) {
2534
+ const pendingTasks = splitText(text, maxCharacterMum).map((content, index) => ({ content, index }));
2535
+ const result = [];
2536
+ let activeTasks = 0;
2537
+ const concurrencyDelay = this.concurrencyDelay;
2538
+ const concurrencyMax = this.concurrencyMax;
2539
+ return new Promise((resolve, reject) => {
2540
+ function processTasks() {
2541
+ while (activeTasks < concurrencyMax && pendingTasks.length > 0) {
2542
+ const { content, index } = pendingTasks.shift();
2543
+ activeTasks++;
2544
+ engine
2545
+ .translate(content, options)
2546
+ .then((res) => {
2547
+ result.push({
2548
+ translated: res,
2549
+ index,
2550
+ });
2551
+ })
2552
+ .catch((error) => reject(error))
2553
+ .finally(async () => {
2554
+ activeTasks--;
2555
+ if (activeTasks === 0 && pendingTasks.length <= 0) {
2556
+ result.sort((a, b) => a.index - b.index);
2557
+ const arr = result.reduce((pre, cur) => pre.concat(cur.translated), []);
2558
+ return resolve([arr.join("")]);
2559
+ }
2560
+ await sleep(concurrencyDelay);
2561
+ processTasks();
2562
+ });
2563
+ }
2564
+ }
2565
+ processTasks();
2566
+ return result;
2567
+ });
2568
+ }
2446
2569
  }
2447
2570
  const translator = new Translator();
2448
2571
  var index = {