gptrans 1.8.6 → 1.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,101 @@
1
+ # GPTrans - Referencias Múltiples y Idioma Base Alternativo
2
+
3
+ ## Nuevas Funcionalidades
4
+
5
+ El método `preload()` ahora soporta dos parámetros opcionales que mejoran la precisión de las traducciones:
6
+
7
+ ### 1. `references` - Referencias Múltiples
8
+
9
+ Permite incluir traducciones existentes en otros idiomas como contexto adicional para el modelo de IA.
10
+
11
+ ```javascript
12
+ await gptrans.preload({
13
+ references: ['en', 'pt'] // Usar inglés y portugués como referencia
14
+ });
15
+ ```
16
+
17
+ **Caso de uso**: Cuando tienes traducciones en varios idiomas y quieres que la nueva traducción sea consistente con las existentes.
18
+
19
+ ### 2. `baseLanguage` - Idioma Base Alternativo
20
+
21
+ Permite usar un idioma diferente al original como base para la traducción.
22
+
23
+ ```javascript
24
+ await gptrans.preload({
25
+ baseLanguage: 'en' // Traducir desde inglés en vez del idioma original
26
+ });
27
+ ```
28
+
29
+ **Caso de uso**: Cuando el texto original tiene características específicas del idioma (como he/she en inglés) que pueden omitirse en la traducción final.
30
+
31
+ ## Ejemplos de Uso
32
+
33
+ ### Ejemplo 1: Traducción Básica (sin cambios)
34
+
35
+ ```javascript
36
+ const gptrans = new GPTrans({ from: 'en', target: 'es' });
37
+ await gptrans.preload(); // Comportamiento original
38
+ ```
39
+
40
+ ### Ejemplo 2: Con Referencias
41
+
42
+ ```javascript
43
+ // Original en español, traducir a francés usando inglés como referencia
44
+ const gptrans = new GPTrans({ from: 'es', target: 'fr' });
45
+ await gptrans.preload({
46
+ references: ['en'] // El modelo verá la traducción en inglés como contexto
47
+ });
48
+ ```
49
+
50
+ ### Ejemplo 3: Con Idioma Base Alternativo
51
+
52
+ ```javascript
53
+ // Original en español, pero traducir DE inglés A portugués
54
+ const gptrans = new GPTrans({ from: 'es', target: 'pt' });
55
+ await gptrans.preload({
56
+ baseLanguage: 'en' // Usa la traducción en inglés como base
57
+ });
58
+ ```
59
+
60
+ ### Ejemplo 4: Combinado
61
+
62
+ ```javascript
63
+ // Original en español, traducir de inglés a alemán, mostrando español y portugués como referencia
64
+ const gptrans = new GPTrans({ from: 'es', target: 'de' });
65
+ await gptrans.preload({
66
+ baseLanguage: 'en', // Traduce desde inglés
67
+ references: ['es', 'pt'] // Muestra español y portugués como contexto adicional
68
+ });
69
+ ```
70
+
71
+ ## Caso de Uso Real: Evitar Problemas de Género
72
+
73
+ ### Problema
74
+ En inglés: "The student is very good" (neutral)
75
+ En español: "El estudiante es muy bueno" / "La estudiante es muy buena" (con género)
76
+ En portugués: Se puede omitir el género en algunos contextos
77
+
78
+ ### Solución
79
+ ```javascript
80
+ // Si el original está en español pero queremos la traducción al portugués
81
+ // basada en la versión inglesa (que es neutral):
82
+
83
+ const ptTranslator = new GPTrans({ from: 'es', target: 'pt' });
84
+ await ptTranslator.preload({
85
+ baseLanguage: 'en', // Usa la versión inglesa como base
86
+ references: ['es'] // Muestra el español original como referencia
87
+ });
88
+ ```
89
+
90
+ ## Compatibilidad
91
+
92
+ ✅ **Totalmente retrocompatible**: Si no especificas ningún parámetro, `preload()` funciona exactamente igual que antes.
93
+
94
+ ```javascript
95
+ // Esto sigue funcionando sin cambios:
96
+ await gptrans.preload();
97
+ ```
98
+
99
+ ## Archivo de Prueba
100
+
101
+ Ejecuta `demo/case_references.js` para ver ejemplos completos de todas las funcionalidades nuevas.
@@ -0,0 +1,139 @@
1
+ import GPTrans from '../index.js';
2
+ import dotenv from 'dotenv';
3
+ import { fileURLToPath } from 'url';
4
+ import { dirname, join } from 'path';
5
+
6
+ // Load .env from demo folder
7
+ const __filename = fileURLToPath(import.meta.url);
8
+ const __dirname = dirname(__filename);
9
+ dotenv.config({ path: join(__dirname, '.env') });
10
+
11
+ console.log('🚀 Testing GPTrans with Reference Translations\n');
12
+ console.log('='.repeat(70));
13
+
14
+ async function testReferences() {
15
+ console.log('\n📋 Test 1: Basic usage without references (baseline)\n');
16
+
17
+ // First, create translations in English and Portuguese
18
+ const enTranslator = new GPTrans({
19
+ from: 'es',
20
+ target: 'en',
21
+ model: 'sonnet45',
22
+ name: 'ref_test',
23
+ debug: true
24
+ });
25
+
26
+ const ptTranslator = new GPTrans({
27
+ from: 'es',
28
+ target: 'pt',
29
+ model: 'sonnet45',
30
+ name: 'ref_test',
31
+ debug: true
32
+ });
33
+
34
+ // Sample Spanish texts with gendered language
35
+ const spanishTexts = [
36
+ 'El estudiante es muy bueno',
37
+ 'La estudiante es muy buena',
38
+ 'Tienes que ir al doctor',
39
+ 'Está muy cansada'
40
+ ];
41
+
42
+ console.log('📝 Creating English translations from Spanish...');
43
+ spanishTexts.forEach(text => {
44
+ console.log(` EN: ${enTranslator.t(text)}`);
45
+ });
46
+
47
+ await new Promise(resolve => setTimeout(resolve, 5000));
48
+
49
+ console.log('\n📝 Creating Portuguese translations from Spanish...');
50
+ spanishTexts.forEach(text => {
51
+ console.log(` PT: ${ptTranslator.t(text)}`);
52
+ });
53
+
54
+ await new Promise(resolve => setTimeout(resolve, 5000));
55
+
56
+ console.log('\n' + '='.repeat(70));
57
+ console.log('\n📋 Test 2: Translation with English as reference\n');
58
+
59
+ // Now translate to French using English as reference
60
+ const frTranslator = new GPTrans({
61
+ from: 'es',
62
+ target: 'fr',
63
+ model: 'sonnet45',
64
+ name: 'ref_test',
65
+ debug: true
66
+ });
67
+
68
+ console.log('🔄 Preloading French translations with English as reference...');
69
+ await frTranslator.preload({
70
+ references: ['en']
71
+ });
72
+
73
+ console.log('\n📝 French translations (with EN reference):');
74
+ spanishTexts.forEach(text => {
75
+ console.log(` ES: ${text}`);
76
+ console.log(` FR: ${frTranslator.t(text)}\n`);
77
+ });
78
+
79
+ console.log('='.repeat(70));
80
+ console.log('\n📋 Test 3: Translation using alternate base language\n');
81
+
82
+ // Translate from English to Italian (using English as base instead of Spanish)
83
+ const itTranslator = new GPTrans({
84
+ from: 'es',
85
+ target: 'it',
86
+ model: 'sonnet45',
87
+ name: 'ref_test',
88
+ debug: true
89
+ });
90
+
91
+ console.log('🔄 Preloading Italian translations with English as base language...');
92
+ await itTranslator.preload({
93
+ baseLanguage: 'en',
94
+ references: ['es', 'pt'] // Show original Spanish and Portuguese as reference
95
+ });
96
+
97
+ console.log('\n📝 Italian translations (from EN, with ES+PT references):');
98
+ spanishTexts.forEach(text => {
99
+ console.log(` ES (original): ${text}`);
100
+ console.log(` IT (from EN): ${itTranslator.t(text)}\n`);
101
+ });
102
+
103
+ console.log('='.repeat(70));
104
+ console.log('\n📋 Test 4: Multiple references\n');
105
+
106
+ // Translate to German with multiple references
107
+ const deTranslator = new GPTrans({
108
+ from: 'es',
109
+ target: 'de',
110
+ model: 'sonnet45',
111
+ name: 'ref_test',
112
+ debug: true
113
+ });
114
+
115
+ console.log('🔄 Preloading German translations with multiple references...');
116
+ await deTranslator.preload({
117
+ references: ['en', 'pt', 'fr']
118
+ });
119
+
120
+ console.log('\n📝 German translations (with EN+PT+FR references):');
121
+ spanishTexts.forEach(text => {
122
+ console.log(` ES: ${text}`);
123
+ console.log(` DE: ${deTranslator.t(text)}\n`);
124
+ });
125
+
126
+ console.log('='.repeat(70));
127
+ console.log('\n✅ All tests completed!\n');
128
+ console.log('💡 Key features demonstrated:');
129
+ console.log(' 1. Baseline translations without references');
130
+ console.log(' 2. Using one language as reference for better context');
131
+ console.log(' 3. Using alternate base language (translate from intermediate language)');
132
+ console.log(' 4. Using multiple references for maximum accuracy\n');
133
+ }
134
+
135
+ // Run tests
136
+ testReferences().catch(error => {
137
+ console.error('\n❌ Error during tests:', error.message);
138
+ console.error(error.stack);
139
+ });
package/index.js CHANGED
@@ -11,14 +11,14 @@ class GPTrans {
11
11
  static #mmixInstances = new Map();
12
12
  static #translationLocks = new Map();
13
13
 
14
- static mmix(models = 'sonnet45') {
14
+ static mmix(models = 'sonnet45', { debug = false } = {}) {
15
15
  const key = Array.isArray(models) ? models.join(',') : models;
16
16
 
17
17
  if (!this.#mmixInstances.has(key)) {
18
18
  const mmix = new ModelMix({
19
19
  config: {
20
20
  max_history: 1,
21
- debug: false,
21
+ debug,
22
22
  bottleneck: {
23
23
  minTime: 15000,
24
24
  maxConcurrent: 1
@@ -49,16 +49,16 @@ class GPTrans {
49
49
  if (!this.#translationLocks.has(modelKey)) {
50
50
  this.#translationLocks.set(modelKey, Promise.resolve());
51
51
  }
52
-
52
+
53
53
  const previousLock = this.#translationLocks.get(modelKey);
54
54
  let releaseLock;
55
-
55
+
56
56
  const currentLock = new Promise(resolve => {
57
57
  releaseLock = resolve;
58
58
  });
59
-
59
+
60
60
  this.#translationLocks.set(modelKey, previousLock.then(() => currentLock));
61
-
61
+
62
62
  await previousLock;
63
63
  return releaseLock;
64
64
  }
@@ -67,7 +67,7 @@ class GPTrans {
67
67
  return isLanguageAvailable(langCode);
68
68
  }
69
69
 
70
- constructor({ from = 'en-US', target = 'es', model = 'sonnet45', batchThreshold = 1500, debounceTimeout = 500, promptFile = null, name = '', context = '', freeze = false } = {}) {
70
+ constructor({ from = 'en-US', target = 'es', model = 'sonnet45', batchThreshold = 1500, debounceTimeout = 500, promptFile = null, name = '', context = '', freeze = false, debug = false } = {}) {
71
71
 
72
72
  target = this.normalizeBCP47(target);
73
73
  from = this.normalizeBCP47(from);
@@ -79,9 +79,11 @@ class GPTrans {
79
79
  }
80
80
 
81
81
  const path = new URL('../../db', import.meta.url).pathname;
82
- name = name ? '_' + name : '';
83
- this.dbTarget = new DeepBase({ name: 'gptrans' + name + '_' + target, path });
84
- this.dbFrom = new DeepBase({ name: 'gptrans' + name + '_from_' + from, path });
82
+ const namePrefix = name ? '_' + name : '';
83
+ this.dbPath = path;
84
+ this.instanceName = name;
85
+ this.dbTarget = new DeepBase({ name: 'gptrans' + namePrefix + '_' + target, path });
86
+ this.dbFrom = new DeepBase({ name: 'gptrans' + namePrefix + '_from_' + from, path });
85
87
 
86
88
  try {
87
89
  this.replaceTarget = isoAssoc(target, 'TARGET_');
@@ -95,6 +97,8 @@ class GPTrans {
95
97
  this.pendingTranslations = new Map(); // [key, text]
96
98
  this.pendingCharCount = 0; // Add character count tracker
97
99
  this.debounceTimer = null;
100
+ this.isProcessingBatch = false; // Track if a batch is currently being processed
101
+ this.modelMixOptions = { debug };
98
102
  this.modelKey = model;
99
103
  this.promptFile = promptFile ?? new URL('./prompt/translate.md', import.meta.url).pathname;
100
104
  this.context = context;
@@ -196,13 +200,24 @@ class GPTrans {
196
200
 
197
201
  this.modelConfig.options.max_tokens = this.pendingCharCount + 1000;
198
202
  const minTime = Math.floor((60000 / (8000 / this.pendingCharCount)) * 1.4);
199
- GPTrans.mmix(this.modelKey).limiter.updateSettings({ minTime });
203
+ GPTrans.mmix(this.modelKey, this.modelMixOptions).limiter.updateSettings({ minTime });
200
204
 
201
205
  this.pendingCharCount = 0;
202
206
 
207
+ // Load references for each text in the batch if preloadReferences is set
208
+ const batchReferences = {};
209
+ if (this.preloadReferences && this.preloadReferences.length > 0) {
210
+ batch.forEach(([key]) => {
211
+ const refs = this._loadReferenceTranslations(key, this.preloadReferences);
212
+ if (Object.keys(refs).length > 0) {
213
+ batchReferences[key] = refs;
214
+ }
215
+ });
216
+ }
217
+
203
218
  const textsToTranslate = batch.map(([_, text]) => text).join(`\n${this.divider}\n`);
204
219
  try {
205
- const translations = await this._translate(textsToTranslate);
220
+ const translations = await this._translate(textsToTranslate, batch, batchReferences, this.preloadBaseLanguage);
206
221
  const translatedTexts = translations.split(`\n${this.divider}\n`);
207
222
 
208
223
  const contextHash = this._hash(context);
@@ -223,20 +238,57 @@ class GPTrans {
223
238
  }
224
239
  }
225
240
 
226
- async _translate(text) {
241
+ async _translate(text, batch = [], batchReferences = {}, baseLanguage = null) {
227
242
  // Acquire lock to ensure atomic model configuration and translation
228
243
  const releaseLock = await GPTrans.#acquireTranslationLock(this.modelKey);
229
-
244
+
230
245
  try {
231
- const model = GPTrans.mmix(this.modelKey);
246
+ const model = GPTrans.mmix(this.modelKey, this.modelMixOptions);
232
247
 
233
248
  model.setSystem("You are an expert translator specialized in literary translation between FROM_LANG and TARGET_DENONYM TARGET_LANG.");
234
249
 
235
250
  model.addTextFromFile(this.promptFile);
236
251
 
237
- model.replace({ INPUT: text, CONTEXT: this.context });
252
+ // Format references if available
253
+ let referencesText = '';
254
+ if (Object.keys(batchReferences).length > 0 && batch.length > 0) {
255
+ const textsArray = text.split(`\n${this.divider}\n`);
256
+
257
+ referencesText = textsArray.map((txt, index) => {
258
+ const key = batch[index] ? batch[index][0] : null;
259
+ if (key && batchReferences[key]) {
260
+ const refs = batchReferences[key];
261
+ const refLines = Object.entries(refs).map(([lang, translation]) => {
262
+ try {
263
+ const langInfo = isoAssoc(lang);
264
+ return `${langInfo.DENONYM} ${langInfo.LANG} (${lang}): ${translation}`;
265
+ } catch (e) {
266
+ return `${lang}: ${translation}`;
267
+ }
268
+ }).join(`\n${this.divider}\n`);
269
+ return refLines;
270
+ }
271
+ return '';
272
+ }).filter(r => r).join(`\n\n`);
273
+ }
274
+
275
+ // Determine which FROM_ values to use
276
+ let fromReplace = this.replaceFrom;
277
+ if (baseLanguage) {
278
+ try {
279
+ fromReplace = isoAssoc(baseLanguage, 'FROM_');
280
+ } catch (e) {
281
+ console.warn(`Invalid baseLanguage: ${baseLanguage}, using default`);
282
+ }
283
+ }
284
+
285
+ model.replace({
286
+ INPUT: text,
287
+ CONTEXT: this.context,
288
+ REFERENCES: referencesText || 'None'
289
+ });
238
290
  model.replace(this.replaceTarget);
239
- model.replace(this.replaceFrom);
291
+ model.replace(fromReplace);
240
292
 
241
293
  const response = await model.message();
242
294
 
@@ -269,29 +321,98 @@ class GPTrans {
269
321
  return stringHash(input).toString(36);
270
322
  }
271
323
 
272
- async preload() {
324
+ _loadReferenceTranslations(key, referenceLangs = []) {
325
+ const references = {};
326
+ const contextHash = this._hash(this.context);
327
+
328
+ for (const lang of referenceLangs) {
329
+ const namePrefix = this.instanceName ? '_' + this.instanceName : '';
330
+ const dbRef = new DeepBase({
331
+ name: `gptrans${namePrefix}_${lang}`,
332
+ path: this.dbPath
333
+ });
334
+
335
+ const translation = dbRef.get(contextHash, key);
336
+ if (translation) {
337
+ references[lang] = translation;
338
+ }
339
+ }
340
+
341
+ return references;
342
+ }
343
+
344
+ async preload({ references = [], baseLanguage = null } = {}) {
273
345
 
274
346
  if (!this.context && this.replaceFrom.FROM_ISO === this.replaceTarget.TARGET_ISO) {
275
347
  return this;
276
348
  }
277
349
 
350
+ // Store preload options for use in translation
351
+ this.preloadReferences = references;
352
+ this.preloadBaseLanguage = baseLanguage;
353
+
354
+ // Track which keys need translation
355
+ const keysNeedingTranslation = [];
356
+
278
357
  for (const [context, pairs] of this.dbFrom.entries()) {
358
+ // Skip the _context metadata
359
+ if (context === '_context') continue;
360
+
279
361
  this.setContext(context);
362
+ const contextHash = this._hash(context);
363
+
280
364
  for (const [key, text] of Object.entries(pairs)) {
365
+ // Check if translation already exists
366
+ if (!this.dbTarget.get(contextHash, key)) {
367
+ keysNeedingTranslation.push({ context, contextHash, key });
368
+ }
281
369
  this.get(key, text);
282
370
  }
283
371
  }
284
372
 
373
+ // If nothing needs translation, return immediately
374
+ if (keysNeedingTranslation.length === 0) {
375
+ this.preloadReferences = [];
376
+ this.preloadBaseLanguage = null;
377
+ return this;
378
+ }
379
+
285
380
  // Wait for any pending translations to complete
286
- await new Promise(resolve => {
381
+ const maxWaitTime = 120000; // 120 seconds timeout
382
+ const startTime = Date.now();
383
+
384
+ await new Promise((resolve, reject) => {
287
385
  const checkInterval = setInterval(() => {
288
- if (this.dbFrom.keys().length === this.dbTarget.keys().length) {
386
+ // Check if there are still pending translations or batch being processed
387
+ const hasPending = this.pendingTranslations.size > 0 || this.isProcessingBatch;
388
+
389
+ // Check if all needed translations are now complete
390
+ let allTranslated = true;
391
+ for (const { contextHash, key } of keysNeedingTranslation) {
392
+ if (!this.dbTarget.get(contextHash, key)) {
393
+ allTranslated = false;
394
+ break;
395
+ }
396
+ }
397
+
398
+ if (allTranslated && !hasPending) {
289
399
  clearInterval(checkInterval);
290
400
  resolve();
291
401
  }
402
+
403
+ // Timeout check
404
+ if (Date.now() - startTime > maxWaitTime) {
405
+ clearInterval(checkInterval);
406
+ console.warn(`Preload timeout: ${keysNeedingTranslation.length} translations pending`);
407
+ resolve(); // Resolve instead of reject to allow partial completion
408
+ }
292
409
  }, 100);
293
410
  });
294
411
 
412
+ // Clear preload options after completion
413
+ this.preloadReferences = [];
414
+ this.preloadBaseLanguage = null;
415
+
295
416
  return this;
296
417
  }
297
418
 
@@ -304,7 +425,7 @@ class GPTrans {
304
425
  // Iterate through dbTarget and remove keys that don't exist in dbFrom
305
426
  for (const [contextHash, pairs] of this.dbTarget.entries()) {
306
427
  for (const key of Object.keys(pairs)) {
307
-
428
+
308
429
  const context = this.dbFrom.get('_context', contextHash);
309
430
  if (!this.dbFrom.get(context, key)) {
310
431
  console.log(contextHash, key);
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "gptrans",
3
3
  "type": "module",
4
- "version": "1.8.6",
4
+ "version": "1.8.8",
5
5
  "description": "🚆 GPTrans - The smarter AI-powered way to translate.",
6
6
  "keywords": [
7
7
  "translate",
@@ -37,7 +37,7 @@
37
37
  "dotenv": "^16.4.7",
38
38
  "form-data": "^4.0.4",
39
39
  "genmix": "^1.0.4",
40
- "modelmix": "^3.9.0",
40
+ "modelmix": "^4.2.8",
41
41
  "string-hash": "^1.1.3"
42
42
  }
43
43
  }
@@ -6,6 +6,12 @@ Translation from FROM_ISO to TARGET_ISO (TARGET_DENONYM TARGET_LANG) with cultur
6
6
  INPUT
7
7
  ```
8
8
 
9
+ ## Reference Translations (for context)
10
+ These are existing translations in other languages that may help you provide a more accurate translation. Use them as reference but do not simply copy them:
11
+ ```
12
+ REFERENCES
13
+ ```
14
+
9
15
  # Return Format
10
16
  - Provide the final translation within a code block using ```.
11
17
  - Do not include alternative translations, only provide the best translation.