gptrans 2.0.6 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -131,17 +131,45 @@ class GPTrans {
131
131
  t(text, params = {}) {
132
132
  const key = this._textToKey(text);
133
133
  const translation = this.get(key, text) || text;
134
+ return this._applyParams(translation, params);
135
+ }
134
136
 
135
- return Object.entries(params).reduce(
136
- (text, [key, value]) => text.replace(`{${key}}`, value),
137
- translation
138
- );
137
+ async tAsync(text, params = {}) {
138
+ const key = this._textToKey(text);
139
+ const { contextHash, translation, needsTranslation } = this._resolveTranslationState(key, text);
140
+
141
+ if (!needsTranslation) {
142
+ return this._applyParams(translation || text, params);
143
+ }
144
+
145
+ // If this key was enqueued via t(), avoid duplicate work in a later batch.
146
+ this._dequeuePendingTranslation(key);
147
+
148
+ const translatedText = await this._translate(text, [[key, text]], {}, this.preloadBaseLanguage);
149
+ const immediateTranslation = translatedText.trim();
150
+ this.dbTarget.set(contextHash, key, immediateTranslation);
151
+
152
+ return this._applyParams(immediateTranslation, params);
139
153
  }
140
154
 
141
155
  get(key, text) {
156
+ const { translation, needsTranslation } = this._resolveTranslationState(key, text);
157
+ if (needsTranslation) {
158
+ this._enqueuePendingTranslation(key, text);
159
+ }
160
+ return translation;
161
+ }
142
162
 
163
+ _applyParams(text, params = {}) {
164
+ return Object.entries(params).reduce(
165
+ (translated, [paramKey, value]) => translated.replace(`{${paramKey}}`, value),
166
+ text
167
+ );
168
+ }
169
+
170
+ _resolveTranslationState(key, text) {
143
171
  if (!text || !text.trim()) {
144
- return text;
172
+ return { contextHash: this._hash(this.context), translation: text, needsTranslation: false };
145
173
  }
146
174
 
147
175
  const contextHash = this._hash(this.context);
@@ -152,41 +180,56 @@ class GPTrans {
152
180
  this.dbFrom.set('_context', contextHash, this.context);
153
181
  }
154
182
 
155
- if (!translation) {
183
+ if (translation) {
184
+ return { contextHash, translation, needsTranslation: false };
185
+ }
156
186
 
157
- // Skip translation if context is empty and languages are the same
158
- if (!this.context && this.replaceFrom.FROM_ISO === this.replaceTarget.TARGET_ISO) {
159
- return text;
160
- }
187
+ if (!this.context && this.replaceFrom.FROM_ISO === this.replaceTarget.TARGET_ISO) {
188
+ return { contextHash, translation: text, needsTranslation: false };
189
+ }
161
190
 
162
- if (this.freeze) {
163
- console.log(`Freeze mode: [${key}] ${text}`);
164
- return text;
165
- }
191
+ if (this.freeze) {
192
+ console.log(`Freeze mode: [${key}] ${text}`);
193
+ return { contextHash, translation: text, needsTranslation: false };
194
+ }
166
195
 
167
- this.pendingTranslations.set(key, text);
168
- this.pendingCharCount += text.length; // Update character count
196
+ return { contextHash, translation: null, needsTranslation: true };
197
+ }
169
198
 
170
- // Clear existing timer
171
- if (this.debounceTimer) {
172
- clearTimeout(this.debounceTimer);
173
- }
199
+ _enqueuePendingTranslation(key, text) {
200
+ const existingText = this.pendingTranslations.get(key);
201
+ if (existingText) {
202
+ this.pendingCharCount -= existingText.length;
203
+ }
174
204
 
175
- // Set new timer - capture context at scheduling time
176
- const capturedContext = this.context;
177
- this.debounceTimer = setTimeout(() => {
178
- if (this.pendingTranslations.size > 0) {
179
- this._processBatch(capturedContext);
180
- }
181
- }, this.debounceTimeout);
205
+ this.pendingTranslations.set(key, text);
206
+ this.pendingCharCount += text.length;
182
207
 
183
- // Process if we hit the character count threshold
184
- if (this.pendingCharCount >= this.batchThreshold) {
185
- clearTimeout(this.debounceTimer);
186
- this._processBatch(this.context);
208
+ if (this.debounceTimer) {
209
+ clearTimeout(this.debounceTimer);
210
+ }
211
+
212
+ const capturedContext = this.context;
213
+ this.debounceTimer = setTimeout(() => {
214
+ if (this.pendingTranslations.size > 0) {
215
+ this._processBatch(capturedContext);
187
216
  }
217
+ }, this.debounceTimeout);
218
+
219
+ if (this.pendingCharCount >= this.batchThreshold) {
220
+ clearTimeout(this.debounceTimer);
221
+ this._processBatch(this.context);
188
222
  }
189
- return translation;
223
+ }
224
+
225
+ _dequeuePendingTranslation(key) {
226
+ const queuedText = this.pendingTranslations.get(key);
227
+ if (!queuedText) {
228
+ return;
229
+ }
230
+
231
+ this.pendingTranslations.delete(key);
232
+ this.pendingCharCount = Math.max(0, this.pendingCharCount - queuedText.length);
190
233
  }
191
234
 
192
235
  async _processBatch(context) {
@@ -220,15 +263,9 @@ class GPTrans {
220
263
  // Try different split strategies to be more robust
221
264
  let translatedTexts = translations.split(`\n${this.divider}\n`);
222
265
 
223
- // If split doesn't match batch size, try alternative separators
266
+ // If split doesn't match batch size, try without newlines around divider
224
267
  if (translatedTexts.length !== batch.length) {
225
- // Try without newlines around divider
226
268
  translatedTexts = translations.split(this.divider);
227
-
228
- // If still doesn't match, try with just newline
229
- if (translatedTexts.length !== batch.length) {
230
- translatedTexts = translations.split(/\n{2,}/); // Split by multiple newlines
231
- }
232
269
  }
233
270
 
234
271
  const contextHash = this._hash(context);
@@ -251,14 +288,23 @@ class GPTrans {
251
288
  return;
252
289
  }
253
290
 
291
+ // Detect suspicious duplicates
292
+ const trimmed = translatedTexts.map(t => t.trim());
293
+ const uniqueCount = new Set(trimmed).size;
294
+ if (batch.length > 2 && uniqueCount === 1) {
295
+ console.error(`❌ All ${batch.length} translations are identical ("${trimmed[0].slice(0, 60)}..."), discarding batch`);
296
+ console.error(` Batch keys: ${batch.map(([key]) => key).join(', ')}`);
297
+ return;
298
+ }
299
+
254
300
  batch.forEach(([key], index) => {
255
- if (!translatedTexts[index] || !translatedTexts[index].trim()) {
301
+ if (!trimmed[index]) {
256
302
  console.error(`❌ No translation found for ${key} at index ${index}`);
257
303
  console.error(` Original text: ${batch[index][1]}`);
258
304
  return;
259
305
  }
260
306
 
261
- this.dbTarget.set(contextHash, key, translatedTexts[index].trim());
307
+ this.dbTarget.set(contextHash, key, trimmed[index]);
262
308
  });
263
309
 
264
310
  } catch (e) {
@@ -551,10 +597,6 @@ class GPTrans {
551
597
 
552
598
  if (refinedTexts.length !== entries.length) {
553
599
  refinedTexts = refined.split(this.divider);
554
-
555
- if (refinedTexts.length !== entries.length) {
556
- refinedTexts = refined.split(/\n{2,}/);
557
- }
558
600
  }
559
601
 
560
602
  if (refinedTexts.length !== entries.length) {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "gptrans",
3
3
  "type": "module",
4
- "version": "2.0.6",
4
+ "version": "2.0.9",
5
5
  "description": "🚆 GPTrans - The smarter AI-powered way to translate.",
6
6
  "keywords": [
7
7
  "translate",
@@ -26,7 +26,7 @@
26
26
  },
27
27
  "main": "index.js",
28
28
  "scripts": {
29
- "test": "echo \"Error: no test specified\" && exit 1"
29
+ "test": "node --test test/**/*.test.js"
30
30
  },
31
31
  "author": "Martin Clasen",
32
32
  "license": "MIT",
@@ -10,6 +10,7 @@ Translation from {FROM_ISO} to {TARGET_ISO} ({TARGET_DENONYM} {TARGET_LANG}) wit
10
10
  {REFERENCES}
11
11
 
12
12
  # Return Format
13
+ - The input may contain multiple texts separated by `------`. Translate each one independently and return them in the same order, separated by `------`. The number of segments in your output must exactly match the number of segments in the input.
13
14
  - Provide the final translation within a code block using ```.
14
15
  - Do not include alternative translations, only provide the best translation.
15
16
 
@@ -0,0 +1,237 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import GPTrans from '../index.js';
4
+
5
+ function createMemoryDb() {
6
+ const store = new Map();
7
+ return {
8
+ get(context, key) {
9
+ return store.get(context)?.get(key);
10
+ },
11
+ set(context, key, value) {
12
+ if (!store.has(context)) {
13
+ store.set(context, new Map());
14
+ }
15
+ store.get(context).set(key, value);
16
+ },
17
+ entries() {
18
+ return Array.from(store.entries()).map(([context, pairs]) => [
19
+ context,
20
+ Object.fromEntries(pairs.entries())
21
+ ]);
22
+ },
23
+ async del(context, key) {
24
+ const pairs = store.get(context);
25
+ if (!pairs) return;
26
+ pairs.delete(key);
27
+ if (pairs.size === 0) store.delete(context);
28
+ }
29
+ };
30
+ }
31
+
32
+ function createTestInstance() {
33
+ const gp = new GPTrans({
34
+ from: 'es',
35
+ target: 'en-US',
36
+ debounceTimeout: 10_000,
37
+ batchThreshold: 50_000,
38
+ name: `unit_${Date.now()}_${Math.random().toString(36).slice(2)}`
39
+ });
40
+
41
+ gp.dbFrom = createMemoryDb();
42
+ gp.dbTarget = createMemoryDb();
43
+
44
+ return gp;
45
+ }
46
+
47
+ const stubMmix = () => ({ limiter: { updateSettings() {} } });
48
+
49
+ // --- Split logic ---
50
+
51
+ test('_processBatch splits correctly with \\n------\\n separators', async (t) => {
52
+ const gp = createTestInstance();
53
+ const originalMmix = GPTrans.mmix;
54
+ GPTrans.mmix = stubMmix;
55
+
56
+ gp._translate = async () => 'Hello\n------\nGoodbye\n------\nThank you';
57
+
58
+ gp.pendingTranslations.set('k1', 'Hola');
59
+ gp.pendingTranslations.set('k2', 'Adiós');
60
+ gp.pendingTranslations.set('k3', 'Gracias');
61
+ gp.pendingCharCount = 15;
62
+
63
+ await gp._processBatch('');
64
+
65
+ const h = gp._hash('');
66
+ assert.equal(gp.dbTarget.get(h, 'k1'), 'Hello');
67
+ assert.equal(gp.dbTarget.get(h, 'k2'), 'Goodbye');
68
+ assert.equal(gp.dbTarget.get(h, 'k3'), 'Thank you');
69
+
70
+ GPTrans.mmix = originalMmix;
71
+ });
72
+
73
+ test('_processBatch falls back to split by divider without newlines', async (t) => {
74
+ const gp = createTestInstance();
75
+ const originalMmix = GPTrans.mmix;
76
+ GPTrans.mmix = stubMmix;
77
+
78
+ // Model returned divider without surrounding newlines
79
+ gp._translate = async () => 'Hello------Goodbye------Thank you';
80
+
81
+ gp.pendingTranslations.set('k1', 'Hola');
82
+ gp.pendingTranslations.set('k2', 'Adiós');
83
+ gp.pendingTranslations.set('k3', 'Gracias');
84
+ gp.pendingCharCount = 15;
85
+
86
+ await gp._processBatch('');
87
+
88
+ const h = gp._hash('');
89
+ assert.equal(gp.dbTarget.get(h, 'k1'), 'Hello');
90
+ assert.equal(gp.dbTarget.get(h, 'k2'), 'Goodbye');
91
+ assert.equal(gp.dbTarget.get(h, 'k3'), 'Thank you');
92
+
93
+ GPTrans.mmix = originalMmix;
94
+ });
95
+
96
+ // --- Double-newline no longer used as fallback ---
97
+
98
+ test('_processBatch does NOT split by double newlines', async (t) => {
99
+ const gp = createTestInstance();
100
+ const originalMmix = GPTrans.mmix;
101
+ GPTrans.mmix = stubMmix;
102
+
103
+ // Response has no divider but has double newlines — old code would split by \n\n
104
+ // and accidentally "match" 3 chunks. New code should treat this as a mismatch.
105
+ gp._translate = async () => 'Hello world\n\nThis is a paragraph\n\nAnother paragraph';
106
+
107
+ gp.pendingTranslations.set('k1', 'Hola');
108
+ gp.pendingTranslations.set('k2', 'Adiós');
109
+ gp.pendingTranslations.set('k3', 'Gracias');
110
+ gp.pendingCharCount = 15;
111
+
112
+ const errors = [];
113
+ const origError = console.error;
114
+ console.error = (...args) => errors.push(args.join(' '));
115
+
116
+ await gp._processBatch('');
117
+
118
+ console.error = origError;
119
+
120
+ // Should hit mismatch path, not silently save wrong translations
121
+ assert.ok(errors.some(e => e.includes('Translation count mismatch')));
122
+
123
+ GPTrans.mmix = originalMmix;
124
+ });
125
+
126
+ // --- Duplicate detection ---
127
+
128
+ test('_processBatch discards batch when all translations are identical (3+)', async (t) => {
129
+ const gp = createTestInstance();
130
+ const originalMmix = GPTrans.mmix;
131
+ GPTrans.mmix = stubMmix;
132
+
133
+ gp._translate = async () =>
134
+ 'Everyone understands.\n------\nEveryone understands.\n------\nEveryone understands.\n------\nEveryone understands.';
135
+
136
+ gp.pendingTranslations.set('k1', 'La fusión será completa');
137
+ gp.pendingTranslations.set('k2', 'Y entonces entenderás');
138
+ gp.pendingTranslations.set('k3', 'Qué libertad tiene el que');
139
+ gp.pendingTranslations.set('k4', 'Todos lo entienden');
140
+ gp.pendingCharCount = 80;
141
+
142
+ const errors = [];
143
+ const origError = console.error;
144
+ console.error = (...args) => errors.push(args.join(' '));
145
+
146
+ await gp._processBatch('');
147
+
148
+ console.error = origError;
149
+
150
+ const h = gp._hash('');
151
+ // Nothing should be saved
152
+ assert.equal(gp.dbTarget.get(h, 'k1'), undefined);
153
+ assert.equal(gp.dbTarget.get(h, 'k2'), undefined);
154
+ assert.equal(gp.dbTarget.get(h, 'k3'), undefined);
155
+ assert.equal(gp.dbTarget.get(h, 'k4'), undefined);
156
+
157
+ assert.ok(errors.some(e => e.includes('translations are identical')));
158
+
159
+ GPTrans.mmix = originalMmix;
160
+ });
161
+
162
+ test('_processBatch allows batch of 2 with identical translations (edge case)', async (t) => {
163
+ const gp = createTestInstance();
164
+ const originalMmix = GPTrans.mmix;
165
+ GPTrans.mmix = stubMmix;
166
+
167
+ // Two items could legitimately have the same translation (e.g. synonyms)
168
+ gp._translate = async () => 'Yes\n------\nYes';
169
+
170
+ gp.pendingTranslations.set('k1', 'Sí');
171
+ gp.pendingTranslations.set('k2', 'Claro que sí');
172
+ gp.pendingCharCount = 15;
173
+
174
+ await gp._processBatch('');
175
+
176
+ const h = gp._hash('');
177
+ assert.equal(gp.dbTarget.get(h, 'k1'), 'Yes');
178
+ assert.equal(gp.dbTarget.get(h, 'k2'), 'Yes');
179
+
180
+ GPTrans.mmix = originalMmix;
181
+ });
182
+
183
+ test('_processBatch saves partial duplicates normally', async (t) => {
184
+ const gp = createTestInstance();
185
+ const originalMmix = GPTrans.mmix;
186
+ GPTrans.mmix = stubMmix;
187
+
188
+ // Some translations are the same but not all — this is valid
189
+ gp._translate = async () => 'Hello\n------\nHello\n------\nGoodbye';
190
+
191
+ gp.pendingTranslations.set('k1', 'Hola');
192
+ gp.pendingTranslations.set('k2', 'Buenos días');
193
+ gp.pendingTranslations.set('k3', 'Adiós');
194
+ gp.pendingCharCount = 20;
195
+
196
+ await gp._processBatch('');
197
+
198
+ const h = gp._hash('');
199
+ assert.equal(gp.dbTarget.get(h, 'k1'), 'Hello');
200
+ assert.equal(gp.dbTarget.get(h, 'k2'), 'Hello');
201
+ assert.equal(gp.dbTarget.get(h, 'k3'), 'Goodbye');
202
+
203
+ GPTrans.mmix = originalMmix;
204
+ });
205
+
206
+ // --- Count mismatch saves partial ---
207
+
208
+ test('_processBatch saves partial results on count mismatch', async (t) => {
209
+ const gp = createTestInstance();
210
+ const originalMmix = GPTrans.mmix;
211
+ GPTrans.mmix = stubMmix;
212
+
213
+ // Model returned only 2 translations for a batch of 3
214
+ gp._translate = async () => 'Hello\n------\nGoodbye';
215
+
216
+ gp.pendingTranslations.set('k1', 'Hola');
217
+ gp.pendingTranslations.set('k2', 'Adiós');
218
+ gp.pendingTranslations.set('k3', 'Gracias');
219
+ gp.pendingCharCount = 15;
220
+
221
+ const errors = [];
222
+ const origError = console.error;
223
+ console.error = (...args) => errors.push(args.join(' '));
224
+
225
+ await gp._processBatch('');
226
+
227
+ console.error = origError;
228
+
229
+ const h = gp._hash('');
230
+ assert.equal(gp.dbTarget.get(h, 'k1'), 'Hello');
231
+ assert.equal(gp.dbTarget.get(h, 'k2'), 'Goodbye');
232
+ assert.equal(gp.dbTarget.get(h, 'k3'), undefined); // Not saved
233
+
234
+ assert.ok(errors.some(e => e.includes('Translation count mismatch')));
235
+
236
+ GPTrans.mmix = originalMmix;
237
+ });
@@ -0,0 +1,143 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import GPTrans from '../index.js';
4
+
5
+ function createMemoryDb() {
6
+ const store = new Map();
7
+ return {
8
+ get(context, key) {
9
+ return store.get(context)?.get(key);
10
+ },
11
+ set(context, key, value) {
12
+ if (!store.has(context)) {
13
+ store.set(context, new Map());
14
+ }
15
+ store.get(context).set(key, value);
16
+ },
17
+ entries() {
18
+ return Array.from(store.entries()).map(([context, pairs]) => [
19
+ context,
20
+ Object.fromEntries(pairs.entries())
21
+ ]);
22
+ },
23
+ async del(context, key) {
24
+ const pairs = store.get(context);
25
+ if (!pairs) {
26
+ return;
27
+ }
28
+ pairs.delete(key);
29
+ if (pairs.size === 0) {
30
+ store.delete(context);
31
+ }
32
+ }
33
+ };
34
+ }
35
+
36
+ function createTestInstance() {
37
+ const gp = new GPTrans({
38
+ from: 'en-US',
39
+ target: 'es',
40
+ debounceTimeout: 10_000,
41
+ batchThreshold: 5000,
42
+ name: `unit_${Date.now()}_${Math.random().toString(36).slice(2)}`
43
+ });
44
+
45
+ gp.dbFrom = createMemoryDb();
46
+ gp.dbTarget = createMemoryDb();
47
+
48
+ return gp;
49
+ }
50
+
51
+ test('tAsync translates immediately and caches result', async () => {
52
+ const gp = createTestInstance();
53
+ let translateCalls = 0;
54
+
55
+ gp._translate = async (text) => {
56
+ translateCalls += 1;
57
+ assert.equal(text, 'Hello {name}');
58
+ return 'Hola {name}';
59
+ };
60
+
61
+ const first = await gp.tAsync('Hello {name}', { name: 'Martin' });
62
+ const second = await gp.tAsync('Hello {name}', { name: 'Martin' });
63
+
64
+ assert.equal(first, 'Hola Martin');
65
+ assert.equal(second, 'Hola Martin');
66
+ assert.equal(translateCalls, 1);
67
+
68
+ if (gp.debounceTimer) {
69
+ clearTimeout(gp.debounceTimer);
70
+ }
71
+ });
72
+
73
+ test('tAsync removes queued batch item to avoid duplicate work', async () => {
74
+ const gp = createTestInstance();
75
+ let translateCalls = 0;
76
+
77
+ gp._translate = async () => {
78
+ translateCalls += 1;
79
+ return 'Comprar';
80
+ };
81
+
82
+ const key = gp._textToKey('Buy');
83
+
84
+ // Enqueue via sync API (batch/background path).
85
+ const syncValue = gp.t('Buy');
86
+ assert.equal(syncValue, 'Buy');
87
+ assert.equal(gp.pendingTranslations.get(key), 'Buy');
88
+ assert.equal(gp.pendingCharCount, 'Buy'.length);
89
+
90
+ // Force immediate translation for same key.
91
+ const asyncValue = await gp.tAsync('Buy');
92
+ assert.equal(asyncValue, 'Comprar');
93
+
94
+ assert.equal(translateCalls, 1);
95
+ assert.equal(gp.pendingTranslations.has(key), false);
96
+ assert.equal(gp.pendingCharCount, 0);
97
+
98
+ if (gp.debounceTimer) {
99
+ clearTimeout(gp.debounceTimer);
100
+ }
101
+ });
102
+
103
+ test('preload translates missing keys from dbFrom into dbTarget', async () => {
104
+ const gp = createTestInstance();
105
+ gp.debounceTimeout = 1;
106
+ let translateCalls = 0;
107
+
108
+ const context = 'checkout';
109
+ const sourceText = 'Buy now';
110
+ const key = gp._textToKey(sourceText);
111
+ const contextHash = gp._hash(context);
112
+
113
+ gp.dbFrom.set(context, key, sourceText);
114
+ gp.dbFrom.set('_context', contextHash, context);
115
+
116
+ gp._translate = async (text) => {
117
+ translateCalls += 1;
118
+ assert.equal(text, sourceText);
119
+ return 'Comprar ahora';
120
+ };
121
+
122
+ const originalMmix = GPTrans.mmix;
123
+ GPTrans.mmix = () => ({
124
+ limiter: {
125
+ updateSettings() {
126
+ }
127
+ }
128
+ });
129
+
130
+ try {
131
+ await gp.preload();
132
+
133
+ assert.equal(translateCalls, 1);
134
+ assert.equal(gp.dbTarget.get(contextHash, key), 'Comprar ahora');
135
+ assert.deepEqual(gp.preloadReferences, []);
136
+ assert.equal(gp.preloadBaseLanguage, null);
137
+ } finally {
138
+ GPTrans.mmix = originalMmix;
139
+ if (gp.debounceTimer) {
140
+ clearTimeout(gp.debounceTimer);
141
+ }
142
+ }
143
+ });