gptrans 2.0.8 → 2.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +14 -16
- package/package.json +1 -1
- package/prompt/translate.md +1 -0
- package/test/gptrans.batch.test.js +237 -0
package/index.js
CHANGED
|
@@ -17,7 +17,6 @@ class GPTrans {
|
|
|
17
17
|
if (!this.#mmixInstances.has(key)) {
|
|
18
18
|
let instance = ModelMix.new({
|
|
19
19
|
config: {
|
|
20
|
-
max_history: 1,
|
|
21
20
|
debug,
|
|
22
21
|
bottleneck: {
|
|
23
22
|
minTime: 15000,
|
|
@@ -31,7 +30,7 @@ class GPTrans {
|
|
|
31
30
|
if (typeof instance[model] !== 'function') {
|
|
32
31
|
throw new Error(
|
|
33
32
|
`Model "${model}" is not available. Please check the model name. ` +
|
|
34
|
-
`Available models include:
|
|
33
|
+
`Available models include: gpt51, gpt52, sonnet46, sonnet45, opus46, haiku45, etc.`
|
|
35
34
|
);
|
|
36
35
|
}
|
|
37
36
|
instance = instance[model]();
|
|
@@ -64,7 +63,7 @@ class GPTrans {
|
|
|
64
63
|
return isLanguageAvailable(langCode);
|
|
65
64
|
}
|
|
66
65
|
|
|
67
|
-
constructor({ from = 'en-US', target = 'es', model = '
|
|
66
|
+
constructor({ from = 'en-US', target = 'es', model = 'sonnet46', batchThreshold = 1500, debounceTimeout = 500, promptFile = null, name = '', context = '', instruction = '', freeze = false, debug = false } = {}) {
|
|
68
67
|
|
|
69
68
|
target = this.normalizeBCP47(target);
|
|
70
69
|
from = this.normalizeBCP47(from);
|
|
@@ -263,15 +262,9 @@ class GPTrans {
|
|
|
263
262
|
// Try different split strategies to be more robust
|
|
264
263
|
let translatedTexts = translations.split(`\n${this.divider}\n`);
|
|
265
264
|
|
|
266
|
-
// If split doesn't match batch size, try
|
|
265
|
+
// If split doesn't match batch size, try without newlines around divider
|
|
267
266
|
if (translatedTexts.length !== batch.length) {
|
|
268
|
-
// Try without newlines around divider
|
|
269
267
|
translatedTexts = translations.split(this.divider);
|
|
270
|
-
|
|
271
|
-
// If still doesn't match, try with just newline
|
|
272
|
-
if (translatedTexts.length !== batch.length) {
|
|
273
|
-
translatedTexts = translations.split(/\n{2,}/); // Split by multiple newlines
|
|
274
|
-
}
|
|
275
268
|
}
|
|
276
269
|
|
|
277
270
|
const contextHash = this._hash(context);
|
|
@@ -294,14 +287,23 @@ class GPTrans {
|
|
|
294
287
|
return;
|
|
295
288
|
}
|
|
296
289
|
|
|
290
|
+
// Detect suspicious duplicates
|
|
291
|
+
const trimmed = translatedTexts.map(t => t.trim());
|
|
292
|
+
const uniqueCount = new Set(trimmed).size;
|
|
293
|
+
if (batch.length > 2 && uniqueCount === 1) {
|
|
294
|
+
console.error(`❌ All ${batch.length} translations are identical ("${trimmed[0].slice(0, 60)}..."), discarding batch`);
|
|
295
|
+
console.error(` Batch keys: ${batch.map(([key]) => key).join(', ')}`);
|
|
296
|
+
return;
|
|
297
|
+
}
|
|
298
|
+
|
|
297
299
|
batch.forEach(([key], index) => {
|
|
298
|
-
if (!
|
|
300
|
+
if (!trimmed[index]) {
|
|
299
301
|
console.error(`❌ No translation found for ${key} at index ${index}`);
|
|
300
302
|
console.error(` Original text: ${batch[index][1]}`);
|
|
301
303
|
return;
|
|
302
304
|
}
|
|
303
305
|
|
|
304
|
-
this.dbTarget.set(contextHash, key,
|
|
306
|
+
this.dbTarget.set(contextHash, key, trimmed[index]);
|
|
305
307
|
});
|
|
306
308
|
|
|
307
309
|
} catch (e) {
|
|
@@ -594,10 +596,6 @@ class GPTrans {
|
|
|
594
596
|
|
|
595
597
|
if (refinedTexts.length !== entries.length) {
|
|
596
598
|
refinedTexts = refined.split(this.divider);
|
|
597
|
-
|
|
598
|
-
if (refinedTexts.length !== entries.length) {
|
|
599
|
-
refinedTexts = refined.split(/\n{2,}/);
|
|
600
|
-
}
|
|
601
599
|
}
|
|
602
600
|
|
|
603
601
|
if (refinedTexts.length !== entries.length) {
|
package/package.json
CHANGED
package/prompt/translate.md
CHANGED
|
@@ -10,6 +10,7 @@ Translation from {FROM_ISO} to {TARGET_ISO} ({TARGET_DENONYM} {TARGET_LANG}) wit
|
|
|
10
10
|
{REFERENCES}
|
|
11
11
|
|
|
12
12
|
# Return Format
|
|
13
|
+
- The input may contain multiple texts separated by `------`. Translate each one independently and return them in the same order, separated by `------`. The number of segments in your output must exactly match the number of segments in the input.
|
|
13
14
|
- Provide the final translation within a code block using ```.
|
|
14
15
|
- Do not include alternative translations, only provide the best translation.
|
|
15
16
|
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import test from 'node:test';
|
|
2
|
+
import assert from 'node:assert/strict';
|
|
3
|
+
import GPTrans from '../index.js';
|
|
4
|
+
|
|
5
|
+
function createMemoryDb() {
|
|
6
|
+
const store = new Map();
|
|
7
|
+
return {
|
|
8
|
+
get(context, key) {
|
|
9
|
+
return store.get(context)?.get(key);
|
|
10
|
+
},
|
|
11
|
+
set(context, key, value) {
|
|
12
|
+
if (!store.has(context)) {
|
|
13
|
+
store.set(context, new Map());
|
|
14
|
+
}
|
|
15
|
+
store.get(context).set(key, value);
|
|
16
|
+
},
|
|
17
|
+
entries() {
|
|
18
|
+
return Array.from(store.entries()).map(([context, pairs]) => [
|
|
19
|
+
context,
|
|
20
|
+
Object.fromEntries(pairs.entries())
|
|
21
|
+
]);
|
|
22
|
+
},
|
|
23
|
+
async del(context, key) {
|
|
24
|
+
const pairs = store.get(context);
|
|
25
|
+
if (!pairs) return;
|
|
26
|
+
pairs.delete(key);
|
|
27
|
+
if (pairs.size === 0) store.delete(context);
|
|
28
|
+
}
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function createTestInstance() {
|
|
33
|
+
const gp = new GPTrans({
|
|
34
|
+
from: 'es',
|
|
35
|
+
target: 'en-US',
|
|
36
|
+
debounceTimeout: 10_000,
|
|
37
|
+
batchThreshold: 50_000,
|
|
38
|
+
name: `unit_${Date.now()}_${Math.random().toString(36).slice(2)}`
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
gp.dbFrom = createMemoryDb();
|
|
42
|
+
gp.dbTarget = createMemoryDb();
|
|
43
|
+
|
|
44
|
+
return gp;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const stubMmix = () => ({ limiter: { updateSettings() {} } });
|
|
48
|
+
|
|
49
|
+
// --- Split logic ---
|
|
50
|
+
|
|
51
|
+
test('_processBatch splits correctly with \\n------\\n separators', async (t) => {
|
|
52
|
+
const gp = createTestInstance();
|
|
53
|
+
const originalMmix = GPTrans.mmix;
|
|
54
|
+
GPTrans.mmix = stubMmix;
|
|
55
|
+
|
|
56
|
+
gp._translate = async () => 'Hello\n------\nGoodbye\n------\nThank you';
|
|
57
|
+
|
|
58
|
+
gp.pendingTranslations.set('k1', 'Hola');
|
|
59
|
+
gp.pendingTranslations.set('k2', 'Adiós');
|
|
60
|
+
gp.pendingTranslations.set('k3', 'Gracias');
|
|
61
|
+
gp.pendingCharCount = 15;
|
|
62
|
+
|
|
63
|
+
await gp._processBatch('');
|
|
64
|
+
|
|
65
|
+
const h = gp._hash('');
|
|
66
|
+
assert.equal(gp.dbTarget.get(h, 'k1'), 'Hello');
|
|
67
|
+
assert.equal(gp.dbTarget.get(h, 'k2'), 'Goodbye');
|
|
68
|
+
assert.equal(gp.dbTarget.get(h, 'k3'), 'Thank you');
|
|
69
|
+
|
|
70
|
+
GPTrans.mmix = originalMmix;
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
test('_processBatch falls back to split by divider without newlines', async (t) => {
|
|
74
|
+
const gp = createTestInstance();
|
|
75
|
+
const originalMmix = GPTrans.mmix;
|
|
76
|
+
GPTrans.mmix = stubMmix;
|
|
77
|
+
|
|
78
|
+
// Model returned divider without surrounding newlines
|
|
79
|
+
gp._translate = async () => 'Hello------Goodbye------Thank you';
|
|
80
|
+
|
|
81
|
+
gp.pendingTranslations.set('k1', 'Hola');
|
|
82
|
+
gp.pendingTranslations.set('k2', 'Adiós');
|
|
83
|
+
gp.pendingTranslations.set('k3', 'Gracias');
|
|
84
|
+
gp.pendingCharCount = 15;
|
|
85
|
+
|
|
86
|
+
await gp._processBatch('');
|
|
87
|
+
|
|
88
|
+
const h = gp._hash('');
|
|
89
|
+
assert.equal(gp.dbTarget.get(h, 'k1'), 'Hello');
|
|
90
|
+
assert.equal(gp.dbTarget.get(h, 'k2'), 'Goodbye');
|
|
91
|
+
assert.equal(gp.dbTarget.get(h, 'k3'), 'Thank you');
|
|
92
|
+
|
|
93
|
+
GPTrans.mmix = originalMmix;
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
// --- Double-newline no longer used as fallback ---
|
|
97
|
+
|
|
98
|
+
test('_processBatch does NOT split by double newlines', async (t) => {
|
|
99
|
+
const gp = createTestInstance();
|
|
100
|
+
const originalMmix = GPTrans.mmix;
|
|
101
|
+
GPTrans.mmix = stubMmix;
|
|
102
|
+
|
|
103
|
+
// Response has no divider but has double newlines — old code would split by \n\n
|
|
104
|
+
// and accidentally "match" 3 chunks. New code should treat this as a mismatch.
|
|
105
|
+
gp._translate = async () => 'Hello world\n\nThis is a paragraph\n\nAnother paragraph';
|
|
106
|
+
|
|
107
|
+
gp.pendingTranslations.set('k1', 'Hola');
|
|
108
|
+
gp.pendingTranslations.set('k2', 'Adiós');
|
|
109
|
+
gp.pendingTranslations.set('k3', 'Gracias');
|
|
110
|
+
gp.pendingCharCount = 15;
|
|
111
|
+
|
|
112
|
+
const errors = [];
|
|
113
|
+
const origError = console.error;
|
|
114
|
+
console.error = (...args) => errors.push(args.join(' '));
|
|
115
|
+
|
|
116
|
+
await gp._processBatch('');
|
|
117
|
+
|
|
118
|
+
console.error = origError;
|
|
119
|
+
|
|
120
|
+
// Should hit mismatch path, not silently save wrong translations
|
|
121
|
+
assert.ok(errors.some(e => e.includes('Translation count mismatch')));
|
|
122
|
+
|
|
123
|
+
GPTrans.mmix = originalMmix;
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
// --- Duplicate detection ---
|
|
127
|
+
|
|
128
|
+
test('_processBatch discards batch when all translations are identical (3+)', async (t) => {
|
|
129
|
+
const gp = createTestInstance();
|
|
130
|
+
const originalMmix = GPTrans.mmix;
|
|
131
|
+
GPTrans.mmix = stubMmix;
|
|
132
|
+
|
|
133
|
+
gp._translate = async () =>
|
|
134
|
+
'Everyone understands.\n------\nEveryone understands.\n------\nEveryone understands.\n------\nEveryone understands.';
|
|
135
|
+
|
|
136
|
+
gp.pendingTranslations.set('k1', 'La fusión será completa');
|
|
137
|
+
gp.pendingTranslations.set('k2', 'Y entonces entenderás');
|
|
138
|
+
gp.pendingTranslations.set('k3', 'Qué libertad tiene el que');
|
|
139
|
+
gp.pendingTranslations.set('k4', 'Todos lo entienden');
|
|
140
|
+
gp.pendingCharCount = 80;
|
|
141
|
+
|
|
142
|
+
const errors = [];
|
|
143
|
+
const origError = console.error;
|
|
144
|
+
console.error = (...args) => errors.push(args.join(' '));
|
|
145
|
+
|
|
146
|
+
await gp._processBatch('');
|
|
147
|
+
|
|
148
|
+
console.error = origError;
|
|
149
|
+
|
|
150
|
+
const h = gp._hash('');
|
|
151
|
+
// Nothing should be saved
|
|
152
|
+
assert.equal(gp.dbTarget.get(h, 'k1'), undefined);
|
|
153
|
+
assert.equal(gp.dbTarget.get(h, 'k2'), undefined);
|
|
154
|
+
assert.equal(gp.dbTarget.get(h, 'k3'), undefined);
|
|
155
|
+
assert.equal(gp.dbTarget.get(h, 'k4'), undefined);
|
|
156
|
+
|
|
157
|
+
assert.ok(errors.some(e => e.includes('translations are identical')));
|
|
158
|
+
|
|
159
|
+
GPTrans.mmix = originalMmix;
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
test('_processBatch allows batch of 2 with identical translations (edge case)', async (t) => {
|
|
163
|
+
const gp = createTestInstance();
|
|
164
|
+
const originalMmix = GPTrans.mmix;
|
|
165
|
+
GPTrans.mmix = stubMmix;
|
|
166
|
+
|
|
167
|
+
// Two items could legitimately have the same translation (e.g. synonyms)
|
|
168
|
+
gp._translate = async () => 'Yes\n------\nYes';
|
|
169
|
+
|
|
170
|
+
gp.pendingTranslations.set('k1', 'Sí');
|
|
171
|
+
gp.pendingTranslations.set('k2', 'Claro que sí');
|
|
172
|
+
gp.pendingCharCount = 15;
|
|
173
|
+
|
|
174
|
+
await gp._processBatch('');
|
|
175
|
+
|
|
176
|
+
const h = gp._hash('');
|
|
177
|
+
assert.equal(gp.dbTarget.get(h, 'k1'), 'Yes');
|
|
178
|
+
assert.equal(gp.dbTarget.get(h, 'k2'), 'Yes');
|
|
179
|
+
|
|
180
|
+
GPTrans.mmix = originalMmix;
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
test('_processBatch saves partial duplicates normally', async (t) => {
|
|
184
|
+
const gp = createTestInstance();
|
|
185
|
+
const originalMmix = GPTrans.mmix;
|
|
186
|
+
GPTrans.mmix = stubMmix;
|
|
187
|
+
|
|
188
|
+
// Some translations are the same but not all — this is valid
|
|
189
|
+
gp._translate = async () => 'Hello\n------\nHello\n------\nGoodbye';
|
|
190
|
+
|
|
191
|
+
gp.pendingTranslations.set('k1', 'Hola');
|
|
192
|
+
gp.pendingTranslations.set('k2', 'Buenos días');
|
|
193
|
+
gp.pendingTranslations.set('k3', 'Adiós');
|
|
194
|
+
gp.pendingCharCount = 20;
|
|
195
|
+
|
|
196
|
+
await gp._processBatch('');
|
|
197
|
+
|
|
198
|
+
const h = gp._hash('');
|
|
199
|
+
assert.equal(gp.dbTarget.get(h, 'k1'), 'Hello');
|
|
200
|
+
assert.equal(gp.dbTarget.get(h, 'k2'), 'Hello');
|
|
201
|
+
assert.equal(gp.dbTarget.get(h, 'k3'), 'Goodbye');
|
|
202
|
+
|
|
203
|
+
GPTrans.mmix = originalMmix;
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
// --- Count mismatch saves partial ---
|
|
207
|
+
|
|
208
|
+
test('_processBatch saves partial results on count mismatch', async (t) => {
|
|
209
|
+
const gp = createTestInstance();
|
|
210
|
+
const originalMmix = GPTrans.mmix;
|
|
211
|
+
GPTrans.mmix = stubMmix;
|
|
212
|
+
|
|
213
|
+
// Model returned only 2 translations for a batch of 3
|
|
214
|
+
gp._translate = async () => 'Hello\n------\nGoodbye';
|
|
215
|
+
|
|
216
|
+
gp.pendingTranslations.set('k1', 'Hola');
|
|
217
|
+
gp.pendingTranslations.set('k2', 'Adiós');
|
|
218
|
+
gp.pendingTranslations.set('k3', 'Gracias');
|
|
219
|
+
gp.pendingCharCount = 15;
|
|
220
|
+
|
|
221
|
+
const errors = [];
|
|
222
|
+
const origError = console.error;
|
|
223
|
+
console.error = (...args) => errors.push(args.join(' '));
|
|
224
|
+
|
|
225
|
+
await gp._processBatch('');
|
|
226
|
+
|
|
227
|
+
console.error = origError;
|
|
228
|
+
|
|
229
|
+
const h = gp._hash('');
|
|
230
|
+
assert.equal(gp.dbTarget.get(h, 'k1'), 'Hello');
|
|
231
|
+
assert.equal(gp.dbTarget.get(h, 'k2'), 'Goodbye');
|
|
232
|
+
assert.equal(gp.dbTarget.get(h, 'k3'), undefined); // Not saved
|
|
233
|
+
|
|
234
|
+
assert.ok(errors.some(e => e.includes('Translation count mismatch')));
|
|
235
|
+
|
|
236
|
+
GPTrans.mmix = originalMmix;
|
|
237
|
+
});
|