gptrans 1.4.6 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -8
- package/db/gptrans_en.json +9 -0
- package/db/gptrans_es.json +5 -0
- package/db/gptrans_from_es.json +7 -1
- package/demo/case_4.js +17 -0
- package/index.js +71 -49
- package/isoAssoc.js +6 -2
- package/package.json +1 -1
- package/prompt/translate.md +2 -0
- package/db/gptrans_ar.json +0 -3
- package/db/gptrans_es-AR.json +0 -13
- package/db/gptrans_es-ES.json +0 -3
- package/db/gptrans_from_en-US.json +0 -11
- package/db/gptrans_from_es-AR.json +0 -3
- package/db/gptrans_from_es-ES.json +0 -5
- package/db/gptrans_it.json +0 -11
package/README.md
CHANGED
|
@@ -9,10 +9,10 @@ Whether you're building a multilingual website, a mobile app, or a localization
|
|
|
9
9
|
## ✨ Features
|
|
10
10
|
|
|
11
11
|
- **AI-Powered Translations:** Harness advanced models like OpenAI's GPT and Anthropic's Sonnet for high-quality translations
|
|
12
|
-
- **Smart Batching & Debouncing:**
|
|
13
|
-
- **Caching with
|
|
12
|
+
- **Smart Batching & Debouncing:** Translations are processed in batches, not only for efficiency but also to provide better context. By sending multiple related texts together, the AI model can better understand the overall context and produce more accurate and consistent translations across related terms and phrases.
|
|
13
|
+
- **Caching with JSON:** Quickly retrieves cached translations to boost performance
|
|
14
14
|
- **Parameter Substitution:** Dynamically replace placeholders in your translations
|
|
15
|
-
- **
|
|
15
|
+
- **Smart Context Handling:** Add contextual information to improve translation accuracy. Perfect for gender-aware translations, domain-specific content, or any scenario where additional context helps produce better results. The context is automatically cleared after each translation to prevent unintended effects.
|
|
16
16
|
|
|
17
17
|
## 📦 Installation
|
|
18
18
|
|
|
@@ -65,20 +65,31 @@ When creating a new instance of GPTrans, you can customize:
|
|
|
65
65
|
|
|
66
66
|
| Option | Description | Default |
|
|
67
67
|
|--------|-------------|---------|
|
|
68
|
-
| `from` | Source language locale | `es-AR` |
|
|
69
|
-
| `target` | Target language locale | `en-US` |
|
|
68
|
+
| `from` | Source language locale (BCP 47) | `es-AR` |
|
|
69
|
+
| `target` | Target language locale (BCP 47) | `en-US` |
|
|
70
70
|
| `model` | Translation model key | `claude-3-7-sonnet` |
|
|
71
71
|
| `batchThreshold` | Maximum number of characters to accumulate before triggering batch processing | `1000` |
|
|
72
72
|
| `debounceTimeout` | Time in milliseconds to wait before processing translations | `500` |
|
|
73
73
|
|
|
74
|
+
### BCP 47 Language Tags
|
|
75
|
+
|
|
76
|
+
GPTrans uses BCP 47 language tags for language specification. BCP 47 is the standard for language tags that combines language, script, and region codes. Here are some common examples:
|
|
77
|
+
|
|
78
|
+
- `en-US` - English (United States)
|
|
79
|
+
- `es-AR` - Spanish (Argentina)
|
|
80
|
+
- `pt-BR` - Portuguese (Brazil)
|
|
81
|
+
|
|
82
|
+
For simplified or universal language codes, you can omit the region specification:
|
|
83
|
+
- `es` - Spanish (Universal)
|
|
84
|
+
|
|
74
85
|
## 🔍 How It Works
|
|
75
86
|
|
|
76
87
|
1. **First-Time Translation Behavior:** On the first request, Gptrans will return the original text while processing the translation in the background. This ensures your application remains responsive without waiting for API calls.
|
|
77
|
-
2. **Translation Caching:** Once processed, translations are stored in `db/gptrans_<
|
|
78
|
-
3. **Smart Batch Processing:**
|
|
88
|
+
2. **Translation Caching:** Once processed, translations are stored in `db/gptrans_<tag>.json`. Subsequent requests for the same text will be served instantly from the cache.
|
|
89
|
+
3. **Smart Batch Processing:** Automatically groups translation requests to optimize API usage and provide better context.
|
|
79
90
|
4. **Dynamic Model Integration:** Easily plug in multiple AI translation providers with the ModelMix library.
|
|
80
91
|
5. **Customizable Prompts:** Load and modify translation prompts (see the `prompt/translate.md` file) to fine-tune the translation output.
|
|
81
|
-
6. **Manual Corrections:** A JSON file stores key-translation pairs, allowing you to override specific translations and make manual corrections when needed. Simply edit the `db/gptrans_<
|
|
92
|
+
6. **Manual Corrections:** A JSON file stores key-translation pairs, allowing you to override specific translations and make manual corrections when needed. Simply edit the `db/gptrans_<tag>.json` file:
|
|
82
93
|
|
|
83
94
|
```json
|
|
84
95
|
{
|
package/db/gptrans_from_es.json
CHANGED
package/demo/case_4.js
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import GPTrans from '../index.js';
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
// Case 2: Translate from Spanish to English
|
|
5
|
+
const model = new GPTrans({
|
|
6
|
+
from: 'es',
|
|
7
|
+
target: 'en',
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
await model.preload();
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
console.log(model.setContext().t('Eres muy bueno'));
|
|
14
|
+
console.log(model.setContext('El mensaje es para una mujer').t('Eres muy bueno'));
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
console.log(model.setContext().t('Tienes fuego?'));
|
package/index.js
CHANGED
|
@@ -9,7 +9,16 @@ class GPTrans {
|
|
|
9
9
|
|
|
10
10
|
static get mmix() {
|
|
11
11
|
if (!this.#mmixInstance) {
|
|
12
|
-
const mmix = new ModelMix(
|
|
12
|
+
const mmix = new ModelMix({
|
|
13
|
+
config: {
|
|
14
|
+
max_history: 1,
|
|
15
|
+
debug: false,
|
|
16
|
+
bottleneck: {
|
|
17
|
+
minTime: 15000,
|
|
18
|
+
maxConcurrent: 1
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
});
|
|
13
22
|
|
|
14
23
|
mmix.attach(new MixOpenAI());
|
|
15
24
|
mmix.attach(new MixAnthropic());
|
|
@@ -23,7 +32,7 @@ class GPTrans {
|
|
|
23
32
|
return isLanguageAvailable(langCode);
|
|
24
33
|
}
|
|
25
34
|
|
|
26
|
-
constructor({ from = 'en-US', target = 'es-AR', model = 'claude-3-7-sonnet-20250219', batchThreshold =
|
|
35
|
+
constructor({ from = 'en-US', target = 'es-AR', model = 'claude-3-7-sonnet-20250219', batchThreshold = 1500, debounceTimeout = 500, promptFile = null, context = '' }) {
|
|
27
36
|
|
|
28
37
|
try {
|
|
29
38
|
dotenv.config();
|
|
@@ -35,8 +44,8 @@ class GPTrans {
|
|
|
35
44
|
this.dbFrom = new DeepBase({ name: 'gptrans_from_' + from });
|
|
36
45
|
|
|
37
46
|
try {
|
|
38
|
-
this.
|
|
39
|
-
this.
|
|
47
|
+
this.replaceTarget = isoAssoc(target, 'TARGET_');
|
|
48
|
+
this.replaceFrom = isoAssoc(from, 'FROM_');
|
|
40
49
|
} catch (e) {
|
|
41
50
|
throw new Error(`Invalid target: ${target}`);
|
|
42
51
|
}
|
|
@@ -50,14 +59,7 @@ class GPTrans {
|
|
|
50
59
|
this.promptFile = promptFile ?? new URL('./prompt/translate.md', import.meta.url).pathname;
|
|
51
60
|
this.context = context;
|
|
52
61
|
this.modelConfig = {
|
|
53
|
-
|
|
54
|
-
max_history: 1,
|
|
55
|
-
debug: false,
|
|
56
|
-
bottleneck: {
|
|
57
|
-
maxConcurrent: 2,
|
|
58
|
-
}
|
|
59
|
-
},
|
|
60
|
-
options: {
|
|
62
|
+
options: {
|
|
61
63
|
max_tokens: batchThreshold,
|
|
62
64
|
temperature: 0
|
|
63
65
|
}
|
|
@@ -68,7 +70,7 @@ class GPTrans {
|
|
|
68
70
|
setContext(context = '') {
|
|
69
71
|
if (this.context !== context && this.pendingTranslations.size > 0) {
|
|
70
72
|
clearTimeout(this.debounceTimer);
|
|
71
|
-
this._processBatch();
|
|
73
|
+
this._processBatch(this.context);
|
|
72
74
|
}
|
|
73
75
|
this.context = context;
|
|
74
76
|
return this;
|
|
@@ -85,15 +87,27 @@ class GPTrans {
|
|
|
85
87
|
}
|
|
86
88
|
|
|
87
89
|
get(key, text) {
|
|
88
|
-
|
|
90
|
+
|
|
91
|
+
if (!text || !text.trim()) {
|
|
92
|
+
return text;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const contextHash = this._hash(this.context);
|
|
96
|
+
const translation = this.dbTarget.get(contextHash, key);
|
|
97
|
+
|
|
89
98
|
if (!translation) {
|
|
90
|
-
this.
|
|
91
|
-
|
|
99
|
+
if (!this.dbFrom.get(this.context, key)) {
|
|
100
|
+
this.dbFrom.set(this.context, key, text);
|
|
101
|
+
}
|
|
92
102
|
|
|
93
|
-
if
|
|
94
|
-
|
|
103
|
+
// Skip translation if context is empty and languages are the same
|
|
104
|
+
if (!this.context && this.replaceFrom.FROM_ISO === this.replaceTarget.TARGET_ISO) {
|
|
105
|
+
return text;
|
|
95
106
|
}
|
|
96
107
|
|
|
108
|
+
this.pendingTranslations.set(key, text);
|
|
109
|
+
this.pendingCharCount += text.length; // Update character count
|
|
110
|
+
|
|
97
111
|
// Clear existing timer
|
|
98
112
|
if (this.debounceTimer) {
|
|
99
113
|
clearTimeout(this.debounceTimer);
|
|
@@ -102,27 +116,31 @@ class GPTrans {
|
|
|
102
116
|
// Set new timer
|
|
103
117
|
this.debounceTimer = setTimeout(() => {
|
|
104
118
|
if (this.pendingTranslations.size > 0) {
|
|
105
|
-
this._processBatch();
|
|
119
|
+
this._processBatch(this.context);
|
|
106
120
|
}
|
|
107
121
|
}, this.debounceTimeout);
|
|
108
122
|
|
|
109
123
|
// Process if we hit the character count threshold
|
|
110
124
|
if (this.pendingCharCount >= this.batchThreshold) {
|
|
111
125
|
clearTimeout(this.debounceTimer);
|
|
112
|
-
this._processBatch();
|
|
126
|
+
this._processBatch(this.context);
|
|
113
127
|
}
|
|
114
128
|
}
|
|
115
129
|
return translation;
|
|
116
130
|
}
|
|
117
131
|
|
|
118
|
-
async _processBatch() {
|
|
132
|
+
async _processBatch(context) {
|
|
119
133
|
this.processing = true;
|
|
120
134
|
|
|
121
135
|
const batch = Array.from(this.pendingTranslations.entries());
|
|
122
136
|
|
|
123
137
|
// Clear pending translations and character count before awaiting translation
|
|
124
138
|
this.pendingTranslations.clear();
|
|
139
|
+
|
|
125
140
|
this.modelConfig.options.max_tokens = this.pendingCharCount + 1000;
|
|
141
|
+
const minTime = Math.floor((60000 / (8000 / this.pendingCharCount)) * 1.4);
|
|
142
|
+
GPTrans.mmix.limiter.updateSettings({ minTime });
|
|
143
|
+
|
|
126
144
|
this.pendingCharCount = 0;
|
|
127
145
|
|
|
128
146
|
const textsToTranslate = batch.map(([_, text]) => text).join('\n---\n');
|
|
@@ -130,8 +148,17 @@ class GPTrans {
|
|
|
130
148
|
const translations = await this._translate(textsToTranslate);
|
|
131
149
|
const translatedTexts = translations.split('\n---\n');
|
|
132
150
|
|
|
151
|
+
const contextHash = this._hash(context);
|
|
133
152
|
batch.forEach(([key], index) => {
|
|
134
|
-
|
|
153
|
+
|
|
154
|
+
if (!translatedTexts[index]) {
|
|
155
|
+
console.log(translations);
|
|
156
|
+
console.error(`No translation found for ${key}`);
|
|
157
|
+
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
this.dbTarget.set(contextHash, key, translatedTexts[index].trim());
|
|
135
162
|
});
|
|
136
163
|
|
|
137
164
|
} catch (e) {
|
|
@@ -142,6 +169,7 @@ class GPTrans {
|
|
|
142
169
|
}
|
|
143
170
|
|
|
144
171
|
async _translate(text) {
|
|
172
|
+
|
|
145
173
|
const model = GPTrans.mmix.create(this.modelKey, this.modelConfig);
|
|
146
174
|
|
|
147
175
|
model.setSystem("You are an expert translator specialized in literary translation between FROM_LANG and TARGET_DENONYM TARGET_LANG.");
|
|
@@ -149,8 +177,8 @@ class GPTrans {
|
|
|
149
177
|
model.addTextFromFile(this.promptFile);
|
|
150
178
|
|
|
151
179
|
model.replace({ INPUT: text, CONTEXT: this.context });
|
|
152
|
-
model.replace(this.
|
|
153
|
-
model.replace(this.
|
|
180
|
+
model.replace(this.replaceTarget);
|
|
181
|
+
model.replace(this.replaceFrom);
|
|
154
182
|
|
|
155
183
|
const response = await model.message();
|
|
156
184
|
|
|
@@ -171,39 +199,33 @@ class GPTrans {
|
|
|
171
199
|
|
|
172
200
|
let key = words.map((x) => x.slice(0, maxlen)).join("_");
|
|
173
201
|
key += key ? '_' : '';
|
|
174
|
-
key +=
|
|
202
|
+
key += this._hash(text);
|
|
175
203
|
return key;
|
|
176
204
|
}
|
|
177
205
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
const translator = new GPTrans({
|
|
182
|
-
from,
|
|
183
|
-
target,
|
|
184
|
-
model,
|
|
185
|
-
batchThreshold,
|
|
186
|
-
debounceTimeout,
|
|
187
|
-
});
|
|
206
|
+
_hash(input) {
|
|
207
|
+
return stringHash(input).toString(36);
|
|
208
|
+
}
|
|
188
209
|
|
|
189
|
-
|
|
190
|
-
for (const [
|
|
191
|
-
|
|
210
|
+
async preload() {
|
|
211
|
+
for (const [context, pairs] of this.dbFrom.entries()) {
|
|
212
|
+
this.setContext(context);
|
|
213
|
+
for (const [key, text] of Object.entries(pairs)) {
|
|
214
|
+
this.get(key, text);
|
|
215
|
+
}
|
|
192
216
|
}
|
|
193
217
|
|
|
194
218
|
// Wait for any pending translations to complete
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
});
|
|
204
|
-
}
|
|
219
|
+
await new Promise(resolve => {
|
|
220
|
+
const checkInterval = setInterval(() => {
|
|
221
|
+
if (this.dbFrom.keys().length === this.dbTarget.keys().length) {
|
|
222
|
+
clearInterval(checkInterval);
|
|
223
|
+
resolve();
|
|
224
|
+
}
|
|
225
|
+
}, 100);
|
|
226
|
+
});
|
|
205
227
|
|
|
206
|
-
return
|
|
228
|
+
return this;
|
|
207
229
|
}
|
|
208
230
|
}
|
|
209
231
|
|
package/isoAssoc.js
CHANGED
|
@@ -180,10 +180,14 @@ export function isoAssoc(iso, prefix = '') {
|
|
|
180
180
|
|
|
181
181
|
const parts = iso.toLowerCase().split('-');
|
|
182
182
|
const lang = parts[0];
|
|
183
|
-
|
|
183
|
+
let country = parts.length > 1 ? parts[1] : null;
|
|
184
184
|
|
|
185
|
-
|
|
185
|
+
if (lang === 'en' && !country) {
|
|
186
|
+
country = 'us';
|
|
187
|
+
}
|
|
186
188
|
|
|
189
|
+
let denonym = country ? countryDenonym[country] : 'Neutral';
|
|
190
|
+
|
|
187
191
|
if (lang === 'zh' && !country) {
|
|
188
192
|
denonym = 'Simplified';
|
|
189
193
|
}
|
package/package.json
CHANGED
package/prompt/translate.md
CHANGED
package/db/gptrans_ar.json
DELETED
package/db/gptrans_es-AR.json
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"eres_muy_bueno_26czme": "Sos muy bueno",
|
|
3
|
-
"eres_muy_bueno_k3ml5b": "Sos muy buena",
|
|
4
|
-
"hello_name_1987p1n": "¡Hola, {name}!",
|
|
5
|
-
"topup_uzdh5y": "Recargar",
|
|
6
|
-
"transf_176pc1a": "Transferir",
|
|
7
|
-
"deposi_wg2ec5": "Depositar",
|
|
8
|
-
"balanc_1rv8if7": "Saldo",
|
|
9
|
-
"transa_1wtqm5d": "Transacción",
|
|
10
|
-
"accoun_x1y0v8": "Cuenta",
|
|
11
|
-
"card_yis1ox": "Tarjeta",
|
|
12
|
-
"tienes_fuego_1i2o3ok": "¿Tenés fuego?"
|
|
13
|
-
}
|
package/db/gptrans_es-ES.json
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"hello_name_1987p1n": "Hello, {name}!",
|
|
3
|
-
"topup_uzdh5y": "Top-up",
|
|
4
|
-
"transf_176pc1a": "Transfer",
|
|
5
|
-
"deposi_wg2ec5": "Deposit",
|
|
6
|
-
"balanc_1rv8if7": "Balance",
|
|
7
|
-
"transa_1wtqm5d": "Transaction",
|
|
8
|
-
"accoun_x1y0v8": "Account",
|
|
9
|
-
"card_yis1ox": "Card",
|
|
10
|
-
"loadin_21q3nx": "Loading..."
|
|
11
|
-
}
|
package/db/gptrans_it.json
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"hello_name_1987p1n": "Ciao, {name}!",
|
|
3
|
-
"topup_uzdh5y": "Ricarica",
|
|
4
|
-
"transf_176pc1a": "Trasferimento",
|
|
5
|
-
"deposi_wg2ec5": "Deposito",
|
|
6
|
-
"balanc_1rv8if7": "Saldo",
|
|
7
|
-
"transa_1wtqm5d": "Transazione",
|
|
8
|
-
"accoun_x1y0v8": "Account",
|
|
9
|
-
"card_yis1ox": "Carta",
|
|
10
|
-
"loadin_21q3nx": "Caricamento in corso..."
|
|
11
|
-
}
|