gptrans 1.4.4 → 1.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -8
- package/db/gptrans_es.json +5 -0
- package/db/gptrans_from_es.json +7 -1
- package/demo/case_4.js +17 -0
- package/index.js +35 -19
- package/package.json +2 -2
- package/db/gptrans_ar.json +0 -3
- package/db/gptrans_es-AR.json +0 -13
- package/db/gptrans_es-ES.json +0 -3
- package/db/gptrans_from_en-US.json +0 -11
- package/db/gptrans_from_es-AR.json +0 -3
- package/db/gptrans_from_es-ES.json +0 -5
- package/db/gptrans_it.json +0 -11
package/README.md
CHANGED
|
@@ -9,10 +9,10 @@ Whether you're building a multilingual website, a mobile app, or a localization
|
|
|
9
9
|
## ✨ Features
|
|
10
10
|
|
|
11
11
|
- **AI-Powered Translations:** Harness advanced models like OpenAI's GPT and Anthropic's Sonnet for high-quality translations
|
|
12
|
-
- **Smart Batching & Debouncing:**
|
|
13
|
-
- **Caching with
|
|
12
|
+
- **Smart Batching & Debouncing:** Translations are processed in batches, not only for efficiency but also to provide better context. By sending multiple related texts together, the AI model can better understand the overall context and produce more accurate and consistent translations across related terms and phrases.
|
|
13
|
+
- **Caching with JSON:** Quickly retrieves cached translations to boost performance
|
|
14
14
|
- **Parameter Substitution:** Dynamically replace placeholders in your translations
|
|
15
|
-
- **
|
|
15
|
+
- **Smart Context Handling:** Add contextual information to improve translation accuracy. Perfect for gender-aware translations, domain-specific content, or any scenario where additional context helps produce better results. The context is automatically cleared after each translation to prevent unintended effects.
|
|
16
16
|
|
|
17
17
|
## 📦 Installation
|
|
18
18
|
|
|
@@ -65,20 +65,31 @@ When creating a new instance of GPTrans, you can customize:
|
|
|
65
65
|
|
|
66
66
|
| Option | Description | Default |
|
|
67
67
|
|--------|-------------|---------|
|
|
68
|
-
| `from` | Source language locale | `es-AR` |
|
|
69
|
-
| `target` | Target language locale | `en-US` |
|
|
68
|
+
| `from` | Source language locale (BCP 47) | `es-AR` |
|
|
69
|
+
| `target` | Target language locale (BCP 47) | `en-US` |
|
|
70
70
|
| `model` | Translation model key | `claude-3-7-sonnet` |
|
|
71
71
|
| `batchThreshold` | Maximum number of characters to accumulate before triggering batch processing | `1000` |
|
|
72
72
|
| `debounceTimeout` | Time in milliseconds to wait before processing translations | `500` |
|
|
73
73
|
|
|
74
|
+
### BCP 47 Language Tags
|
|
75
|
+
|
|
76
|
+
GPTrans uses BCP 47 language tags for language specification. BCP 47 is the standard for language tags that combines language, script, and region codes. Here are some common examples:
|
|
77
|
+
|
|
78
|
+
- `en-US` - English (United States)
|
|
79
|
+
- `es-AR` - Spanish (Argentina)
|
|
80
|
+
- `pt-BR` - Portuguese (Brazil)
|
|
81
|
+
|
|
82
|
+
For simplified or universal language codes, you can omit the region specification:
|
|
83
|
+
- `es` - Spanish (Universal)
|
|
84
|
+
|
|
74
85
|
## 🔍 How It Works
|
|
75
86
|
|
|
76
87
|
1. **First-Time Translation Behavior:** On the first request, Gptrans will return the original text while processing the translation in the background. This ensures your application remains responsive without waiting for API calls.
|
|
77
|
-
2. **Translation Caching:** Once processed, translations are stored in `db/gptrans_<
|
|
78
|
-
3. **Smart Batch Processing:**
|
|
88
|
+
2. **Translation Caching:** Once processed, translations are stored in `db/gptrans_<tag>.json`. Subsequent requests for the same text will be served instantly from the cache.
|
|
89
|
+
3. **Smart Batch Processing:** Automatically groups translation requests to optimize API usage and provide better context.
|
|
79
90
|
4. **Dynamic Model Integration:** Easily plug in multiple AI translation providers with the ModelMix library.
|
|
80
91
|
5. **Customizable Prompts:** Load and modify translation prompts (see the `prompt/translate.md` file) to fine-tune the translation output.
|
|
81
|
-
6. **Manual Corrections:** A JSON file stores key-translation pairs, allowing you to override specific translations and make manual corrections when needed. Simply edit the `db/gptrans_<
|
|
92
|
+
6. **Manual Corrections:** A JSON file stores key-translation pairs, allowing you to override specific translations and make manual corrections when needed. Simply edit the `db/gptrans_<tag>.json` file:
|
|
82
93
|
|
|
83
94
|
```json
|
|
84
95
|
{
|
package/db/gptrans_from_es.json
CHANGED
package/demo/case_4.js
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import GPTrans from '../index.js';
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
// Case 2: Translate from Spanish Spain to Spanish Argentina
|
|
5
|
+
const model = new GPTrans({
|
|
6
|
+
from: 'es',
|
|
7
|
+
target: 'es',
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
await model.preload();
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
console.log(model.setContext().t('Eres muy bueno'));
|
|
14
|
+
console.log(model.setContext('El mensaje es para una mujer').t('Eres muy bueno'));
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
console.log(model.setContext().t('Tienes fuego?'));
|
package/index.js
CHANGED
|
@@ -35,8 +35,8 @@ class GPTrans {
|
|
|
35
35
|
this.dbFrom = new DeepBase({ name: 'gptrans_from_' + from });
|
|
36
36
|
|
|
37
37
|
try {
|
|
38
|
-
this.
|
|
39
|
-
this.
|
|
38
|
+
this.replaceTarget = isoAssoc(target, 'TARGET_');
|
|
39
|
+
this.replaceFrom = isoAssoc(from, 'FROM_');
|
|
40
40
|
} catch (e) {
|
|
41
41
|
throw new Error(`Invalid target: ${target}`);
|
|
42
42
|
}
|
|
@@ -57,7 +57,7 @@ class GPTrans {
|
|
|
57
57
|
maxConcurrent: 2,
|
|
58
58
|
}
|
|
59
59
|
},
|
|
60
|
-
options: {
|
|
60
|
+
options: {
|
|
61
61
|
max_tokens: batchThreshold,
|
|
62
62
|
temperature: 0
|
|
63
63
|
}
|
|
@@ -68,7 +68,7 @@ class GPTrans {
|
|
|
68
68
|
setContext(context = '') {
|
|
69
69
|
if (this.context !== context && this.pendingTranslations.size > 0) {
|
|
70
70
|
clearTimeout(this.debounceTimer);
|
|
71
|
-
this._processBatch();
|
|
71
|
+
this._processBatch(this.context);
|
|
72
72
|
}
|
|
73
73
|
this.context = context;
|
|
74
74
|
return this;
|
|
@@ -85,15 +85,22 @@ class GPTrans {
|
|
|
85
85
|
}
|
|
86
86
|
|
|
87
87
|
get(key, text) {
|
|
88
|
-
const
|
|
88
|
+
const contextHash = this._hash(this.context);
|
|
89
|
+
const translation = this.dbTarget.get(contextHash, key);
|
|
90
|
+
|
|
89
91
|
if (!translation) {
|
|
90
|
-
this.
|
|
91
|
-
|
|
92
|
+
if (!this.dbFrom.get(this.context, key)) {
|
|
93
|
+
this.dbFrom.set(this.context, key, text);
|
|
94
|
+
}
|
|
92
95
|
|
|
93
|
-
if
|
|
94
|
-
|
|
96
|
+
// Skip translation if context is empty and languages are the same
|
|
97
|
+
if (!this.context && this.replaceFrom.FROM_ISO === this.replaceTarget.TARGET_ISO) {
|
|
98
|
+
return text;
|
|
95
99
|
}
|
|
96
100
|
|
|
101
|
+
this.pendingTranslations.set(key, text);
|
|
102
|
+
this.pendingCharCount += text.length; // Update character count
|
|
103
|
+
|
|
97
104
|
// Clear existing timer
|
|
98
105
|
if (this.debounceTimer) {
|
|
99
106
|
clearTimeout(this.debounceTimer);
|
|
@@ -102,20 +109,20 @@ class GPTrans {
|
|
|
102
109
|
// Set new timer
|
|
103
110
|
this.debounceTimer = setTimeout(() => {
|
|
104
111
|
if (this.pendingTranslations.size > 0) {
|
|
105
|
-
this._processBatch();
|
|
112
|
+
this._processBatch(this.context);
|
|
106
113
|
}
|
|
107
114
|
}, this.debounceTimeout);
|
|
108
115
|
|
|
109
116
|
// Process if we hit the character count threshold
|
|
110
117
|
if (this.pendingCharCount >= this.batchThreshold) {
|
|
111
118
|
clearTimeout(this.debounceTimer);
|
|
112
|
-
this._processBatch();
|
|
119
|
+
this._processBatch(this.context);
|
|
113
120
|
}
|
|
114
121
|
}
|
|
115
122
|
return translation;
|
|
116
123
|
}
|
|
117
124
|
|
|
118
|
-
async _processBatch() {
|
|
125
|
+
async _processBatch(context) {
|
|
119
126
|
this.processing = true;
|
|
120
127
|
|
|
121
128
|
const batch = Array.from(this.pendingTranslations.entries());
|
|
@@ -130,8 +137,9 @@ class GPTrans {
|
|
|
130
137
|
const translations = await this._translate(textsToTranslate);
|
|
131
138
|
const translatedTexts = translations.split('\n---\n');
|
|
132
139
|
|
|
140
|
+
const contextHash = this._hash(context);
|
|
133
141
|
batch.forEach(([key], index) => {
|
|
134
|
-
this.dbTarget.set(key, translatedTexts[index].trim());
|
|
142
|
+
this.dbTarget.set(contextHash, key, translatedTexts[index].trim());
|
|
135
143
|
});
|
|
136
144
|
|
|
137
145
|
} catch (e) {
|
|
@@ -142,6 +150,7 @@ class GPTrans {
|
|
|
142
150
|
}
|
|
143
151
|
|
|
144
152
|
async _translate(text) {
|
|
153
|
+
|
|
145
154
|
const model = GPTrans.mmix.create(this.modelKey, this.modelConfig);
|
|
146
155
|
|
|
147
156
|
model.setSystem("You are an expert translator specialized in literary translation between FROM_LANG and TARGET_DENONYM TARGET_LANG.");
|
|
@@ -149,8 +158,8 @@ class GPTrans {
|
|
|
149
158
|
model.addTextFromFile(this.promptFile);
|
|
150
159
|
|
|
151
160
|
model.replace({ INPUT: text, CONTEXT: this.context });
|
|
152
|
-
model.replace(this.
|
|
153
|
-
model.replace(this.
|
|
161
|
+
model.replace(this.replaceTarget);
|
|
162
|
+
model.replace(this.replaceFrom);
|
|
154
163
|
|
|
155
164
|
const response = await model.message();
|
|
156
165
|
|
|
@@ -171,11 +180,15 @@ class GPTrans {
|
|
|
171
180
|
|
|
172
181
|
let key = words.map((x) => x.slice(0, maxlen)).join("_");
|
|
173
182
|
key += key ? '_' : '';
|
|
174
|
-
key +=
|
|
183
|
+
key += this._hash(text);
|
|
175
184
|
return key;
|
|
176
185
|
}
|
|
177
186
|
|
|
178
|
-
|
|
187
|
+
_hash(input) {
|
|
188
|
+
return stringHash(input).toString(36);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
async preload({ target = this.replaceTarget.TARGET_ISO, model = this.modelKey, from = this.replaceFrom.FROM_ISO, batchThreshold = this.batchThreshold, debounceTimeout = this.debounceTimeout } = {}) {
|
|
179
192
|
|
|
180
193
|
// Create new GPTrans instance for the target language
|
|
181
194
|
const translator = new GPTrans({
|
|
@@ -187,8 +200,11 @@ class GPTrans {
|
|
|
187
200
|
});
|
|
188
201
|
|
|
189
202
|
// Process all entries in batches
|
|
190
|
-
for (const [
|
|
191
|
-
translator.
|
|
203
|
+
for (const [context, pairs] of this.dbFrom.entries()) {
|
|
204
|
+
translator.setContext(context);
|
|
205
|
+
for (const [key, text] of Object.entries(pairs)) {
|
|
206
|
+
translator.get(key, text);
|
|
207
|
+
}
|
|
192
208
|
}
|
|
193
209
|
|
|
194
210
|
// Wait for any pending translations to complete
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "gptrans",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "1.4.
|
|
4
|
+
"version": "1.4.8",
|
|
5
5
|
"description": "🚆 GPTrans - The smarter AI-powered way to translate.",
|
|
6
6
|
"keywords": [
|
|
7
7
|
"translate",
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
},
|
|
33
33
|
"homepage": "https://github.com/clasen/GPTrans#readme",
|
|
34
34
|
"dependencies": {
|
|
35
|
-
"deepbase": "^1.4.
|
|
35
|
+
"deepbase": "^1.4.8",
|
|
36
36
|
"dotenv": "^16.4.7",
|
|
37
37
|
"modelmix": "^2.9.0",
|
|
38
38
|
"string-hash": "^1.1.3"
|
package/db/gptrans_ar.json
DELETED
package/db/gptrans_es-AR.json
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"eres_muy_bueno_26czme": "Sos muy bueno",
|
|
3
|
-
"eres_muy_bueno_k3ml5b": "Sos muy buena",
|
|
4
|
-
"hello_name_1987p1n": "¡Hola, {name}!",
|
|
5
|
-
"topup_uzdh5y": "Recargar",
|
|
6
|
-
"transf_176pc1a": "Transferir",
|
|
7
|
-
"deposi_wg2ec5": "Depositar",
|
|
8
|
-
"balanc_1rv8if7": "Saldo",
|
|
9
|
-
"transa_1wtqm5d": "Transacción",
|
|
10
|
-
"accoun_x1y0v8": "Cuenta",
|
|
11
|
-
"card_yis1ox": "Tarjeta",
|
|
12
|
-
"tienes_fuego_1i2o3ok": "¿Tenés fuego?"
|
|
13
|
-
}
|
package/db/gptrans_es-ES.json
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"hello_name_1987p1n": "Hello, {name}!",
|
|
3
|
-
"topup_uzdh5y": "Top-up",
|
|
4
|
-
"transf_176pc1a": "Transfer",
|
|
5
|
-
"deposi_wg2ec5": "Deposit",
|
|
6
|
-
"balanc_1rv8if7": "Balance",
|
|
7
|
-
"transa_1wtqm5d": "Transaction",
|
|
8
|
-
"accoun_x1y0v8": "Account",
|
|
9
|
-
"card_yis1ox": "Card",
|
|
10
|
-
"loadin_21q3nx": "Loading..."
|
|
11
|
-
}
|
package/db/gptrans_it.json
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"hello_name_1987p1n": "Ciao, {name}!",
|
|
3
|
-
"topup_uzdh5y": "Ricarica",
|
|
4
|
-
"transf_176pc1a": "Trasferimento",
|
|
5
|
-
"deposi_wg2ec5": "Deposito",
|
|
6
|
-
"balanc_1rv8if7": "Saldo",
|
|
7
|
-
"transa_1wtqm5d": "Transazione",
|
|
8
|
-
"accoun_x1y0v8": "Account",
|
|
9
|
-
"card_yis1ox": "Carta",
|
|
10
|
-
"loadin_21q3nx": "Caricamento in corso..."
|
|
11
|
-
}
|