gptrans 1.4.6 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -9,10 +9,10 @@ Whether you're building a multilingual website, a mobile app, or a localization
9
9
  ## ✨ Features
10
10
 
11
11
  - **AI-Powered Translations:** Harness advanced models like OpenAI's GPT and Anthropic's Sonnet for high-quality translations
12
- - **Smart Batching & Debouncing:** Automatically groups translation requests to optimize API usage
13
- - **Caching with DeepBase:** Quickly retrieves cached translations to boost performance
12
+ - **Smart Batching & Debouncing:** Translations are processed in batches, not only for efficiency but also to provide better context. By sending multiple related texts together, the AI model can better understand the overall context and produce more accurate and consistent translations across related terms and phrases.
13
+ - **Caching with JSON:** Quickly retrieves cached translations to boost performance
14
14
  - **Parameter Substitution:** Dynamically replace placeholders in your translations
15
- - **Flexible Configuration:** Customize source and target locales, model keys, and batching settings to fit your needs
15
+ - **Smart Context Handling:** Add contextual information to improve translation accuracy. Perfect for gender-aware translations, domain-specific content, or any scenario where additional context helps produce better results. The context is automatically cleared after each translation to prevent unintended effects.
16
16
 
17
17
  ## 📦 Installation
18
18
 
@@ -65,20 +65,31 @@ When creating a new instance of GPTrans, you can customize:
65
65
 
66
66
  | Option | Description | Default |
67
67
  |--------|-------------|---------|
68
- | `from` | Source language locale | `es-AR` |
69
- | `target` | Target language locale | `en-US` |
68
+ | `from` | Source language locale (BCP 47) | `es-AR` |
69
+ | `target` | Target language locale (BCP 47) | `en-US` |
70
70
  | `model` | Translation model key | `claude-3-7-sonnet` |
71
71
  | `batchThreshold` | Maximum number of characters to accumulate before triggering batch processing | `1000` |
72
72
  | `debounceTimeout` | Time in milliseconds to wait before processing translations | `500` |
73
73
 
74
+ ### BCP 47 Language Tags
75
+
76
+ GPTrans uses BCP 47 language tags for language specification. BCP 47 is the standard for language tags that combines language, script, and region codes. Here are some common examples:
77
+
78
+ - `en-US` - English (United States)
79
+ - `es-AR` - Spanish (Argentina)
80
+ - `pt-BR` - Portuguese (Brazil)
81
+
82
+ For simplified or universal language codes, you can omit the region specification:
83
+ - `es` - Spanish (Universal)
84
+
74
85
  ## 🔍 How It Works
75
86
 
76
87
  1. **First-Time Translation Behavior:** On the first request, Gptrans will return the original text while processing the translation in the background. This ensures your application remains responsive without waiting for API calls.
77
- 2. **Translation Caching:** Once processed, translations are stored in `db/gptrans_<iso>.json`. Subsequent requests for the same text will be served instantly from the cache.
78
- 3. **Smart Batch Processing:** Translations are processed in batches, providing better context for more accurate results.
88
+ 2. **Translation Caching:** Once processed, translations are stored in `db/gptrans_<tag>.json`. Subsequent requests for the same text will be served instantly from the cache.
89
+ 3. **Smart Batch Processing:** Automatically groups translation requests to optimize API usage and provide better context.
79
90
  4. **Dynamic Model Integration:** Easily plug in multiple AI translation providers with the ModelMix library.
80
91
  5. **Customizable Prompts:** Load and modify translation prompts (see the `prompt/translate.md` file) to fine-tune the translation output.
81
- 6. **Manual Corrections:** A JSON file stores key-translation pairs, allowing you to override specific translations and make manual corrections when needed. Simply edit the `db/gptrans_<iso>.json` file:
92
+ 6. **Manual Corrections:** A JSON file stores key-translation pairs, allowing you to override specific translations and make manual corrections when needed. Simply edit the `db/gptrans_<tag>.json` file:
82
93
 
83
94
  ```json
84
95
  {
@@ -0,0 +1,9 @@
1
+ {
2
+ "45h": {
3
+ "eres_muy_bueno_26czme": "You're very good",
4
+ "tienes_fuego_1i2o3ok": "Do you have a light?"
5
+ },
6
+ "1sfvxng": {
7
+ "eres_muy_bueno_26czme": "You are very good"
8
+ }
9
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "1sfvxng": {
3
+ "eres_muy_bueno_26czme": "Eres muy buena"
4
+ }
5
+ }
@@ -1,3 +1,9 @@
1
1
  {
2
- "cargan_1998owo": "Cargando..."
2
+ "": {
3
+ "eres_muy_bueno_26czme": "Eres muy bueno",
4
+ "tienes_fuego_1i2o3ok": "Tienes fuego?"
5
+ },
6
+ "El mensaje es para una mujer": {
7
+ "eres_muy_bueno_26czme": "Eres muy bueno"
8
+ }
3
9
  }
package/demo/case_4.js ADDED
@@ -0,0 +1,17 @@
1
+ import GPTrans from '../index.js';
2
+
3
+
4
+ // Case 2: Translate from Spanish to English
5
+ const model = new GPTrans({
6
+ from: 'es',
7
+ target: 'en',
8
+ });
9
+
10
+ await model.preload();
11
+
12
+
13
+ console.log(model.setContext().t('Eres muy bueno'));
14
+ console.log(model.setContext('El mensaje es para una mujer').t('Eres muy bueno'));
15
+
16
+
17
+ console.log(model.setContext().t('Tienes fuego?'));
package/index.js CHANGED
@@ -9,7 +9,16 @@ class GPTrans {
9
9
 
10
10
  static get mmix() {
11
11
  if (!this.#mmixInstance) {
12
- const mmix = new ModelMix();
12
+ const mmix = new ModelMix({
13
+ config: {
14
+ max_history: 1,
15
+ debug: false,
16
+ bottleneck: {
17
+ minTime: 15000,
18
+ maxConcurrent: 1
19
+ }
20
+ }
21
+ });
13
22
 
14
23
  mmix.attach(new MixOpenAI());
15
24
  mmix.attach(new MixAnthropic());
@@ -23,7 +32,7 @@ class GPTrans {
23
32
  return isLanguageAvailable(langCode);
24
33
  }
25
34
 
26
- constructor({ from = 'en-US', target = 'es-AR', model = 'claude-3-7-sonnet-20250219', batchThreshold = 1000, debounceTimeout = 500, promptFile = null, context = '' }) {
35
+ constructor({ from = 'en-US', target = 'es-AR', model = 'claude-3-7-sonnet-20250219', batchThreshold = 1500, debounceTimeout = 500, promptFile = null, context = '' }) {
27
36
 
28
37
  try {
29
38
  dotenv.config();
@@ -35,8 +44,8 @@ class GPTrans {
35
44
  this.dbFrom = new DeepBase({ name: 'gptrans_from_' + from });
36
45
 
37
46
  try {
38
- this.replace_target = isoAssoc(target, 'TARGET_');
39
- this.replace_from = isoAssoc(from, 'FROM_');
47
+ this.replaceTarget = isoAssoc(target, 'TARGET_');
48
+ this.replaceFrom = isoAssoc(from, 'FROM_');
40
49
  } catch (e) {
41
50
  throw new Error(`Invalid target: ${target}`);
42
51
  }
@@ -50,14 +59,7 @@ class GPTrans {
50
59
  this.promptFile = promptFile ?? new URL('./prompt/translate.md', import.meta.url).pathname;
51
60
  this.context = context;
52
61
  this.modelConfig = {
53
- config: {
54
- max_history: 1,
55
- debug: false,
56
- bottleneck: {
57
- maxConcurrent: 2,
58
- }
59
- },
60
- options: {
62
+ options: {
61
63
  max_tokens: batchThreshold,
62
64
  temperature: 0
63
65
  }
@@ -68,7 +70,7 @@ class GPTrans {
68
70
  setContext(context = '') {
69
71
  if (this.context !== context && this.pendingTranslations.size > 0) {
70
72
  clearTimeout(this.debounceTimer);
71
- this._processBatch();
73
+ this._processBatch(this.context);
72
74
  }
73
75
  this.context = context;
74
76
  return this;
@@ -85,15 +87,27 @@ class GPTrans {
85
87
  }
86
88
 
87
89
  get(key, text) {
88
- const translation = this.dbTarget.get(key);
90
+
91
+ if (!text || !text.trim()) {
92
+ return text;
93
+ }
94
+
95
+ const contextHash = this._hash(this.context);
96
+ const translation = this.dbTarget.get(contextHash, key);
97
+
89
98
  if (!translation) {
90
- this.pendingTranslations.set(key, text);
91
- this.pendingCharCount += text.length; // Update character count
99
+ if (!this.dbFrom.get(this.context, key)) {
100
+ this.dbFrom.set(this.context, key, text);
101
+ }
92
102
 
93
- if (!this.dbFrom.get(key)) {
94
- this.dbFrom.set(key, text);
103
+ // Skip translation if context is empty and languages are the same
104
+ if (!this.context && this.replaceFrom.FROM_ISO === this.replaceTarget.TARGET_ISO) {
105
+ return text;
95
106
  }
96
107
 
108
+ this.pendingTranslations.set(key, text);
109
+ this.pendingCharCount += text.length; // Update character count
110
+
97
111
  // Clear existing timer
98
112
  if (this.debounceTimer) {
99
113
  clearTimeout(this.debounceTimer);
@@ -102,27 +116,31 @@ class GPTrans {
102
116
  // Set new timer
103
117
  this.debounceTimer = setTimeout(() => {
104
118
  if (this.pendingTranslations.size > 0) {
105
- this._processBatch();
119
+ this._processBatch(this.context);
106
120
  }
107
121
  }, this.debounceTimeout);
108
122
 
109
123
  // Process if we hit the character count threshold
110
124
  if (this.pendingCharCount >= this.batchThreshold) {
111
125
  clearTimeout(this.debounceTimer);
112
- this._processBatch();
126
+ this._processBatch(this.context);
113
127
  }
114
128
  }
115
129
  return translation;
116
130
  }
117
131
 
118
- async _processBatch() {
132
+ async _processBatch(context) {
119
133
  this.processing = true;
120
134
 
121
135
  const batch = Array.from(this.pendingTranslations.entries());
122
136
 
123
137
  // Clear pending translations and character count before awaiting translation
124
138
  this.pendingTranslations.clear();
139
+
125
140
  this.modelConfig.options.max_tokens = this.pendingCharCount + 1000;
141
+ const minTime = Math.floor((60000 / (8000 / this.pendingCharCount)) * 1.4);
142
+ GPTrans.mmix.limiter.updateSettings({ minTime });
143
+
126
144
  this.pendingCharCount = 0;
127
145
 
128
146
  const textsToTranslate = batch.map(([_, text]) => text).join('\n---\n');
@@ -130,8 +148,17 @@ class GPTrans {
130
148
  const translations = await this._translate(textsToTranslate);
131
149
  const translatedTexts = translations.split('\n---\n');
132
150
 
151
+ const contextHash = this._hash(context);
133
152
  batch.forEach(([key], index) => {
134
- this.dbTarget.set(key, translatedTexts[index].trim());
153
+
154
+ if (!translatedTexts[index]) {
155
+ console.log(translations);
156
+ console.error(`No translation found for ${key}`);
157
+
158
+ return;
159
+ }
160
+
161
+ this.dbTarget.set(contextHash, key, translatedTexts[index].trim());
135
162
  });
136
163
 
137
164
  } catch (e) {
@@ -142,6 +169,7 @@ class GPTrans {
142
169
  }
143
170
 
144
171
  async _translate(text) {
172
+
145
173
  const model = GPTrans.mmix.create(this.modelKey, this.modelConfig);
146
174
 
147
175
  model.setSystem("You are an expert translator specialized in literary translation between FROM_LANG and TARGET_DENONYM TARGET_LANG.");
@@ -149,8 +177,8 @@ class GPTrans {
149
177
  model.addTextFromFile(this.promptFile);
150
178
 
151
179
  model.replace({ INPUT: text, CONTEXT: this.context });
152
- model.replace(this.replace_target);
153
- model.replace(this.replace_from);
180
+ model.replace(this.replaceTarget);
181
+ model.replace(this.replaceFrom);
154
182
 
155
183
  const response = await model.message();
156
184
 
@@ -171,39 +199,33 @@ class GPTrans {
171
199
 
172
200
  let key = words.map((x) => x.slice(0, maxlen)).join("_");
173
201
  key += key ? '_' : '';
174
- key += stringHash(text + this.context).toString(36);
202
+ key += this._hash(text);
175
203
  return key;
176
204
  }
177
205
 
178
- async preload({ target = this.replace_target.TARGET_ISO, model = this.modelKey, from = this.replace_from.FROM_ISO, batchThreshold = this.batchThreshold, debounceTimeout = this.debounceTimeout } = {}) {
179
-
180
- // Create new GPTrans instance for the target language
181
- const translator = new GPTrans({
182
- from,
183
- target,
184
- model,
185
- batchThreshold,
186
- debounceTimeout,
187
- });
206
+ _hash(input) {
207
+ return stringHash(input).toString(36);
208
+ }
188
209
 
189
- // Process all entries in batches
190
- for (const [key, text] of this.dbFrom.entries()) {
191
- translator.get(key, text);
210
+ async preload() {
211
+ for (const [context, pairs] of this.dbFrom.entries()) {
212
+ this.setContext(context);
213
+ for (const [key, text] of Object.entries(pairs)) {
214
+ this.get(key, text);
215
+ }
192
216
  }
193
217
 
194
218
  // Wait for any pending translations to complete
195
- if (translator.pendingTranslations.size > 0) {
196
- await new Promise(resolve => {
197
- const checkInterval = setInterval(() => {
198
- if (translator.processing === false && translator.pendingTranslations.size === 0) {
199
- clearInterval(checkInterval);
200
- resolve();
201
- }
202
- }, 1000);
203
- });
204
- }
219
+ await new Promise(resolve => {
220
+ const checkInterval = setInterval(() => {
221
+ if (this.dbFrom.keys().length === this.dbTarget.keys().length) {
222
+ clearInterval(checkInterval);
223
+ resolve();
224
+ }
225
+ }, 100);
226
+ });
205
227
 
206
- return translator;
228
+ return this;
207
229
  }
208
230
  }
209
231
 
package/isoAssoc.js CHANGED
@@ -180,10 +180,14 @@ export function isoAssoc(iso, prefix = '') {
180
180
 
181
181
  const parts = iso.toLowerCase().split('-');
182
182
  const lang = parts[0];
183
- const country = parts.length > 1 ? parts[1] : null;
183
+ let country = parts.length > 1 ? parts[1] : null;
184
184
 
185
- let denonym = country ? countryDenonym[country] : 'Neutral';
185
+ if (lang === 'en' && !country) {
186
+ country = 'us';
187
+ }
186
188
 
189
+ let denonym = country ? countryDenonym[country] : 'Neutral';
190
+
187
191
  if (lang === 'zh' && !country) {
188
192
  denonym = 'Simplified';
189
193
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "gptrans",
3
3
  "type": "module",
4
- "version": "1.4.6",
4
+ "version": "1.5.0",
5
5
  "description": "🚆 GPTrans - The smarter AI-powered way to translate.",
6
6
  "keywords": [
7
7
  "translate",
@@ -2,7 +2,9 @@
2
2
  Translation from FROM_ISO to TARGET_ISO (TARGET_DENONYM TARGET_LANG) with cultural adaptations.
3
3
 
4
4
  ## Text to translate
5
+ ```
5
6
  INPUT
7
+ ```
6
8
 
7
9
  # Return Format
8
10
  - Provide the final translation within a code block using ```.
@@ -1,3 +0,0 @@
1
- {
2
- "cargan_1998owo": "جارٍ التحميل..."
3
- }
@@ -1,13 +0,0 @@
1
- {
2
- "eres_muy_bueno_26czme": "Sos muy bueno",
3
- "eres_muy_bueno_k3ml5b": "Sos muy buena",
4
- "hello_name_1987p1n": "¡Hola, {name}!",
5
- "topup_uzdh5y": "Recargar",
6
- "transf_176pc1a": "Transferir",
7
- "deposi_wg2ec5": "Depositar",
8
- "balanc_1rv8if7": "Saldo",
9
- "transa_1wtqm5d": "Transacción",
10
- "accoun_x1y0v8": "Cuenta",
11
- "card_yis1ox": "Tarjeta",
12
- "tienes_fuego_1i2o3ok": "¿Tenés fuego?"
13
- }
@@ -1,3 +0,0 @@
1
- {
2
- "tenes_fuego_1fs98im": "¿Tienes fuego?"
3
- }
@@ -1,11 +0,0 @@
1
- {
2
- "hello_name_1987p1n": "Hello, {name}!",
3
- "topup_uzdh5y": "Top-up",
4
- "transf_176pc1a": "Transfer",
5
- "deposi_wg2ec5": "Deposit",
6
- "balanc_1rv8if7": "Balance",
7
- "transa_1wtqm5d": "Transaction",
8
- "accoun_x1y0v8": "Account",
9
- "card_yis1ox": "Card",
10
- "loadin_21q3nx": "Loading..."
11
- }
@@ -1,3 +0,0 @@
1
- {
2
- "tenes_fuego_1fs98im": "¿Tenés fuego?"
3
- }
@@ -1,5 +0,0 @@
1
- {
2
- "eres_muy_bueno_26czme": "Eres muy bueno",
3
- "eres_muy_bueno_k3ml5b": "Eres muy bueno",
4
- "tienes_fuego_1i2o3ok": "Tienes fuego?"
5
- }
@@ -1,11 +0,0 @@
1
- {
2
- "hello_name_1987p1n": "Ciao, {name}!",
3
- "topup_uzdh5y": "Ricarica",
4
- "transf_176pc1a": "Trasferimento",
5
- "deposi_wg2ec5": "Deposito",
6
- "balanc_1rv8if7": "Saldo",
7
- "transa_1wtqm5d": "Transazione",
8
- "accoun_x1y0v8": "Account",
9
- "card_yis1ox": "Carta",
10
- "loadin_21q3nx": "Caricamento in corso..."
11
- }