gptrans 1.4.4 → 1.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -9,10 +9,10 @@ Whether you're building a multilingual website, a mobile app, or a localization
9
9
  ## ✨ Features
10
10
 
11
11
  - **AI-Powered Translations:** Harness advanced models like OpenAI's GPT and Anthropic's Sonnet for high-quality translations
12
- - **Smart Batching & Debouncing:** Automatically groups translation requests to optimize API usage
13
- - **Caching with DeepBase:** Quickly retrieves cached translations to boost performance
12
+ - **Smart Batching & Debouncing:** Translations are processed in batches, not only for efficiency but also to provide better context. By sending multiple related texts together, the AI model can better understand the overall context and produce more accurate and consistent translations across related terms and phrases.
13
+ - **Caching with JSON:** Quickly retrieves cached translations to boost performance
14
14
  - **Parameter Substitution:** Dynamically replace placeholders in your translations
15
- - **Flexible Configuration:** Customize source and target locales, model keys, and batching settings to fit your needs
15
+ - **Smart Context Handling:** Add contextual information to improve translation accuracy. Perfect for gender-aware translations, domain-specific content, or any scenario where additional context helps produce better results. The context is automatically cleared after each translation to prevent unintended effects.
16
16
 
17
17
  ## 📦 Installation
18
18
 
@@ -65,20 +65,31 @@ When creating a new instance of GPTrans, you can customize:
65
65
 
66
66
  | Option | Description | Default |
67
67
  |--------|-------------|---------|
68
- | `from` | Source language locale | `es-AR` |
69
- | `target` | Target language locale | `en-US` |
68
+ | `from` | Source language locale (BCP 47) | `es-AR` |
69
+ | `target` | Target language locale (BCP 47) | `en-US` |
70
70
  | `model` | Translation model key | `claude-3-7-sonnet` |
71
71
  | `batchThreshold` | Maximum number of characters to accumulate before triggering batch processing | `1000` |
72
72
  | `debounceTimeout` | Time in milliseconds to wait before processing translations | `500` |
73
73
 
74
+ ### BCP 47 Language Tags
75
+
76
+ GPTrans uses BCP 47 language tags for language specification. BCP 47 is the standard for language tags that combines language, script, and region codes. Here are some common examples:
77
+
78
+ - `en-US` - English (United States)
79
+ - `es-AR` - Spanish (Argentina)
80
+ - `pt-BR` - Portuguese (Brazil)
81
+
82
+ For simplified or universal language codes, you can omit the region specification:
83
+ - `es` - Spanish (Universal)
84
+
74
85
  ## 🔍 How It Works
75
86
 
76
87
  1. **First-Time Translation Behavior:** On the first request, Gptrans will return the original text while processing the translation in the background. This ensures your application remains responsive without waiting for API calls.
77
- 2. **Translation Caching:** Once processed, translations are stored in `db/gptrans_<iso>.json`. Subsequent requests for the same text will be served instantly from the cache.
78
- 3. **Smart Batch Processing:** Translations are processed in batches, providing better context for more accurate results.
88
+ 2. **Translation Caching:** Once processed, translations are stored in `db/gptrans_<tag>.json`. Subsequent requests for the same text will be served instantly from the cache.
89
+ 3. **Smart Batch Processing:** Automatically groups translation requests to optimize API usage and provide better context.
79
90
  4. **Dynamic Model Integration:** Easily plug in multiple AI translation providers with the ModelMix library.
80
91
  5. **Customizable Prompts:** Load and modify translation prompts (see the `prompt/translate.md` file) to fine-tune the translation output.
81
- 6. **Manual Corrections:** A JSON file stores key-translation pairs, allowing you to override specific translations and make manual corrections when needed. Simply edit the `db/gptrans_<iso>.json` file:
92
+ 6. **Manual Corrections:** A JSON file stores key-translation pairs, allowing you to override specific translations and make manual corrections when needed. Simply edit the `db/gptrans_<tag>.json` file:
82
93
 
83
94
  ```json
84
95
  {
@@ -0,0 +1,5 @@
1
+ {
2
+ "1sfvxng": {
3
+ "eres_muy_bueno_26czme": "Eres muy buena"
4
+ }
5
+ }
@@ -1,3 +1,9 @@
1
1
  {
2
- "cargan_1998owo": "Cargando..."
2
+ "": {
3
+ "eres_muy_bueno_26czme": "Eres muy bueno",
4
+ "tienes_fuego_1i2o3ok": "Tienes fuego?"
5
+ },
6
+ "El mensaje es para una mujer": {
7
+ "eres_muy_bueno_26czme": "Eres muy bueno"
8
+ }
3
9
  }
package/demo/case_4.js ADDED
@@ -0,0 +1,17 @@
1
+ import GPTrans from '../index.js';
2
+
3
+
4
+ // Case 2: Translate from Spanish Spain to Spanish Argentina
5
+ const model = new GPTrans({
6
+ from: 'es',
7
+ target: 'es',
8
+ });
9
+
10
+ await model.preload();
11
+
12
+
13
+ console.log(model.setContext().t('Eres muy bueno'));
14
+ console.log(model.setContext('El mensaje es para una mujer').t('Eres muy bueno'));
15
+
16
+
17
+ console.log(model.setContext().t('Tienes fuego?'));
package/index.js CHANGED
@@ -35,8 +35,8 @@ class GPTrans {
35
35
  this.dbFrom = new DeepBase({ name: 'gptrans_from_' + from });
36
36
 
37
37
  try {
38
- this.replace_target = isoAssoc(target, 'TARGET_');
39
- this.replace_from = isoAssoc(from, 'FROM_');
38
+ this.replaceTarget = isoAssoc(target, 'TARGET_');
39
+ this.replaceFrom = isoAssoc(from, 'FROM_');
40
40
  } catch (e) {
41
41
  throw new Error(`Invalid target: ${target}`);
42
42
  }
@@ -57,7 +57,7 @@ class GPTrans {
57
57
  maxConcurrent: 2,
58
58
  }
59
59
  },
60
- options: {
60
+ options: {
61
61
  max_tokens: batchThreshold,
62
62
  temperature: 0
63
63
  }
@@ -68,7 +68,7 @@ class GPTrans {
68
68
  setContext(context = '') {
69
69
  if (this.context !== context && this.pendingTranslations.size > 0) {
70
70
  clearTimeout(this.debounceTimer);
71
- this._processBatch();
71
+ this._processBatch(this.context);
72
72
  }
73
73
  this.context = context;
74
74
  return this;
@@ -85,15 +85,22 @@ class GPTrans {
85
85
  }
86
86
 
87
87
  get(key, text) {
88
- const translation = this.dbTarget.get(key);
88
+ const contextHash = this._hash(this.context);
89
+ const translation = this.dbTarget.get(contextHash, key);
90
+
89
91
  if (!translation) {
90
- this.pendingTranslations.set(key, text);
91
- this.pendingCharCount += text.length; // Update character count
92
+ if (!this.dbFrom.get(this.context, key)) {
93
+ this.dbFrom.set(this.context, key, text);
94
+ }
92
95
 
93
- if (!this.dbFrom.get(key)) {
94
- this.dbFrom.set(key, text);
96
+ // Skip translation if context is empty and languages are the same
97
+ if (!this.context && this.replaceFrom.FROM_ISO === this.replaceTarget.TARGET_ISO) {
98
+ return text;
95
99
  }
96
100
 
101
+ this.pendingTranslations.set(key, text);
102
+ this.pendingCharCount += text.length; // Update character count
103
+
97
104
  // Clear existing timer
98
105
  if (this.debounceTimer) {
99
106
  clearTimeout(this.debounceTimer);
@@ -102,20 +109,20 @@ class GPTrans {
102
109
  // Set new timer
103
110
  this.debounceTimer = setTimeout(() => {
104
111
  if (this.pendingTranslations.size > 0) {
105
- this._processBatch();
112
+ this._processBatch(this.context);
106
113
  }
107
114
  }, this.debounceTimeout);
108
115
 
109
116
  // Process if we hit the character count threshold
110
117
  if (this.pendingCharCount >= this.batchThreshold) {
111
118
  clearTimeout(this.debounceTimer);
112
- this._processBatch();
119
+ this._processBatch(this.context);
113
120
  }
114
121
  }
115
122
  return translation;
116
123
  }
117
124
 
118
- async _processBatch() {
125
+ async _processBatch(context) {
119
126
  this.processing = true;
120
127
 
121
128
  const batch = Array.from(this.pendingTranslations.entries());
@@ -130,8 +137,9 @@ class GPTrans {
130
137
  const translations = await this._translate(textsToTranslate);
131
138
  const translatedTexts = translations.split('\n---\n');
132
139
 
140
+ const contextHash = this._hash(context);
133
141
  batch.forEach(([key], index) => {
134
- this.dbTarget.set(key, translatedTexts[index].trim());
142
+ this.dbTarget.set(contextHash, key, translatedTexts[index].trim());
135
143
  });
136
144
 
137
145
  } catch (e) {
@@ -142,6 +150,7 @@ class GPTrans {
142
150
  }
143
151
 
144
152
  async _translate(text) {
153
+
145
154
  const model = GPTrans.mmix.create(this.modelKey, this.modelConfig);
146
155
 
147
156
  model.setSystem("You are an expert translator specialized in literary translation between FROM_LANG and TARGET_DENONYM TARGET_LANG.");
@@ -149,8 +158,8 @@ class GPTrans {
149
158
  model.addTextFromFile(this.promptFile);
150
159
 
151
160
  model.replace({ INPUT: text, CONTEXT: this.context });
152
- model.replace(this.replace_target);
153
- model.replace(this.replace_from);
161
+ model.replace(this.replaceTarget);
162
+ model.replace(this.replaceFrom);
154
163
 
155
164
  const response = await model.message();
156
165
 
@@ -171,11 +180,15 @@ class GPTrans {
171
180
 
172
181
  let key = words.map((x) => x.slice(0, maxlen)).join("_");
173
182
  key += key ? '_' : '';
174
- key += stringHash(text + this.context).toString(36);
183
+ key += this._hash(text);
175
184
  return key;
176
185
  }
177
186
 
178
- async preload({ target = this.replace_target.TARGET_ISO, model = this.modelKey, from = this.replace_from.FROM_ISO, batchThreshold = this.batchThreshold, debounceTimeout = this.debounceTimeout } = {}) {
187
+ _hash(input) {
188
+ return stringHash(input).toString(36);
189
+ }
190
+
191
+ async preload({ target = this.replaceTarget.TARGET_ISO, model = this.modelKey, from = this.replaceFrom.FROM_ISO, batchThreshold = this.batchThreshold, debounceTimeout = this.debounceTimeout } = {}) {
179
192
 
180
193
  // Create new GPTrans instance for the target language
181
194
  const translator = new GPTrans({
@@ -187,8 +200,11 @@ class GPTrans {
187
200
  });
188
201
 
189
202
  // Process all entries in batches
190
- for (const [key, text] of this.dbFrom.entries()) {
191
- translator.get(key, text);
203
+ for (const [context, pairs] of this.dbFrom.entries()) {
204
+ translator.setContext(context);
205
+ for (const [key, text] of Object.entries(pairs)) {
206
+ translator.get(key, text);
207
+ }
192
208
  }
193
209
 
194
210
  // Wait for any pending translations to complete
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "gptrans",
3
3
  "type": "module",
4
- "version": "1.4.4",
4
+ "version": "1.4.8",
5
5
  "description": "🚆 GPTrans - The smarter AI-powered way to translate.",
6
6
  "keywords": [
7
7
  "translate",
@@ -32,7 +32,7 @@
32
32
  },
33
33
  "homepage": "https://github.com/clasen/GPTrans#readme",
34
34
  "dependencies": {
35
- "deepbase": "^1.4.6",
35
+ "deepbase": "^1.4.8",
36
36
  "dotenv": "^16.4.7",
37
37
  "modelmix": "^2.9.0",
38
38
  "string-hash": "^1.1.3"
@@ -1,3 +0,0 @@
1
- {
2
- "cargan_1998owo": "جارٍ التحميل..."
3
- }
@@ -1,13 +0,0 @@
1
- {
2
- "eres_muy_bueno_26czme": "Sos muy bueno",
3
- "eres_muy_bueno_k3ml5b": "Sos muy buena",
4
- "hello_name_1987p1n": "¡Hola, {name}!",
5
- "topup_uzdh5y": "Recargar",
6
- "transf_176pc1a": "Transferir",
7
- "deposi_wg2ec5": "Depositar",
8
- "balanc_1rv8if7": "Saldo",
9
- "transa_1wtqm5d": "Transacción",
10
- "accoun_x1y0v8": "Cuenta",
11
- "card_yis1ox": "Tarjeta",
12
- "tienes_fuego_1i2o3ok": "¿Tenés fuego?"
13
- }
@@ -1,3 +0,0 @@
1
- {
2
- "tenes_fuego_1fs98im": "¿Tienes fuego?"
3
- }
@@ -1,11 +0,0 @@
1
- {
2
- "hello_name_1987p1n": "Hello, {name}!",
3
- "topup_uzdh5y": "Top-up",
4
- "transf_176pc1a": "Transfer",
5
- "deposi_wg2ec5": "Deposit",
6
- "balanc_1rv8if7": "Balance",
7
- "transa_1wtqm5d": "Transaction",
8
- "accoun_x1y0v8": "Account",
9
- "card_yis1ox": "Card",
10
- "loadin_21q3nx": "Loading..."
11
- }
@@ -1,3 +0,0 @@
1
- {
2
- "tenes_fuego_1fs98im": "¿Tenés fuego?"
3
- }
@@ -1,5 +0,0 @@
1
- {
2
- "eres_muy_bueno_26czme": "Eres muy bueno",
3
- "eres_muy_bueno_k3ml5b": "Eres muy bueno",
4
- "tienes_fuego_1i2o3ok": "Tienes fuego?"
5
- }
@@ -1,11 +0,0 @@
1
- {
2
- "hello_name_1987p1n": "Ciao, {name}!",
3
- "topup_uzdh5y": "Ricarica",
4
- "transf_176pc1a": "Trasferimento",
5
- "deposi_wg2ec5": "Deposito",
6
- "balanc_1rv8if7": "Saldo",
7
- "transa_1wtqm5d": "Transazione",
8
- "accoun_x1y0v8": "Account",
9
- "card_yis1ox": "Carta",
10
- "loadin_21q3nx": "Caricamento in corso..."
11
- }