@yellowpanther/shared 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/index.js +3 -0
- package/src/lang/translationHelper.js +223 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yellowpanther/shared",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.2",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"access": "public"
|
|
6
6
|
},
|
|
@@ -31,6 +31,7 @@
|
|
|
31
31
|
"./queue/quizPublishQueue": "./src/queue/quizPublishQueue.js",
|
|
32
32
|
"./queue/predictPublishQueue": "./src/queue/predictPublishQueue.js",
|
|
33
33
|
"./queue/queueRegistry": "./src/queue/queueRegistry.js",
|
|
34
|
+
"./translationHelper": "./src/lang/translationHelper.js",
|
|
34
35
|
"./*": "./src/*"
|
|
35
36
|
},
|
|
36
37
|
"scripts": {
|
package/src/index.js
CHANGED
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
const axios = require('axios');
|
|
2
|
+
const { JSDOM } = require('jsdom'); // Required for HTML parsing
|
|
3
|
+
|
|
4
|
+
// ♻️ Shared Redis connection
|
|
5
|
+
const redis = require('../redis/redisClient');
|
|
6
|
+
|
|
7
|
+
const BASE_URLS = [
|
|
8
|
+
'https://lingva.ml',
|
|
9
|
+
'https://translate.mentality.rip',
|
|
10
|
+
'https://translate.plausibility.cloud',
|
|
11
|
+
'https://lingva.translate.garudalinux.org',
|
|
12
|
+
];
|
|
13
|
+
|
|
14
|
+
const MAX_CHUNK_SIZE = 300;
|
|
15
|
+
const RETRY_LIMIT = 3;
|
|
16
|
+
const DELAY_BETWEEN_REQUESTS = 300;
|
|
17
|
+
const REDIS_TTL = 60 * 60 * 24 * 7; // 7 days
|
|
18
|
+
const MAX_FAILURES = 3; // Maximum consecutive mirror failures before Redis key is invalidated
|
|
19
|
+
|
|
20
|
+
const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
21
|
+
|
|
22
|
+
function splitTextIntoChunks(text, maxChunkSize = MAX_CHUNK_SIZE) {
|
|
23
|
+
const sentences = text.match(/[^.!?]+[.!?]*[\s]*/g) || [text];
|
|
24
|
+
const chunks = [];
|
|
25
|
+
let current = '';
|
|
26
|
+
|
|
27
|
+
for (const sentence of sentences) {
|
|
28
|
+
if ((current + sentence).length > maxChunkSize) {
|
|
29
|
+
if (current.trim()) chunks.push(current.trim());
|
|
30
|
+
current = sentence;
|
|
31
|
+
} else {
|
|
32
|
+
current += sentence;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (current.trim()) chunks.push(current.trim());
|
|
37
|
+
return chunks;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function isLikelyURL(text) {
|
|
41
|
+
return /(https?:\/\/[^\s]+)/.test(text);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async function getFromCache(cacheKey) {
|
|
45
|
+
return await redis.get(cacheKey);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async function setInCache(cacheKey, value, ttl = REDIS_TTL) {
|
|
49
|
+
await redis.set(cacheKey, value, 'EX', ttl);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function deleteCacheKey(cacheKey) {
|
|
53
|
+
await redis.del(cacheKey);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* 🔁 Translates a chunk with retries and mirror fallback.
|
|
58
|
+
*/
|
|
59
|
+
async function translateChunk(chunk, from, to, debug = false, fallbackToOriginal = true) {
|
|
60
|
+
const encoded = encodeURIComponent(chunk);
|
|
61
|
+
const langFrom = from?.toLowerCase?.() || 'auto';
|
|
62
|
+
const langTo = to?.toLowerCase?.() || 'en';
|
|
63
|
+
|
|
64
|
+
let totalFailures = 0;
|
|
65
|
+
|
|
66
|
+
for (let baseUrl of BASE_URLS) {
|
|
67
|
+
const url = `${baseUrl}/api/v1/${langFrom}/${langTo}/${encoded}`;
|
|
68
|
+
|
|
69
|
+
for (let attempt = 1; attempt <= RETRY_LIMIT; attempt++) {
|
|
70
|
+
try {
|
|
71
|
+
if (debug) console.log(`🌐 Translating "${chunk}" via ${baseUrl} (attempt ${attempt})`);
|
|
72
|
+
const response = await axios.get(url);
|
|
73
|
+
const translated = response?.data?.translation?.trim();
|
|
74
|
+
if (translated) return translated;
|
|
75
|
+
break;
|
|
76
|
+
} catch (error) {
|
|
77
|
+
const status = error?.response?.status;
|
|
78
|
+
const message = error?.message || 'Unknown error';
|
|
79
|
+
|
|
80
|
+
if (status === 429) {
|
|
81
|
+
if (debug) console.warn(`⚠️ 429 Too Many Requests – waiting ${attempt * 500}ms`);
|
|
82
|
+
await delay(attempt * 500);
|
|
83
|
+
} else {
|
|
84
|
+
totalFailures++;
|
|
85
|
+
if (debug) {
|
|
86
|
+
console.error(`❌ Mirror ${baseUrl} failed:`, { chunk, status, message });
|
|
87
|
+
}
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (debug) console.warn(`🚫 Final fallback for chunk "${chunk}"`);
|
|
95
|
+
return fallbackToOriginal ? chunk : '';
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* 🧠 Translates plain text (cached in Redis, safe retries)
|
|
100
|
+
*/
|
|
101
|
+
async function translateWithLingva(text, to = 'fr', from = 'en', options = {}) {
|
|
102
|
+
const {
|
|
103
|
+
debug = false,
|
|
104
|
+
fallbackToOriginal = true,
|
|
105
|
+
cacheEnabled = true,
|
|
106
|
+
html = false,
|
|
107
|
+
} = options;
|
|
108
|
+
|
|
109
|
+
try {
|
|
110
|
+
if (!text || typeof text !== 'string' || !text.trim()) {
|
|
111
|
+
if (debug) console.warn('⚠️ Skipping invalid input');
|
|
112
|
+
return text;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (isLikelyURL(text)) return text;
|
|
116
|
+
|
|
117
|
+
const trimmedText = text.trim();
|
|
118
|
+
const langFrom = from?.toLowerCase?.() || 'auto';
|
|
119
|
+
const langTo = to?.toLowerCase?.() || 'en';
|
|
120
|
+
const cacheKey = `lingva:${langFrom}:${langTo}:${trimmedText}`;
|
|
121
|
+
|
|
122
|
+
if (cacheEnabled) {
|
|
123
|
+
const cached = await getFromCache(cacheKey);
|
|
124
|
+
if (cached) {
|
|
125
|
+
if (debug) console.log('✅ Redis cache hit');
|
|
126
|
+
return cached;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
let translated;
|
|
131
|
+
if (html) {
|
|
132
|
+
// calling infor with html content
|
|
133
|
+
/*
|
|
134
|
+
const { translateWithLingva } = require('./translateHelperRedis');
|
|
135
|
+
const htmlInput = `
|
|
136
|
+
<div>
|
|
137
|
+
<h1>Hello World!</h1>
|
|
138
|
+
<p>This is <strong>translated</strong> text with <a href="https://example.com">link</a>.</p>
|
|
139
|
+
</div>
|
|
140
|
+
`;
|
|
141
|
+
const result = await translateWithLingva(htmlInput, 'fr', 'en', {
|
|
142
|
+
debug: true,
|
|
143
|
+
html: true,
|
|
144
|
+
});
|
|
145
|
+
*/
|
|
146
|
+
translated = await translateHtmlContent(trimmedText, from, to, debug, fallbackToOriginal);
|
|
147
|
+
} else {
|
|
148
|
+
const chunks = splitTextIntoChunks(trimmedText);
|
|
149
|
+
const translations = [];
|
|
150
|
+
|
|
151
|
+
for (const chunk of chunks) {
|
|
152
|
+
const result = await translateChunk(chunk, from, to, debug, fallbackToOriginal);
|
|
153
|
+
translations.push(result);
|
|
154
|
+
await delay(DELAY_BETWEEN_REQUESTS);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
translated = translations.join(' ').replace(/\s+/g, ' ').trim();
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Cache only if result is non-empty and not equal to input
|
|
161
|
+
if (translated && cacheEnabled && translated !== trimmedText) {
|
|
162
|
+
await setInCache(cacheKey, translated);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Auto-invalidate on mirror failures
|
|
166
|
+
if (!translated || translated === trimmedText) {
|
|
167
|
+
const mirrorFailures = await redis.incr(`lingva:fail:${cacheKey}`);
|
|
168
|
+
await redis.expire(`lingva:fail:${cacheKey}`, 3600); // 1h expiration
|
|
169
|
+
|
|
170
|
+
if (mirrorFailures >= MAX_FAILURES) {
|
|
171
|
+
await deleteCacheKey(cacheKey);
|
|
172
|
+
if (debug) console.warn(`🔥 Invalidated cache due to repeated failures`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return translated || (fallbackToOriginal ? trimmedText : '');
|
|
177
|
+
} catch (err) {
|
|
178
|
+
if (debug) console.error('💥 Unexpected error:', err.message);
|
|
179
|
+
return fallbackToOriginal ? text : '';
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* 🧠 Translate inner text of HTML while preserving tags
|
|
185
|
+
*/
|
|
186
|
+
async function translateHtmlContent(htmlText, from, to, debug = false, fallbackToOriginal = true) {
|
|
187
|
+
const dom = new JSDOM(`<body>${htmlText}</body>`);
|
|
188
|
+
const walker = dom.window.document.createTreeWalker(dom.window.document.body, NodeFilter.SHOW_TEXT);
|
|
189
|
+
|
|
190
|
+
while (walker.nextNode()) {
|
|
191
|
+
const node = walker.currentNode;
|
|
192
|
+
const raw = node.nodeValue.trim();
|
|
193
|
+
if (raw.length > 0) {
|
|
194
|
+
const translated = await translateWithLingva(raw, to, from, {
|
|
195
|
+
debug,
|
|
196
|
+
fallbackToOriginal,
|
|
197
|
+
cacheEnabled: true,
|
|
198
|
+
html: false, // prevent recursion
|
|
199
|
+
});
|
|
200
|
+
node.nodeValue = translated;
|
|
201
|
+
await delay(DELAY_BETWEEN_REQUESTS);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return dom.window.document.body.innerHTML;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* 📚 Batch translation of plain text strings
|
|
210
|
+
*/
|
|
211
|
+
async function batchTranslateWithLingva(texts = [], to = 'fr', from = 'en', options = {}) {
|
|
212
|
+
const results = [];
|
|
213
|
+
for (const text of texts) {
|
|
214
|
+
const result = await translateWithLingva(text, to, from, options);
|
|
215
|
+
results.push(result);
|
|
216
|
+
}
|
|
217
|
+
return results;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
module.exports = {
|
|
221
|
+
translateWithLingva,
|
|
222
|
+
batchTranslateWithLingva,
|
|
223
|
+
};
|