@yellowpanther/shared 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -1
- package/src/index.js +4 -1
- package/src/lang/translationHelper.js +223 -0
- package/src/queue/imageAlbumPublishQueue.js +164 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yellowpanther/shared",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.2",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"access": "public"
|
|
6
6
|
},
|
|
@@ -25,11 +25,13 @@
|
|
|
25
25
|
"./queue/articlePublishQueue": "./src/queue/articlePublishQueue.js",
|
|
26
26
|
"./queue/videoPublishQueue": "./src/queue/videoPublishQueue.js",
|
|
27
27
|
"./queue/imagePublishQueue": "./src/queue/imagePublishQueue.js",
|
|
28
|
+
"./queue/imageAlbumPublishQueue": "./src/queue/imageAlbumPublishQueue.js",
|
|
28
29
|
"./queue/pagePublishQueue": "./src/queue/pagePublishQueue.js",
|
|
29
30
|
"./queue/productPublishQueue": "./src/queue/productPublishQueue.js",
|
|
30
31
|
"./queue/quizPublishQueue": "./src/queue/quizPublishQueue.js",
|
|
31
32
|
"./queue/predictPublishQueue": "./src/queue/predictPublishQueue.js",
|
|
32
33
|
"./queue/queueRegistry": "./src/queue/queueRegistry.js",
|
|
34
|
+
"./translationHelper": "./src/lang/translationHelper.js",
|
|
33
35
|
"./*": "./src/*"
|
|
34
36
|
},
|
|
35
37
|
"scripts": {
|
package/src/index.js
CHANGED
|
@@ -2,6 +2,9 @@ module.exports = {
|
|
|
2
2
|
// ✅ Redis
|
|
3
3
|
redisClient: require('./redis/redisClient'),
|
|
4
4
|
|
|
5
|
+
// ✅ Language tranlation Helper
|
|
6
|
+
translationHelper: require('./lang/translationHelper'),
|
|
7
|
+
|
|
5
8
|
// ✅ Config
|
|
6
9
|
config: require('./config'),
|
|
7
10
|
|
|
@@ -18,5 +21,5 @@ module.exports = {
|
|
|
18
21
|
addProductPublishJob: require('./queue/productPublishQueue').addProductPublishJob,
|
|
19
22
|
addQuizPublishJob: require('./queue/quizPublishQueue').addQuizPublishJob,
|
|
20
23
|
addPredictionPublishJob: require('./queue/predictionPublishQueue').addPredictionPublishJob,
|
|
21
|
-
|
|
24
|
+
addImageAlbumPublishJob: require('./queue/imageAlbumPublishQueue').addImageAlbumPublishJob,
|
|
22
25
|
};
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
const axios = require('axios');
|
|
2
|
+
const { JSDOM } = require('jsdom'); // Required for HTML parsing
|
|
3
|
+
|
|
4
|
+
// ♻️ Shared Redis connection
|
|
5
|
+
const redis = require('../redis/redisClient');
|
|
6
|
+
|
|
7
|
+
const BASE_URLS = [
|
|
8
|
+
'https://lingva.ml',
|
|
9
|
+
'https://translate.mentality.rip',
|
|
10
|
+
'https://translate.plausibility.cloud',
|
|
11
|
+
'https://lingva.translate.garudalinux.org',
|
|
12
|
+
];
|
|
13
|
+
|
|
14
|
+
const MAX_CHUNK_SIZE = 300;
|
|
15
|
+
const RETRY_LIMIT = 3;
|
|
16
|
+
const DELAY_BETWEEN_REQUESTS = 300;
|
|
17
|
+
const REDIS_TTL = 60 * 60 * 24 * 7; // 7 days
|
|
18
|
+
const MAX_FAILURES = 3; // Maximum consecutive mirror failures before Redis key is invalidated
|
|
19
|
+
|
|
20
|
+
const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
21
|
+
|
|
22
|
+
function splitTextIntoChunks(text, maxChunkSize = MAX_CHUNK_SIZE) {
|
|
23
|
+
const sentences = text.match(/[^.!?]+[.!?]*[\s]*/g) || [text];
|
|
24
|
+
const chunks = [];
|
|
25
|
+
let current = '';
|
|
26
|
+
|
|
27
|
+
for (const sentence of sentences) {
|
|
28
|
+
if ((current + sentence).length > maxChunkSize) {
|
|
29
|
+
if (current.trim()) chunks.push(current.trim());
|
|
30
|
+
current = sentence;
|
|
31
|
+
} else {
|
|
32
|
+
current += sentence;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (current.trim()) chunks.push(current.trim());
|
|
37
|
+
return chunks;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function isLikelyURL(text) {
|
|
41
|
+
return /(https?:\/\/[^\s]+)/.test(text);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async function getFromCache(cacheKey) {
|
|
45
|
+
return await redis.get(cacheKey);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async function setInCache(cacheKey, value, ttl = REDIS_TTL) {
|
|
49
|
+
await redis.set(cacheKey, value, 'EX', ttl);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function deleteCacheKey(cacheKey) {
|
|
53
|
+
await redis.del(cacheKey);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* 🔁 Translates a chunk with retries and mirror fallback.
|
|
58
|
+
*/
|
|
59
|
+
async function translateChunk(chunk, from, to, debug = false, fallbackToOriginal = true) {
|
|
60
|
+
const encoded = encodeURIComponent(chunk);
|
|
61
|
+
const langFrom = from?.toLowerCase?.() || 'auto';
|
|
62
|
+
const langTo = to?.toLowerCase?.() || 'en';
|
|
63
|
+
|
|
64
|
+
let totalFailures = 0;
|
|
65
|
+
|
|
66
|
+
for (let baseUrl of BASE_URLS) {
|
|
67
|
+
const url = `${baseUrl}/api/v1/${langFrom}/${langTo}/${encoded}`;
|
|
68
|
+
|
|
69
|
+
for (let attempt = 1; attempt <= RETRY_LIMIT; attempt++) {
|
|
70
|
+
try {
|
|
71
|
+
if (debug) console.log(`🌐 Translating "${chunk}" via ${baseUrl} (attempt ${attempt})`);
|
|
72
|
+
const response = await axios.get(url);
|
|
73
|
+
const translated = response?.data?.translation?.trim();
|
|
74
|
+
if (translated) return translated;
|
|
75
|
+
break;
|
|
76
|
+
} catch (error) {
|
|
77
|
+
const status = error?.response?.status;
|
|
78
|
+
const message = error?.message || 'Unknown error';
|
|
79
|
+
|
|
80
|
+
if (status === 429) {
|
|
81
|
+
if (debug) console.warn(`⚠️ 429 Too Many Requests – waiting ${attempt * 500}ms`);
|
|
82
|
+
await delay(attempt * 500);
|
|
83
|
+
} else {
|
|
84
|
+
totalFailures++;
|
|
85
|
+
if (debug) {
|
|
86
|
+
console.error(`❌ Mirror ${baseUrl} failed:`, { chunk, status, message });
|
|
87
|
+
}
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (debug) console.warn(`🚫 Final fallback for chunk "${chunk}"`);
|
|
95
|
+
return fallbackToOriginal ? chunk : '';
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* 🧠 Translates plain text (cached in Redis, safe retries)
|
|
100
|
+
*/
|
|
101
|
+
async function translateWithLingva(text, to = 'fr', from = 'en', options = {}) {
|
|
102
|
+
const {
|
|
103
|
+
debug = false,
|
|
104
|
+
fallbackToOriginal = true,
|
|
105
|
+
cacheEnabled = true,
|
|
106
|
+
html = false,
|
|
107
|
+
} = options;
|
|
108
|
+
|
|
109
|
+
try {
|
|
110
|
+
if (!text || typeof text !== 'string' || !text.trim()) {
|
|
111
|
+
if (debug) console.warn('⚠️ Skipping invalid input');
|
|
112
|
+
return text;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (isLikelyURL(text)) return text;
|
|
116
|
+
|
|
117
|
+
const trimmedText = text.trim();
|
|
118
|
+
const langFrom = from?.toLowerCase?.() || 'auto';
|
|
119
|
+
const langTo = to?.toLowerCase?.() || 'en';
|
|
120
|
+
const cacheKey = `lingva:${langFrom}:${langTo}:${trimmedText}`;
|
|
121
|
+
|
|
122
|
+
if (cacheEnabled) {
|
|
123
|
+
const cached = await getFromCache(cacheKey);
|
|
124
|
+
if (cached) {
|
|
125
|
+
if (debug) console.log('✅ Redis cache hit');
|
|
126
|
+
return cached;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
let translated;
|
|
131
|
+
if (html) {
|
|
132
|
+
// calling infor with html content
|
|
133
|
+
/*
|
|
134
|
+
const { translateWithLingva } = require('./translateHelperRedis');
|
|
135
|
+
const htmlInput = `
|
|
136
|
+
<div>
|
|
137
|
+
<h1>Hello World!</h1>
|
|
138
|
+
<p>This is <strong>translated</strong> text with <a href="https://example.com">link</a>.</p>
|
|
139
|
+
</div>
|
|
140
|
+
`;
|
|
141
|
+
const result = await translateWithLingva(htmlInput, 'fr', 'en', {
|
|
142
|
+
debug: true,
|
|
143
|
+
html: true,
|
|
144
|
+
});
|
|
145
|
+
*/
|
|
146
|
+
translated = await translateHtmlContent(trimmedText, from, to, debug, fallbackToOriginal);
|
|
147
|
+
} else {
|
|
148
|
+
const chunks = splitTextIntoChunks(trimmedText);
|
|
149
|
+
const translations = [];
|
|
150
|
+
|
|
151
|
+
for (const chunk of chunks) {
|
|
152
|
+
const result = await translateChunk(chunk, from, to, debug, fallbackToOriginal);
|
|
153
|
+
translations.push(result);
|
|
154
|
+
await delay(DELAY_BETWEEN_REQUESTS);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
translated = translations.join(' ').replace(/\s+/g, ' ').trim();
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Cache only if result is non-empty and not equal to input
|
|
161
|
+
if (translated && cacheEnabled && translated !== trimmedText) {
|
|
162
|
+
await setInCache(cacheKey, translated);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Auto-invalidate on mirror failures
|
|
166
|
+
if (!translated || translated === trimmedText) {
|
|
167
|
+
const mirrorFailures = await redis.incr(`lingva:fail:${cacheKey}`);
|
|
168
|
+
await redis.expire(`lingva:fail:${cacheKey}`, 3600); // 1h expiration
|
|
169
|
+
|
|
170
|
+
if (mirrorFailures >= MAX_FAILURES) {
|
|
171
|
+
await deleteCacheKey(cacheKey);
|
|
172
|
+
if (debug) console.warn(`🔥 Invalidated cache due to repeated failures`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return translated || (fallbackToOriginal ? trimmedText : '');
|
|
177
|
+
} catch (err) {
|
|
178
|
+
if (debug) console.error('💥 Unexpected error:', err.message);
|
|
179
|
+
return fallbackToOriginal ? text : '';
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* 🧠 Translate inner text of HTML while preserving tags
|
|
185
|
+
*/
|
|
186
|
+
async function translateHtmlContent(htmlText, from, to, debug = false, fallbackToOriginal = true) {
|
|
187
|
+
const dom = new JSDOM(`<body>${htmlText}</body>`);
|
|
188
|
+
const walker = dom.window.document.createTreeWalker(dom.window.document.body, NodeFilter.SHOW_TEXT);
|
|
189
|
+
|
|
190
|
+
while (walker.nextNode()) {
|
|
191
|
+
const node = walker.currentNode;
|
|
192
|
+
const raw = node.nodeValue.trim();
|
|
193
|
+
if (raw.length > 0) {
|
|
194
|
+
const translated = await translateWithLingva(raw, to, from, {
|
|
195
|
+
debug,
|
|
196
|
+
fallbackToOriginal,
|
|
197
|
+
cacheEnabled: true,
|
|
198
|
+
html: false, // prevent recursion
|
|
199
|
+
});
|
|
200
|
+
node.nodeValue = translated;
|
|
201
|
+
await delay(DELAY_BETWEEN_REQUESTS);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return dom.window.document.body.innerHTML;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* 📚 Batch translation of plain text strings
|
|
210
|
+
*/
|
|
211
|
+
async function batchTranslateWithLingva(texts = [], to = 'fr', from = 'en', options = {}) {
|
|
212
|
+
const results = [];
|
|
213
|
+
for (const text of texts) {
|
|
214
|
+
const result = await translateWithLingva(text, to, from, options);
|
|
215
|
+
results.push(result);
|
|
216
|
+
}
|
|
217
|
+
return results;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
module.exports = {
|
|
221
|
+
translateWithLingva,
|
|
222
|
+
batchTranslateWithLingva,
|
|
223
|
+
};
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
// src/queue/imageAlbumPublishQueue.js
|
|
2
|
+
// One-off *publish* scheduler for image album items (BullMQ).
|
|
3
|
+
// Strong upsert: purge any prior job for the same album before adding a new delayed job.
|
|
4
|
+
|
|
5
|
+
const { Queue } = require('bullmq');
|
|
6
|
+
const redisClient = require('../redis/redisClient');
|
|
7
|
+
// const logger = require('../logger'); // uncomment if you have a shared logger
|
|
8
|
+
|
|
9
|
+
const DEBUG = String(process.env.DEBUG_LOGGER || '').trim() === '1';
|
|
10
|
+
const DRIFT_MS = Math.max(0, Number(process.env.SCHEDULE_DRIFT_MS || 250));
|
|
11
|
+
|
|
12
|
+
const imageAlbumPublishQueue = new Queue('imageAlbumPublishQueue', {
|
|
13
|
+
connection: redisClient,
|
|
14
|
+
defaultJobOptions: {
|
|
15
|
+
attempts: 5,
|
|
16
|
+
backoff: { type: 'exponential', delay: 2000 },
|
|
17
|
+
removeOnComplete: 500,
|
|
18
|
+
removeOnFail: 500,
|
|
19
|
+
},
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
// ── ID helpers ────────────────────────────────────────────────────────────────
|
|
23
|
+
|
|
24
|
+
function stableJobId(album_id) {
|
|
25
|
+
return `imageAlbum:publish:${album_id}`;
|
|
26
|
+
}
|
|
27
|
+
function versionedJobId(album_id, runAtIso) {
|
|
28
|
+
return `imageAlbum:publish:${album_id}:${runAtIso}`;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// ── Time helpers ──────────────────────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
/** Parse anything into a UTC Date. Strings without timezone are treated as UTC. */
|
|
34
|
+
function normalizeToUtcDate(input) {
|
|
35
|
+
if (input instanceof Date) return new Date(input.getTime());
|
|
36
|
+
if (typeof input === 'number') return new Date(input); // epoch ms
|
|
37
|
+
|
|
38
|
+
if (typeof input === 'string') {
|
|
39
|
+
// Has timezone (Z or ±hh:mm)
|
|
40
|
+
if (/[zZ]|[+\-]\d{2}:\d{2}$/.test(input)) {
|
|
41
|
+
const d = new Date(input);
|
|
42
|
+
if (Number.isNaN(d.getTime())) throw new Error(`Invalid ISO datetime: ${input}`);
|
|
43
|
+
return d;
|
|
44
|
+
}
|
|
45
|
+
// No timezone => treat as UTC
|
|
46
|
+
const s = input.trim().replace(' ', 'T');
|
|
47
|
+
const d = new Date(`${s}Z`);
|
|
48
|
+
if (Number.isNaN(d.getTime())) throw new Error(`Invalid datetime (no tz): ${input}`);
|
|
49
|
+
return d;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
throw new Error(`Unsupported runAtUtc type: ${typeof input}`);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// ── Purge helpers ────────────────────────────────────────────────────────────
|
|
56
|
+
|
|
57
|
+
async function findPendingAlbumJobs(album_id) {
|
|
58
|
+
const states = ['delayed', 'waiting', 'waiting-children', 'active'];
|
|
59
|
+
const jobs = await imageAlbumPublishQueue.getJobs(states);
|
|
60
|
+
return jobs.filter(
|
|
61
|
+
(j) => j?.name === 'imageAlbum:publish' && j?.data?.album_id === String(album_id)
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async function purgeExistingForAlbum(album_id) {
|
|
66
|
+
let removed = 0;
|
|
67
|
+
|
|
68
|
+
// Remove any pending/waiting/active jobs for this album
|
|
69
|
+
const pendings = await findPendingAlbumJobs(album_id);
|
|
70
|
+
for (const j of pendings) {
|
|
71
|
+
try {
|
|
72
|
+
await j.remove();
|
|
73
|
+
removed++;
|
|
74
|
+
} catch (e) {
|
|
75
|
+
if (DEBUG) console.warn(`[imageAlbumPublishQueue] failed to remove pending job ${j.id}: ${e.message}`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Also remove a leftover stable job if present
|
|
80
|
+
const last = await imageAlbumPublishQueue.getJob(stableJobId(album_id));
|
|
81
|
+
if (last) {
|
|
82
|
+
try {
|
|
83
|
+
await last.remove();
|
|
84
|
+
removed++;
|
|
85
|
+
} catch (e) {
|
|
86
|
+
if (DEBUG) console.warn(`[imageAlbumPublishQueue] failed to remove stable job ${last.id}: ${e.message}`);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (DEBUG) {
|
|
91
|
+
console.info('[imageAlbumPublishQueue] purgeExistingForAlbum', {
|
|
92
|
+
album_id: String(album_id),
|
|
93
|
+
removed,
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
return removed;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// ── Public API ───────────────────────────────────────────────────────────────
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Upsert a one-off publish job for an image album.
|
|
103
|
+
* - Purges any previous jobs for this album
|
|
104
|
+
* - Uses a versioned jobId by default (or stable if useStableId=true)
|
|
105
|
+
*
|
|
106
|
+
* @param {Object} params
|
|
107
|
+
* @param {string|number} params.album_id
|
|
108
|
+
* @param {string|number|Date} params.runAtUtc - ISO string, epoch ms, or Date (UTC)
|
|
109
|
+
* @param {object} [params.extra]
|
|
110
|
+
* @param {boolean} [params.useStableId=false] - if true, uses stable jobId
|
|
111
|
+
*/
|
|
112
|
+
async function addImageAlbumPublishJob({ album_id, runAtUtc, extra = {}, useStableId = false }) {
|
|
113
|
+
if (!album_id || !runAtUtc) {
|
|
114
|
+
throw new Error('addImageAlbumPublishJob: album_id and runAtUtc are required');
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const runAt = normalizeToUtcDate(runAtUtc);
|
|
118
|
+
const runAtIso = runAt.toISOString();
|
|
119
|
+
|
|
120
|
+
// Drift guard to avoid immediate fire due to ms skew
|
|
121
|
+
const now = Date.now();
|
|
122
|
+
let delayMs = runAt.getTime() - now;
|
|
123
|
+
if (delayMs < DRIFT_MS) delayMs = Math.max(0, DRIFT_MS);
|
|
124
|
+
|
|
125
|
+
// Purge any prior jobs for this album (across states)
|
|
126
|
+
await purgeExistingForAlbum(album_id);
|
|
127
|
+
|
|
128
|
+
// Choose jobId strategy
|
|
129
|
+
const jobId = useStableId ? stableJobId(album_id) : versionedJobId(album_id, runAtIso);
|
|
130
|
+
|
|
131
|
+
const job = await imageAlbumPublishQueue.add(
|
|
132
|
+
'imageAlbum:publish',
|
|
133
|
+
{ album_id: String(album_id), runAtUtc: runAtIso, extra },
|
|
134
|
+
{ jobId, delay: delayMs }
|
|
135
|
+
);
|
|
136
|
+
|
|
137
|
+
if (DEBUG) {
|
|
138
|
+
console.info('[imageAlbumPublishQueue] upsert', {
|
|
139
|
+
album_id: String(album_id),
|
|
140
|
+
jobId,
|
|
141
|
+
runAtIso,
|
|
142
|
+
delayMs,
|
|
143
|
+
nowIso: new Date(now).toISOString(),
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return job;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/** Cancel any scheduled publish for a given image album (if present). */
|
|
151
|
+
async function cancelImageAlbumPublishJob(album_id) {
|
|
152
|
+
const removed = await purgeExistingForAlbum(album_id);
|
|
153
|
+
return removed > 0;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
module.exports = {
|
|
157
|
+
imageAlbumPublishQueue,
|
|
158
|
+
addImageAlbumPublishJob,
|
|
159
|
+
cancelImageAlbumPublishJob,
|
|
160
|
+
// helpers for tooling/tests
|
|
161
|
+
stableJobId,
|
|
162
|
+
versionedJobId,
|
|
163
|
+
normalizeToUtcDate,
|
|
164
|
+
};
|