bctranslate 1.0.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +90 -0
- package/bin/bctranslate.js +373 -0
- package/package.json +50 -0
- package/python/translator.py +110 -0
- package/src/bridges/python.js +189 -0
- package/src/config.js +28 -0
- package/src/detect.js +58 -0
- package/src/generators/locales.js +105 -0
- package/src/generators/setup.js +445 -0
- package/src/index.js +233 -0
- package/src/parsers/html.js +161 -0
- package/src/parsers/js.js +156 -0
- package/src/parsers/json.js +93 -0
- package/src/parsers/react.js +148 -0
- package/src/parsers/vue.js +282 -0
- package/src/utils.js +227 -0
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
import * as compiler from '@vue/compiler-dom';
|
|
2
|
+
import { parse as babelParse } from '@babel/parser';
|
|
3
|
+
import _traverse from '@babel/traverse';
|
|
4
|
+
const traverse = _traverse.default;
|
|
5
|
+
import MagicString from 'magic-string';
|
|
6
|
+
import { contextKey, isTranslatable } from '../utils.js';
|
|
7
|
+
|
|
8
|
+
const ATTR_BLACKLIST = new Set([
|
|
9
|
+
'id', 'class', 'style', 'src', 'href', 'ref', 'key', 'is',
|
|
10
|
+
'v-model', 'v-bind', 'v-on', 'v-if', 'v-else', 'v-else-if',
|
|
11
|
+
'v-for', 'v-show', 'v-html', 'v-text', 'v-slot', 'v-pre',
|
|
12
|
+
'v-cloak', 'v-once', 'v-memo', 'name', 'type', 'value',
|
|
13
|
+
'action', 'method', 'target', 'rel', 'media', 'lang',
|
|
14
|
+
'charset', 'content', 'http-equiv', 'for', 'tabindex',
|
|
15
|
+
'role', 'xmlns', 'viewBox', 'fill', 'stroke', 'd', 'cx', 'cy',
|
|
16
|
+
'r', 'rx', 'ry', 'x', 'y', 'width', 'height', 'transform',
|
|
17
|
+
'xmlns:xlink', 'xlink:href', 'data-testid', 'data-cy',
|
|
18
|
+
]);
|
|
19
|
+
|
|
20
|
+
const ATTR_WHITELIST = new Set([
|
|
21
|
+
'title', 'placeholder', 'label', 'alt', 'aria-label',
|
|
22
|
+
'aria-placeholder', 'aria-description', 'aria-roledescription',
|
|
23
|
+
]);
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Detect which t-function call to emit in templates.
|
|
27
|
+
*
|
|
28
|
+
* Rules:
|
|
29
|
+
* - If <script setup> and file already destructures `t` from a composable
|
|
30
|
+
* → use t('key') (matches user's existing pattern)
|
|
31
|
+
* - If <script setup> but no `t` yet
|
|
32
|
+
* → use t('key') AND inject `const { t } = useI18n()` into script
|
|
33
|
+
* - Options API (no setup)
|
|
34
|
+
* → use $t('key') (global plugin property)
|
|
35
|
+
*/
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Parse a .vue file and extract translatable strings.
|
|
39
|
+
*/
|
|
40
|
+
export function parseVue(source, filePath) {
|
|
41
|
+
const extracted = [];
|
|
42
|
+
const s = new MagicString(source);
|
|
43
|
+
|
|
44
|
+
const scriptSetupMatch = source.match(/(<script\b[^>]*\bsetup\b[^>]*>)([\s\S]*?)<\/script>/i);
|
|
45
|
+
const hasScriptSetup = !!scriptSetupMatch;
|
|
46
|
+
|
|
47
|
+
// Prefer what the file already uses in templates: `$t()` vs `t()`.
|
|
48
|
+
const templateBlockMatch = source.match(/<template\b[^>]*>([\s\S]*?)<\/template>/);
|
|
49
|
+
const templateBlock = templateBlockMatch ? templateBlockMatch[1] : '';
|
|
50
|
+
const templatePrefersDollarT = /\$t\s*\(/.test(templateBlock);
|
|
51
|
+
const templatePrefersT = !templatePrefersDollarT && /\bt\s*\(/.test(templateBlock);
|
|
52
|
+
|
|
53
|
+
const tpl = (key) => (templatePrefersT ? `t('${key}')` : `$t('${key}')`);
|
|
54
|
+
const scr = (key) => (hasScriptSetup ? `t('${key}')` : `this.$t('${key}')`);
|
|
55
|
+
|
|
56
|
+
// ── Template ────────────────────────────────────────────────────────────────
|
|
57
|
+
const templateMatch = source.match(/<template\b[^>]*>([\s\S]*?)<\/template>/);
|
|
58
|
+
if (templateMatch) {
|
|
59
|
+
const templateContent = templateMatch[1];
|
|
60
|
+
const templateOffset =
|
|
61
|
+
source.indexOf(templateMatch[0]) + templateMatch[0].indexOf(templateContent);
|
|
62
|
+
|
|
63
|
+
try {
|
|
64
|
+
const ast = compiler.parse(templateContent, { comments: true, getTextMode: () => 0 });
|
|
65
|
+
walkTemplate(ast.children, s, templateOffset, extracted, filePath, tpl);
|
|
66
|
+
} catch {
|
|
67
|
+
extractTemplateRegex(source, s, extracted, filePath, tpl);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// ── Script (prefer <script setup>, else first non-setup <script>) ────────────
|
|
72
|
+
if (scriptSetupMatch) {
|
|
73
|
+
const scriptContent = scriptSetupMatch[2];
|
|
74
|
+
const scriptOffset =
|
|
75
|
+
source.indexOf(scriptSetupMatch[0]) + scriptSetupMatch[0].indexOf(scriptContent);
|
|
76
|
+
extractScriptStrings(scriptContent, s, scriptOffset, extracted, filePath, scr);
|
|
77
|
+
} else {
|
|
78
|
+
const scriptMatch = source.match(/<script\b(?![^>]*\bsetup\b)[^>]*>([\s\S]*?)<\/script>/i);
|
|
79
|
+
if (scriptMatch) {
|
|
80
|
+
const scriptContent = scriptMatch[1];
|
|
81
|
+
const scriptOffset =
|
|
82
|
+
source.indexOf(scriptMatch[0]) + scriptMatch[0].indexOf(scriptContent);
|
|
83
|
+
extractScriptStrings(scriptContent, s, scriptOffset, extracted, filePath, scr);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// ── Inject `const { t } = useI18n()` if not yet declared ────────
|
|
88
|
+
if (extracted.length > 0 && hasScriptSetup) {
|
|
89
|
+
const hasT = /const\s*\{[^}]*\bt\b[^}]*\}\s*=/.test(source);
|
|
90
|
+
|
|
91
|
+
if (!hasT) {
|
|
92
|
+
const insertAt = source.indexOf(scriptSetupMatch[0]) + scriptSetupMatch[1].length;
|
|
93
|
+
const needsImport = !source.includes('useI18n');
|
|
94
|
+
const importLine = needsImport ? `import { useI18n } from 'vue-i18n';\n` : '';
|
|
95
|
+
s.appendRight(insertAt, `\n${importLine}const { t } = useI18n();\n`);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
source: extracted.length > 0 ? s.toString() : source,
|
|
101
|
+
extracted,
|
|
102
|
+
modified: extracted.length > 0,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ── Template walker ───────────────────────────────────────────────────────────
|
|
107
|
+
|
|
108
|
+
function walkTemplate(nodes, s, baseOffset, extracted, filePath, tpl) {
|
|
109
|
+
for (const node of nodes) {
|
|
110
|
+
// Text node (type 2)
|
|
111
|
+
if (node.type === 2) {
|
|
112
|
+
const text = node.content.trim();
|
|
113
|
+
if (isTranslatable(text)) {
|
|
114
|
+
const start = baseOffset + node.loc.start.offset;
|
|
115
|
+
const end = baseOffset + node.loc.end.offset;
|
|
116
|
+
|
|
117
|
+
if (!isAlreadyWrapped(s.original, start, end)) {
|
|
118
|
+
const key = contextKey(text, filePath);
|
|
119
|
+
const orig = node.content;
|
|
120
|
+
const lws = orig.match(/^(\s*)/)[1];
|
|
121
|
+
const tws = orig.match(/(\s*)$/)[1];
|
|
122
|
+
s.overwrite(start, end, `${lws}{{ ${tpl(key)} }}${tws}`);
|
|
123
|
+
extracted.push({ key, text, context: 'template-text' });
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Element (type 1) — check translatable attributes, then recurse
|
|
129
|
+
if (node.type === 1) {
|
|
130
|
+
for (const prop of node.props ?? []) {
|
|
131
|
+
if (prop.type === 6 && prop.value) {
|
|
132
|
+
const attrName = prop.name.toLowerCase();
|
|
133
|
+
if (ATTR_WHITELIST.has(attrName) && isTranslatable(prop.value.content)) {
|
|
134
|
+
const text = prop.value.content;
|
|
135
|
+
if (!ATTR_BLACKLIST.has(attrName)) {
|
|
136
|
+
const key = contextKey(text, filePath);
|
|
137
|
+
const attrStart = baseOffset + prop.loc.start.offset;
|
|
138
|
+
const attrEnd = baseOffset + prop.loc.end.offset;
|
|
139
|
+
s.overwrite(attrStart, attrEnd, `:${attrName}="${tpl(key)}"`);
|
|
140
|
+
extracted.push({ key, text, context: `template-attr-${attrName}` });
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
if (node.children) walkTemplate(node.children, s, baseOffset, extracted, filePath, tpl);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// ForNode (type 11)
|
|
149
|
+
if (node.type === 11 && node.children) {
|
|
150
|
+
walkTemplate(node.children, s, baseOffset, extracted, filePath, tpl);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// IfNode (type 9) — walk branches
|
|
154
|
+
if (node.type === 9 && node.branches) {
|
|
155
|
+
for (const branch of node.branches) {
|
|
156
|
+
if (branch.children) walkTemplate(branch.children, s, baseOffset, extracted, filePath, tpl);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// ── Script string extractor ───────────────────────────────────────────────────
|
|
163
|
+
|
|
164
|
+
function extractScriptStrings(scriptContent, s, baseOffset, extracted, filePath, scr) {
|
|
165
|
+
try {
|
|
166
|
+
const ast = babelParse(scriptContent, {
|
|
167
|
+
sourceType: 'module',
|
|
168
|
+
plugins: ['typescript', 'jsx'], // Enable TS and JSX support
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
const replacements = [];
|
|
172
|
+
|
|
173
|
+
traverse(ast, {
|
|
174
|
+
enter(path) {
|
|
175
|
+
let text;
|
|
176
|
+
|
|
177
|
+
if (path.isStringLiteral()) {
|
|
178
|
+
text = path.node.value;
|
|
179
|
+
} else if (path.isTemplateLiteral()) {
|
|
180
|
+
if (path.node.expressions.length > 0 || path.node.quasis.length !== 1) {
|
|
181
|
+
return;
|
|
182
|
+
}
|
|
183
|
+
text = path.node.quasis[0].value.cooked;
|
|
184
|
+
} else {
|
|
185
|
+
return;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (!isTranslatable(text)) return;
|
|
189
|
+
|
|
190
|
+
// --- Parent checks to avoid replacing the wrong strings ---
|
|
191
|
+
if (path.parent.type === 'CallExpression' && path.parent.callee.name === 't') return;
|
|
192
|
+
if (['ImportDeclaration', 'ExportNamedDeclaration', 'ExportAllDeclaration'].includes(path.parent.type)) return;
|
|
193
|
+
if (path.parent.type === 'ObjectProperty' && path.parent.key === path.node) return;
|
|
194
|
+
if (path.parent.type === 'Property' && ['name'].includes(path.parent.key.name)) return;
|
|
195
|
+
|
|
196
|
+
// --- Passed all checks, schedule replacement ---
|
|
197
|
+
const key = contextKey(text, filePath);
|
|
198
|
+
const start = baseOffset + path.node.start;
|
|
199
|
+
const end = baseOffset + path.node.end;
|
|
200
|
+
replacements.push({ start, end, key });
|
|
201
|
+
extracted.push({ key, text, context: 'script' });
|
|
202
|
+
}
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
// Apply replacements in reverse order to avoid offset issues
|
|
206
|
+
for (let i = replacements.length - 1; i >= 0; i--) {
|
|
207
|
+
const { start, end, key } = replacements[i];
|
|
208
|
+
s.overwrite(start, end, scr(key));
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
} catch (e) {
|
|
212
|
+
console.error('Babel parsing failed:', e);
|
|
213
|
+
// If babel parsing fails, fallback to regex.
|
|
214
|
+
extractScriptStringsRegex(scriptContent, s, baseOffset, extracted, filePath, scr);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
function extractScriptStringsRegex(scriptContent, s, baseOffset, extracted, filePath, scr) {
|
|
220
|
+
// This is the old regex-based implementation, kept as a fallback.
|
|
221
|
+
const patterns = [
|
|
222
|
+
/\b(alert|confirm|toast|notify|message\.(?:success|error|warning|info))\s*\(\s*(['"`])((?:(?!\2).)+)\2\s*\)/g,
|
|
223
|
+
/\b(title|label|placeholder|message|text|description|tooltip|hint|caption|header|subtitle|errorMessage|successMessage|emptyText|noData|loadingText|buttonText|confirmText|cancelText|successText|failText|warningText|helperText|hintText)\s*:\s*(['"`])((?:(?!\2).)+)\2/g,
|
|
224
|
+
/\bref\s*\(\s*(['"`])((?:(?!\1)[^\\]|\\.)+)\1\s*\)/g,
|
|
225
|
+
/\bcomputed\s*\(\s*\(\s*\)\s*=>\s*(['"`])((?:(?!\1)[^\\]|\\.)+)\1\s*\)/g,
|
|
226
|
+
];
|
|
227
|
+
|
|
228
|
+
for (const pattern of patterns) {
|
|
229
|
+
let match;
|
|
230
|
+
while ((match = pattern.exec(scriptContent)) !== null) {
|
|
231
|
+
const text = match[3] || match[2];
|
|
232
|
+
if (!isTranslatable(text) || text.length <= 1) continue;
|
|
233
|
+
|
|
234
|
+
const quoteChar = match[2] || match[1];
|
|
235
|
+
const innerStr = quoteChar + text + quoteChar;
|
|
236
|
+
const relPos = match.index + match[0].lastIndexOf(innerStr);
|
|
237
|
+
|
|
238
|
+
if (isAlreadyWrappedScript(scriptContent, relPos)) continue;
|
|
239
|
+
|
|
240
|
+
const key = contextKey(text, filePath);
|
|
241
|
+
const textStart = baseOffset + relPos;
|
|
242
|
+
const textEnd = textStart + innerStr.length;
|
|
243
|
+
|
|
244
|
+
s.overwrite(textStart, textEnd, scr(key));
|
|
245
|
+
extracted.push({ key, text, context: 'script-regex' });
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
function isAlreadyWrappedScript(scriptContent, pos) {
|
|
251
|
+
const before = scriptContent.slice(Math.max(0, pos - 30), pos);
|
|
252
|
+
return /\$?t\s*\(\s*$/.test(before);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
// ── Helpers ───────────────────────────────────────────────────────────────────
|
|
257
|
+
|
|
258
|
+
function isAlreadyWrapped(source, start, end) {
|
|
259
|
+
// Look back 25 chars for an open t( call — node is inside an interpolation
|
|
260
|
+
const before = source.slice(Math.max(0, start - 25), start);
|
|
261
|
+
return /\$?t\s*\(\s*['"]/.test(before);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function extractTemplateRegex(source, s, extracted, filePath, tpl) {
|
|
265
|
+
const pattern = />([^<]+)</g;
|
|
266
|
+
let match;
|
|
267
|
+
while ((match = pattern.exec(source)) !== null) {
|
|
268
|
+
const text = match[1].trim();
|
|
269
|
+
if (isTranslatable(text)) {
|
|
270
|
+
const textStart = match.index + 1;
|
|
271
|
+
const textEnd = textStart + match[1].length;
|
|
272
|
+
if (!isAlreadyWrapped(source, textStart, textEnd)) {
|
|
273
|
+
const key = contextKey(text, filePath);
|
|
274
|
+
const orig = match[1];
|
|
275
|
+
const lws = orig.match(/^(\s*)/)[1];
|
|
276
|
+
const tws = orig.match(/(\s*)$/)[1];
|
|
277
|
+
s.overwrite(textStart, textEnd, `${lws}{{ ${tpl(key)} }}${tws}`);
|
|
278
|
+
extracted.push({ key, text, context: 'template-text' });
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
package/src/utils.js
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import { createHash } from 'crypto';
|
|
2
|
+
import { basename, extname } from 'path';
|
|
3
|
+
|
|
4
|
+
// Words too generic to use as the sole semantic component of a key
|
|
5
|
+
const STOP_WORDS = new Set([
|
|
6
|
+
'a', 'an', 'the', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
|
|
7
|
+
'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'shall',
|
|
8
|
+
'should', 'may', 'might', 'must', 'can', 'could',
|
|
9
|
+
'to', 'for', 'and', 'or', 'but', 'of', 'in', 'on', 'at', 'by',
|
|
10
|
+
'as', 'if', 'its', 'it', 'this', 'that', 'these', 'those',
|
|
11
|
+
'my', 'your', 'our', 'their', 'with', 'from', 'up', 'about',
|
|
12
|
+
'no', 'not', 'so',
|
|
13
|
+
]);
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Generate a readable, slug-based i18n key from a string.
|
|
17
|
+
* "Submit" → "submit"
|
|
18
|
+
* "Please enter your email" → "please_enter_your_email"
|
|
19
|
+
* Falls back to a hash prefix for non-Latin or symbol-only strings.
|
|
20
|
+
*/
|
|
21
|
+
export function textKey(text) {
|
|
22
|
+
const trimmed = text.trim();
|
|
23
|
+
|
|
24
|
+
const slug = trimmed
|
|
25
|
+
.toLowerCase()
|
|
26
|
+
.replace(/[^\w\s]/g, ' ') // punctuation → space
|
|
27
|
+
.replace(/\s+/g, '_') // spaces → underscores
|
|
28
|
+
.replace(/^[^a-z]+/, '') // strip non-alpha prefix
|
|
29
|
+
.replace(/[^a-z0-9_]/g, '') // remove remaining non-ASCII
|
|
30
|
+
.replace(/_+/g, '_') // collapse multiple underscores
|
|
31
|
+
.replace(/^_|_$/g, '') // trim underscores
|
|
32
|
+
.slice(0, 40)
|
|
33
|
+
.replace(/_+$/, '');
|
|
34
|
+
|
|
35
|
+
if (slug && slug.length >= 2 && /[a-z]/.test(slug)) {
|
|
36
|
+
return slug;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Fallback: hash (for Chinese, Arabic, emoji, symbols, etc.)
|
|
40
|
+
const hash = createHash('sha256').update(trimmed).digest('hex').slice(0, 8);
|
|
41
|
+
return `key_${hash}`;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Generate a context-aware i18n key using the component name as namespace
|
|
46
|
+
* and key content words as the slug.
|
|
47
|
+
*
|
|
48
|
+
* "Notes" in HomeView.vue → home.notes
|
|
49
|
+
* "Quick Note" in HomeView.vue → home.quickNote
|
|
50
|
+
* "View livestock" in HomeView.vue → home.viewLivestock
|
|
51
|
+
* "Submit" in LoginForm.vue → loginForm.submit
|
|
52
|
+
* "你好" in App.vue → app.key_3d2a1f
|
|
53
|
+
*
|
|
54
|
+
* @param {string} text The source string to key
|
|
55
|
+
* @param {string} filePath Absolute or relative path of the source file
|
|
56
|
+
*/
|
|
57
|
+
export function contextKey(text, filePath) {
|
|
58
|
+
const trimmed = text.trim();
|
|
59
|
+
|
|
60
|
+
// ── Namespace: derive from filename ──────────────────────────────────────
|
|
61
|
+
const fileName = basename(filePath, extname(filePath));
|
|
62
|
+
// Strip common Vue/React suffixes: HomeView → Home, UserCard → User, etc.
|
|
63
|
+
const stripped = fileName.replace(
|
|
64
|
+
/(?:View|Component|Page|Screen|Modal|Dialog|Card|Panel|Widget|Layout|Container)$/,
|
|
65
|
+
''
|
|
66
|
+
) || fileName;
|
|
67
|
+
// camelCase namespace: "UserProfile" → "userProfile", "home" → "home"
|
|
68
|
+
const ns = stripped[0].toLowerCase() + stripped.slice(1);
|
|
69
|
+
|
|
70
|
+
// ── Slug: 1-3 meaningful words in camelCase ───────────────────────────────
|
|
71
|
+
const words = trimmed
|
|
72
|
+
.replace(/[^\w\s]/g, ' ')
|
|
73
|
+
.split(/\s+/)
|
|
74
|
+
.map((w) => w.toLowerCase())
|
|
75
|
+
.filter((w) => w.length >= 2 && /[a-z]/.test(w) && !STOP_WORDS.has(w));
|
|
76
|
+
|
|
77
|
+
if (!words.length) {
|
|
78
|
+
// Non-Latin scripts, emoji, or all stop words — fall back to hash suffix
|
|
79
|
+
const hash = createHash('sha256').update(trimmed).digest('hex').slice(0, 6);
|
|
80
|
+
return `${ns}.key${hash}`;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const slug =
|
|
84
|
+
words[0] + words.slice(1, 3).map((w) => w[0].toUpperCase() + w.slice(1)).join('');
|
|
85
|
+
|
|
86
|
+
return `${ns}.${slug}`;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* @deprecated Use contextKey() for new code. textKey() kept for non-file contexts.
|
|
91
|
+
*/
|
|
92
|
+
export const hashKey = textKey;
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Determine if a string is "translatable" — contains actual human-readable
|
|
96
|
+
* text rather than whitespace, numbers, symbols, or code identifiers.
|
|
97
|
+
*/
|
|
98
|
+
export function isTranslatable(text) {
|
|
99
|
+
if (!text || typeof text !== 'string') return false;
|
|
100
|
+
|
|
101
|
+
const trimmed = text.trim();
|
|
102
|
+
if (trimmed.length === 0) return false;
|
|
103
|
+
|
|
104
|
+
// Skip pure whitespace / newlines
|
|
105
|
+
if (/^\s*$/.test(trimmed)) return false;
|
|
106
|
+
|
|
107
|
+
// Skip pure numbers
|
|
108
|
+
if (/^[\d.,]+$/.test(trimmed)) return false;
|
|
109
|
+
|
|
110
|
+
// Skip single characters that are punctuation/symbols
|
|
111
|
+
if (
|
|
112
|
+
trimmed.length === 1 &&
|
|
113
|
+
/[^a-zA-Z\u00C0-\u024F\u0400-\u04FF\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]/.test(trimmed)
|
|
114
|
+
)
|
|
115
|
+
return false;
|
|
116
|
+
|
|
117
|
+
// Skip code identifiers (camelCase, snake_case with no spaces)
|
|
118
|
+
if (/^[a-zA-Z_$][a-zA-Z0-9_$.]*$/.test(trimmed) && !trimmed.includes(' ') && trimmed.length > 1) {
|
|
119
|
+
// Allow capitalised real words: "Submit", "Cancel", "Home"
|
|
120
|
+
if (/[aeiouAEIOU]/.test(trimmed) && trimmed.length > 2 && /^[A-Z][a-z]+$/.test(trimmed)) {
|
|
121
|
+
return true;
|
|
122
|
+
}
|
|
123
|
+
// Allow ALL-CAPS short labels: "OK", "FAQ"
|
|
124
|
+
if (/^[A-Z]{2,12}$/.test(trimmed)) return true;
|
|
125
|
+
return false;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Skip URLs, paths, emails
|
|
129
|
+
if (/^(https?:\/\/|\/|\.\/|\.\.\/)/.test(trimmed)) return false;
|
|
130
|
+
if (/^[\w.+-]+@[\w.-]+\.\w+$/.test(trimmed)) return false;
|
|
131
|
+
|
|
132
|
+
// Skip pure template expressions {{ something }}
|
|
133
|
+
if (/^\{\{[^}]+\}\}$/.test(trimmed)) return false;
|
|
134
|
+
|
|
135
|
+
// Must contain at least one letter from any script
|
|
136
|
+
if (
|
|
137
|
+
!/[a-zA-Z\u00C0-\u024F\u0400-\u04FF\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uAC00-\uD7AF]/.test(
|
|
138
|
+
trimmed
|
|
139
|
+
)
|
|
140
|
+
)
|
|
141
|
+
return false;
|
|
142
|
+
|
|
143
|
+
return true;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Shield interpolation variables before sending to Argos Translate.
|
|
148
|
+
* Replaces {{ name }}, {name}, ${name}, %{name} with XML-like tokens <xi/>
|
|
149
|
+
* that NMT models are trained to preserve verbatim.
|
|
150
|
+
*
|
|
151
|
+
* Returns { shielded, tokens } — call unshieldInterpolations() to restore.
|
|
152
|
+
*/
|
|
153
|
+
export function shieldInterpolations(text) {
|
|
154
|
+
const tokens = [];
|
|
155
|
+
let shielded = text;
|
|
156
|
+
|
|
157
|
+
// Vue {{ expr }} — must come first to avoid matching inner {
|
|
158
|
+
shielded = shielded.replace(/\{\{[^}]*\}\}/g, (m) => {
|
|
159
|
+
const i = tokens.length;
|
|
160
|
+
tokens.push(m);
|
|
161
|
+
return `<x${i}/>`;
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
// Template literal ${expr}
|
|
165
|
+
shielded = shielded.replace(/\$\{[^}]*\}/g, (m) => {
|
|
166
|
+
const i = tokens.length;
|
|
167
|
+
tokens.push(m);
|
|
168
|
+
return `<x${i}/>`;
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
// i18next / vue-i18n {varName} or {0}
|
|
172
|
+
shielded = shielded.replace(/\{[^{}\s][^{}]*\}/g, (m) => {
|
|
173
|
+
const i = tokens.length;
|
|
174
|
+
tokens.push(m);
|
|
175
|
+
return `<x${i}/>`;
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
// Ruby / Rails %{varName}
|
|
179
|
+
shielded = shielded.replace(/%\{[^}]+\}/g, (m) => {
|
|
180
|
+
const i = tokens.length;
|
|
181
|
+
tokens.push(m);
|
|
182
|
+
return `<x${i}/>`;
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
return { shielded, tokens };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Restore interpolation variables after translation.
|
|
190
|
+
* Tolerates minor whitespace changes the MT model may introduce.
|
|
191
|
+
*/
|
|
192
|
+
export function unshieldInterpolations(text, tokens) {
|
|
193
|
+
if (!tokens || tokens.length === 0) return text;
|
|
194
|
+
return text.replace(/<x(\d+)\s*\/>/gi, (_, idx) => tokens[parseInt(idx, 10)] ?? '');
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Check if a string contains interpolation expressions.
|
|
199
|
+
*/
|
|
200
|
+
export function parseInterpolation(text) {
|
|
201
|
+
const vuePattern = /\{\{\s*([^}]+?)\s*\}\}/g;
|
|
202
|
+
|
|
203
|
+
const parts = [];
|
|
204
|
+
let hasInterpolation = false;
|
|
205
|
+
|
|
206
|
+
if (vuePattern.test(text)) {
|
|
207
|
+
hasInterpolation = true;
|
|
208
|
+
vuePattern.lastIndex = 0;
|
|
209
|
+
|
|
210
|
+
let lastIndex = 0;
|
|
211
|
+
let match;
|
|
212
|
+
while ((match = vuePattern.exec(text)) !== null) {
|
|
213
|
+
if (match.index > lastIndex) {
|
|
214
|
+
const staticPart = text.slice(lastIndex, match.index);
|
|
215
|
+
if (staticPart.trim()) parts.push({ type: 'static', value: staticPart });
|
|
216
|
+
}
|
|
217
|
+
parts.push({ type: 'expr', value: match[1].trim() });
|
|
218
|
+
lastIndex = match.index + match[0].length;
|
|
219
|
+
}
|
|
220
|
+
if (lastIndex < text.length) {
|
|
221
|
+
const remainder = text.slice(lastIndex);
|
|
222
|
+
if (remainder.trim()) parts.push({ type: 'static', value: remainder });
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
return { hasInterpolation, parts };
|
|
227
|
+
}
|