bctranslate 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/bctranslate.js +132 -132
- package/package.json +1 -1
- package/python/translator.py +7 -0
- package/src/bridges/python.js +61 -55
- package/src/index.js +158 -97
- package/src/parsers/html.js +3 -3
- package/src/parsers/js.js +4 -4
- package/src/parsers/json.js +1 -1
- package/src/parsers/react.js +55 -60
- package/src/parsers/vue.js +5 -5
- package/src/utils.js +102 -23
package/src/parsers/react.js
CHANGED
|
@@ -1,13 +1,10 @@
|
|
|
1
1
|
import * as babelParser from '@babel/parser';
|
|
2
2
|
import _traverse from '@babel/traverse';
|
|
3
|
-
import _generate from '@babel/generator';
|
|
4
|
-
import * as t from '@babel/types';
|
|
5
3
|
import MagicString from 'magic-string';
|
|
6
|
-
import {
|
|
4
|
+
import { textKey, isTranslatable } from '../utils.js';
|
|
7
5
|
|
|
8
6
|
// Handle ESM default export quirks
|
|
9
7
|
const traverse = _traverse.default || _traverse;
|
|
10
|
-
const generate = _generate.default || _generate;
|
|
11
8
|
|
|
12
9
|
/**
|
|
13
10
|
* Non-translatable JSX attribute names.
|
|
@@ -25,8 +22,31 @@ const ATTR_WHITELIST = new Set([
|
|
|
25
22
|
'aria-placeholder', 'aria-description',
|
|
26
23
|
]);
|
|
27
24
|
|
|
25
|
+
/**
|
|
26
|
+
* Walk up the Babel path tree to find the nearest enclosing function
|
|
27
|
+
* with a BlockStatement body — that is the React component function.
|
|
28
|
+
* Returns the character offset right after the opening `{`.
|
|
29
|
+
*/
|
|
30
|
+
function findComponentBodyStart(jsxPath) {
|
|
31
|
+
let p = jsxPath.parentPath;
|
|
32
|
+
while (p) {
|
|
33
|
+
const { node } = p;
|
|
34
|
+
if (
|
|
35
|
+
(node.type === 'FunctionDeclaration' ||
|
|
36
|
+
node.type === 'FunctionExpression' ||
|
|
37
|
+
node.type === 'ArrowFunctionExpression') &&
|
|
38
|
+
node.body?.type === 'BlockStatement'
|
|
39
|
+
) {
|
|
40
|
+
return node.body.start + 1; // right after opening {
|
|
41
|
+
}
|
|
42
|
+
p = p.parentPath;
|
|
43
|
+
}
|
|
44
|
+
return -1;
|
|
45
|
+
}
|
|
46
|
+
|
|
28
47
|
/**
|
|
29
48
|
* Parse a JSX/TSX file and extract translatable strings.
|
|
49
|
+
* Hook injection uses AST-derived character positions — no regex.
|
|
30
50
|
*/
|
|
31
51
|
export function parseReact(source, filePath) {
|
|
32
52
|
const extracted = [];
|
|
@@ -45,13 +65,12 @@ export function parseReact(source, filePath) {
|
|
|
45
65
|
'decorators-legacy',
|
|
46
66
|
].filter(Boolean),
|
|
47
67
|
});
|
|
48
|
-
} catch
|
|
49
|
-
// If Babel fails, return unmodified
|
|
68
|
+
} catch {
|
|
50
69
|
return { source, extracted: [], modified: false };
|
|
51
70
|
}
|
|
52
71
|
|
|
53
72
|
const s = new MagicString(source);
|
|
54
|
-
let
|
|
73
|
+
let hookInsertPos = -1;
|
|
55
74
|
|
|
56
75
|
traverse(ast, {
|
|
57
76
|
// JSX text children: <div>Hello World</div>
|
|
@@ -59,28 +78,23 @@ export function parseReact(source, filePath) {
|
|
|
59
78
|
const text = path.node.value.trim();
|
|
60
79
|
if (!isTranslatable(text)) return;
|
|
61
80
|
|
|
62
|
-
const key =
|
|
63
|
-
const start = path.node
|
|
64
|
-
const end = path.node.end;
|
|
65
|
-
|
|
66
|
-
// Preserve whitespace
|
|
81
|
+
const key = textKey(text);
|
|
82
|
+
const { start, end } = path.node;
|
|
67
83
|
const original = path.node.value;
|
|
68
84
|
const leadingWs = original.match(/^(\s*)/)[1];
|
|
69
85
|
const trailingWs = original.match(/(\s*)$/)[1];
|
|
70
86
|
|
|
71
87
|
s.overwrite(start, end, `${leadingWs}{t('${key}')}${trailingWs}`);
|
|
72
88
|
extracted.push({ key, text, context: 'jsx-text' });
|
|
73
|
-
|
|
89
|
+
|
|
90
|
+
if (hookInsertPos === -1) hookInsertPos = findComponentBodyStart(path);
|
|
74
91
|
},
|
|
75
92
|
|
|
76
93
|
// JSX string attributes: <input placeholder="Enter name" />
|
|
77
94
|
JSXAttribute(path) {
|
|
78
95
|
const name = path.node.name?.name;
|
|
79
|
-
if (!name) return;
|
|
80
|
-
|
|
81
|
-
if (ATTR_BLACKLIST.has(name)) return;
|
|
96
|
+
if (!name || ATTR_BLACKLIST.has(name)) return;
|
|
82
97
|
|
|
83
|
-
// Only translate whitelisted attrs, or unknown attrs if they have translatable values
|
|
84
98
|
const value = path.node.value;
|
|
85
99
|
if (!value || value.type !== 'StringLiteral') return;
|
|
86
100
|
|
|
@@ -88,60 +102,41 @@ export function parseReact(source, filePath) {
|
|
|
88
102
|
if (!isTranslatable(text)) return;
|
|
89
103
|
if (!ATTR_WHITELIST.has(name) && text.length < 3) return;
|
|
90
104
|
|
|
91
|
-
const key =
|
|
92
|
-
|
|
93
|
-
const attrEnd = path.node.end;
|
|
94
|
-
|
|
95
|
-
s.overwrite(attrStart, attrEnd, `${name}={t('${key}')}`);
|
|
105
|
+
const key = textKey(text);
|
|
106
|
+
s.overwrite(path.node.start, path.node.end, `${name}={t('${key}')}`);
|
|
96
107
|
extracted.push({ key, text, context: `jsx-attr-${name}` });
|
|
97
|
-
|
|
108
|
+
|
|
109
|
+
if (hookInsertPos === -1) hookInsertPos = findComponentBodyStart(path);
|
|
98
110
|
},
|
|
99
111
|
|
|
100
|
-
//
|
|
112
|
+
// alert('...'), confirm('...')
|
|
101
113
|
CallExpression(path) {
|
|
102
114
|
const callee = path.node.callee;
|
|
103
|
-
const calleeName = callee.name ||
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
extracted.push({ key, text: arg.value, context: 'call-arg' });
|
|
112
|
-
needsImport = true;
|
|
113
|
-
}
|
|
115
|
+
const calleeName = callee.name || callee.property?.name;
|
|
116
|
+
if (!['alert', 'confirm'].includes(calleeName)) return;
|
|
117
|
+
|
|
118
|
+
const arg = path.node.arguments[0];
|
|
119
|
+
if (arg?.type === 'StringLiteral' && isTranslatable(arg.value)) {
|
|
120
|
+
const key = textKey(arg.value);
|
|
121
|
+
s.overwrite(arg.start, arg.end, `t('${key}')`);
|
|
122
|
+
extracted.push({ key, text: arg.value, context: 'call-arg' });
|
|
114
123
|
}
|
|
115
124
|
},
|
|
116
125
|
});
|
|
117
126
|
|
|
118
|
-
//
|
|
119
|
-
if (
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
if (importMatch) {
|
|
126
|
-
// Insert after all imports
|
|
127
|
-
const lastImportMatch = [...source.matchAll(/^import\s.+$/gm)];
|
|
128
|
-
if (lastImportMatch.length > 0) {
|
|
129
|
-
const last = lastImportMatch[lastImportMatch.length - 1];
|
|
130
|
-
insertPos = last.index + last[0].length;
|
|
131
|
-
}
|
|
127
|
+
// ── Inject import and hook using AST-derived positions ───────────────────────
|
|
128
|
+
if (extracted.length > 0) {
|
|
129
|
+
if (!source.includes('useTranslation')) {
|
|
130
|
+
// Find the last ImportDeclaration node — safe even with 'use client' directives
|
|
131
|
+
let lastImportEnd = 0;
|
|
132
|
+
for (const node of ast.program.body) {
|
|
133
|
+
if (node.type === 'ImportDeclaration') lastImportEnd = node.end;
|
|
132
134
|
}
|
|
133
|
-
|
|
134
|
-
s.appendRight(insertPos, `\nimport { useTranslation } from 'react-i18next';\n`);
|
|
135
|
+
s.appendRight(lastImportEnd, `\nimport { useTranslation } from 'react-i18next';`);
|
|
135
136
|
}
|
|
136
137
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
// Find the function body
|
|
140
|
-
const funcMatch = source.match(/(?:function\s+\w+|const\s+\w+\s*=\s*(?:\([^)]*\)\s*=>|\w+\s*=>))\s*\{/);
|
|
141
|
-
if (funcMatch) {
|
|
142
|
-
const insertAt = funcMatch.index + funcMatch[0].length;
|
|
143
|
-
s.appendRight(insertAt, `\n const { t } = useTranslation();\n`);
|
|
144
|
-
}
|
|
138
|
+
if (!source.includes('useTranslation()') && hookInsertPos > 0) {
|
|
139
|
+
s.appendRight(hookInsertPos, `\n const { t } = useTranslation();\n`);
|
|
145
140
|
}
|
|
146
141
|
}
|
|
147
142
|
|
|
@@ -150,4 +145,4 @@ export function parseReact(source, filePath) {
|
|
|
150
145
|
extracted,
|
|
151
146
|
modified: extracted.length > 0,
|
|
152
147
|
};
|
|
153
|
-
}
|
|
148
|
+
}
|
package/src/parsers/vue.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as compiler from '@vue/compiler-dom';
|
|
2
2
|
import MagicString from 'magic-string';
|
|
3
|
-
import {
|
|
3
|
+
import { textKey, isTranslatable, parseInterpolation } from '../utils.js';
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* Non-translatable attribute names.
|
|
@@ -80,7 +80,7 @@ function walkTemplate(nodes, s, baseOffset, extracted) {
|
|
|
80
80
|
if (node.type === 2) {
|
|
81
81
|
const text = node.content.trim();
|
|
82
82
|
if (isTranslatable(text)) {
|
|
83
|
-
const key =
|
|
83
|
+
const key = textKey(text);
|
|
84
84
|
const start = baseOffset + node.loc.start.offset;
|
|
85
85
|
const end = baseOffset + node.loc.end.offset;
|
|
86
86
|
|
|
@@ -108,7 +108,7 @@ function walkTemplate(nodes, s, baseOffset, extracted) {
|
|
|
108
108
|
|
|
109
109
|
if (ATTR_WHITELIST.has(attrName) && isTranslatable(prop.value.content)) {
|
|
110
110
|
const text = prop.value.content;
|
|
111
|
-
const key =
|
|
111
|
+
const key = textKey(text);
|
|
112
112
|
const attrStart = baseOffset + prop.loc.start.offset;
|
|
113
113
|
const attrEnd = baseOffset + prop.loc.end.offset;
|
|
114
114
|
|
|
@@ -168,7 +168,7 @@ function extractScriptStrings(scriptContent, s, baseOffset, extracted, isSetup)
|
|
|
168
168
|
while ((match = pattern.exec(scriptContent)) !== null) {
|
|
169
169
|
const text = match[3];
|
|
170
170
|
if (isTranslatable(text) && text.length > 1) {
|
|
171
|
-
const key =
|
|
171
|
+
const key = textKey(text);
|
|
172
172
|
const fullMatchStart = baseOffset + match.index;
|
|
173
173
|
const quoteChar = match[2];
|
|
174
174
|
const textStart = baseOffset + match.index + match[0].indexOf(quoteChar + text);
|
|
@@ -196,7 +196,7 @@ function extractTemplateRegex(source, s, extracted) {
|
|
|
196
196
|
while ((match = textPattern.exec(source)) !== null) {
|
|
197
197
|
const text = match[1].trim();
|
|
198
198
|
if (isTranslatable(text)) {
|
|
199
|
-
const key =
|
|
199
|
+
const key = textKey(text);
|
|
200
200
|
const textStart = match.index + 1;
|
|
201
201
|
const textEnd = textStart + match[1].length;
|
|
202
202
|
if (!isAlreadyWrapped(source, textStart, textEnd)) {
|
package/src/utils.js
CHANGED
|
@@ -1,18 +1,42 @@
|
|
|
1
1
|
import { createHash } from 'crypto';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
* Generate a
|
|
5
|
-
*
|
|
4
|
+
* Generate a readable, slug-based i18n key from a string.
|
|
5
|
+
* "Submit" → "submit"
|
|
6
|
+
* "Please enter your email" → "please_enter_your_email"
|
|
7
|
+
* Falls back to a hash prefix for non-Latin or symbol-only strings.
|
|
6
8
|
*/
|
|
7
|
-
export function
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
export function textKey(text) {
|
|
10
|
+
const trimmed = text.trim();
|
|
11
|
+
|
|
12
|
+
const slug = trimmed
|
|
13
|
+
.toLowerCase()
|
|
14
|
+
.replace(/[^\w\s]/g, ' ') // punctuation → space
|
|
15
|
+
.replace(/\s+/g, '_') // spaces → underscores
|
|
16
|
+
.replace(/^[^a-z]+/, '') // strip non-alpha prefix
|
|
17
|
+
.replace(/[^a-z0-9_]/g, '') // remove remaining non-ASCII
|
|
18
|
+
.replace(/_+/g, '_') // collapse multiple underscores
|
|
19
|
+
.replace(/^_|_$/g, '') // trim underscores
|
|
20
|
+
.slice(0, 40)
|
|
21
|
+
.replace(/_+$/, '');
|
|
22
|
+
|
|
23
|
+
if (slug && slug.length >= 2 && /[a-z]/.test(slug)) {
|
|
24
|
+
return slug;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// Fallback: hash (for Chinese, Arabic, emoji, symbols, etc.)
|
|
28
|
+
const hash = createHash('sha256').update(trimmed).digest('hex').slice(0, 8);
|
|
29
|
+
return `key_${hash}`;
|
|
30
|
+
}
|
|
12
31
|
|
|
13
32
|
/**
|
|
14
|
-
*
|
|
15
|
-
|
|
33
|
+
* @deprecated Use textKey() instead. Kept for internal backward compat.
|
|
34
|
+
*/
|
|
35
|
+
export const hashKey = textKey;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Determine if a string is "translatable" — contains actual human-readable
|
|
39
|
+
* text rather than whitespace, numbers, symbols, or code identifiers.
|
|
16
40
|
*/
|
|
17
41
|
export function isTranslatable(text) {
|
|
18
42
|
if (!text || typeof text !== 'string') return false;
|
|
@@ -27,15 +51,19 @@ export function isTranslatable(text) {
|
|
|
27
51
|
if (/^[\d.,]+$/.test(trimmed)) return false;
|
|
28
52
|
|
|
29
53
|
// Skip single characters that are punctuation/symbols
|
|
30
|
-
if (
|
|
54
|
+
if (
|
|
55
|
+
trimmed.length === 1 &&
|
|
56
|
+
/[^a-zA-Z\u00C0-\u024F\u0400-\u04FF\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]/.test(trimmed)
|
|
57
|
+
)
|
|
58
|
+
return false;
|
|
31
59
|
|
|
32
|
-
// Skip
|
|
60
|
+
// Skip code identifiers (camelCase, snake_case with no spaces)
|
|
33
61
|
if (/^[a-zA-Z_$][a-zA-Z0-9_$.]*$/.test(trimmed) && !trimmed.includes(' ') && trimmed.length > 1) {
|
|
34
|
-
//
|
|
62
|
+
// Allow capitalised real words: "Submit", "Cancel", "Home"
|
|
35
63
|
if (/[aeiouAEIOU]/.test(trimmed) && trimmed.length > 2 && /^[A-Z][a-z]+$/.test(trimmed)) {
|
|
36
|
-
return true;
|
|
64
|
+
return true;
|
|
37
65
|
}
|
|
38
|
-
// Allow ALL
|
|
66
|
+
// Allow ALL-CAPS short labels: "OK", "FAQ"
|
|
39
67
|
if (/^[A-Z]{2,12}$/.test(trimmed)) return true;
|
|
40
68
|
return false;
|
|
41
69
|
}
|
|
@@ -44,29 +72,80 @@ export function isTranslatable(text) {
|
|
|
44
72
|
if (/^(https?:\/\/|\/|\.\/|\.\.\/)/.test(trimmed)) return false;
|
|
45
73
|
if (/^[\w.+-]+@[\w.-]+\.\w+$/.test(trimmed)) return false;
|
|
46
74
|
|
|
47
|
-
// Skip template expressions
|
|
75
|
+
// Skip pure template expressions {{ something }}
|
|
48
76
|
if (/^\{\{[^}]+\}\}$/.test(trimmed)) return false;
|
|
49
77
|
|
|
50
78
|
// Must contain at least one letter from any script
|
|
51
|
-
if (
|
|
79
|
+
if (
|
|
80
|
+
!/[a-zA-Z\u00C0-\u024F\u0400-\u04FF\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uAC00-\uD7AF]/.test(
|
|
81
|
+
trimmed
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
return false;
|
|
52
85
|
|
|
53
86
|
return true;
|
|
54
87
|
}
|
|
55
88
|
|
|
56
89
|
/**
|
|
57
|
-
*
|
|
58
|
-
*
|
|
90
|
+
* Shield interpolation variables before sending to Argos Translate.
|
|
91
|
+
* Replaces {{ name }}, {name}, ${name}, %{name} with XML-like tokens <xi/>
|
|
92
|
+
* that NMT models are trained to preserve verbatim.
|
|
93
|
+
*
|
|
94
|
+
* Returns { shielded, tokens } — call unshieldInterpolations() to restore.
|
|
95
|
+
*/
|
|
96
|
+
export function shieldInterpolations(text) {
|
|
97
|
+
const tokens = [];
|
|
98
|
+
let shielded = text;
|
|
99
|
+
|
|
100
|
+
// Vue {{ expr }} — must come first to avoid matching inner {
|
|
101
|
+
shielded = shielded.replace(/\{\{[^}]*\}\}/g, (m) => {
|
|
102
|
+
const i = tokens.length;
|
|
103
|
+
tokens.push(m);
|
|
104
|
+
return `<x${i}/>`;
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
// Template literal ${expr}
|
|
108
|
+
shielded = shielded.replace(/\$\{[^}]*\}/g, (m) => {
|
|
109
|
+
const i = tokens.length;
|
|
110
|
+
tokens.push(m);
|
|
111
|
+
return `<x${i}/>`;
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
// i18next / vue-i18n {varName} or {0}
|
|
115
|
+
shielded = shielded.replace(/\{[^{}\s][^{}]*\}/g, (m) => {
|
|
116
|
+
const i = tokens.length;
|
|
117
|
+
tokens.push(m);
|
|
118
|
+
return `<x${i}/>`;
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
// Ruby / Rails %{varName}
|
|
122
|
+
shielded = shielded.replace(/%\{[^}]+\}/g, (m) => {
|
|
123
|
+
const i = tokens.length;
|
|
124
|
+
tokens.push(m);
|
|
125
|
+
return `<x${i}/>`;
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
return { shielded, tokens };
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Restore interpolation variables after translation.
|
|
133
|
+
* Tolerates minor whitespace changes the MT model may introduce.
|
|
134
|
+
*/
|
|
135
|
+
export function unshieldInterpolations(text, tokens) {
|
|
136
|
+
if (!tokens || tokens.length === 0) return text;
|
|
137
|
+
return text.replace(/<x(\d+)\s*\/>/gi, (_, idx) => tokens[parseInt(idx, 10)] ?? '');
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Check if a string contains interpolation expressions.
|
|
59
142
|
*/
|
|
60
143
|
export function parseInterpolation(text) {
|
|
61
|
-
// Vue-style {{ expr }}
|
|
62
144
|
const vuePattern = /\{\{\s*([^}]+?)\s*\}\}/g;
|
|
63
|
-
// React/JS-style {expr} or ${expr}
|
|
64
|
-
const jsPattern = /\$?\{([^}]+)\}/g;
|
|
65
145
|
|
|
66
146
|
const parts = [];
|
|
67
147
|
let hasInterpolation = false;
|
|
68
148
|
|
|
69
|
-
// Check for Vue interpolation
|
|
70
149
|
if (vuePattern.test(text)) {
|
|
71
150
|
hasInterpolation = true;
|
|
72
151
|
vuePattern.lastIndex = 0;
|
|
@@ -88,4 +167,4 @@ export function parseInterpolation(text) {
|
|
|
88
167
|
}
|
|
89
168
|
|
|
90
169
|
return { hasInterpolation, parts };
|
|
91
|
-
}
|
|
170
|
+
}
|