bctranslate 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -0
- package/bin/bctranslate.js +367 -0
- package/package.json +50 -0
- package/python/t2.py +76 -0
- package/python/translator.py +103 -0
- package/src/bridges/python.js +183 -0
- package/src/config.js +28 -0
- package/src/detect.js +58 -0
- package/src/generators/locales.js +62 -0
- package/src/generators/setup.js +188 -0
- package/src/index.js +155 -0
- package/src/parsers/html.js +118 -0
- package/src/parsers/js.js +128 -0
- package/src/parsers/json.js +93 -0
- package/src/parsers/react.js +153 -0
- package/src/parsers/vue.js +211 -0
- package/src/utils.js +91 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import * as compiler from '@vue/compiler-dom';
|
|
2
|
+
import MagicString from 'magic-string';
|
|
3
|
+
import { hashKey, isTranslatable, parseInterpolation } from '../utils.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Non-translatable attribute names.
|
|
7
|
+
*/
|
|
8
|
+
const ATTR_BLACKLIST = new Set([
|
|
9
|
+
'id', 'class', 'style', 'src', 'href', 'ref', 'key', 'is',
|
|
10
|
+
'v-model', 'v-bind', 'v-on', 'v-if', 'v-else', 'v-else-if',
|
|
11
|
+
'v-for', 'v-show', 'v-html', 'v-text', 'v-slot', 'v-pre',
|
|
12
|
+
'v-cloak', 'v-once', 'v-memo', 'name', 'type', 'value',
|
|
13
|
+
'action', 'method', 'target', 'rel', 'media', 'lang',
|
|
14
|
+
'charset', 'content', 'http-equiv', 'for', 'tabindex',
|
|
15
|
+
'role', 'xmlns', 'viewBox', 'fill', 'stroke', 'd', 'cx', 'cy',
|
|
16
|
+
'r', 'rx', 'ry', 'x', 'y', 'width', 'height', 'transform',
|
|
17
|
+
'xmlns:xlink', 'xlink:href', 'data-testid', 'data-cy',
|
|
18
|
+
]);
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Translatable attribute names.
|
|
22
|
+
*/
|
|
23
|
+
const ATTR_WHITELIST = new Set([
|
|
24
|
+
'title', 'placeholder', 'label', 'alt', 'aria-label',
|
|
25
|
+
'aria-placeholder', 'aria-description', 'aria-roledescription',
|
|
26
|
+
]);
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Parse a .vue file and extract translatable strings.
|
|
30
|
+
* Returns the modified source and the list of extracted strings.
|
|
31
|
+
*/
|
|
32
|
+
export function parseVue(source, filePath) {
|
|
33
|
+
const extracted = []; // {key, text, context}
|
|
34
|
+
const s = new MagicString(source);
|
|
35
|
+
let modified = false;
|
|
36
|
+
|
|
37
|
+
// Parse template section
|
|
38
|
+
const templateMatch = source.match(/<template\b[^>]*>([\s\S]*?)<\/template>/);
|
|
39
|
+
if (templateMatch) {
|
|
40
|
+
const templateStart = source.indexOf(templateMatch[0]);
|
|
41
|
+
const templateContent = templateMatch[1];
|
|
42
|
+
const templateOffset = templateStart + templateMatch[0].indexOf(templateContent);
|
|
43
|
+
|
|
44
|
+
try {
|
|
45
|
+
const ast = compiler.parse(templateContent, {
|
|
46
|
+
comments: true,
|
|
47
|
+
getTextMode: () => 0,
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
walkTemplate(ast.children, s, templateOffset, extracted);
|
|
51
|
+
modified = extracted.length > 0;
|
|
52
|
+
} catch (err) {
|
|
53
|
+
// If Vue parser fails, fall back to regex-based extraction for template
|
|
54
|
+
extractTemplateRegex(source, s, extracted);
|
|
55
|
+
modified = extracted.length > 0;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Parse script section for string literals
|
|
60
|
+
const scriptMatch = source.match(/<script\b[^>]*>([\s\S]*?)<\/script>/);
|
|
61
|
+
if (scriptMatch) {
|
|
62
|
+
const scriptStart = source.indexOf(scriptMatch[0]);
|
|
63
|
+
const scriptContent = scriptMatch[1];
|
|
64
|
+
const scriptOffset = scriptStart + scriptMatch[0].indexOf(scriptContent);
|
|
65
|
+
const isSetup = /<script\b[^>]*setup[^>]*>/.test(source);
|
|
66
|
+
|
|
67
|
+
extractScriptStrings(scriptContent, s, scriptOffset, extracted, isSetup);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
source: modified || extracted.length > 0 ? s.toString() : source,
|
|
72
|
+
extracted,
|
|
73
|
+
modified: modified || extracted.length > 0,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function walkTemplate(nodes, s, baseOffset, extracted) {
|
|
78
|
+
for (const node of nodes) {
|
|
79
|
+
// Type 2 = Text
|
|
80
|
+
if (node.type === 2) {
|
|
81
|
+
const text = node.content.trim();
|
|
82
|
+
if (isTranslatable(text)) {
|
|
83
|
+
const key = hashKey(text);
|
|
84
|
+
const start = baseOffset + node.loc.start.offset;
|
|
85
|
+
const end = baseOffset + node.loc.end.offset;
|
|
86
|
+
|
|
87
|
+
// Check if already wrapped in $t()
|
|
88
|
+
if (!isAlreadyWrapped(s.original, start, end)) {
|
|
89
|
+
// Preserve leading/trailing whitespace
|
|
90
|
+
const originalText = node.content;
|
|
91
|
+
const leadingWs = originalText.match(/^(\s*)/)[1];
|
|
92
|
+
const trailingWs = originalText.match(/(\s*)$/)[1];
|
|
93
|
+
|
|
94
|
+
s.overwrite(start, end, `${leadingWs}{{ $t('${key}') }}${trailingWs}`);
|
|
95
|
+
extracted.push({ key, text, context: 'template-text' });
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Type 1 = Element
|
|
101
|
+
if (node.type === 1) {
|
|
102
|
+
// Check translatable attributes
|
|
103
|
+
if (node.props) {
|
|
104
|
+
for (const prop of node.props) {
|
|
105
|
+
// Type 6 = Attribute (static)
|
|
106
|
+
if (prop.type === 6 && prop.value) {
|
|
107
|
+
const attrName = prop.name.toLowerCase();
|
|
108
|
+
|
|
109
|
+
if (ATTR_WHITELIST.has(attrName) && isTranslatable(prop.value.content)) {
|
|
110
|
+
const text = prop.value.content;
|
|
111
|
+
const key = hashKey(text);
|
|
112
|
+
const attrStart = baseOffset + prop.loc.start.offset;
|
|
113
|
+
const attrEnd = baseOffset + prop.loc.end.offset;
|
|
114
|
+
|
|
115
|
+
// Convert static attribute to v-bind with $t()
|
|
116
|
+
s.overwrite(attrStart, attrEnd, `:${attrName}="$t('${key}')"`);
|
|
117
|
+
extracted.push({ key, text, context: `template-attr-${attrName}` });
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Recurse into children
|
|
124
|
+
if (node.children) {
|
|
125
|
+
walkTemplate(node.children, s, baseOffset, extracted);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Type 5 = Interpolation ({{ expr }})
|
|
130
|
+
if (node.type === 5 && node.content) {
|
|
131
|
+
// Check for compound expressions that contain string literals
|
|
132
|
+
// e.g., {{ isError ? 'Failed' : 'Success' }}
|
|
133
|
+
// We only extract simple string content, not expressions
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Type 8 = CompoundExpression — skip
|
|
137
|
+
// Type 11 = ForNode — recurse into children
|
|
138
|
+
if (node.type === 11 && node.children) {
|
|
139
|
+
walkTemplate(node.children, s, baseOffset, extracted);
|
|
140
|
+
}
|
|
141
|
+
// Type 9 = IfNode — recurse into branches
|
|
142
|
+
if (node.type === 9 && node.branches) {
|
|
143
|
+
for (const branch of node.branches) {
|
|
144
|
+
if (branch.children) {
|
|
145
|
+
walkTemplate(branch.children, s, baseOffset, extracted);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function extractScriptStrings(scriptContent, s, baseOffset, extracted, isSetup) {
|
|
153
|
+
// Match string literals that look like translatable text in common patterns:
|
|
154
|
+
// - alert('text'), confirm('text')
|
|
155
|
+
// - title: 'text', label: 'text', placeholder: 'text', message: 'text'
|
|
156
|
+
// - toast('text'), notify('text')
|
|
157
|
+
// - error/success/warning message strings
|
|
158
|
+
|
|
159
|
+
const patterns = [
|
|
160
|
+
// Function calls: alert('...'), confirm('...'), toast('...'), notify('...')
|
|
161
|
+
/\b(alert|confirm|toast|notify|message\.(?:success|error|warning|info))\s*\(\s*(['"`])((?:(?!\2).)+)\2\s*\)/g,
|
|
162
|
+
// Object properties: title: '...', label: '...', message: '...'
|
|
163
|
+
/\b(title|label|placeholder|message|text|description|tooltip|hint|caption|header|subtitle|errorMessage|successMessage)\s*:\s*(['"`])((?:(?!\2).)+)\2/g,
|
|
164
|
+
];
|
|
165
|
+
|
|
166
|
+
for (const pattern of patterns) {
|
|
167
|
+
let match;
|
|
168
|
+
while ((match = pattern.exec(scriptContent)) !== null) {
|
|
169
|
+
const text = match[3];
|
|
170
|
+
if (isTranslatable(text) && text.length > 1) {
|
|
171
|
+
const key = hashKey(text);
|
|
172
|
+
const fullMatchStart = baseOffset + match.index;
|
|
173
|
+
const quoteChar = match[2];
|
|
174
|
+
const textStart = baseOffset + match.index + match[0].indexOf(quoteChar + text);
|
|
175
|
+
const textEnd = textStart + text.length + 2; // +2 for quotes
|
|
176
|
+
|
|
177
|
+
const tCall = isSetup ? `t('${key}')` : `this.$t('${key}')`;
|
|
178
|
+
|
|
179
|
+
s.overwrite(textStart, textEnd, tCall);
|
|
180
|
+
extracted.push({ key, text, context: 'script' });
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function isAlreadyWrapped(source, start, end) {
|
|
187
|
+
// Check if the text is already inside a $t() call or {{ $t(...) }}
|
|
188
|
+
const before = source.slice(Math.max(0, start - 20), start);
|
|
189
|
+
return /\$t\s*\(\s*['"]/.test(before) || /t\s*\(\s*['"]/.test(before);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function extractTemplateRegex(source, s, extracted) {
|
|
193
|
+
// Fallback regex extraction for when the Vue parser fails
|
|
194
|
+
const textPattern = />([^<]+)</g;
|
|
195
|
+
let match;
|
|
196
|
+
while ((match = textPattern.exec(source)) !== null) {
|
|
197
|
+
const text = match[1].trim();
|
|
198
|
+
if (isTranslatable(text)) {
|
|
199
|
+
const key = hashKey(text);
|
|
200
|
+
const textStart = match.index + 1;
|
|
201
|
+
const textEnd = textStart + match[1].length;
|
|
202
|
+
if (!isAlreadyWrapped(source, textStart, textEnd)) {
|
|
203
|
+
const original = match[1];
|
|
204
|
+
const leadingWs = original.match(/^(\s*)/)[1];
|
|
205
|
+
const trailingWs = original.match(/(\s*)$/)[1];
|
|
206
|
+
s.overwrite(textStart, textEnd, `${leadingWs}{{ $t('${key}') }}${trailingWs}`);
|
|
207
|
+
extracted.push({ key, text, context: 'template-text' });
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
package/src/utils.js
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { createHash } from 'crypto';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Generate a deterministic, short key from a string.
|
|
5
|
+
* Same input always produces the same key — idempotent across runs.
|
|
6
|
+
*/
|
|
7
|
+
export function hashKey(text) {
|
|
8
|
+
const normalized = text.trim().toLowerCase().replace(/\s+/g, ' ');
|
|
9
|
+
const hash = createHash('sha256').update(normalized).digest('hex').slice(0, 8);
|
|
10
|
+
return `key_${hash}`;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Determine if a string is "translatable" — i.e. it contains actual
|
|
15
|
+
* human-readable text and not just whitespace, numbers, symbols, or code.
|
|
16
|
+
*/
|
|
17
|
+
export function isTranslatable(text) {
|
|
18
|
+
if (!text || typeof text !== 'string') return false;
|
|
19
|
+
|
|
20
|
+
const trimmed = text.trim();
|
|
21
|
+
if (trimmed.length === 0) return false;
|
|
22
|
+
|
|
23
|
+
// Skip pure whitespace / newlines
|
|
24
|
+
if (/^\s*$/.test(trimmed)) return false;
|
|
25
|
+
|
|
26
|
+
// Skip pure numbers
|
|
27
|
+
if (/^[\d.,]+$/.test(trimmed)) return false;
|
|
28
|
+
|
|
29
|
+
// Skip single characters that are punctuation/symbols
|
|
30
|
+
if (trimmed.length === 1 && /[^a-zA-Z\u00C0-\u024F\u0400-\u04FF\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]/.test(trimmed)) return false;
|
|
31
|
+
|
|
32
|
+
// Skip things that look like code identifiers (camelCase, snake_case with no spaces)
|
|
33
|
+
if (/^[a-zA-Z_$][a-zA-Z0-9_$.]*$/.test(trimmed) && !trimmed.includes(' ') && trimmed.length > 1) {
|
|
34
|
+
// But allow single real words (check if it has vowels or is a common word)
|
|
35
|
+
if (/[aeiouAEIOU]/.test(trimmed) && trimmed.length > 2 && /^[A-Z][a-z]+$/.test(trimmed)) {
|
|
36
|
+
return true; // Likely a real word like "Submit", "Cancel", "Home"
|
|
37
|
+
}
|
|
38
|
+
// Allow ALL CAPS short words (likely labels)
|
|
39
|
+
if (/^[A-Z]{2,12}$/.test(trimmed)) return true;
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Skip URLs, paths, emails
|
|
44
|
+
if (/^(https?:\/\/|\/|\.\/|\.\.\/)/.test(trimmed)) return false;
|
|
45
|
+
if (/^[\w.+-]+@[\w.-]+\.\w+$/.test(trimmed)) return false;
|
|
46
|
+
|
|
47
|
+
// Skip template expressions that are purely code ({{ something }})
|
|
48
|
+
if (/^\{\{[^}]+\}\}$/.test(trimmed)) return false;
|
|
49
|
+
|
|
50
|
+
// Must contain at least one letter from any script
|
|
51
|
+
if (!/[a-zA-Z\u00C0-\u024F\u0400-\u04FF\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uAC00-\uD7AF]/.test(trimmed)) return false;
|
|
52
|
+
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Check if a string contains interpolation expressions ({{ }}, {}, ${}).
|
|
58
|
+
* Returns the parts if so.
|
|
59
|
+
*/
|
|
60
|
+
export function parseInterpolation(text) {
|
|
61
|
+
// Vue-style {{ expr }}
|
|
62
|
+
const vuePattern = /\{\{\s*([^}]+?)\s*\}\}/g;
|
|
63
|
+
// React/JS-style {expr} or ${expr}
|
|
64
|
+
const jsPattern = /\$?\{([^}]+)\}/g;
|
|
65
|
+
|
|
66
|
+
const parts = [];
|
|
67
|
+
let hasInterpolation = false;
|
|
68
|
+
|
|
69
|
+
// Check for Vue interpolation
|
|
70
|
+
if (vuePattern.test(text)) {
|
|
71
|
+
hasInterpolation = true;
|
|
72
|
+
vuePattern.lastIndex = 0;
|
|
73
|
+
|
|
74
|
+
let lastIndex = 0;
|
|
75
|
+
let match;
|
|
76
|
+
while ((match = vuePattern.exec(text)) !== null) {
|
|
77
|
+
if (match.index > lastIndex) {
|
|
78
|
+
const staticPart = text.slice(lastIndex, match.index);
|
|
79
|
+
if (staticPart.trim()) parts.push({ type: 'static', value: staticPart });
|
|
80
|
+
}
|
|
81
|
+
parts.push({ type: 'expr', value: match[1].trim() });
|
|
82
|
+
lastIndex = match.index + match[0].length;
|
|
83
|
+
}
|
|
84
|
+
if (lastIndex < text.length) {
|
|
85
|
+
const remainder = text.slice(lastIndex);
|
|
86
|
+
if (remainder.trim()) parts.push({ type: 'static', value: remainder });
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return { hasInterpolation, parts };
|
|
91
|
+
}
|