bctranslate 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/bctranslate.js +132 -132
- package/package.json +1 -1
- package/python/translator.py +7 -0
- package/src/bridges/python.js +61 -55
- package/src/index.js +158 -97
- package/src/parsers/html.js +3 -3
- package/src/parsers/js.js +4 -4
- package/src/parsers/json.js +1 -1
- package/src/parsers/react.js +55 -60
- package/src/parsers/vue.js +99 -105
- package/src/utils.js +159 -23
package/src/utils.js
CHANGED
|
@@ -1,18 +1,99 @@
|
|
|
1
1
|
import { createHash } from 'crypto';
|
|
2
|
+
import { basename, extname } from 'path';
|
|
3
|
+
|
|
4
|
+
// Words too generic to use as the sole semantic component of a key
|
|
5
|
+
const STOP_WORDS = new Set([
|
|
6
|
+
'a', 'an', 'the', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
|
|
7
|
+
'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'shall',
|
|
8
|
+
'should', 'may', 'might', 'must', 'can', 'could',
|
|
9
|
+
'to', 'for', 'and', 'or', 'but', 'of', 'in', 'on', 'at', 'by',
|
|
10
|
+
'as', 'if', 'its', 'it', 'this', 'that', 'these', 'those',
|
|
11
|
+
'my', 'your', 'our', 'their', 'with', 'from', 'up', 'about',
|
|
12
|
+
'no', 'not', 'so',
|
|
13
|
+
]);
|
|
2
14
|
|
|
3
15
|
/**
|
|
4
|
-
* Generate a
|
|
5
|
-
*
|
|
16
|
+
* Generate a readable, slug-based i18n key from a string.
|
|
17
|
+
* "Submit" → "submit"
|
|
18
|
+
* "Please enter your email" → "please_enter_your_email"
|
|
19
|
+
* Falls back to a hash prefix for non-Latin or symbol-only strings.
|
|
6
20
|
*/
|
|
7
|
-
export function
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
21
|
+
export function textKey(text) {
|
|
22
|
+
const trimmed = text.trim();
|
|
23
|
+
|
|
24
|
+
const slug = trimmed
|
|
25
|
+
.toLowerCase()
|
|
26
|
+
.replace(/[^\w\s]/g, ' ') // punctuation → space
|
|
27
|
+
.replace(/\s+/g, '_') // spaces → underscores
|
|
28
|
+
.replace(/^[^a-z]+/, '') // strip non-alpha prefix
|
|
29
|
+
.replace(/[^a-z0-9_]/g, '') // remove remaining non-ASCII
|
|
30
|
+
.replace(/_+/g, '_') // collapse multiple underscores
|
|
31
|
+
.replace(/^_|_$/g, '') // trim underscores
|
|
32
|
+
.slice(0, 40)
|
|
33
|
+
.replace(/_+$/, '');
|
|
34
|
+
|
|
35
|
+
if (slug && slug.length >= 2 && /[a-z]/.test(slug)) {
|
|
36
|
+
return slug;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Fallback: hash (for Chinese, Arabic, emoji, symbols, etc.)
|
|
40
|
+
const hash = createHash('sha256').update(trimmed).digest('hex').slice(0, 8);
|
|
41
|
+
return `key_${hash}`;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Generate a context-aware i18n key using the component name as namespace
|
|
46
|
+
* and key content words as the slug.
|
|
47
|
+
*
|
|
48
|
+
* "Notes" in HomeView.vue → home.notes
|
|
49
|
+
* "Quick Note" in HomeView.vue → home.quickNote
|
|
50
|
+
* "View livestock" in HomeView.vue → home.viewLivestock
|
|
51
|
+
* "Submit" in LoginForm.vue → loginForm.submit
|
|
52
|
+
* "你好" in App.vue → app.key_3d2a1f
|
|
53
|
+
*
|
|
54
|
+
* @param {string} text The source string to key
|
|
55
|
+
* @param {string} filePath Absolute or relative path of the source file
|
|
56
|
+
*/
|
|
57
|
+
export function contextKey(text, filePath) {
|
|
58
|
+
const trimmed = text.trim();
|
|
59
|
+
|
|
60
|
+
// ── Namespace: derive from filename ──────────────────────────────────────
|
|
61
|
+
const fileName = basename(filePath, extname(filePath));
|
|
62
|
+
// Strip common Vue/React suffixes: HomeView → Home, UserCard → User, etc.
|
|
63
|
+
const stripped = fileName.replace(
|
|
64
|
+
/(?:View|Component|Page|Screen|Modal|Dialog|Card|Panel|Widget|Layout|Container)$/,
|
|
65
|
+
''
|
|
66
|
+
) || fileName;
|
|
67
|
+
// camelCase namespace: "UserProfile" → "userProfile", "home" → "home"
|
|
68
|
+
const ns = stripped[0].toLowerCase() + stripped.slice(1);
|
|
69
|
+
|
|
70
|
+
// ── Slug: 1-3 meaningful words in camelCase ───────────────────────────────
|
|
71
|
+
const words = trimmed
|
|
72
|
+
.replace(/[^\w\s]/g, ' ')
|
|
73
|
+
.split(/\s+/)
|
|
74
|
+
.map((w) => w.toLowerCase())
|
|
75
|
+
.filter((w) => w.length >= 2 && /[a-z]/.test(w) && !STOP_WORDS.has(w));
|
|
76
|
+
|
|
77
|
+
if (!words.length) {
|
|
78
|
+
// Non-Latin scripts, emoji, or all stop words — fall back to hash suffix
|
|
79
|
+
const hash = createHash('sha256').update(trimmed).digest('hex').slice(0, 6);
|
|
80
|
+
return `${ns}.key${hash}`;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const slug =
|
|
84
|
+
words[0] + words.slice(1, 3).map((w) => w[0].toUpperCase() + w.slice(1)).join('');
|
|
85
|
+
|
|
86
|
+
return `${ns}.${slug}`;
|
|
87
|
+
}
|
|
12
88
|
|
|
13
89
|
/**
|
|
14
|
-
*
|
|
15
|
-
|
|
90
|
+
* @deprecated Use contextKey() for new code. textKey() kept for non-file contexts.
|
|
91
|
+
*/
|
|
92
|
+
export const hashKey = textKey;
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Determine if a string is "translatable" — contains actual human-readable
|
|
96
|
+
* text rather than whitespace, numbers, symbols, or code identifiers.
|
|
16
97
|
*/
|
|
17
98
|
export function isTranslatable(text) {
|
|
18
99
|
if (!text || typeof text !== 'string') return false;
|
|
@@ -27,15 +108,19 @@ export function isTranslatable(text) {
|
|
|
27
108
|
if (/^[\d.,]+$/.test(trimmed)) return false;
|
|
28
109
|
|
|
29
110
|
// Skip single characters that are punctuation/symbols
|
|
30
|
-
if (
|
|
111
|
+
if (
|
|
112
|
+
trimmed.length === 1 &&
|
|
113
|
+
/[^a-zA-Z\u00C0-\u024F\u0400-\u04FF\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]/.test(trimmed)
|
|
114
|
+
)
|
|
115
|
+
return false;
|
|
31
116
|
|
|
32
|
-
// Skip
|
|
117
|
+
// Skip code identifiers (camelCase, snake_case with no spaces)
|
|
33
118
|
if (/^[a-zA-Z_$][a-zA-Z0-9_$.]*$/.test(trimmed) && !trimmed.includes(' ') && trimmed.length > 1) {
|
|
34
|
-
//
|
|
119
|
+
// Allow capitalised real words: "Submit", "Cancel", "Home"
|
|
35
120
|
if (/[aeiouAEIOU]/.test(trimmed) && trimmed.length > 2 && /^[A-Z][a-z]+$/.test(trimmed)) {
|
|
36
|
-
return true;
|
|
121
|
+
return true;
|
|
37
122
|
}
|
|
38
|
-
// Allow ALL
|
|
123
|
+
// Allow ALL-CAPS short labels: "OK", "FAQ"
|
|
39
124
|
if (/^[A-Z]{2,12}$/.test(trimmed)) return true;
|
|
40
125
|
return false;
|
|
41
126
|
}
|
|
@@ -44,29 +129,80 @@ export function isTranslatable(text) {
|
|
|
44
129
|
if (/^(https?:\/\/|\/|\.\/|\.\.\/)/.test(trimmed)) return false;
|
|
45
130
|
if (/^[\w.+-]+@[\w.-]+\.\w+$/.test(trimmed)) return false;
|
|
46
131
|
|
|
47
|
-
// Skip template expressions
|
|
132
|
+
// Skip pure template expressions {{ something }}
|
|
48
133
|
if (/^\{\{[^}]+\}\}$/.test(trimmed)) return false;
|
|
49
134
|
|
|
50
135
|
// Must contain at least one letter from any script
|
|
51
|
-
if (
|
|
136
|
+
if (
|
|
137
|
+
!/[a-zA-Z\u00C0-\u024F\u0400-\u04FF\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uAC00-\uD7AF]/.test(
|
|
138
|
+
trimmed
|
|
139
|
+
)
|
|
140
|
+
)
|
|
141
|
+
return false;
|
|
52
142
|
|
|
53
143
|
return true;
|
|
54
144
|
}
|
|
55
145
|
|
|
56
146
|
/**
|
|
57
|
-
*
|
|
58
|
-
*
|
|
147
|
+
* Shield interpolation variables before sending to Argos Translate.
|
|
148
|
+
* Replaces {{ name }}, {name}, ${name}, %{name} with XML-like tokens <xi/>
|
|
149
|
+
* that NMT models are trained to preserve verbatim.
|
|
150
|
+
*
|
|
151
|
+
* Returns { shielded, tokens } — call unshieldInterpolations() to restore.
|
|
152
|
+
*/
|
|
153
|
+
export function shieldInterpolations(text) {
|
|
154
|
+
const tokens = [];
|
|
155
|
+
let shielded = text;
|
|
156
|
+
|
|
157
|
+
// Vue {{ expr }} — must come first to avoid matching inner {
|
|
158
|
+
shielded = shielded.replace(/\{\{[^}]*\}\}/g, (m) => {
|
|
159
|
+
const i = tokens.length;
|
|
160
|
+
tokens.push(m);
|
|
161
|
+
return `<x${i}/>`;
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
// Template literal ${expr}
|
|
165
|
+
shielded = shielded.replace(/\$\{[^}]*\}/g, (m) => {
|
|
166
|
+
const i = tokens.length;
|
|
167
|
+
tokens.push(m);
|
|
168
|
+
return `<x${i}/>`;
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
// i18next / vue-i18n {varName} or {0}
|
|
172
|
+
shielded = shielded.replace(/\{[^{}\s][^{}]*\}/g, (m) => {
|
|
173
|
+
const i = tokens.length;
|
|
174
|
+
tokens.push(m);
|
|
175
|
+
return `<x${i}/>`;
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
// Ruby / Rails %{varName}
|
|
179
|
+
shielded = shielded.replace(/%\{[^}]+\}/g, (m) => {
|
|
180
|
+
const i = tokens.length;
|
|
181
|
+
tokens.push(m);
|
|
182
|
+
return `<x${i}/>`;
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
return { shielded, tokens };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Restore interpolation variables after translation.
|
|
190
|
+
* Tolerates minor whitespace changes the MT model may introduce.
|
|
191
|
+
*/
|
|
192
|
+
export function unshieldInterpolations(text, tokens) {
|
|
193
|
+
if (!tokens || tokens.length === 0) return text;
|
|
194
|
+
return text.replace(/<x(\d+)\s*\/>/gi, (_, idx) => tokens[parseInt(idx, 10)] ?? '');
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Check if a string contains interpolation expressions.
|
|
59
199
|
*/
|
|
60
200
|
export function parseInterpolation(text) {
|
|
61
|
-
// Vue-style {{ expr }}
|
|
62
201
|
const vuePattern = /\{\{\s*([^}]+?)\s*\}\}/g;
|
|
63
|
-
// React/JS-style {expr} or ${expr}
|
|
64
|
-
const jsPattern = /\$?\{([^}]+)\}/g;
|
|
65
202
|
|
|
66
203
|
const parts = [];
|
|
67
204
|
let hasInterpolation = false;
|
|
68
205
|
|
|
69
|
-
// Check for Vue interpolation
|
|
70
206
|
if (vuePattern.test(text)) {
|
|
71
207
|
hasInterpolation = true;
|
|
72
208
|
vuePattern.lastIndex = 0;
|
|
@@ -88,4 +224,4 @@ export function parseInterpolation(text) {
|
|
|
88
224
|
}
|
|
89
225
|
|
|
90
226
|
return { hasInterpolation, parts };
|
|
91
|
-
}
|
|
227
|
+
}
|