bctranslate 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,10 @@
1
1
  import * as babelParser from '@babel/parser';
2
2
  import _traverse from '@babel/traverse';
3
- import _generate from '@babel/generator';
4
- import * as t from '@babel/types';
5
3
  import MagicString from 'magic-string';
6
- import { hashKey, isTranslatable } from '../utils.js';
4
+ import { textKey, isTranslatable } from '../utils.js';
7
5
 
8
6
  // Handle ESM default export quirks
9
7
  const traverse = _traverse.default || _traverse;
10
- const generate = _generate.default || _generate;
11
8
 
12
9
  /**
13
10
  * Non-translatable JSX attribute names.
@@ -25,8 +22,31 @@ const ATTR_WHITELIST = new Set([
25
22
  'aria-placeholder', 'aria-description',
26
23
  ]);
27
24
 
25
+ /**
26
+ * Walk up the Babel path tree to find the nearest enclosing function
27
+ * with a BlockStatement body — that is the React component function.
28
+ * Returns the character offset right after the opening `{`.
29
+ */
30
+ function findComponentBodyStart(jsxPath) {
31
+ let p = jsxPath.parentPath;
32
+ while (p) {
33
+ const { node } = p;
34
+ if (
35
+ (node.type === 'FunctionDeclaration' ||
36
+ node.type === 'FunctionExpression' ||
37
+ node.type === 'ArrowFunctionExpression') &&
38
+ node.body?.type === 'BlockStatement'
39
+ ) {
40
+ return node.body.start + 1; // right after opening {
41
+ }
42
+ p = p.parentPath;
43
+ }
44
+ return -1;
45
+ }
46
+
28
47
  /**
29
48
  * Parse a JSX/TSX file and extract translatable strings.
49
+ * Hook injection uses AST-derived character positions — no regex.
30
50
  */
31
51
  export function parseReact(source, filePath) {
32
52
  const extracted = [];
@@ -45,13 +65,12 @@ export function parseReact(source, filePath) {
45
65
  'decorators-legacy',
46
66
  ].filter(Boolean),
47
67
  });
48
- } catch (err) {
49
- // If Babel fails, return unmodified
68
+ } catch {
50
69
  return { source, extracted: [], modified: false };
51
70
  }
52
71
 
53
72
  const s = new MagicString(source);
54
- let needsImport = false;
73
+ let hookInsertPos = -1;
55
74
 
56
75
  traverse(ast, {
57
76
  // JSX text children: <div>Hello World</div>
@@ -59,28 +78,23 @@ export function parseReact(source, filePath) {
59
78
  const text = path.node.value.trim();
60
79
  if (!isTranslatable(text)) return;
61
80
 
62
- const key = hashKey(text);
63
- const start = path.node.start;
64
- const end = path.node.end;
65
-
66
- // Preserve whitespace
81
+ const key = textKey(text);
82
+ const { start, end } = path.node;
67
83
  const original = path.node.value;
68
84
  const leadingWs = original.match(/^(\s*)/)[1];
69
85
  const trailingWs = original.match(/(\s*)$/)[1];
70
86
 
71
87
  s.overwrite(start, end, `${leadingWs}{t('${key}')}${trailingWs}`);
72
88
  extracted.push({ key, text, context: 'jsx-text' });
73
- needsImport = true;
89
+
90
+ if (hookInsertPos === -1) hookInsertPos = findComponentBodyStart(path);
74
91
  },
75
92
 
76
93
  // JSX string attributes: <input placeholder="Enter name" />
77
94
  JSXAttribute(path) {
78
95
  const name = path.node.name?.name;
79
- if (!name) return;
80
-
81
- if (ATTR_BLACKLIST.has(name)) return;
96
+ if (!name || ATTR_BLACKLIST.has(name)) return;
82
97
 
83
- // Only translate whitelisted attrs, or unknown attrs if they have translatable values
84
98
  const value = path.node.value;
85
99
  if (!value || value.type !== 'StringLiteral') return;
86
100
 
@@ -88,60 +102,41 @@ export function parseReact(source, filePath) {
88
102
  if (!isTranslatable(text)) return;
89
103
  if (!ATTR_WHITELIST.has(name) && text.length < 3) return;
90
104
 
91
- const key = hashKey(text);
92
- const attrStart = path.node.start;
93
- const attrEnd = path.node.end;
94
-
95
- s.overwrite(attrStart, attrEnd, `${name}={t('${key}')}`);
105
+ const key = textKey(text);
106
+ s.overwrite(path.node.start, path.node.end, `${name}={t('${key}')}`);
96
107
  extracted.push({ key, text, context: `jsx-attr-${name}` });
97
- needsImport = true;
108
+
109
+ if (hookInsertPos === -1) hookInsertPos = findComponentBodyStart(path);
98
110
  },
99
111
 
100
- // String literals in common patterns (not JSX)
112
+ // alert('...'), confirm('...')
101
113
  CallExpression(path) {
102
114
  const callee = path.node.callee;
103
- const calleeName = callee.name || (callee.property && callee.property.name);
104
-
105
- // alert('...'), confirm('...'), toast('...')
106
- if (['alert', 'confirm'].includes(calleeName)) {
107
- const arg = path.node.arguments[0];
108
- if (arg && arg.type === 'StringLiteral' && isTranslatable(arg.value)) {
109
- const key = hashKey(arg.value);
110
- s.overwrite(arg.start, arg.end, `t('${key}')`);
111
- extracted.push({ key, text: arg.value, context: 'call-arg' });
112
- needsImport = true;
113
- }
115
+ const calleeName = callee.name || callee.property?.name;
116
+ if (!['alert', 'confirm'].includes(calleeName)) return;
117
+
118
+ const arg = path.node.arguments[0];
119
+ if (arg?.type === 'StringLiteral' && isTranslatable(arg.value)) {
120
+ const key = textKey(arg.value);
121
+ s.overwrite(arg.start, arg.end, `t('${key}')`);
122
+ extracted.push({ key, text: arg.value, context: 'call-arg' });
114
123
  }
115
124
  },
116
125
  });
117
126
 
118
- // Add useTranslation import if needed
119
- if (needsImport) {
120
- const hasUseTranslation = source.includes('useTranslation');
121
- if (!hasUseTranslation) {
122
- // Find the first import statement or top of file
123
- let insertPos = 0;
124
- const importMatch = source.match(/^import\s.+$/m);
125
- if (importMatch) {
126
- // Insert after all imports
127
- const lastImportMatch = [...source.matchAll(/^import\s.+$/gm)];
128
- if (lastImportMatch.length > 0) {
129
- const last = lastImportMatch[lastImportMatch.length - 1];
130
- insertPos = last.index + last[0].length;
131
- }
127
+ // ── Inject import and hook using AST-derived positions ───────────────────────
128
+ if (extracted.length > 0) {
129
+ if (!source.includes('useTranslation')) {
130
+ // Find the last ImportDeclaration node — safe even with 'use client' directives
131
+ let lastImportEnd = 0;
132
+ for (const node of ast.program.body) {
133
+ if (node.type === 'ImportDeclaration') lastImportEnd = node.end;
132
134
  }
133
-
134
- s.appendRight(insertPos, `\nimport { useTranslation } from 'react-i18next';\n`);
135
+ s.appendRight(lastImportEnd, `\nimport { useTranslation } from 'react-i18next';`);
135
136
  }
136
137
 
137
- // Add const { t } = useTranslation() if not present
138
- if (!source.includes('useTranslation()')) {
139
- // Find the function body
140
- const funcMatch = source.match(/(?:function\s+\w+|const\s+\w+\s*=\s*(?:\([^)]*\)\s*=>|\w+\s*=>))\s*\{/);
141
- if (funcMatch) {
142
- const insertAt = funcMatch.index + funcMatch[0].length;
143
- s.appendRight(insertAt, `\n const { t } = useTranslation();\n`);
144
- }
138
+ if (!source.includes('useTranslation()') && hookInsertPos > 0) {
139
+ s.appendRight(hookInsertPos, `\n const { t } = useTranslation();\n`);
145
140
  }
146
141
  }
147
142
 
@@ -150,4 +145,4 @@ export function parseReact(source, filePath) {
150
145
  extracted,
151
146
  modified: extracted.length > 0,
152
147
  };
153
- }
148
+ }
@@ -1,6 +1,6 @@
1
1
  import * as compiler from '@vue/compiler-dom';
2
2
  import MagicString from 'magic-string';
3
- import { hashKey, isTranslatable, parseInterpolation } from '../utils.js';
3
+ import { textKey, isTranslatable, parseInterpolation } from '../utils.js';
4
4
 
5
5
  /**
6
6
  * Non-translatable attribute names.
@@ -80,7 +80,7 @@ function walkTemplate(nodes, s, baseOffset, extracted) {
80
80
  if (node.type === 2) {
81
81
  const text = node.content.trim();
82
82
  if (isTranslatable(text)) {
83
- const key = hashKey(text);
83
+ const key = textKey(text);
84
84
  const start = baseOffset + node.loc.start.offset;
85
85
  const end = baseOffset + node.loc.end.offset;
86
86
 
@@ -108,7 +108,7 @@ function walkTemplate(nodes, s, baseOffset, extracted) {
108
108
 
109
109
  if (ATTR_WHITELIST.has(attrName) && isTranslatable(prop.value.content)) {
110
110
  const text = prop.value.content;
111
- const key = hashKey(text);
111
+ const key = textKey(text);
112
112
  const attrStart = baseOffset + prop.loc.start.offset;
113
113
  const attrEnd = baseOffset + prop.loc.end.offset;
114
114
 
@@ -168,7 +168,7 @@ function extractScriptStrings(scriptContent, s, baseOffset, extracted, isSetup)
168
168
  while ((match = pattern.exec(scriptContent)) !== null) {
169
169
  const text = match[3];
170
170
  if (isTranslatable(text) && text.length > 1) {
171
- const key = hashKey(text);
171
+ const key = textKey(text);
172
172
  const fullMatchStart = baseOffset + match.index;
173
173
  const quoteChar = match[2];
174
174
  const textStart = baseOffset + match.index + match[0].indexOf(quoteChar + text);
@@ -196,7 +196,7 @@ function extractTemplateRegex(source, s, extracted) {
196
196
  while ((match = textPattern.exec(source)) !== null) {
197
197
  const text = match[1].trim();
198
198
  if (isTranslatable(text)) {
199
- const key = hashKey(text);
199
+ const key = textKey(text);
200
200
  const textStart = match.index + 1;
201
201
  const textEnd = textStart + match[1].length;
202
202
  if (!isAlreadyWrapped(source, textStart, textEnd)) {
package/src/utils.js CHANGED
@@ -1,18 +1,42 @@
1
1
  import { createHash } from 'crypto';
2
2
 
3
3
  /**
4
- * Generate a deterministic, short key from a string.
5
- * Same input always produces the same key — idempotent across runs.
4
+ * Generate a readable, slug-based i18n key from a string.
5
+ * "Submit" "submit"
6
+ * "Please enter your email" → "please_enter_your_email"
7
+ * Falls back to a hash prefix for non-Latin or symbol-only strings.
6
8
  */
7
- export function hashKey(text) {
8
- const normalized = text.trim().toLowerCase().replace(/\s+/g, ' ');
9
- const hash = createHash('sha256').update(normalized).digest('hex').slice(0, 8);
10
- return `key_${hash}`;
11
- }
9
+ export function textKey(text) {
10
+ const trimmed = text.trim();
11
+
12
+ const slug = trimmed
13
+ .toLowerCase()
14
+ .replace(/[^\w\s]/g, ' ') // punctuation → space
15
+ .replace(/\s+/g, '_') // spaces → underscores
16
+ .replace(/^[^a-z]+/, '') // strip non-alpha prefix
17
+ .replace(/[^a-z0-9_]/g, '') // remove remaining non-ASCII
18
+ .replace(/_+/g, '_') // collapse multiple underscores
19
+ .replace(/^_|_$/g, '') // trim underscores
20
+ .slice(0, 40)
21
+ .replace(/_+$/, '');
22
+
23
+ if (slug && slug.length >= 2 && /[a-z]/.test(slug)) {
24
+ return slug;
25
+ }
26
+
27
+ // Fallback: hash (for Chinese, Arabic, emoji, symbols, etc.)
28
+ const hash = createHash('sha256').update(trimmed).digest('hex').slice(0, 8);
29
+ return `key_${hash}`;
30
+ }
12
31
 
13
32
  /**
14
- * Determine if a string is "translatable" i.e. it contains actual
15
- * human-readable text and not just whitespace, numbers, symbols, or code.
33
+ * @deprecated Use textKey() instead. Kept for internal backward compat.
34
+ */
35
+ export const hashKey = textKey;
36
+
37
+ /**
38
+ * Determine if a string is "translatable" — contains actual human-readable
39
+ * text rather than whitespace, numbers, symbols, or code identifiers.
16
40
  */
17
41
  export function isTranslatable(text) {
18
42
  if (!text || typeof text !== 'string') return false;
@@ -27,15 +51,19 @@ export function isTranslatable(text) {
27
51
  if (/^[\d.,]+$/.test(trimmed)) return false;
28
52
 
29
53
  // Skip single characters that are punctuation/symbols
30
- if (trimmed.length === 1 && /[^a-zA-Z\u00C0-\u024F\u0400-\u04FF\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]/.test(trimmed)) return false;
54
+ if (
55
+ trimmed.length === 1 &&
56
+ /[^a-zA-Z\u00C0-\u024F\u0400-\u04FF\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]/.test(trimmed)
57
+ )
58
+ return false;
31
59
 
32
- // Skip things that look like code identifiers (camelCase, snake_case with no spaces)
60
+ // Skip code identifiers (camelCase, snake_case with no spaces)
33
61
  if (/^[a-zA-Z_$][a-zA-Z0-9_$.]*$/.test(trimmed) && !trimmed.includes(' ') && trimmed.length > 1) {
34
- // But allow single real words (check if it has vowels or is a common word)
62
+ // Allow capitalised real words: "Submit", "Cancel", "Home"
35
63
  if (/[aeiouAEIOU]/.test(trimmed) && trimmed.length > 2 && /^[A-Z][a-z]+$/.test(trimmed)) {
36
- return true; // Likely a real word like "Submit", "Cancel", "Home"
64
+ return true;
37
65
  }
38
- // Allow ALL CAPS short words (likely labels)
66
+ // Allow ALL-CAPS short labels: "OK", "FAQ"
39
67
  if (/^[A-Z]{2,12}$/.test(trimmed)) return true;
40
68
  return false;
41
69
  }
@@ -44,29 +72,80 @@ export function isTranslatable(text) {
44
72
  if (/^(https?:\/\/|\/|\.\/|\.\.\/)/.test(trimmed)) return false;
45
73
  if (/^[\w.+-]+@[\w.-]+\.\w+$/.test(trimmed)) return false;
46
74
 
47
- // Skip template expressions that are purely code ({{ something }})
75
+ // Skip pure template expressions {{ something }}
48
76
  if (/^\{\{[^}]+\}\}$/.test(trimmed)) return false;
49
77
 
50
78
  // Must contain at least one letter from any script
51
- if (!/[a-zA-Z\u00C0-\u024F\u0400-\u04FF\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uAC00-\uD7AF]/.test(trimmed)) return false;
79
+ if (
80
+ !/[a-zA-Z\u00C0-\u024F\u0400-\u04FF\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uAC00-\uD7AF]/.test(
81
+ trimmed
82
+ )
83
+ )
84
+ return false;
52
85
 
53
86
  return true;
54
87
  }
55
88
 
56
89
  /**
57
- * Check if a string contains interpolation expressions ({{ }}, {}, ${}).
58
- * Returns the parts if so.
90
+ * Shield interpolation variables before sending to Argos Translate.
91
+ * Replaces {{ name }}, {name}, ${name}, %{name} with XML-like tokens <xi/>
92
+ * that NMT models are trained to preserve verbatim.
93
+ *
94
+ * Returns { shielded, tokens } — call unshieldInterpolations() to restore.
95
+ */
96
+ export function shieldInterpolations(text) {
97
+ const tokens = [];
98
+ let shielded = text;
99
+
100
+ // Vue {{ expr }} — must come first to avoid matching inner {
101
+ shielded = shielded.replace(/\{\{[^}]*\}\}/g, (m) => {
102
+ const i = tokens.length;
103
+ tokens.push(m);
104
+ return `<x${i}/>`;
105
+ });
106
+
107
+ // Template literal ${expr}
108
+ shielded = shielded.replace(/\$\{[^}]*\}/g, (m) => {
109
+ const i = tokens.length;
110
+ tokens.push(m);
111
+ return `<x${i}/>`;
112
+ });
113
+
114
+ // i18next / vue-i18n {varName} or {0}
115
+ shielded = shielded.replace(/\{[^{}\s][^{}]*\}/g, (m) => {
116
+ const i = tokens.length;
117
+ tokens.push(m);
118
+ return `<x${i}/>`;
119
+ });
120
+
121
+ // Ruby / Rails %{varName}
122
+ shielded = shielded.replace(/%\{[^}]+\}/g, (m) => {
123
+ const i = tokens.length;
124
+ tokens.push(m);
125
+ return `<x${i}/>`;
126
+ });
127
+
128
+ return { shielded, tokens };
129
+ }
130
+
131
+ /**
132
+ * Restore interpolation variables after translation.
133
+ * Tolerates minor whitespace changes the MT model may introduce.
134
+ */
135
+ export function unshieldInterpolations(text, tokens) {
136
+ if (!tokens || tokens.length === 0) return text;
137
+ return text.replace(/<x(\d+)\s*\/>/gi, (_, idx) => tokens[parseInt(idx, 10)] ?? '');
138
+ }
139
+
140
+ /**
141
+ * Check if a string contains interpolation expressions.
59
142
  */
60
143
  export function parseInterpolation(text) {
61
- // Vue-style {{ expr }}
62
144
  const vuePattern = /\{\{\s*([^}]+?)\s*\}\}/g;
63
- // React/JS-style {expr} or ${expr}
64
- const jsPattern = /\$?\{([^}]+)\}/g;
65
145
 
66
146
  const parts = [];
67
147
  let hasInterpolation = false;
68
148
 
69
- // Check for Vue interpolation
70
149
  if (vuePattern.test(text)) {
71
150
  hasInterpolation = true;
72
151
  vuePattern.lastIndex = 0;
@@ -88,4 +167,4 @@ export function parseInterpolation(text) {
88
167
  }
89
168
 
90
169
  return { hasInterpolation, parts };
91
- }
170
+ }