i18ntk 3.0.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,175 @@
1
+ const DEFAULT_ENGLISH_THRESHOLD_PERCENT = 10;
2
+
3
+ const URL_PATTERN = /https?:\/\/[^\s"'<>]+/i;
4
+ const EMAIL_PATTERN = /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/;
5
+ const PRIVATE_KEY_PATTERN = /-----BEGIN [A-Z ]*PRIVATE KEY-----/;
6
+ const BEARER_TOKEN_PATTERN = /\bBearer\s+[A-Za-z0-9._~+/=-]{16,}/i;
7
+ const CREDENTIAL_ASSIGNMENT_PATTERN = /\b(api[_-]?key|access[_-]?token|auth[_-]?token|refresh[_-]?token|secret|password|private[_-]?key|client[_-]?secret)\b\s*[:=]\s*["']?[A-Za-z0-9._~+/=-]{16,}/i;
8
+ const CREDENTIAL_KEY_PATTERN = /\b(api[_-]?key|access[_-]?token|auth[_-]?token|refresh[_-]?token|secret|password|private[_-]?key|client[_-]?secret)\b/i;
9
+ const OPAQUE_SECRET_PATTERN = /\b(AKIA[0-9A-Z]{16}|ASIA[0-9A-Z]{16}|AIza[0-9A-Za-z_-]{35}|sk_live_[0-9A-Za-z]{16,}|xox[baprs]-[0-9A-Za-z-]{16,}|gh[pousr]_[0-9A-Za-z_]{20,}|[A-Za-z0-9._~+/=-]{32,})\b/;
10
+ const JWT_PATTERN = /\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b/;
11
+
12
+ const ENGLISH_WORDS = new Set([
13
+ 'a', 'about', 'above', 'after', 'again', 'all', 'also', 'an', 'and', 'any', 'are', 'as', 'at', 'available',
14
+ 'back', 'based', 'be', 'because', 'before', 'below', 'best', 'book', 'build', 'by',
15
+ 'can', 'cash', 'challenge', 'change', 'check', 'choose', 'click', 'close', 'coming', 'complete', 'confirm',
16
+ 'continue', 'copy', 'create', 'current',
17
+ 'data', 'delete', 'depth', 'details', 'done', 'down',
18
+ 'earn', 'edit', 'email', 'empty', 'enter', 'error', 'exchange',
19
+ 'failed', 'file', 'filter', 'for', 'from',
20
+ 'governance',
21
+ 'has', 'have', 'help', 'hide', 'history', 'home',
22
+ 'if', 'in', 'into', 'is', 'it', 'its',
23
+ 'key', 'keys',
24
+ 'language', 'last', 'latest', 'learn', 'live', 'loading', 'log', 'login', 'logout',
25
+ 'market', 'markets', 'message', 'missing', 'more',
26
+ 'new', 'next', 'no', 'not',
27
+ 'of', 'off', 'on', 'open', 'or', 'order', 'other', 'out', 'over', 'overview',
28
+ 'page', 'participate', 'players', 'please', 'platform', 'predict', 'prediction', 'previous', 'public',
29
+ 'real', 'record', 'remove', 'required', 'reset', 'result', 'results', 'rewards',
30
+ 'save', 'search', 'select', 'settings', 'show', 'soon', 'start', 'status', 'structured', 'submit', 'success',
31
+ 'the', 'their', 'this', 'through', 'to', 'track', 'try',
32
+ 'up', 'update', 'use', 'used', 'using',
33
+ 'value', 'view',
34
+ 'warning', 'when', 'with', 'without'
35
+ ]);
36
+
37
+ const DEFAULT_ALLOWED_ENGLISH_TERMS = new Set([
38
+ 'api',
39
+ ]);
40
+
41
+ function normalizeLanguage(language) {
42
+ return String(language || '').toLowerCase().split(/[-_]/)[0];
43
+ }
44
+
45
+ function toAllowedTermSet(terms) {
46
+ const allowed = new Set(DEFAULT_ALLOWED_ENGLISH_TERMS);
47
+ if (Array.isArray(terms)) {
48
+ terms.forEach(term => {
49
+ if (typeof term === 'string' && term.trim()) {
50
+ allowed.add(term.trim().toLowerCase());
51
+ }
52
+ });
53
+ }
54
+ return allowed;
55
+ }
56
+
57
+ function stripNonLanguageTokens(value) {
58
+ return String(value || '')
59
+ .replace(URL_PATTERN, ' ')
60
+ .replace(EMAIL_PATTERN, ' ')
61
+ .replace(/<[^>]+>/g, ' ')
62
+ .replace(/\{\{[^}]+\}\}|\{[^}]+\}|%[sdifjoO]/g, ' ');
63
+ }
64
+
65
+ function isIgnoredEnglishToken(word, allowedTerms) {
66
+ const normalized = word.toLowerCase().replace(/^['-]+|['-]+$/g, '');
67
+ if (!normalized || normalized.length <= 2) return true;
68
+ if (allowedTerms.has(normalized)) return true;
69
+ if (/^[A-Z0-9_-]{2,}$/.test(word)) return true;
70
+ if (/\d/.test(word)) return true;
71
+ return false;
72
+ }
73
+
74
+ function analyzeEnglishContent(value, options = {}) {
75
+ const allowedTerms = toAllowedTermSet(options.allowedEnglishTerms);
76
+ const words = stripNonLanguageTokens(value).match(/[A-Za-z][A-Za-z'-]*/g) || [];
77
+ const countedWords = [];
78
+ const englishWords = [];
79
+
80
+ words.forEach(word => {
81
+ if (isIgnoredEnglishToken(word, allowedTerms)) return;
82
+
83
+ const normalized = word.toLowerCase().replace(/^['-]+|['-]+$/g, '');
84
+ countedWords.push(normalized);
85
+ if (ENGLISH_WORDS.has(normalized)) {
86
+ englishWords.push(normalized);
87
+ }
88
+ });
89
+
90
+ const totalWordCount = countedWords.length;
91
+ const englishWordCount = englishWords.length;
92
+ const englishPercentage = totalWordCount === 0
93
+ ? 0
94
+ : Number(((englishWordCount / totalWordCount) * 100).toFixed(2));
95
+
96
+ return {
97
+ englishPercentage,
98
+ englishWordCount,
99
+ totalWordCount,
100
+ englishWords: [...new Set(englishWords)].slice(0, 8)
101
+ };
102
+ }
103
+
104
+ function hasSecretLikeValue(value, keyPath = '') {
105
+ const valueStr = String(value || '');
106
+ if (
107
+ PRIVATE_KEY_PATTERN.test(valueStr) ||
108
+ BEARER_TOKEN_PATTERN.test(valueStr) ||
109
+ CREDENTIAL_ASSIGNMENT_PATTERN.test(valueStr) ||
110
+ JWT_PATTERN.test(valueStr)
111
+ ) {
112
+ return true;
113
+ }
114
+
115
+ return CREDENTIAL_KEY_PATTERN.test(keyPath) && OPAQUE_SECRET_PATTERN.test(valueStr);
116
+ }
117
+
118
+ function detectTranslationContentRisks(value, options = {}) {
119
+ const valueStr = String(value || '');
120
+ const issues = [];
121
+
122
+ if (URL_PATTERN.test(valueStr)) {
123
+ issues.push({
124
+ type: 'url',
125
+ reason: 'Contains a URL; verify it is intentional and localized where needed.'
126
+ });
127
+ }
128
+
129
+ if (EMAIL_PATTERN.test(valueStr)) {
130
+ issues.push({
131
+ type: 'email',
132
+ reason: 'Contains an email address; verify it is intentional public contact content.'
133
+ });
134
+ }
135
+
136
+ if (hasSecretLikeValue(valueStr, options.keyPath)) {
137
+ issues.push({
138
+ type: 'secret',
139
+ reason: 'Looks like a credential or secret value, not ordinary translated content.'
140
+ });
141
+ }
142
+
143
+ const sourceLanguage = normalizeLanguage(options.sourceLanguage || 'en');
144
+ const targetLanguage = normalizeLanguage(options.targetLanguage);
145
+ if (targetLanguage && targetLanguage !== sourceLanguage) {
146
+ const threshold = Number.isFinite(Number(options.englishThresholdPercent))
147
+ ? Number(options.englishThresholdPercent)
148
+ : DEFAULT_ENGLISH_THRESHOLD_PERCENT;
149
+ const english = analyzeEnglishContent(valueStr, options);
150
+
151
+ if (
152
+ english.englishPercentage > threshold &&
153
+ english.englishWordCount >= 3
154
+ ) {
155
+ issues.push({
156
+ type: 'english_content',
157
+ reason: `Possible untranslated English content (${english.englishPercentage}% English words, threshold ${threshold}%).`,
158
+ englishPercentage: english.englishPercentage,
159
+ englishThresholdPercent: threshold,
160
+ englishWordCount: english.englishWordCount,
161
+ totalWordCount: english.totalWordCount,
162
+ englishWords: english.englishWords
163
+ });
164
+ }
165
+ }
166
+
167
+ return issues;
168
+ }
169
+
170
+ module.exports = {
171
+ DEFAULT_ENGLISH_THRESHOLD_PERCENT,
172
+ analyzeEnglishContent,
173
+ detectTranslationContentRisks,
174
+ hasSecretLikeValue
175
+ };