i18ntk 3.3.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -109,49 +109,64 @@ class I18nTextScanner {
109
109
  const args = process.argv.slice(2);
110
110
  const parsed = {};
111
111
 
112
- args.forEach(arg => {
113
- if (arg.startsWith('--')) {
114
- const [key, ...valueParts] = arg.substring(2).split('=');
115
- const value = valueParts.join('=');
116
-
117
- switch (key) {
118
- case 'source-dir':
119
- parsed.sourceDir = value || '';
120
- break;
121
- case 'framework':
122
- parsed.framework = value || '';
123
- break;
124
- case 'patterns':
125
- parsed.patterns = value ? value.split(',').map(p => p.trim()).filter(Boolean) : [];
126
- break;
127
- case 'exclude':
128
- parsed.exclude = value ? value.split(',').map(e => e.trim()).filter(Boolean) : [];
129
- break;
130
- case 'output-dir':
131
- parsed.outputDir = value || '';
132
- break;
133
- case 'min-length':
134
- parsed.minLength = parseInt(value) || 3;
135
- break;
136
- case 'max-length':
137
- parsed.maxLength = parseInt(value) || 100;
138
- break;
139
- case 'output-report':
140
- parsed.outputReport = true;
112
+ for (let i = 0; i < args.length; i++) {
113
+ const arg = args[i];
114
+ if (arg.startsWith('--')) {
115
+ const [key, ...valueParts] = arg.substring(2).split('=');
116
+ let value = valueParts.join('=');
117
+ if (!value && args[i + 1] && !args[i + 1].startsWith('--')) {
118
+ value = args[i + 1];
119
+ }
120
+
121
+ switch (key) {
122
+ case 'source-dir':
123
+ parsed.sourceDir = value || '';
124
+ if (value === args[i + 1]) i++;
125
+ break;
126
+ case 'framework':
127
+ parsed.framework = value || '';
128
+ if (value === args[i + 1]) i++;
129
+ break;
130
+ case 'patterns':
131
+ parsed.patterns = value ? value.split(',').map(p => p.trim()).filter(Boolean) : [];
132
+ if (value === args[i + 1]) i++;
133
+ break;
134
+ case 'exclude':
135
+ parsed.exclude = value ? value.split(',').map(e => e.trim()).filter(Boolean) : [];
136
+ if (value === args[i + 1]) i++;
137
+ break;
138
+ case 'output-dir':
139
+ parsed.outputDir = value || '';
140
+ if (value === args[i + 1]) i++;
141
+ break;
142
+ case 'min-length':
143
+ parsed.minLength = parseInt(value) || 3;
144
+ if (value === args[i + 1]) i++;
145
+ break;
146
+ case 'max-length':
147
+ parsed.maxLength = parseInt(value) || 100;
148
+ if (value === args[i + 1]) i++;
149
+ break;
150
+ case 'output-report':
151
+ parsed.outputReport = true;
141
152
  break;
142
153
  case 'include-tests':
143
- parsed.includeTests = true;
144
- break;
145
- case 'help':
146
- case 'h':
147
- parsed.help = true;
148
- break;
149
- }
150
- }
151
- });
152
-
153
- return parsed;
154
- }
154
+ parsed.includeTests = true;
155
+ break;
156
+ case 'source-language':
157
+ parsed.sourceLanguage = value || '';
158
+ if (value === args[i + 1]) i++;
159
+ break;
160
+ case 'help':
161
+ case 'h':
162
+ parsed.help = true;
163
+ break;
164
+ }
165
+ }
166
+ }
167
+
168
+ return parsed;
169
+ }
155
170
 
156
171
  detectFramework(projectRoot) {
157
172
  const packagePath = path.join(projectRoot, 'package.json');
@@ -293,43 +308,147 @@ class I18nTextScanner {
293
308
  }
294
309
 
295
310
  isEnglishText(text) {
296
- // Enhanced text detection for Unicode and multilingual support
297
311
  const trimmed = text.trim();
298
312
  if (trimmed.length < 3) return false;
299
313
 
300
- // Skip if it's just numbers or special characters
301
314
  if (/^\d+$/.test(trimmed)) return false;
302
315
  if (/^[!@#$%^&*()_+\-=\[\]{};':"\\|,.<>?]+$/.test(trimmed)) return false;
303
316
 
304
- // Allow Unicode characters including CJK, Cyrillic, etc.
305
317
  const validChars = trimmed.match(/[\p{L}\p{N}\s\-,.!?':"()\[\]{}]/gu) || [];
306
318
  const validRatio = validChars.length / trimmed.length;
307
319
 
308
- // Must have at least 50% valid characters and some alphabetic characters
309
320
  const hasAlpha = /[a-zA-Z\u00C0-\u024F\u1E00-\u1EFF\u0400-\u04FF\u4E00-\u9FFF\uAC00-\uD7AF]/u.test(trimmed);
310
321
 
311
322
  return validRatio >= 0.5 && hasAlpha;
312
323
  }
313
324
 
325
+ getLanguageProfile(langCode) {
326
+ const profiles = {
327
+ en: {
328
+ name: 'English',
329
+ charRegex: /[a-zA-Z\u00C0-\u024F]/u,
330
+ stopwords: ['the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had', 'her', 'was', 'one', 'our', 'out', 'has', 'have', 'from', 'they', 'that', 'with', 'this', 'will', 'your', 'which', 'their', 'them', 'than', 'then', 'been', 'being', 'would', 'should', 'could', 'about', 'after'],
331
+ minLength: 3,
332
+ maxLength: 150
333
+ },
334
+ de: {
335
+ name: 'German',
336
+ charRegex: /[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u00DF\u1E00-\u1EFF]/u,
337
+ stopwords: ['der', 'die', 'das', 'und', 'ist', 'von', 'mit', 'sich', 'des', 'auf', 'dem', 'nicht', 'ein', 'eine', 'auch', 'als', 'aus', 'bei', 'nach', 'wie', 'oder', 'war', 'hat', 'ich', 'sie', 'einem', 'um', 'am', 'im', 'es'],
338
+ minLength: 3,
339
+ maxLength: 180
340
+ },
341
+ fr: {
342
+ name: 'French',
343
+ charRegex: /[a-zA-Z\u00C0-\u00FF\u0152\u0153]/u,
344
+ stopwords: ['le', 'la', 'les', 'des', 'est', 'pas', 'que', 'une', 'dans', 'sur', 'plus', 'par', 'pour', 'avec', 'aux', 'ces', 'ses', 'mes', 'tes', 'notre', 'votre', 'leur', 'dont', 'sont', 'comme', 'mais', 'alors', 'peut', 'tout', 'tous', 'fait'],
345
+ minLength: 3,
346
+ maxLength: 170
347
+ },
348
+ es: {
349
+ name: 'Spanish',
350
+ charRegex: /[a-zA-Z\u00C0-\u00FF\u00F1\u00D1]/u,
351
+ stopwords: ['que', 'los', 'las', 'del', 'como', 'por', 'para', 'con', 'una', 'sus', 'muy', 'más', 'pero', 'este', 'esta', 'hay', 'son', 'eran', 'fue', 'han', 'será', 'está', 'todo', 'otro', 'otra'],
352
+ minLength: 3,
353
+ maxLength: 150
354
+ },
355
+ ja: {
356
+ name: 'Japanese',
357
+ charRegex: /[\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\uFF66-\uFF9F]/u,
358
+ stopwords: ['の', 'に', 'は', 'を', 'た', 'が', 'で', 'て', 'と', 'し', 'れ', 'さ', 'る', 'す', 'ん', 'な', 'い', 'か', 'ま', 'も', 'こ', 'り', 'ち', 'き', 'ょ', 'う'],
359
+ minLength: 2,
360
+ maxLength: 80
361
+ },
362
+ zh: {
363
+ name: 'Chinese',
364
+ charRegex: /[\u4E00-\u9FFF\u3400-\u4DBF\uF900-\uFAFF]/u,
365
+ stopwords: ['的', '是', '在', '不', '了', '有', '和', '人', '这', '中', '大', '为', '上', '个', '国', '我', '以', '要', '他', '时', '来', '用', '们', '生', '到', '作', '地'],
366
+ minLength: 1,
367
+ maxLength: 50
368
+ },
369
+ ru: {
370
+ name: 'Russian',
371
+ charRegex: /[\u0400-\u04FF\u0500-\u052F]/u,
372
+ stopwords: ['и', 'в', 'не', 'на', 'что', 'как', 'по', 'к', 'от', 'это', 'за', 'то', 'для', 'все', 'его', 'она', 'так', 'же', 'но', 'был', 'быть', 'еще', 'уже', 'кто', 'мой', 'ее', 'их', 'из'],
373
+ minLength: 2,
374
+ maxLength: 200
375
+ },
376
+ ko: {
377
+ name: 'Korean',
378
+ charRegex: /[\uAC00-\uD7AF\u1100-\u11FF\u3130-\u318F]/u,
379
+ stopwords: ['이', '그', '저', '것', '수', '등', '들', '및', '년', '월', '일', '에서', '에게', '으로', '보다', '에게서', '의', '에', '는', '은', '가', '를', '과', '와', '도', '만', '까지', '부터'],
380
+ minLength: 1,
381
+ maxLength: 70
382
+ },
383
+ ar: {
384
+ name: 'Arabic',
385
+ charRegex: /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF]/u,
386
+ stopwords: ['في', 'من', 'على', 'عن', 'مع', 'هو', 'هي', 'كان', 'هذا', 'ذلك', 'بين', 'بعد', 'قبل', 'عند', 'حتى', 'الى', 'او', 'لا', 'ما', 'لم', 'لن', 'كل', 'بعض', 'أي'],
387
+ minLength: 2,
388
+ maxLength: 150
389
+ },
390
+ hi: {
391
+ name: 'Hindi',
392
+ charRegex: /[\u0900-\u097F]/u,
393
+ stopwords: ['का', 'की', 'के', 'है', 'हैं', 'था', 'थे', 'होगा', 'होगी', 'में', 'से', 'पर', 'को', 'तक', 'और', 'या', 'लेकिन', 'जब', 'तब', 'कि', 'यह', 'वह', 'एक', 'दो'],
394
+ minLength: 2,
395
+ maxLength: 160
396
+ },
397
+ vanilla: {
398
+ name: 'Generic Latin',
399
+ charRegex: /[a-zA-Z\u00C0-\u024F]/u,
400
+ stopwords: [],
401
+ minLength: 3,
402
+ maxLength: 150
403
+ }
404
+ };
405
+ return profiles[langCode] || profiles.en;
406
+ }
407
+
408
+ isTextInLanguage(text, langCode) {
409
+ const profile = this.getLanguageProfile(langCode);
410
+ const trimmed = text.trim();
411
+
412
+ if (trimmed.length < profile.minLength) return false;
413
+ if (trimmed.length > profile.maxLength) return false;
414
+
415
+ if (/^\d+$/.test(trimmed)) return false;
416
+ if (/^[!@#$%^&*()_+\-=\[\]{};':"\\|,.<>?]+$/.test(trimmed)) return false;
417
+
418
+ const hasScriptChar = profile.charRegex.test(trimmed);
419
+ if (!hasScriptChar) return false;
420
+
421
+ if (profile.stopwords.length > 0) {
422
+ const words = trimmed.toLowerCase().split(/\s+/);
423
+ for (const word of words) {
424
+ if (profile.stopwords.includes(word)) return true;
425
+ }
426
+ }
427
+
428
+ const validChars = trimmed.match(/[\p{L}\p{N}\s\-,.!?':"()\[\]{}]/gu) || [];
429
+ const validRatio = validChars.length / trimmed.length;
430
+ return validRatio >= 0.5;
431
+ }
432
+
314
433
  scanFile(filePath, patterns, minLength, maxLength) {
315
434
  try {
316
435
  const content = SecurityUtils.safeReadFileSync(filePath, path.dirname(filePath), 'utf8');
317
436
  const lines = content.split('\n');
318
437
  const results = [];
438
+ const sourceLang = this.sourceLanguage || 'en';
319
439
 
320
440
  patterns.forEach(pattern => {
321
441
  let match;
322
442
  while ((match = pattern.exec(content)) !== null) {
323
443
  const text = match[1] || match[0];
324
444
 
325
- // Skip translation function calls
326
445
  const beforeMatch = content.substring(Math.max(0, match.index - 20), match.index);
327
446
  if (beforeMatch.includes('t(') || beforeMatch.includes('i18next.t(') ||
328
447
  beforeMatch.includes('$t(') || beforeMatch.includes('translate(')) {
329
448
  continue;
330
449
  }
331
450
 
332
- if (text && this.isEnglishText(text) &&
451
+ if (text && this.isTextInLanguage(text, sourceLang) &&
333
452
  text.length >= minLength && text.length <= maxLength) {
334
453
 
335
454
  const lineNumber = content.substring(0, match.index).split('\n').length;
@@ -355,7 +474,23 @@ class I18nTextScanner {
355
474
  }
356
475
 
357
476
  generateSuggestion(text) {
358
- const key = text.toLowerCase()
477
+ const sourceLang = this.sourceLanguage || 'en';
478
+ const transliterations = {
479
+ ja: { 'あ': 'a', 'い': 'i', 'う': 'u', 'え': 'e', 'お': 'o', 'か': 'ka', 'き': 'ki', 'く': 'ku', 'け': 'ke', 'こ': 'ko', 'さ': 'sa', 'し': 'shi', 'す': 'su', 'せ': 'se', 'そ': 'so', 'た': 'ta', 'ち': 'chi', 'つ': 'tsu', 'て': 'te', 'と': 'to', 'な': 'na', 'に': 'ni', 'ぬ': 'nu', 'ね': 'ne', 'の': 'no', 'は': 'ha', 'ひ': 'hi', 'ふ': 'fu', 'へ': 'he', 'ほ': 'ho', 'ま': 'ma', 'み': 'mi', 'む': 'mu', 'め': 'me', 'も': 'mo', 'や': 'ya', 'ゆ': 'yu', 'よ': 'yo', 'ら': 'ra', 'り': 'ri', 'る': 'ru', 'れ': 're', 'ろ': 'ro', 'わ': 'wa', 'を': 'wo', 'ん': 'n' },
480
+ ru: { 'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ё': 'yo', 'ж': 'zh', 'з': 'z', 'и': 'i', 'й': 'y', 'к': 'k', 'л': 'l', 'м': 'm', 'н': 'n', 'о': 'o', 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u', 'ф': 'f', 'х': 'kh', 'ц': 'ts', 'ч': 'ch', 'ш': 'sh', 'щ': 'sch', 'ъ': '', 'ы': 'y', 'ь': '', 'э': 'e', 'ю': 'yu', 'я': 'ya' },
481
+ zh: { '的': 'de', '一': 'yi', '是': 'shi', '在': 'zai', '不': 'bu', '了': 'le', '有': 'you', '和': 'he', '人': 'ren', '这': 'zhe', '中': 'zhong', '大': 'da', '为': 'wei', '上': 'shang', '个': 'ge', '国': 'guo', '我': 'wo', '以': 'yi_t', '要': 'yao', '他': 'ta', '时': 'shi_t', '来': 'lai', '用': 'yong', '们': 'men', '生': 'sheng', '到': 'dao', '作': 'zuo', '地': 'di' }
482
+ };
483
+
484
+ let transliterated = text;
485
+ const table = transliterations[sourceLang];
486
+ if (table) {
487
+ transliterated = '';
488
+ for (const ch of text) {
489
+ transliterated += table[ch] || ch;
490
+ }
491
+ }
492
+
493
+ const key = transliterated.toLowerCase()
359
494
  .replace(/[^a-z0-9\s]/g, '')
360
495
  .replace(/\s+/g, '_')
361
496
  .substring(0, 50);
@@ -397,6 +532,9 @@ class I18nTextScanner {
397
532
  gettext: `import gettext\ngettext.gettext('${text}')`,
398
533
  underscore: `from gettext import gettext as _\n_('${text}')`,
399
534
  lazy: `from gettext import gettext_lazy as _\n_('${text}')`
535
+ },
536
+ vanilla: {
537
+ generic: `t('ui.${text.toLowerCase().replace(/[^a-z0-9\s]/g, '').replace(/\s+/g, '_')}')`
400
538
  }
401
539
  };
402
540
 
@@ -559,6 +697,9 @@ class I18nTextScanner {
559
697
 
560
698
  this.sourceDir = this.config.sourceDir || './src';
561
699
 
700
+ // Source language for multi-language detection
701
+ this.sourceLanguage = args.sourceLanguage || this.config.sourceLanguage || 'en';
702
+
562
703
  // Resolve framework with precedence: CLI arg > config.framework.preference|string > auto-detect > fallback
563
704
  const cliFramework = args.framework;
564
705
  const cfgFramework = this.config.framework;
@@ -637,4 +778,4 @@ if (require.main === module) {
637
778
  })();
638
779
  }
639
780
 
640
- module.exports = I18nTextScanner;
781
+ module.exports = I18nTextScanner;