transduck 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.d.ts CHANGED
@@ -31,6 +31,14 @@ export interface WarmOptions {
31
31
  configPath?: string;
32
32
  }
33
33
  export declare function runWarm(opts: WarmOptions): Promise<string>;
34
+ export interface ScanOptions {
35
+ dirs: string[];
36
+ warm?: boolean;
37
+ langs?: string[];
38
+ outputPath?: string;
39
+ configPath?: string;
40
+ }
41
+ export declare function runScan(opts: ScanOptions): Promise<string>;
34
42
  export interface StatsOptions {
35
43
  configPath?: string;
36
44
  }
package/dist/cli.js CHANGED
@@ -9,6 +9,7 @@ import { TranslationStore } from './storage.js';
9
9
  import { translate as backendTranslate, translatePlural as backendTranslatePlural } from './backend.js';
10
10
  import { validateTranslation, extractPlaceholders } from './validation.js';
11
11
  import { getPluralCategory, interpolateVars } from './plural.js';
12
+ import { scanDirectory } from './scanner.js';
12
13
  function hash(text) {
13
14
  return createHash('sha256').update(text).digest('hex');
14
15
  }
@@ -285,6 +286,138 @@ export async function runWarm(opts) {
285
286
  store.close();
286
287
  return `Translated: ${translated} | Skipped: ${skipped} | Failed: ${failed}`;
287
288
  }
289
+ export async function runScan(opts) {
290
+ const cfg = loadConfig(opts.configPath);
291
+ const scanDirs = opts.dirs.length > 0 ? opts.dirs : [process.cwd()];
292
+ const entries = scanDirectory(scanDirs);
293
+ const regular = entries.filter(e => !e.plural);
294
+ const plurals = entries.filter(e => e.plural);
295
+ // Count scanned files
296
+ const allFiles = new Set();
297
+ for (const e of entries) {
298
+ for (const f of (e.files ?? [])) {
299
+ allFiles.add(f);
300
+ }
301
+ }
302
+ const lines = [];
303
+ lines.push(`Scanned files with matches: ${allFiles.size}`);
304
+ lines.push(`Found ${entries.length} strings (${regular.length} regular, ${plurals.length} plural)`);
305
+ lines.push('');
306
+ for (const e of entries) {
307
+ const locations = (e.files ?? []).join(', ');
308
+ if (e.plural) {
309
+ lines.push(` ait_plural("${e.one}", "${e.other}") ${locations}`);
310
+ }
311
+ else {
312
+ const ctx = e.context ? `, context="${e.context}"` : '';
313
+ lines.push(` ait("${e.text}"${ctx}) ${locations}`);
314
+ }
315
+ }
316
+ // Output to JSON
317
+ if (opts.outputPath) {
318
+ const outputEntries = entries.map(e => {
319
+ const out = {};
320
+ for (const [k, v] of Object.entries(e)) {
321
+ if (k !== 'files')
322
+ out[k] = v;
323
+ }
324
+ return out;
325
+ });
326
+ writeFileSync(opts.outputPath, JSON.stringify(outputEntries, null, 2));
327
+ lines.push(`\nWrote ${entries.length} entries to ${opts.outputPath}`);
328
+ }
329
+ // Warm
330
+ if (opts.warm) {
331
+ const targetLangs = opts.langs && opts.langs.length > 0
332
+ ? opts.langs.map(l => l.toUpperCase())
333
+ : cfg.targetLangs;
334
+ const store = new TranslationStore(cfg.storagePath);
335
+ await store.initialize();
336
+ const apiKey = process.env[cfg.apiKeyEnv];
337
+ const projectContextHash = hash(cfg.projectContext);
338
+ let translated = 0;
339
+ let skipped = 0;
340
+ let failed = 0;
341
+ for (const entry of entries) {
342
+ if (entry.plural) {
343
+ const sourceKey = entry.one + '\x00' + entry.other;
344
+ const stringContextHash = hash(entry.context ?? '');
345
+ for (const lang of targetLangs) {
346
+ const cachedForms = await store.lookupPlural({
347
+ sourceText: sourceKey, sourceLang: cfg.sourceLang, targetLang: lang,
348
+ projectContextHash, stringContextHash,
349
+ });
350
+ if (Object.keys(cachedForms).length > 0) {
351
+ skipped++;
352
+ continue;
353
+ }
354
+ try {
355
+ const forms = await backendTranslatePlural({
356
+ one: entry.one, other: entry.other,
357
+ sourceLang: cfg.sourceLang, targetLang: lang,
358
+ projectContext: cfg.projectContext, stringContext: entry.context ?? null,
359
+ apiKey: apiKey, model: cfg.backendModel,
360
+ timeout: cfg.backendTimeout, maxRetries: cfg.backendMaxRetries,
361
+ });
362
+ for (const [cat, translatedText] of Object.entries(forms)) {
363
+ await store.insertPlural({
364
+ sourceText: sourceKey, sourceLang: cfg.sourceLang, targetLang: lang,
365
+ projectContextHash, stringContextHash,
366
+ pluralCategory: cat, translatedText: translatedText,
367
+ model: cfg.backendModel, status: 'translated',
368
+ });
369
+ }
370
+ translated++;
371
+ }
372
+ catch {
373
+ failed++;
374
+ }
375
+ }
376
+ }
377
+ else {
378
+ const stringContextHash = hash(entry.context ?? '');
379
+ for (const lang of targetLangs) {
380
+ const cached = await store.lookup({
381
+ sourceText: entry.text, sourceLang: cfg.sourceLang, targetLang: lang,
382
+ projectContextHash, stringContextHash,
383
+ });
384
+ if (cached !== null) {
385
+ skipped++;
386
+ continue;
387
+ }
388
+ try {
389
+ const result = await backendTranslate({
390
+ sourceText: entry.text, sourceLang: cfg.sourceLang, targetLang: lang,
391
+ projectContext: cfg.projectContext, stringContext: entry.context ?? null,
392
+ apiKey: apiKey, model: cfg.backendModel,
393
+ timeout: cfg.backendTimeout, maxRetries: cfg.backendMaxRetries,
394
+ });
395
+ if (validateTranslation(entry.text, result)) {
396
+ await store.insert({
397
+ sourceText: entry.text, sourceLang: cfg.sourceLang, targetLang: lang,
398
+ projectContextHash, stringContextHash,
399
+ translatedText: result, model: cfg.backendModel, status: 'translated',
400
+ });
401
+ translated++;
402
+ }
403
+ else {
404
+ failed++;
405
+ }
406
+ }
407
+ catch {
408
+ failed++;
409
+ }
410
+ }
411
+ }
412
+ }
413
+ store.close();
414
+ lines.push(`\nTranslated: ${translated} | Skipped: ${skipped} | Failed: ${failed}`);
415
+ }
416
+ if (!opts.warm && !opts.outputPath) {
417
+ lines.push(`\nRun 'transduck scan --warm --langs DE,ES' to translate all strings.`);
418
+ }
419
+ return lines.join('\n');
420
+ }
288
421
  export async function runStats(opts) {
289
422
  const cfg = loadConfig(opts.configPath);
290
423
  const store = new TranslationStore(cfg.storagePath);
@@ -375,6 +508,23 @@ program.command('stats')
375
508
  const output = await runStats({ configPath: opts.config ?? '' });
376
509
  console.log(output);
377
510
  });
511
+ program.command('scan')
512
+ .description('Scan source code for translatable strings')
513
+ .option('--dir <path...>', 'Directories to scan (repeatable)')
514
+ .option('--warm', 'Translate all found strings')
515
+ .option('--langs <langs>', 'Comma-separated target languages (for --warm)')
516
+ .option('--output <path>', 'Write found strings to JSON')
517
+ .option('--config <path>', 'Path to transduck.yaml')
518
+ .action(async (opts) => {
519
+ const output = await runScan({
520
+ dirs: opts.dir ?? [],
521
+ warm: opts.warm ?? false,
522
+ langs: opts.langs ? opts.langs.split(',').map(s => s.trim()) : undefined,
523
+ outputPath: opts.output,
524
+ configPath: opts.config,
525
+ });
526
+ console.log(output);
527
+ });
378
528
  export { program };
379
529
  // Run CLI when executed directly (not when imported by tests or other modules)
380
530
  if (typeof process !== 'undefined' && process.argv[1]) {
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Source code scanner for TransDuck ait() and ait_plural()/aitPlural() calls.
3
+ */
4
+ export interface ScanEntry {
5
+ text?: string;
6
+ context?: string | null;
7
+ plural?: true;
8
+ one?: string;
9
+ other?: string;
10
+ line?: number;
11
+ files?: string[];
12
+ }
13
+ /**
14
+ * Extract translatable strings from file content.
15
+ */
16
+ export declare function extractStrings(content: string, filename: string): ScanEntry[];
17
+ /**
18
+ * Walk directories and extract all translatable strings.
19
+ * Returns deduplicated list of entries with 'files' field listing all locations.
20
+ */
21
+ export declare function scanDirectory(dirs: string[]): ScanEntry[];
@@ -0,0 +1,154 @@
1
+ /**
2
+ * Source code scanner for TransDuck ait() and ait_plural()/aitPlural() calls.
3
+ */
4
+ import { readdirSync, readFileSync, statSync } from 'fs';
5
+ import { join, extname } from 'path';
6
+ // --- Regex patterns ---
7
+ // ait("text") or ait("text", context="ctx") — Python/template keyword style
8
+ const AIT_KEYWORD_CTX = /ait\s*\(\s*(['"])(.*?)\1(?:\s*,\s*context\s*=\s*(['"])(.*?)\3)?/g;
9
+ // ait("text") or ait("text", "ctx") — JS positional style
10
+ const AIT_POSITIONAL_CTX = /ait\s*\(\s*(['"])(.*?)\1(?:\s*,\s*(['"])(.*?)\3)?/g;
11
+ // ait_plural("one", "other") or aitPlural("one", "other")
12
+ const AIT_PLURAL = /ait(?:_p|P)lural\s*\(\s*(['"])(.*?)\1\s*,\s*(['"])(.*?)\3/g;
13
+ // {% ait "text" %} or {% ait "text" context="ctx" %}
14
+ const DJANGO_TAG = /\{%\s*ait\s+(['"])(.*?)\1(?:\s+context=(['"])(.*?)\3)?\s*%\}/g;
15
+ // File extensions that use JS-style positional context
16
+ const JS_EXTENSIONS = new Set(['.js', '.ts', '.tsx', '.jsx']);
17
+ // File extensions that may contain Django template tags
18
+ const TEMPLATE_EXTENSIONS = new Set(['.html', '.jinja', '.jinja2']);
19
+ // Supported file extensions for scanning
20
+ const SCAN_EXTENSIONS = new Set(['.py', '.js', '.ts', '.tsx', '.jsx', '.html', '.jinja', '.jinja2']);
21
+ // Directories to skip
22
+ const SKIP_DIRS = new Set(['node_modules', '.venv', 'venv', '__pycache__', '.git', 'dist', 'build', '.next']);
23
+ function shouldSkipDir(dirname) {
24
+ if (SKIP_DIRS.has(dirname))
25
+ return true;
26
+ if (dirname.includes('egg-info'))
27
+ return true;
28
+ return false;
29
+ }
30
+ /**
31
+ * Extract translatable strings from file content.
32
+ */
33
+ export function extractStrings(content, filename) {
34
+ const results = [];
35
+ const ext = extname(filename).toLowerCase();
36
+ const isJs = JS_EXTENSIONS.has(ext);
37
+ const isTemplate = TEMPLATE_EXTENSIONS.has(ext);
38
+ // Track positions of plural matches so we don't double-match them as ait()
39
+ const pluralSpans = [];
40
+ // 1. Find all plural calls
41
+ const pluralRegex = new RegExp(AIT_PLURAL.source, 'g');
42
+ let match;
43
+ while ((match = pluralRegex.exec(content)) !== null) {
44
+ const one = match[2];
45
+ const other = match[4];
46
+ const lineNum = content.slice(0, match.index).split('\n').length;
47
+ results.push({
48
+ plural: true,
49
+ one,
50
+ other,
51
+ context: null,
52
+ line: lineNum,
53
+ });
54
+ pluralSpans.push([match.index, match.index + match[0].length]);
55
+ }
56
+ // 2. Find Django template tags (only in template files)
57
+ if (isTemplate) {
58
+ const djangoRegex = new RegExp(DJANGO_TAG.source, 'g');
59
+ while ((match = djangoRegex.exec(content)) !== null) {
60
+ const text = match[2];
61
+ const context = match[4] || null;
62
+ const lineNum = content.slice(0, match.index).split('\n').length;
63
+ results.push({ text, context, line: lineNum });
64
+ }
65
+ }
66
+ // 3. Find ait() calls
67
+ const pattern = isJs ? AIT_POSITIONAL_CTX : AIT_KEYWORD_CTX;
68
+ const aitRegex = new RegExp(pattern.source, 'g');
69
+ while ((match = aitRegex.exec(content)) !== null) {
70
+ // Skip if this is part of a plural match
71
+ const pos = match.index;
72
+ if (pluralSpans.some(([start, end]) => pos >= start && pos < end)) {
73
+ continue;
74
+ }
75
+ // Check that this is specifically "ait(" not "ait_plural(" or "aitPlural("
76
+ const prefixStart = Math.max(0, pos - 10);
77
+ const prefix = content.slice(prefixStart, pos + 4);
78
+ if (prefix.toLowerCase().includes('plural') || prefix.includes('Plural')) {
79
+ continue;
80
+ }
81
+ const text = match[2];
82
+ const context = match[4] || null;
83
+ const lineNum = content.slice(0, pos).split('\n').length;
84
+ results.push({ text, context, line: lineNum });
85
+ }
86
+ return results;
87
+ }
88
+ /**
89
+ * Walk directories and extract all translatable strings.
90
+ * Returns deduplicated list of entries with 'files' field listing all locations.
91
+ */
92
+ export function scanDirectory(dirs) {
93
+ const rawMatches = new Map();
94
+ for (const scanDir of dirs) {
95
+ walkDir(scanDir, rawMatches);
96
+ }
97
+ return Array.from(rawMatches.values());
98
+ }
99
+ function walkDir(dir, rawMatches) {
100
+ let entries;
101
+ try {
102
+ entries = readdirSync(dir);
103
+ }
104
+ catch {
105
+ return;
106
+ }
107
+ for (const name of entries) {
108
+ const fullPath = join(dir, name);
109
+ let stat;
110
+ try {
111
+ stat = statSync(fullPath);
112
+ }
113
+ catch {
114
+ continue;
115
+ }
116
+ if (stat.isDirectory()) {
117
+ if (!shouldSkipDir(name)) {
118
+ walkDir(fullPath, rawMatches);
119
+ }
120
+ }
121
+ else if (stat.isFile()) {
122
+ const ext = extname(name).toLowerCase();
123
+ if (!SCAN_EXTENSIONS.has(ext))
124
+ continue;
125
+ let content;
126
+ try {
127
+ content = readFileSync(fullPath, 'utf-8');
128
+ }
129
+ catch {
130
+ continue;
131
+ }
132
+ const entries = extractStrings(content, name);
133
+ for (const entry of entries) {
134
+ // Build dedup key
135
+ let key;
136
+ if (entry.plural) {
137
+ key = `plural:${entry.one}\x00${entry.other}\x00${entry.context ?? ''}`;
138
+ }
139
+ else {
140
+ key = `text:${entry.text}\x00${entry.context ?? ''}`;
141
+ }
142
+ const fileLoc = `${fullPath}:${entry.line}`;
143
+ if (rawMatches.has(key)) {
144
+ rawMatches.get(key).files.push(fileLoc);
145
+ }
146
+ else {
147
+ const resultEntry = { ...entry, files: [fileLoc] };
148
+ delete resultEntry.line;
149
+ rawMatches.set(key, resultEntry);
150
+ }
151
+ }
152
+ }
153
+ }
154
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "transduck",
3
- "version": "0.0.4",
3
+ "version": "0.0.5",
4
4
  "description": "AI-native translation tool using source text as keys",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
package/src/cli.ts CHANGED
@@ -10,6 +10,7 @@ import { TranslationStore } from './storage.js';
10
10
  import { translate as backendTranslate, translatePlural as backendTranslatePlural } from './backend.js';
11
11
  import { validateTranslation, extractPlaceholders } from './validation.js';
12
12
  import { getPluralCategory, interpolateVars } from './plural.js';
13
+ import { scanDirectory, type ScanEntry } from './scanner.js';
13
14
 
14
15
  function hash(text: string): string {
15
16
  return createHash('sha256').update(text).digest('hex');
@@ -336,6 +337,161 @@ export async function runWarm(opts: WarmOptions): Promise<string> {
336
337
  return `Translated: ${translated} | Skipped: ${skipped} | Failed: ${failed}`;
337
338
  }
338
339
 
340
+ export interface ScanOptions {
341
+ dirs: string[];
342
+ warm?: boolean;
343
+ langs?: string[];
344
+ outputPath?: string;
345
+ configPath?: string;
346
+ }
347
+
348
+ export async function runScan(opts: ScanOptions): Promise<string> {
349
+ const cfg = loadConfig(opts.configPath);
350
+ const scanDirs = opts.dirs.length > 0 ? opts.dirs : [process.cwd()];
351
+ const entries = scanDirectory(scanDirs);
352
+
353
+ const regular = entries.filter(e => !e.plural);
354
+ const plurals = entries.filter(e => e.plural);
355
+
356
+ // Count scanned files
357
+ const allFiles = new Set<string>();
358
+ for (const e of entries) {
359
+ for (const f of (e.files ?? [])) {
360
+ allFiles.add(f);
361
+ }
362
+ }
363
+
364
+ const lines: string[] = [];
365
+ lines.push(`Scanned files with matches: ${allFiles.size}`);
366
+ lines.push(`Found ${entries.length} strings (${regular.length} regular, ${plurals.length} plural)`);
367
+ lines.push('');
368
+
369
+ for (const e of entries) {
370
+ const locations = (e.files ?? []).join(', ');
371
+ if (e.plural) {
372
+ lines.push(` ait_plural("${e.one}", "${e.other}") ${locations}`);
373
+ } else {
374
+ const ctx = e.context ? `, context="${e.context}"` : '';
375
+ lines.push(` ait("${e.text}"${ctx}) ${locations}`);
376
+ }
377
+ }
378
+
379
+ // Output to JSON
380
+ if (opts.outputPath) {
381
+ const outputEntries = entries.map(e => {
382
+ const out: Record<string, unknown> = {};
383
+ for (const [k, v] of Object.entries(e)) {
384
+ if (k !== 'files') out[k] = v;
385
+ }
386
+ return out;
387
+ });
388
+ writeFileSync(opts.outputPath, JSON.stringify(outputEntries, null, 2));
389
+ lines.push(`\nWrote ${entries.length} entries to ${opts.outputPath}`);
390
+ }
391
+
392
+ // Warm
393
+ if (opts.warm) {
394
+ const targetLangs = opts.langs && opts.langs.length > 0
395
+ ? opts.langs.map(l => l.toUpperCase())
396
+ : cfg.targetLangs;
397
+
398
+ const store = new TranslationStore(cfg.storagePath);
399
+ await store.initialize();
400
+ const apiKey = process.env[cfg.apiKeyEnv];
401
+ const projectContextHash = hash(cfg.projectContext);
402
+
403
+ let translated = 0;
404
+ let skipped = 0;
405
+ let failed = 0;
406
+
407
+ for (const entry of entries) {
408
+ if (entry.plural) {
409
+ const sourceKey = entry.one + '\x00' + entry.other;
410
+ const stringContextHash = hash(entry.context ?? '');
411
+
412
+ for (const lang of targetLangs) {
413
+ const cachedForms = await store.lookupPlural({
414
+ sourceText: sourceKey, sourceLang: cfg.sourceLang, targetLang: lang,
415
+ projectContextHash, stringContextHash,
416
+ });
417
+
418
+ if (Object.keys(cachedForms).length > 0) {
419
+ skipped++;
420
+ continue;
421
+ }
422
+
423
+ try {
424
+ const forms = await backendTranslatePlural({
425
+ one: entry.one!, other: entry.other!,
426
+ sourceLang: cfg.sourceLang, targetLang: lang,
427
+ projectContext: cfg.projectContext, stringContext: entry.context ?? null,
428
+ apiKey: apiKey!, model: cfg.backendModel,
429
+ timeout: cfg.backendTimeout, maxRetries: cfg.backendMaxRetries,
430
+ });
431
+
432
+ for (const [cat, translatedText] of Object.entries(forms)) {
433
+ await store.insertPlural({
434
+ sourceText: sourceKey, sourceLang: cfg.sourceLang, targetLang: lang,
435
+ projectContextHash, stringContextHash,
436
+ pluralCategory: cat, translatedText: translatedText as string,
437
+ model: cfg.backendModel, status: 'translated',
438
+ });
439
+ }
440
+ translated++;
441
+ } catch {
442
+ failed++;
443
+ }
444
+ }
445
+ } else {
446
+ const stringContextHash = hash(entry.context ?? '');
447
+
448
+ for (const lang of targetLangs) {
449
+ const cached = await store.lookup({
450
+ sourceText: entry.text!, sourceLang: cfg.sourceLang, targetLang: lang,
451
+ projectContextHash, stringContextHash,
452
+ });
453
+
454
+ if (cached !== null) {
455
+ skipped++;
456
+ continue;
457
+ }
458
+
459
+ try {
460
+ const result = await backendTranslate({
461
+ sourceText: entry.text!, sourceLang: cfg.sourceLang, targetLang: lang,
462
+ projectContext: cfg.projectContext, stringContext: entry.context ?? null,
463
+ apiKey: apiKey!, model: cfg.backendModel,
464
+ timeout: cfg.backendTimeout, maxRetries: cfg.backendMaxRetries,
465
+ });
466
+
467
+ if (validateTranslation(entry.text!, result)) {
468
+ await store.insert({
469
+ sourceText: entry.text!, sourceLang: cfg.sourceLang, targetLang: lang,
470
+ projectContextHash, stringContextHash,
471
+ translatedText: result, model: cfg.backendModel, status: 'translated',
472
+ });
473
+ translated++;
474
+ } else {
475
+ failed++;
476
+ }
477
+ } catch {
478
+ failed++;
479
+ }
480
+ }
481
+ }
482
+ }
483
+
484
+ store.close();
485
+ lines.push(`\nTranslated: ${translated} | Skipped: ${skipped} | Failed: ${failed}`);
486
+ }
487
+
488
+ if (!opts.warm && !opts.outputPath) {
489
+ lines.push(`\nRun 'transduck scan --warm --langs DE,ES' to translate all strings.`);
490
+ }
491
+
492
+ return lines.join('\n');
493
+ }
494
+
339
495
  export interface StatsOptions {
340
496
  configPath?: string;
341
497
  }
@@ -441,6 +597,24 @@ program.command('stats')
441
597
  console.log(output);
442
598
  });
443
599
 
600
+ program.command('scan')
601
+ .description('Scan source code for translatable strings')
602
+ .option('--dir <path...>', 'Directories to scan (repeatable)')
603
+ .option('--warm', 'Translate all found strings')
604
+ .option('--langs <langs>', 'Comma-separated target languages (for --warm)')
605
+ .option('--output <path>', 'Write found strings to JSON')
606
+ .option('--config <path>', 'Path to transduck.yaml')
607
+ .action(async (opts: { dir?: string[]; warm?: boolean; langs?: string; output?: string; config?: string }) => {
608
+ const output = await runScan({
609
+ dirs: opts.dir ?? [],
610
+ warm: opts.warm ?? false,
611
+ langs: opts.langs ? opts.langs.split(',').map(s => s.trim()) : undefined,
612
+ outputPath: opts.output,
613
+ configPath: opts.config,
614
+ });
615
+ console.log(output);
616
+ });
617
+
444
618
  export { program };
445
619
 
446
620
  // Run CLI when executed directly (not when imported by tests or other modules)
package/src/scanner.ts ADDED
@@ -0,0 +1,186 @@
1
+ /**
2
+ * Source code scanner for TransDuck ait() and ait_plural()/aitPlural() calls.
3
+ */
4
+
5
+ import { readdirSync, readFileSync, statSync } from 'fs';
6
+ import { join, extname } from 'path';
7
+
8
+ // --- Types ---
9
+
10
+ export interface ScanEntry {
11
+ text?: string;
12
+ context?: string | null;
13
+ plural?: true;
14
+ one?: string;
15
+ other?: string;
16
+ line?: number;
17
+ files?: string[];
18
+ }
19
+
20
+ // --- Regex patterns ---
21
+
22
+ // ait("text") or ait("text", context="ctx") — Python/template keyword style
23
+ const AIT_KEYWORD_CTX = /ait\s*\(\s*(['"])(.*?)\1(?:\s*,\s*context\s*=\s*(['"])(.*?)\3)?/g;
24
+
25
+ // ait("text") or ait("text", "ctx") — JS positional style
26
+ const AIT_POSITIONAL_CTX = /ait\s*\(\s*(['"])(.*?)\1(?:\s*,\s*(['"])(.*?)\3)?/g;
27
+
28
+ // ait_plural("one", "other") or aitPlural("one", "other")
29
+ const AIT_PLURAL = /ait(?:_p|P)lural\s*\(\s*(['"])(.*?)\1\s*,\s*(['"])(.*?)\3/g;
30
+
31
+ // {% ait "text" %} or {% ait "text" context="ctx" %}
32
+ const DJANGO_TAG = /\{%\s*ait\s+(['"])(.*?)\1(?:\s+context=(['"])(.*?)\3)?\s*%\}/g;
33
+
34
+ // File extensions that use JS-style positional context
35
+ const JS_EXTENSIONS = new Set(['.js', '.ts', '.tsx', '.jsx']);
36
+
37
+ // File extensions that may contain Django template tags
38
+ const TEMPLATE_EXTENSIONS = new Set(['.html', '.jinja', '.jinja2']);
39
+
40
+ // Supported file extensions for scanning
41
+ const SCAN_EXTENSIONS = new Set(['.py', '.js', '.ts', '.tsx', '.jsx', '.html', '.jinja', '.jinja2']);
42
+
43
+ // Directories to skip
44
+ const SKIP_DIRS = new Set(['node_modules', '.venv', 'venv', '__pycache__', '.git', 'dist', 'build', '.next']);
45
+
46
+ function shouldSkipDir(dirname: string): boolean {
47
+ if (SKIP_DIRS.has(dirname)) return true;
48
+ if (dirname.includes('egg-info')) return true;
49
+ return false;
50
+ }
51
+
52
+ /**
53
+ * Extract translatable strings from file content.
54
+ */
55
+ export function extractStrings(content: string, filename: string): ScanEntry[] {
56
+ const results: ScanEntry[] = [];
57
+ const ext = extname(filename).toLowerCase();
58
+ const isJs = JS_EXTENSIONS.has(ext);
59
+ const isTemplate = TEMPLATE_EXTENSIONS.has(ext);
60
+
61
+ // Track positions of plural matches so we don't double-match them as ait()
62
+ const pluralSpans: Array<[number, number]> = [];
63
+
64
+ // 1. Find all plural calls
65
+ const pluralRegex = new RegExp(AIT_PLURAL.source, 'g');
66
+ let match: RegExpExecArray | null;
67
+ while ((match = pluralRegex.exec(content)) !== null) {
68
+ const one = match[2];
69
+ const other = match[4];
70
+ const lineNum = content.slice(0, match.index).split('\n').length;
71
+ results.push({
72
+ plural: true,
73
+ one,
74
+ other,
75
+ context: null,
76
+ line: lineNum,
77
+ });
78
+ pluralSpans.push([match.index, match.index + match[0].length]);
79
+ }
80
+
81
+ // 2. Find Django template tags (only in template files)
82
+ if (isTemplate) {
83
+ const djangoRegex = new RegExp(DJANGO_TAG.source, 'g');
84
+ while ((match = djangoRegex.exec(content)) !== null) {
85
+ const text = match[2];
86
+ const context = match[4] || null;
87
+ const lineNum = content.slice(0, match.index).split('\n').length;
88
+ results.push({ text, context, line: lineNum });
89
+ }
90
+ }
91
+
92
+ // 3. Find ait() calls
93
+ const pattern = isJs ? AIT_POSITIONAL_CTX : AIT_KEYWORD_CTX;
94
+ const aitRegex = new RegExp(pattern.source, 'g');
95
+ while ((match = aitRegex.exec(content)) !== null) {
96
+ // Skip if this is part of a plural match
97
+ const pos = match.index;
98
+ if (pluralSpans.some(([start, end]) => pos >= start && pos < end)) {
99
+ continue;
100
+ }
101
+
102
+ // Check that this is specifically "ait(" not "ait_plural(" or "aitPlural("
103
+ const prefixStart = Math.max(0, pos - 10);
104
+ const prefix = content.slice(prefixStart, pos + 4);
105
+ if (prefix.toLowerCase().includes('plural') || prefix.includes('Plural')) {
106
+ continue;
107
+ }
108
+
109
+ const text = match[2];
110
+ const context = match[4] || null;
111
+ const lineNum = content.slice(0, pos).split('\n').length;
112
+ results.push({ text, context, line: lineNum });
113
+ }
114
+
115
+ return results;
116
+ }
117
+
118
+ /**
119
+ * Walk directories and extract all translatable strings.
120
+ * Returns deduplicated list of entries with 'files' field listing all locations.
121
+ */
122
+ export function scanDirectory(dirs: string[]): ScanEntry[] {
123
+ const rawMatches = new Map<string, ScanEntry>();
124
+
125
+ for (const scanDir of dirs) {
126
+ walkDir(scanDir, rawMatches);
127
+ }
128
+
129
+ return Array.from(rawMatches.values());
130
+ }
131
+
132
+ function walkDir(dir: string, rawMatches: Map<string, ScanEntry>): void {
133
+ let entries: string[];
134
+ try {
135
+ entries = readdirSync(dir);
136
+ } catch {
137
+ return;
138
+ }
139
+
140
+ for (const name of entries) {
141
+ const fullPath = join(dir, name);
142
+ let stat;
143
+ try {
144
+ stat = statSync(fullPath);
145
+ } catch {
146
+ continue;
147
+ }
148
+
149
+ if (stat.isDirectory()) {
150
+ if (!shouldSkipDir(name)) {
151
+ walkDir(fullPath, rawMatches);
152
+ }
153
+ } else if (stat.isFile()) {
154
+ const ext = extname(name).toLowerCase();
155
+ if (!SCAN_EXTENSIONS.has(ext)) continue;
156
+
157
+ let content: string;
158
+ try {
159
+ content = readFileSync(fullPath, 'utf-8');
160
+ } catch {
161
+ continue;
162
+ }
163
+
164
+ const entries = extractStrings(content, name);
165
+ for (const entry of entries) {
166
+ // Build dedup key
167
+ let key: string;
168
+ if (entry.plural) {
169
+ key = `plural:${entry.one}\x00${entry.other}\x00${entry.context ?? ''}`;
170
+ } else {
171
+ key = `text:${entry.text}\x00${entry.context ?? ''}`;
172
+ }
173
+
174
+ const fileLoc = `${fullPath}:${entry.line}`;
175
+
176
+ if (rawMatches.has(key)) {
177
+ rawMatches.get(key)!.files!.push(fileLoc);
178
+ } else {
179
+ const resultEntry: ScanEntry = { ...entry, files: [fileLoc] };
180
+ delete resultEntry.line;
181
+ rawMatches.set(key, resultEntry);
182
+ }
183
+ }
184
+ }
185
+ }
186
+ }
package/tests/cli.test.ts CHANGED
@@ -1,11 +1,11 @@
1
1
  import { describe, it, expect, vi, beforeEach } from 'vitest';
2
- import { writeFileSync } from 'fs';
2
+ import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs';
3
3
  import { join } from 'path';
4
4
  import { mkdtempSync } from 'fs';
5
5
  import { tmpdir } from 'os';
6
6
  import { createHash } from 'crypto';
7
7
 
8
- import { runInit, runStats, runTranslate, runTranslatePlural, runWarm } from '../src/cli.js';
8
+ import { runInit, runStats, runTranslate, runTranslatePlural, runWarm, runScan } from '../src/cli.js';
9
9
  import { TranslationStore } from '../src/storage.js';
10
10
 
11
11
  function hash(text: string): string {
@@ -159,4 +159,65 @@ describe('CLI functions', () => {
159
159
  // Since plural forms already exist, should be skipped
160
160
  expect(output).toContain('Skipped: 1');
161
161
  });
162
+
163
+ // --- scan ---
164
+
165
+ it('scan finds strings', async () => {
166
+ const configPath = writeConfig(tmpDir);
167
+ // Create source files to scan
168
+ const srcDir = join(tmpDir, 'src');
169
+ mkdirSync(srcDir);
170
+ writeFileSync(join(srcDir, 'app.py'), 'from transduck import ait\nait("Hello")\nait("World", context="greeting")\n');
171
+ const output = await runScan({
172
+ dirs: [srcDir],
173
+ configPath,
174
+ });
175
+ expect(output).toContain('Hello');
176
+ expect(output).toContain('World');
177
+ expect(output).toContain('2');
178
+ });
179
+
180
+ it('scan output JSON', async () => {
181
+ const configPath = writeConfig(tmpDir);
182
+ const srcDir = join(tmpDir, 'src');
183
+ mkdirSync(srcDir);
184
+ writeFileSync(join(srcDir, 'app.py'), 'ait("Hello")\n');
185
+ const outputFile = join(tmpDir, 'strings.json');
186
+ const output = await runScan({
187
+ dirs: [srcDir],
188
+ outputPath: outputFile,
189
+ configPath,
190
+ });
191
+ expect(output).toContain('Wrote');
192
+ expect(existsSync(outputFile)).toBe(true);
193
+ const data = JSON.parse(readFileSync(outputFile, 'utf-8'));
194
+ expect(data).toHaveLength(1);
195
+ expect(data[0].text).toBe('Hello');
196
+ });
197
+
198
+ it('scan with warm', async () => {
199
+ const configPath = writeConfig(tmpDir);
200
+ const srcDir = join(tmpDir, 'src');
201
+ mkdirSync(srcDir);
202
+ writeFileSync(join(srcDir, 'app.py'), 'ait("Hello")\n');
203
+
204
+ // Pre-populate the DB so warm skips
205
+ const dbPath = join(tmpDir, 'translations.duckdb');
206
+ const store = new TranslationStore(dbPath);
207
+ await store.initialize();
208
+ await store.insert({
209
+ sourceText: 'Hello', sourceLang: 'EN', targetLang: 'DE',
210
+ projectContextHash: hash('A test site'), stringContextHash: hash(''),
211
+ translatedText: 'Hallo', model: 'gpt-4.1-mini', status: 'translated',
212
+ });
213
+ store.close();
214
+
215
+ const output = await runScan({
216
+ dirs: [srcDir],
217
+ warm: true,
218
+ langs: ['DE'],
219
+ configPath,
220
+ });
221
+ expect(output).toContain('Skipped: 1');
222
+ });
162
223
  });
@@ -0,0 +1,167 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { writeFileSync, mkdirSync, mkdtempSync } from 'fs';
3
+ import { join } from 'path';
4
+ import { tmpdir } from 'os';
5
+ import { extractStrings, scanDirectory } from '../src/scanner.js';
6
+
7
+ function makeTmpDir(): string {
8
+ return mkdtempSync(join(tmpdir(), 'transduck-scanner-test-'));
9
+ }
10
+
11
+ describe('extractStrings', () => {
12
+ it('extracts ait with double quotes', () => {
13
+ const result = extractStrings('ait("Our Events")', 'test.py');
14
+ expect(result).toHaveLength(1);
15
+ expect(result[0].text).toBe('Our Events');
16
+ expect(result[0].plural).toBeUndefined();
17
+ });
18
+
19
+ it('extracts ait with single quotes', () => {
20
+ const result = extractStrings("ait('Our Events')", 'test.py');
21
+ expect(result).toHaveLength(1);
22
+ expect(result[0].text).toBe('Our Events');
23
+ });
24
+
25
+ it('extracts ait with keyword context', () => {
26
+ const result = extractStrings('ait("Book Now", context="Hotel booking")', 'test.py');
27
+ expect(result).toHaveLength(1);
28
+ expect(result[0].text).toBe('Book Now');
29
+ expect(result[0].context).toBe('Hotel booking');
30
+ });
31
+
32
+ it('extracts ait with positional context (JS style)', () => {
33
+ const result = extractStrings('ait("Book Now", "Hotel booking")', 'test.js');
34
+ expect(result).toHaveLength(1);
35
+ expect(result[0].text).toBe('Book Now');
36
+ expect(result[0].context).toBe('Hotel booking');
37
+ });
38
+
39
+ it('extracts ait with no context', () => {
40
+ const result = extractStrings('ait("Hello")', 'test.py');
41
+ expect(result[0].context).toBeNull();
42
+ });
43
+
44
+ it('extracts ait with vars but no context', () => {
45
+ const result = extractStrings('ait("Welcome {name}", vars={"name": user})', 'test.py');
46
+ expect(result[0].text).toBe('Welcome {name}');
47
+ expect(result[0].context).toBeNull();
48
+ });
49
+
50
+ it('extracts ait_plural (Python style)', () => {
51
+ const result = extractStrings('ait_plural("{count} message", "{count} messages", count=n)', 'test.py');
52
+ expect(result).toHaveLength(1);
53
+ expect(result[0].plural).toBe(true);
54
+ expect(result[0].one).toBe('{count} message');
55
+ expect(result[0].other).toBe('{count} messages');
56
+ });
57
+
58
+ it('extracts aitPlural (JS style)', () => {
59
+ const result = extractStrings("aitPlural('{count} night', '{count} nights', 5)", 'test.ts');
60
+ expect(result).toHaveLength(1);
61
+ expect(result[0].plural).toBe(true);
62
+ expect(result[0].one).toBe('{count} night');
63
+ expect(result[0].other).toBe('{count} nights');
64
+ });
65
+
66
+ it('extracts Django template tag', () => {
67
+ const result = extractStrings('{% ait "Our Events" %}', 'test.html');
68
+ expect(result).toHaveLength(1);
69
+ expect(result[0].text).toBe('Our Events');
70
+ });
71
+
72
+ it('extracts Django template tag with context', () => {
73
+ const result = extractStrings('{% ait "Book Now" context="Hotel booking" %}', 'test.html');
74
+ expect(result[0].text).toBe('Book Now');
75
+ expect(result[0].context).toBe('Hotel booking');
76
+ });
77
+
78
+ it('extracts Jinja expression', () => {
79
+ const result = extractStrings('{{ ait("Our Events") }}', 'test.html');
80
+ expect(result).toHaveLength(1);
81
+ expect(result[0].text).toBe('Our Events');
82
+ });
83
+
84
+ it('extracts multiple strings', () => {
85
+ const code = `
86
+ ait("Hello")
87
+ ait("World", context="greeting")
88
+ ait_plural("{count} item", "{count} items", count=n)
89
+ `;
90
+ const result = extractStrings(code, 'test.py');
91
+ expect(result).toHaveLength(3);
92
+ });
93
+
94
+ it('returns empty for no matches', () => {
95
+ const result = extractStrings('print("Hello")', 'test.py');
96
+ expect(result).toHaveLength(0);
97
+ });
98
+
99
+ it('includes line numbers', () => {
100
+ const result = extractStrings('line1\nait("Hello")\nline3', 'test.py');
101
+ expect(result[0].line).toBe(2);
102
+ });
103
+ });
104
+
105
+ describe('scanDirectory', () => {
106
+ it('scans directory and deduplicates', () => {
107
+ const tmp = makeTmpDir();
108
+ writeFileSync(join(tmp, 'app.py'), 'ait("Hello")\nait("World")');
109
+ writeFileSync(join(tmp, 'views.py'), 'ait("Hello")'); // duplicate
110
+ writeFileSync(join(tmp, 'template.html'), '{% ait "Events" %}');
111
+ const result = scanDirectory([tmp]);
112
+ const texts = result.filter(e => !e.plural).map(e => e.text);
113
+ expect(texts).toContain('Hello');
114
+ expect(texts).toContain('World');
115
+ expect(texts).toContain('Events');
116
+ expect(result).toHaveLength(3);
117
+ });
118
+
119
+ it('dedup tracks files', () => {
120
+ const tmp = makeTmpDir();
121
+ writeFileSync(join(tmp, 'a.py'), 'ait("Hello")');
122
+ writeFileSync(join(tmp, 'b.py'), 'ait("Hello")');
123
+ const result = scanDirectory([tmp]);
124
+ expect(result).toHaveLength(1);
125
+ expect(result[0].files).toHaveLength(2);
126
+ });
127
+
128
+ it('skips node_modules', () => {
129
+ const tmp = makeTmpDir();
130
+ const nm = join(tmp, 'node_modules');
131
+ mkdirSync(nm);
132
+ writeFileSync(join(nm, 'dep.js'), 'ait("Hidden")');
133
+ writeFileSync(join(tmp, 'app.py'), 'ait("Visible")');
134
+ const result = scanDirectory([tmp]);
135
+ expect(result).toHaveLength(1);
136
+ expect(result[0].text).toBe('Visible');
137
+ });
138
+
139
+ it('skips .venv', () => {
140
+ const tmp = makeTmpDir();
141
+ const venv = join(tmp, '.venv');
142
+ mkdirSync(venv);
143
+ writeFileSync(join(venv, 'lib.py'), 'ait("Hidden")');
144
+ writeFileSync(join(tmp, 'app.py'), 'ait("Visible")');
145
+ const result = scanDirectory([tmp]);
146
+ expect(result).toHaveLength(1);
147
+ });
148
+
149
+ it('filters extensions', () => {
150
+ const tmp = makeTmpDir();
151
+ writeFileSync(join(tmp, 'readme.md'), 'ait("Not a code file")');
152
+ writeFileSync(join(tmp, 'app.py'), 'ait("Code file")');
153
+ const result = scanDirectory([tmp]);
154
+ expect(result).toHaveLength(1);
155
+ expect(result[0].text).toBe('Code file');
156
+ });
157
+
158
+ it('deduplicates plural entries', () => {
159
+ const tmp = makeTmpDir();
160
+ writeFileSync(join(tmp, 'a.py'), 'ait_plural("{count} msg", "{count} msgs", count=n)');
161
+ writeFileSync(join(tmp, 'b.py'), 'ait_plural("{count} msg", "{count} msgs", count=n)');
162
+ const result = scanDirectory([tmp]);
163
+ expect(result).toHaveLength(1);
164
+ expect(result[0].plural).toBe(true);
165
+ expect(result[0].files).toHaveLength(2);
166
+ });
167
+ });