ai-localize-scanner 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,225 @@
1
+ import * as parser from '@babel/parser';
2
+ import traverse from '@babel/traverse';
3
+ import * as t from '@babel/types';
4
+
5
+ import type { DetectedText, TextContext } from '@ai-localize/shared';
6
+ import {
7
+ isHumanReadableText,
8
+ normalizeText,
9
+ TEXT_ATTRIBUTE_NAMES,
10
+ generateLocaleKey,
11
+ } from '@ai-localize/shared';
12
+
13
+ export interface AstScanOptions {
14
+ filePath: string;
15
+ content: string;
16
+ sourceRoot?: string;
17
+ }
18
+
19
+ const TRANSLATION_IMPORT_SOURCES = new Set([
20
+ 'react-i18next',
21
+ 'i18next',
22
+ 'vue-i18n',
23
+ '@ngx-translate/core',
24
+ ]);
25
+
26
+ /**
27
+ * Scans a JS/TS/JSX/TSX file using Babel AST to find hardcoded text.
28
+ */
29
+ export class AstScanner {
30
+ private options: AstScanOptions;
31
+ private detectedTexts: DetectedText[] = [];
32
+ private translationFunctionNames = new Set<string>(['t', '$t', 'i18n', 'translate']);
33
+
34
+ constructor(options: AstScanOptions) {
35
+ this.options = options;
36
+ }
37
+
38
+ scan(): DetectedText[] {
39
+ const { content } = this.options;
40
+
41
+ let ast: t.File;
42
+ try {
43
+ ast = parser.parse(content, {
44
+ sourceType: 'module',
45
+ plugins: [
46
+ 'jsx',
47
+ 'typescript',
48
+ 'decorators-legacy',
49
+ 'classProperties',
50
+ 'optionalChaining',
51
+ 'nullishCoalescingOperator',
52
+ 'dynamicImport',
53
+ 'exportDefaultFrom',
54
+ ],
55
+ errorRecovery: true,
56
+ });
57
+ } catch {
58
+ return this.regexFallbackScan();
59
+ }
60
+
61
+ this.collectTranslationImports(ast);
62
+
63
+ traverse(ast, {
64
+ JSXText: (nodePath) => {
65
+ const text = normalizeText(nodePath.node.value);
66
+ if (!isHumanReadableText(text)) return;
67
+ if (this.isInsideTranslationCall(nodePath)) return;
68
+ this.addDetected(
69
+ text,
70
+ nodePath.node.loc?.start.line ?? 0,
71
+ nodePath.node.loc?.start.column ?? 0,
72
+ 'jsx-text',
73
+ 'JSXText'
74
+ );
75
+ },
76
+
77
+ JSXAttribute: (nodePath) => {
78
+ const attrName = t.isJSXIdentifier(nodePath.node.name)
79
+ ? nodePath.node.name.name
80
+ : '';
81
+ if (!TEXT_ATTRIBUTE_NAMES.has(attrName.toLowerCase())) return;
82
+ const valueNode = nodePath.node.value;
83
+ if (!t.isStringLiteral(valueNode)) return;
84
+ const text = normalizeText(valueNode.value);
85
+ if (!isHumanReadableText(text)) return;
86
+ if (this.isInsideTranslationCall(nodePath)) return;
87
+ const context = this.mapAttrToContext(attrName);
88
+ this.addDetected(
89
+ text,
90
+ valueNode.loc?.start.line ?? 0,
91
+ valueNode.loc?.start.column ?? 0,
92
+ context,
93
+ 'JSXAttribute'
94
+ );
95
+ },
96
+
97
+ StringLiteral: (nodePath) => {
98
+ if (t.isImportDeclaration(nodePath.parent)) return;
99
+ if (t.isObjectProperty(nodePath.parent) && nodePath.parent.key === nodePath.node) return;
100
+ if (t.isJSXAttribute(nodePath.parent)) return;
101
+ if (this.isInsideTranslationCall(nodePath)) return;
102
+ if (/^[a-z][a-z0-9_.]+$/.test(nodePath.node.value)) return;
103
+ const text = normalizeText(nodePath.node.value);
104
+ if (!isHumanReadableText(text)) return;
105
+ this.addDetected(
106
+ text,
107
+ nodePath.node.loc?.start.line ?? 0,
108
+ nodePath.node.loc?.start.column ?? 0,
109
+ 'string-literal',
110
+ 'StringLiteral'
111
+ );
112
+ },
113
+
114
+ TemplateLiteral: (nodePath) => {
115
+ if (nodePath.node.expressions.length > 0) return;
116
+ if (this.isInsideTranslationCall(nodePath)) return;
117
+ const text = normalizeText(nodePath.node.quasis[0]?.value.cooked ?? '');
118
+ if (!isHumanReadableText(text)) return;
119
+ this.addDetected(
120
+ text,
121
+ nodePath.node.loc?.start.line ?? 0,
122
+ nodePath.node.loc?.start.column ?? 0,
123
+ 'template-literal',
124
+ 'TemplateLiteral'
125
+ );
126
+ },
127
+ });
128
+
129
+ return this.detectedTexts;
130
+ }
131
+
132
+ private collectTranslationImports(ast: t.File): void {
133
+ for (const node of ast.program.body) {
134
+ if (!t.isImportDeclaration(node)) continue;
135
+ if (!TRANSLATION_IMPORT_SOURCES.has(node.source.value)) continue;
136
+ for (const specifier of node.specifiers) {
137
+ if (t.isImportSpecifier(specifier) && t.isIdentifier(specifier.local)) {
138
+ this.translationFunctionNames.add(specifier.local.name);
139
+ }
140
+ }
141
+ }
142
+ }
143
+
144
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
145
+ private isInsideTranslationCall(nodePath: any): boolean {
146
+ let current = nodePath.parentPath;
147
+ while (current) {
148
+ const node = current.node;
149
+ if (t.isCallExpression(node)) {
150
+ const callee = node.callee;
151
+ if (t.isIdentifier(callee) && this.translationFunctionNames.has(callee.name)) {
152
+ return true;
153
+ }
154
+ if (
155
+ t.isMemberExpression(callee) &&
156
+ t.isIdentifier(callee.property) &&
157
+ this.translationFunctionNames.has(callee.property.name)
158
+ ) {
159
+ return true;
160
+ }
161
+ }
162
+ current = current.parentPath;
163
+ }
164
+ return false;
165
+ }
166
+
167
+ private addDetected(
168
+ text: string,
169
+ line: number,
170
+ column: number,
171
+ context: TextContext,
172
+ nodeType: string
173
+ ): void {
174
+ const key = generateLocaleKey(
175
+ this.options.filePath,
176
+ text,
177
+ this.options.sourceRoot || 'src'
178
+ );
179
+ this.detectedTexts.push({
180
+ filePath: this.options.filePath,
181
+ line,
182
+ column,
183
+ text,
184
+ suggestedKey: key,
185
+ context,
186
+ nodeType,
187
+ alreadyTranslated: false,
188
+ });
189
+ }
190
+
191
+ private mapAttrToContext(attrName: string): TextContext {
192
+ const lower = attrName.toLowerCase();
193
+ if (lower === 'placeholder') return 'placeholder';
194
+ if (lower === 'aria-label' || lower === 'aria-placeholder') return 'aria-label';
195
+ if (lower === 'title') return 'title';
196
+ if (lower === 'alt') return 'alt';
197
+ return 'jsx-attribute';
198
+ }
199
+
200
+ private regexFallbackScan(): DetectedText[] {
201
+ const results: DetectedText[] = [];
202
+ const jsxTextRegex = />([^<>{}\n]+)</g;
203
+ const lines = this.options.content.split('\n');
204
+ lines.forEach((line, idx) => {
205
+ let m: RegExpExecArray | null;
206
+ jsxTextRegex.lastIndex = 0;
207
+ while ((m = jsxTextRegex.exec(line)) !== null) {
208
+ const text = normalizeText(m[1]);
209
+ if (!isHumanReadableText(text)) continue;
210
+ const key = generateLocaleKey(this.options.filePath, text, this.options.sourceRoot || 'src');
211
+ results.push({
212
+ filePath: this.options.filePath,
213
+ line: idx + 1,
214
+ column: m.index,
215
+ text,
216
+ suggestedKey: key,
217
+ context: 'jsx-text',
218
+ nodeType: 'regex-fallback',
219
+ alreadyTranslated: false,
220
+ });
221
+ }
222
+ });
223
+ return results;
224
+ }
225
+ }
@@ -0,0 +1,52 @@
1
+ import { execSync } from 'child_process';
2
+ import * as path from 'path';
3
+
4
+ export class GitScanner {
5
+ private cwd: string;
6
+
7
+ constructor(cwd = process.cwd()) {
8
+ this.cwd = cwd;
9
+ }
10
+
11
+ getStagedFiles(extensions = ['ts', 'tsx', 'js', 'jsx', 'vue']): string[] {
12
+ try {
13
+ const out = execSync('git diff --cached --name-only --diff-filter=ACM', {
14
+ cwd: this.cwd,
15
+ encoding: 'utf-8',
16
+ });
17
+ return this.filter(out.trim().split('\n'), extensions);
18
+ } catch {
19
+ return [];
20
+ }
21
+ }
22
+
23
+ getChangedFiles(base = 'main', extensions = ['ts', 'tsx', 'js', 'jsx', 'vue']): string[] {
24
+ try {
25
+ const out = execSync(`git diff --name-only --diff-filter=ACM ${base}...HEAD`, {
26
+ cwd: this.cwd,
27
+ encoding: 'utf-8',
28
+ });
29
+ return this.filter(out.trim().split('\n'), extensions);
30
+ } catch {
31
+ return [];
32
+ }
33
+ }
34
+
35
+ getRecentlyChangedFiles(commits = 1, extensions = ['ts', 'tsx', 'js', 'jsx', 'vue']): string[] {
36
+ try {
37
+ const out = execSync(
38
+ `git diff --name-only --diff-filter=ACM HEAD~${commits}...HEAD`,
39
+ { cwd: this.cwd, encoding: 'utf-8' }
40
+ );
41
+ return this.filter(out.trim().split('\n'), extensions);
42
+ } catch {
43
+ return [];
44
+ }
45
+ }
46
+
47
+ private filter(files: string[], extensions: string[]): string[] {
48
+ return files
49
+ .filter((f) => f && extensions.some((e) => f.endsWith(`.${e}`)))
50
+ .map((f) => path.join(this.cwd, f));
51
+ }
52
+ }
@@ -0,0 +1,58 @@
1
+ import * as fs from 'fs';
2
+ import * as path from 'path';
3
+ import * as crypto from 'crypto';
4
+
5
+ import type { IncrementalCache, DetectedText } from '@ai-localize/shared';
6
+ import { readJsonSafe, writeJson, ensureDir } from '@ai-localize/shared';
7
+
8
+ export class IncrementalScanCache {
9
+ private cachePath: string;
10
+ private cache: IncrementalCache;
11
+
12
+ constructor(cacheDir: string) {
13
+ ensureDir(cacheDir);
14
+ this.cachePath = path.join(cacheDir, 'scan-cache.json');
15
+ this.cache = this.load();
16
+ }
17
+
18
+ private load(): IncrementalCache {
19
+ const existing = readJsonSafe<IncrementalCache>(this.cachePath);
20
+ if (existing?.version === '1') return existing;
21
+ return { version: '1', lastRun: new Date().toISOString(), fileHashes: {}, processedFiles: {} };
22
+ }
23
+
24
+ isFileChanged(filePath: string): boolean {
25
+ return this.hashFile(filePath) !== this.cache.fileHashes[filePath];
26
+ }
27
+
28
+ getCachedResult(filePath: string): DetectedText[] | null {
29
+ const entry = this.cache.processedFiles[filePath];
30
+ if (!entry) return null;
31
+ if (entry.hash !== this.hashFile(filePath)) return null;
32
+ return entry.detectedTexts;
33
+ }
34
+
35
+ setCachedResult(filePath: string, texts: DetectedText[]): void {
36
+ const hash = this.hashFile(filePath);
37
+ this.cache.fileHashes[filePath] = hash;
38
+ this.cache.processedFiles[filePath] = { hash, detectedTexts: texts, lastModified: Date.now() };
39
+ }
40
+
41
+ persist(): void {
42
+ this.cache.lastRun = new Date().toISOString();
43
+ writeJson(this.cachePath, this.cache);
44
+ }
45
+
46
+ private hashFile(filePath: string): string {
47
+ try {
48
+ return crypto.createHash('sha256').update(fs.readFileSync(filePath)).digest('hex');
49
+ } catch {
50
+ return '';
51
+ }
52
+ }
53
+
54
+ clear(): void {
55
+ this.cache = { version: '1', lastRun: new Date().toISOString(), fileHashes: {}, processedFiles: {} };
56
+ this.persist();
57
+ }
58
+ }
package/src/index.ts ADDED
@@ -0,0 +1,5 @@
1
+ export * from './ast-scanner.js';
2
+ export * from './asset-scanner.js';
3
+ export * from './incremental-scanner.js';
4
+ export * from './project-scanner.js';
5
+ export * from './git-scanner.js';
@@ -0,0 +1,114 @@
1
+ import * as path from 'path';
2
+ import * as os from 'os';
3
+
4
+ import type {
5
+ DetectedText,
6
+ AssetReference,
7
+ LegacyCdnUrl,
8
+ ScanResult,
9
+ LocalizationConfig,
10
+ } from '@ai-localize/shared';
11
+ import { collectFiles, DEFAULT_IGNORE_DIRS, SOURCE_EXTENSIONS } from '@ai-localize/shared';
12
+
13
+ import { AstScanner } from './ast-scanner.js';
14
+ import { AssetScanner } from './asset-scanner.js';
15
+ import { IncrementalScanCache } from './incremental-scanner.js';
16
+
17
+ export interface ScanOptions {
18
+ files?: string[];
19
+ incremental?: boolean;
20
+ }
21
+
22
+ export class ProjectScanner {
23
+ private config: LocalizationConfig;
24
+ private sourceRoot: string;
25
+ private cache?: IncrementalScanCache;
26
+ private assetScanner: AssetScanner;
27
+
28
+ constructor(config: LocalizationConfig) {
29
+ this.config = config;
30
+ this.sourceRoot = path.join(process.cwd(), config.sourceDir);
31
+ this.assetScanner = new AssetScanner(config.aws?.legacyCdnPattern);
32
+ if (config.incrementalCache) {
33
+ this.cache = new IncrementalScanCache(
34
+ path.join(process.cwd(), config.cacheDir || '.ai-localize-cache')
35
+ );
36
+ }
37
+ }
38
+
39
+ async scan(options: ScanOptions = {}): Promise<ScanResult> {
40
+ const startTime = Date.now();
41
+ const filesToScan = options.files?.length
42
+ ? options.files
43
+ : collectFiles(this.sourceRoot, SOURCE_EXTENSIONS, [
44
+ ...DEFAULT_IGNORE_DIRS,
45
+ ...(this.config.ignorePatterns || []),
46
+ ]);
47
+
48
+ const allTexts: DetectedText[] = [];
49
+ const allAssets: AssetReference[] = [];
50
+ const allLegacyUrls: LegacyCdnUrl[] = [];
51
+
52
+ const chunkSize = Math.max(
53
+ 1,
54
+ Math.min(50, Math.ceil(filesToScan.length / (os.cpus().length || 4)))
55
+ );
56
+ const chunks = this.chunkArray(filesToScan, chunkSize);
57
+
58
+ for (const chunk of chunks) {
59
+ const results = await Promise.all(chunk.map((f) => this.scanFile(f)));
60
+ for (const r of results) {
61
+ allTexts.push(...r.texts);
62
+ allAssets.push(...r.assets);
63
+ allLegacyUrls.push(...r.legacyUrls);
64
+ }
65
+ }
66
+
67
+ this.cache?.persist();
68
+
69
+ return {
70
+ framework: this.config.framework,
71
+ scannedFiles: filesToScan.length,
72
+ detectedTexts: allTexts,
73
+ assets: allAssets,
74
+ legacyCdnUrls: allLegacyUrls,
75
+ duration: Date.now() - startTime,
76
+ timestamp: new Date().toISOString(),
77
+ };
78
+ }
79
+
80
+ private async scanFile(filePath: string): Promise<{
81
+ texts: DetectedText[];
82
+ assets: AssetReference[];
83
+ legacyUrls: LegacyCdnUrl[];
84
+ }> {
85
+ if (this.cache && !this.cache.isFileChanged(filePath)) {
86
+ const cached = this.cache.getCachedResult(filePath);
87
+ if (cached) return { texts: cached, assets: [], legacyUrls: [] };
88
+ }
89
+
90
+ let content: string;
91
+ try {
92
+ const { readFileSync } = await import('fs');
93
+ content = readFileSync(filePath, 'utf-8');
94
+ } catch {
95
+ return { texts: [], assets: [], legacyUrls: [] };
96
+ }
97
+
98
+ const scanner = new AstScanner({ filePath, content, sourceRoot: this.config.sourceDir });
99
+ const texts = scanner.scan();
100
+ const { assets, legacyCdnUrls } = this.assetScanner.scanFile(filePath);
101
+
102
+ this.cache?.setCachedResult(filePath, texts);
103
+
104
+ return { texts, assets, legacyUrls: legacyCdnUrls };
105
+ }
106
+
107
+ private chunkArray<T>(array: T[], size: number): T[][] {
108
+ const chunks: T[][] = [];
109
+ for (let i = 0; i < array.length; i += size) {
110
+ chunks.push(array.slice(i, i + size));
111
+ }
112
+ return chunks;
113
+ }
114
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,9 @@
1
+ {
2
+ "extends": "../../tsconfig.base.json",
3
+ "compilerOptions": {
4
+ "outDir": "./dist",
5
+ "rootDir": "./src"
6
+ },
7
+ "include": ["src/**/*"],
8
+ "exclude": ["node_modules", "dist"]
9
+ }