ai-localize-scanner 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +73 -0
- package/dist/index.d.ts +73 -0
- package/dist/index.js +504 -0
- package/dist/index.mjs +468 -0
- package/package.json +40 -0
- package/src/__tests__/ast-scanner.test.ts +65 -0
- package/src/asset-scanner.ts +118 -0
- package/src/ast-scanner.ts +225 -0
- package/src/git-scanner.ts +52 -0
- package/src/incremental-scanner.ts +58 -0
- package/src/index.ts +5 -0
- package/src/project-scanner.ts +114 -0
- package/tsconfig.json +9 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
// src/ast-scanner.ts
|
|
2
|
+
import * as parser from "@babel/parser";
|
|
3
|
+
import traverse from "@babel/traverse";
|
|
4
|
+
import * as t from "@babel/types";
|
|
5
|
+
import {
|
|
6
|
+
isHumanReadableText,
|
|
7
|
+
normalizeText,
|
|
8
|
+
TEXT_ATTRIBUTE_NAMES,
|
|
9
|
+
generateLocaleKey
|
|
10
|
+
} from "@ai-localize/shared";
|
|
11
|
+
var TRANSLATION_IMPORT_SOURCES = /* @__PURE__ */ new Set([
|
|
12
|
+
"react-i18next",
|
|
13
|
+
"i18next",
|
|
14
|
+
"vue-i18n",
|
|
15
|
+
"@ngx-translate/core"
|
|
16
|
+
]);
|
|
17
|
+
var AstScanner = class {
|
|
18
|
+
options;
|
|
19
|
+
detectedTexts = [];
|
|
20
|
+
translationFunctionNames = /* @__PURE__ */ new Set(["t", "$t", "i18n", "translate"]);
|
|
21
|
+
constructor(options) {
|
|
22
|
+
this.options = options;
|
|
23
|
+
}
|
|
24
|
+
scan() {
|
|
25
|
+
const { content } = this.options;
|
|
26
|
+
let ast;
|
|
27
|
+
try {
|
|
28
|
+
ast = parser.parse(content, {
|
|
29
|
+
sourceType: "module",
|
|
30
|
+
plugins: [
|
|
31
|
+
"jsx",
|
|
32
|
+
"typescript",
|
|
33
|
+
"decorators-legacy",
|
|
34
|
+
"classProperties",
|
|
35
|
+
"optionalChaining",
|
|
36
|
+
"nullishCoalescingOperator",
|
|
37
|
+
"dynamicImport",
|
|
38
|
+
"exportDefaultFrom"
|
|
39
|
+
],
|
|
40
|
+
errorRecovery: true
|
|
41
|
+
});
|
|
42
|
+
} catch {
|
|
43
|
+
return this.regexFallbackScan();
|
|
44
|
+
}
|
|
45
|
+
this.collectTranslationImports(ast);
|
|
46
|
+
traverse(ast, {
|
|
47
|
+
JSXText: (nodePath) => {
|
|
48
|
+
const text = normalizeText(nodePath.node.value);
|
|
49
|
+
if (!isHumanReadableText(text)) return;
|
|
50
|
+
if (this.isInsideTranslationCall(nodePath)) return;
|
|
51
|
+
this.addDetected(
|
|
52
|
+
text,
|
|
53
|
+
nodePath.node.loc?.start.line ?? 0,
|
|
54
|
+
nodePath.node.loc?.start.column ?? 0,
|
|
55
|
+
"jsx-text",
|
|
56
|
+
"JSXText"
|
|
57
|
+
);
|
|
58
|
+
},
|
|
59
|
+
JSXAttribute: (nodePath) => {
|
|
60
|
+
const attrName = t.isJSXIdentifier(nodePath.node.name) ? nodePath.node.name.name : "";
|
|
61
|
+
if (!TEXT_ATTRIBUTE_NAMES.has(attrName.toLowerCase())) return;
|
|
62
|
+
const valueNode = nodePath.node.value;
|
|
63
|
+
if (!t.isStringLiteral(valueNode)) return;
|
|
64
|
+
const text = normalizeText(valueNode.value);
|
|
65
|
+
if (!isHumanReadableText(text)) return;
|
|
66
|
+
if (this.isInsideTranslationCall(nodePath)) return;
|
|
67
|
+
const context = this.mapAttrToContext(attrName);
|
|
68
|
+
this.addDetected(
|
|
69
|
+
text,
|
|
70
|
+
valueNode.loc?.start.line ?? 0,
|
|
71
|
+
valueNode.loc?.start.column ?? 0,
|
|
72
|
+
context,
|
|
73
|
+
"JSXAttribute"
|
|
74
|
+
);
|
|
75
|
+
},
|
|
76
|
+
StringLiteral: (nodePath) => {
|
|
77
|
+
if (t.isImportDeclaration(nodePath.parent)) return;
|
|
78
|
+
if (t.isObjectProperty(nodePath.parent) && nodePath.parent.key === nodePath.node) return;
|
|
79
|
+
if (t.isJSXAttribute(nodePath.parent)) return;
|
|
80
|
+
if (this.isInsideTranslationCall(nodePath)) return;
|
|
81
|
+
if (/^[a-z][a-z0-9_.]+$/.test(nodePath.node.value)) return;
|
|
82
|
+
const text = normalizeText(nodePath.node.value);
|
|
83
|
+
if (!isHumanReadableText(text)) return;
|
|
84
|
+
this.addDetected(
|
|
85
|
+
text,
|
|
86
|
+
nodePath.node.loc?.start.line ?? 0,
|
|
87
|
+
nodePath.node.loc?.start.column ?? 0,
|
|
88
|
+
"string-literal",
|
|
89
|
+
"StringLiteral"
|
|
90
|
+
);
|
|
91
|
+
},
|
|
92
|
+
TemplateLiteral: (nodePath) => {
|
|
93
|
+
if (nodePath.node.expressions.length > 0) return;
|
|
94
|
+
if (this.isInsideTranslationCall(nodePath)) return;
|
|
95
|
+
const text = normalizeText(nodePath.node.quasis[0]?.value.cooked ?? "");
|
|
96
|
+
if (!isHumanReadableText(text)) return;
|
|
97
|
+
this.addDetected(
|
|
98
|
+
text,
|
|
99
|
+
nodePath.node.loc?.start.line ?? 0,
|
|
100
|
+
nodePath.node.loc?.start.column ?? 0,
|
|
101
|
+
"template-literal",
|
|
102
|
+
"TemplateLiteral"
|
|
103
|
+
);
|
|
104
|
+
}
|
|
105
|
+
});
|
|
106
|
+
return this.detectedTexts;
|
|
107
|
+
}
|
|
108
|
+
collectTranslationImports(ast) {
|
|
109
|
+
for (const node of ast.program.body) {
|
|
110
|
+
if (!t.isImportDeclaration(node)) continue;
|
|
111
|
+
if (!TRANSLATION_IMPORT_SOURCES.has(node.source.value)) continue;
|
|
112
|
+
for (const specifier of node.specifiers) {
|
|
113
|
+
if (t.isImportSpecifier(specifier) && t.isIdentifier(specifier.local)) {
|
|
114
|
+
this.translationFunctionNames.add(specifier.local.name);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
120
|
+
isInsideTranslationCall(nodePath) {
|
|
121
|
+
let current = nodePath.parentPath;
|
|
122
|
+
while (current) {
|
|
123
|
+
const node = current.node;
|
|
124
|
+
if (t.isCallExpression(node)) {
|
|
125
|
+
const callee = node.callee;
|
|
126
|
+
if (t.isIdentifier(callee) && this.translationFunctionNames.has(callee.name)) {
|
|
127
|
+
return true;
|
|
128
|
+
}
|
|
129
|
+
if (t.isMemberExpression(callee) && t.isIdentifier(callee.property) && this.translationFunctionNames.has(callee.property.name)) {
|
|
130
|
+
return true;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
current = current.parentPath;
|
|
134
|
+
}
|
|
135
|
+
return false;
|
|
136
|
+
}
|
|
137
|
+
addDetected(text, line, column, context, nodeType) {
|
|
138
|
+
const key = generateLocaleKey(
|
|
139
|
+
this.options.filePath,
|
|
140
|
+
text,
|
|
141
|
+
this.options.sourceRoot || "src"
|
|
142
|
+
);
|
|
143
|
+
this.detectedTexts.push({
|
|
144
|
+
filePath: this.options.filePath,
|
|
145
|
+
line,
|
|
146
|
+
column,
|
|
147
|
+
text,
|
|
148
|
+
suggestedKey: key,
|
|
149
|
+
context,
|
|
150
|
+
nodeType,
|
|
151
|
+
alreadyTranslated: false
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
mapAttrToContext(attrName) {
|
|
155
|
+
const lower = attrName.toLowerCase();
|
|
156
|
+
if (lower === "placeholder") return "placeholder";
|
|
157
|
+
if (lower === "aria-label" || lower === "aria-placeholder") return "aria-label";
|
|
158
|
+
if (lower === "title") return "title";
|
|
159
|
+
if (lower === "alt") return "alt";
|
|
160
|
+
return "jsx-attribute";
|
|
161
|
+
}
|
|
162
|
+
regexFallbackScan() {
|
|
163
|
+
const results = [];
|
|
164
|
+
const jsxTextRegex = />([^<>{}\n]+)</g;
|
|
165
|
+
const lines = this.options.content.split("\n");
|
|
166
|
+
lines.forEach((line, idx) => {
|
|
167
|
+
let m;
|
|
168
|
+
jsxTextRegex.lastIndex = 0;
|
|
169
|
+
while ((m = jsxTextRegex.exec(line)) !== null) {
|
|
170
|
+
const text = normalizeText(m[1]);
|
|
171
|
+
if (!isHumanReadableText(text)) continue;
|
|
172
|
+
const key = generateLocaleKey(this.options.filePath, text, this.options.sourceRoot || "src");
|
|
173
|
+
results.push({
|
|
174
|
+
filePath: this.options.filePath,
|
|
175
|
+
line: idx + 1,
|
|
176
|
+
column: m.index,
|
|
177
|
+
text,
|
|
178
|
+
suggestedKey: key,
|
|
179
|
+
context: "jsx-text",
|
|
180
|
+
nodeType: "regex-fallback",
|
|
181
|
+
alreadyTranslated: false
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
});
|
|
185
|
+
return results;
|
|
186
|
+
}
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
// src/asset-scanner.ts
|
|
190
|
+
import * as fs from "fs";
|
|
191
|
+
import * as path from "path";
|
|
192
|
+
import { ASSET_EXTENSIONS } from "@ai-localize/shared";
|
|
193
|
+
var CDN_URL_PATTERN = /https?:\/\/[a-zA-Z0-9\-.]+\.[a-zA-Z]{2,}\/[^\s"'`\)\]>]+/g;
|
|
194
|
+
var CSS_URL_PATTERN = /url\(['"\s]?([^'")]+)['"\s]?\)/g;
|
|
195
|
+
var IMPORT_ASSET_PATTERN = /import\s+\w+\s+from\s+['"]([^'"]+\.(png|jpg|jpeg|svg|webp|gif|ico|woff|woff2|ttf|eot|mp4))['"];?/gi;
|
|
196
|
+
var SRC_ATTR_PATTERN = /(?:src|href)=["']([^"']+\.(png|jpg|jpeg|svg|webp|gif|ico|mp4))["']/gi;
|
|
197
|
+
var AssetScanner = class {
|
|
198
|
+
legacyCdnPattern = null;
|
|
199
|
+
constructor(legacyCdnPattern) {
|
|
200
|
+
if (legacyCdnPattern) {
|
|
201
|
+
try {
|
|
202
|
+
this.legacyCdnPattern = new RegExp(legacyCdnPattern, "g");
|
|
203
|
+
} catch {
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
scanFile(filePath) {
|
|
208
|
+
const assets = [];
|
|
209
|
+
const legacyCdnUrls = [];
|
|
210
|
+
let content;
|
|
211
|
+
try {
|
|
212
|
+
content = fs.readFileSync(filePath, "utf-8");
|
|
213
|
+
} catch {
|
|
214
|
+
return { assets, legacyCdnUrls };
|
|
215
|
+
}
|
|
216
|
+
let m;
|
|
217
|
+
IMPORT_ASSET_PATTERN.lastIndex = 0;
|
|
218
|
+
while ((m = IMPORT_ASSET_PATTERN.exec(content)) !== null) {
|
|
219
|
+
const assetPath = m[1];
|
|
220
|
+
assets.push({
|
|
221
|
+
filePath,
|
|
222
|
+
line: this.getLineNumber(content, m.index),
|
|
223
|
+
assetPath,
|
|
224
|
+
assetType: this.getAssetType(assetPath),
|
|
225
|
+
referenceType: "import"
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
CSS_URL_PATTERN.lastIndex = 0;
|
|
229
|
+
while ((m = CSS_URL_PATTERN.exec(content)) !== null) {
|
|
230
|
+
const assetPath = m[1];
|
|
231
|
+
if (assetPath.startsWith("data:")) continue;
|
|
232
|
+
assets.push({
|
|
233
|
+
filePath,
|
|
234
|
+
line: this.getLineNumber(content, m.index),
|
|
235
|
+
assetPath,
|
|
236
|
+
assetType: this.getAssetType(assetPath),
|
|
237
|
+
referenceType: "css-url"
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
SRC_ATTR_PATTERN.lastIndex = 0;
|
|
241
|
+
while ((m = SRC_ATTR_PATTERN.exec(content)) !== null) {
|
|
242
|
+
assets.push({
|
|
243
|
+
filePath,
|
|
244
|
+
line: this.getLineNumber(content, m.index),
|
|
245
|
+
assetPath: m[1],
|
|
246
|
+
assetType: this.getAssetType(m[1]),
|
|
247
|
+
referenceType: "src-attr"
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
if (this.legacyCdnPattern) {
|
|
251
|
+
this.legacyCdnPattern.lastIndex = 0;
|
|
252
|
+
while ((m = this.legacyCdnPattern.exec(content)) !== null) {
|
|
253
|
+
const url = m[0];
|
|
254
|
+
legacyCdnUrls.push({
|
|
255
|
+
filePath,
|
|
256
|
+
line: this.getLineNumber(content, m.index),
|
|
257
|
+
url,
|
|
258
|
+
assetPath: this.extractPathFromUrl(url)
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
CDN_URL_PATTERN.lastIndex = 0;
|
|
263
|
+
while ((m = CDN_URL_PATTERN.exec(content)) !== null) {
|
|
264
|
+
const url = m[0];
|
|
265
|
+
if (!ASSET_EXTENSIONS.some((ext) => url.includes(`.${ext}`))) continue;
|
|
266
|
+
const line = this.getLineNumber(content, m.index);
|
|
267
|
+
if (!legacyCdnUrls.find((u) => u.url === url && u.line === line)) {
|
|
268
|
+
legacyCdnUrls.push({ filePath, line, url, assetPath: this.extractPathFromUrl(url) });
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
return { assets, legacyCdnUrls };
|
|
272
|
+
}
|
|
273
|
+
getLineNumber(content, index) {
|
|
274
|
+
return content.slice(0, index).split("\n").length;
|
|
275
|
+
}
|
|
276
|
+
getAssetType(assetPath) {
|
|
277
|
+
const ext = path.extname(assetPath).toLowerCase().replace(".", "");
|
|
278
|
+
return ASSET_EXTENSIONS.includes(ext) ? ext : "other";
|
|
279
|
+
}
|
|
280
|
+
extractPathFromUrl(url) {
|
|
281
|
+
try {
|
|
282
|
+
return new URL(url).pathname;
|
|
283
|
+
} catch {
|
|
284
|
+
return url;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
};
|
|
288
|
+
|
|
289
|
+
// src/incremental-scanner.ts
|
|
290
|
+
import * as fs2 from "fs";
|
|
291
|
+
import * as path2 from "path";
|
|
292
|
+
import * as crypto from "crypto";
|
|
293
|
+
import { readJsonSafe, writeJson, ensureDir } from "@ai-localize/shared";
|
|
294
|
+
var IncrementalScanCache = class {
|
|
295
|
+
cachePath;
|
|
296
|
+
cache;
|
|
297
|
+
constructor(cacheDir) {
|
|
298
|
+
ensureDir(cacheDir);
|
|
299
|
+
this.cachePath = path2.join(cacheDir, "scan-cache.json");
|
|
300
|
+
this.cache = this.load();
|
|
301
|
+
}
|
|
302
|
+
load() {
|
|
303
|
+
const existing = readJsonSafe(this.cachePath);
|
|
304
|
+
if (existing?.version === "1") return existing;
|
|
305
|
+
return { version: "1", lastRun: (/* @__PURE__ */ new Date()).toISOString(), fileHashes: {}, processedFiles: {} };
|
|
306
|
+
}
|
|
307
|
+
isFileChanged(filePath) {
|
|
308
|
+
return this.hashFile(filePath) !== this.cache.fileHashes[filePath];
|
|
309
|
+
}
|
|
310
|
+
getCachedResult(filePath) {
|
|
311
|
+
const entry = this.cache.processedFiles[filePath];
|
|
312
|
+
if (!entry) return null;
|
|
313
|
+
if (entry.hash !== this.hashFile(filePath)) return null;
|
|
314
|
+
return entry.detectedTexts;
|
|
315
|
+
}
|
|
316
|
+
setCachedResult(filePath, texts) {
|
|
317
|
+
const hash = this.hashFile(filePath);
|
|
318
|
+
this.cache.fileHashes[filePath] = hash;
|
|
319
|
+
this.cache.processedFiles[filePath] = { hash, detectedTexts: texts, lastModified: Date.now() };
|
|
320
|
+
}
|
|
321
|
+
persist() {
|
|
322
|
+
this.cache.lastRun = (/* @__PURE__ */ new Date()).toISOString();
|
|
323
|
+
writeJson(this.cachePath, this.cache);
|
|
324
|
+
}
|
|
325
|
+
hashFile(filePath) {
|
|
326
|
+
try {
|
|
327
|
+
return crypto.createHash("sha256").update(fs2.readFileSync(filePath)).digest("hex");
|
|
328
|
+
} catch {
|
|
329
|
+
return "";
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
clear() {
|
|
333
|
+
this.cache = { version: "1", lastRun: (/* @__PURE__ */ new Date()).toISOString(), fileHashes: {}, processedFiles: {} };
|
|
334
|
+
this.persist();
|
|
335
|
+
}
|
|
336
|
+
};
|
|
337
|
+
|
|
338
|
+
// src/project-scanner.ts
|
|
339
|
+
import * as path3 from "path";
|
|
340
|
+
import * as os from "os";
|
|
341
|
+
import { collectFiles, DEFAULT_IGNORE_DIRS, SOURCE_EXTENSIONS } from "@ai-localize/shared";
|
|
342
|
+
var ProjectScanner = class {
|
|
343
|
+
config;
|
|
344
|
+
sourceRoot;
|
|
345
|
+
cache;
|
|
346
|
+
assetScanner;
|
|
347
|
+
constructor(config) {
|
|
348
|
+
this.config = config;
|
|
349
|
+
this.sourceRoot = path3.join(process.cwd(), config.sourceDir);
|
|
350
|
+
this.assetScanner = new AssetScanner(config.aws?.legacyCdnPattern);
|
|
351
|
+
if (config.incrementalCache) {
|
|
352
|
+
this.cache = new IncrementalScanCache(
|
|
353
|
+
path3.join(process.cwd(), config.cacheDir || ".ai-localize-cache")
|
|
354
|
+
);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
async scan(options = {}) {
|
|
358
|
+
const startTime = Date.now();
|
|
359
|
+
const filesToScan = options.files?.length ? options.files : collectFiles(this.sourceRoot, SOURCE_EXTENSIONS, [
|
|
360
|
+
...DEFAULT_IGNORE_DIRS,
|
|
361
|
+
...this.config.ignorePatterns || []
|
|
362
|
+
]);
|
|
363
|
+
const allTexts = [];
|
|
364
|
+
const allAssets = [];
|
|
365
|
+
const allLegacyUrls = [];
|
|
366
|
+
const chunkSize = Math.max(
|
|
367
|
+
1,
|
|
368
|
+
Math.min(50, Math.ceil(filesToScan.length / (os.cpus().length || 4)))
|
|
369
|
+
);
|
|
370
|
+
const chunks = this.chunkArray(filesToScan, chunkSize);
|
|
371
|
+
for (const chunk of chunks) {
|
|
372
|
+
const results = await Promise.all(chunk.map((f) => this.scanFile(f)));
|
|
373
|
+
for (const r of results) {
|
|
374
|
+
allTexts.push(...r.texts);
|
|
375
|
+
allAssets.push(...r.assets);
|
|
376
|
+
allLegacyUrls.push(...r.legacyUrls);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
this.cache?.persist();
|
|
380
|
+
return {
|
|
381
|
+
framework: this.config.framework,
|
|
382
|
+
scannedFiles: filesToScan.length,
|
|
383
|
+
detectedTexts: allTexts,
|
|
384
|
+
assets: allAssets,
|
|
385
|
+
legacyCdnUrls: allLegacyUrls,
|
|
386
|
+
duration: Date.now() - startTime,
|
|
387
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
388
|
+
};
|
|
389
|
+
}
|
|
390
|
+
async scanFile(filePath) {
|
|
391
|
+
if (this.cache && !this.cache.isFileChanged(filePath)) {
|
|
392
|
+
const cached = this.cache.getCachedResult(filePath);
|
|
393
|
+
if (cached) return { texts: cached, assets: [], legacyUrls: [] };
|
|
394
|
+
}
|
|
395
|
+
let content;
|
|
396
|
+
try {
|
|
397
|
+
const { readFileSync: readFileSync3 } = await import("fs");
|
|
398
|
+
content = readFileSync3(filePath, "utf-8");
|
|
399
|
+
} catch {
|
|
400
|
+
return { texts: [], assets: [], legacyUrls: [] };
|
|
401
|
+
}
|
|
402
|
+
const scanner = new AstScanner({ filePath, content, sourceRoot: this.config.sourceDir });
|
|
403
|
+
const texts = scanner.scan();
|
|
404
|
+
const { assets, legacyCdnUrls } = this.assetScanner.scanFile(filePath);
|
|
405
|
+
this.cache?.setCachedResult(filePath, texts);
|
|
406
|
+
return { texts, assets, legacyUrls: legacyCdnUrls };
|
|
407
|
+
}
|
|
408
|
+
chunkArray(array, size) {
|
|
409
|
+
const chunks = [];
|
|
410
|
+
for (let i = 0; i < array.length; i += size) {
|
|
411
|
+
chunks.push(array.slice(i, i + size));
|
|
412
|
+
}
|
|
413
|
+
return chunks;
|
|
414
|
+
}
|
|
415
|
+
};
|
|
416
|
+
|
|
417
|
+
// src/git-scanner.ts
|
|
418
|
+
import { execSync } from "child_process";
|
|
419
|
+
import * as path4 from "path";
|
|
420
|
+
var GitScanner = class {
|
|
421
|
+
cwd;
|
|
422
|
+
constructor(cwd = process.cwd()) {
|
|
423
|
+
this.cwd = cwd;
|
|
424
|
+
}
|
|
425
|
+
getStagedFiles(extensions = ["ts", "tsx", "js", "jsx", "vue"]) {
|
|
426
|
+
try {
|
|
427
|
+
const out = execSync("git diff --cached --name-only --diff-filter=ACM", {
|
|
428
|
+
cwd: this.cwd,
|
|
429
|
+
encoding: "utf-8"
|
|
430
|
+
});
|
|
431
|
+
return this.filter(out.trim().split("\n"), extensions);
|
|
432
|
+
} catch {
|
|
433
|
+
return [];
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
getChangedFiles(base = "main", extensions = ["ts", "tsx", "js", "jsx", "vue"]) {
|
|
437
|
+
try {
|
|
438
|
+
const out = execSync(`git diff --name-only --diff-filter=ACM ${base}...HEAD`, {
|
|
439
|
+
cwd: this.cwd,
|
|
440
|
+
encoding: "utf-8"
|
|
441
|
+
});
|
|
442
|
+
return this.filter(out.trim().split("\n"), extensions);
|
|
443
|
+
} catch {
|
|
444
|
+
return [];
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
getRecentlyChangedFiles(commits = 1, extensions = ["ts", "tsx", "js", "jsx", "vue"]) {
|
|
448
|
+
try {
|
|
449
|
+
const out = execSync(
|
|
450
|
+
`git diff --name-only --diff-filter=ACM HEAD~${commits}...HEAD`,
|
|
451
|
+
{ cwd: this.cwd, encoding: "utf-8" }
|
|
452
|
+
);
|
|
453
|
+
return this.filter(out.trim().split("\n"), extensions);
|
|
454
|
+
} catch {
|
|
455
|
+
return [];
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
filter(files, extensions) {
|
|
459
|
+
return files.filter((f) => f && extensions.some((e) => f.endsWith(`.${e}`))).map((f) => path4.join(this.cwd, f));
|
|
460
|
+
}
|
|
461
|
+
};
|
|
462
|
+
export {
|
|
463
|
+
AssetScanner,
|
|
464
|
+
AstScanner,
|
|
465
|
+
GitScanner,
|
|
466
|
+
IncrementalScanCache,
|
|
467
|
+
ProjectScanner
|
|
468
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "ai-localize-scanner",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "AST-based hardcoded text scanner for frontend applications",
|
|
5
|
+
"main": "./dist/index.js",
|
|
6
|
+
"module": "./dist/index.mjs",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.mjs",
|
|
12
|
+
"require": "./dist/index.js"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"dependencies": {
|
|
16
|
+
"@babel/parser": "^7.23.9",
|
|
17
|
+
"@babel/traverse": "^7.23.9",
|
|
18
|
+
"@babel/types": "^7.23.9",
|
|
19
|
+
"glob": "^10.3.10",
|
|
20
|
+
"ai-localize-shared": "1.0.0",
|
|
21
|
+
"ai-localize-config": "1.0.0"
|
|
22
|
+
},
|
|
23
|
+
"devDependencies": {
|
|
24
|
+
"@types/babel__traverse": "^7.20.5",
|
|
25
|
+
"tsup": "^8.0.1",
|
|
26
|
+
"typescript": "^5.3.3",
|
|
27
|
+
"vitest": "^1.2.1"
|
|
28
|
+
},
|
|
29
|
+
"license": "MIT",
|
|
30
|
+
"publishConfig": {
|
|
31
|
+
"access": "public"
|
|
32
|
+
},
|
|
33
|
+
"scripts": {
|
|
34
|
+
"build": "tsup src/index.ts --format cjs,esm --dts",
|
|
35
|
+
"dev": "tsup src/index.ts --format cjs,esm --dts --watch",
|
|
36
|
+
"typecheck": "tsc --noEmit",
|
|
37
|
+
"test": "vitest run",
|
|
38
|
+
"lint": "eslint src --ext .ts"
|
|
39
|
+
}
|
|
40
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { AstScanner } from '../ast-scanner.js';
|
|
3
|
+
|
|
4
|
+
describe('AstScanner', () => {
|
|
5
|
+
it('detects JSX text', () => {
|
|
6
|
+
const content = `
|
|
7
|
+
export default function Button() {
|
|
8
|
+
return <button>Save Campaign</button>;
|
|
9
|
+
}
|
|
10
|
+
`;
|
|
11
|
+
const scanner = new AstScanner({ filePath: 'src/Button.tsx', content, sourceRoot: 'src' });
|
|
12
|
+
const results = scanner.scan();
|
|
13
|
+
expect(results.length).toBeGreaterThan(0);
|
|
14
|
+
expect(results[0].text).toBe('Save Campaign');
|
|
15
|
+
expect(results[0].context).toBe('jsx-text');
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
it('detects JSX attribute placeholder', () => {
|
|
19
|
+
const content = `
|
|
20
|
+
export default function Input() {
|
|
21
|
+
return <input placeholder="Enter your name" />;
|
|
22
|
+
}
|
|
23
|
+
`;
|
|
24
|
+
const scanner = new AstScanner({ filePath: 'src/Input.tsx', content, sourceRoot: 'src' });
|
|
25
|
+
const results = scanner.scan();
|
|
26
|
+
const placeholders = results.filter((r) => r.context === 'placeholder');
|
|
27
|
+
expect(placeholders.length).toBeGreaterThan(0);
|
|
28
|
+
expect(placeholders[0].text).toBe('Enter your name');
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it('skips already-translated text', () => {
|
|
32
|
+
const content = `
|
|
33
|
+
import { useTranslation } from 'react-i18next';
|
|
34
|
+
export default function Button() {
|
|
35
|
+
const { t } = useTranslation();
|
|
36
|
+
return <button>{t('button.save')}</button>;
|
|
37
|
+
}
|
|
38
|
+
`;
|
|
39
|
+
const scanner = new AstScanner({ filePath: 'src/Button.tsx', content, sourceRoot: 'src' });
|
|
40
|
+
const results = scanner.scan();
|
|
41
|
+
expect(results.filter((r) => r.text === 'button.save').length).toBe(0);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it('skips import declarations', () => {
|
|
45
|
+
const content = `
|
|
46
|
+
import { something } from 'some-package';
|
|
47
|
+
export default function App() {
|
|
48
|
+
return <div>Hello World</div>;
|
|
49
|
+
}
|
|
50
|
+
`;
|
|
51
|
+
const scanner = new AstScanner({ filePath: 'src/App.tsx', content, sourceRoot: 'src' });
|
|
52
|
+
const results = scanner.scan();
|
|
53
|
+
const importResults = results.filter((r) => r.text === 'some-package');
|
|
54
|
+
expect(importResults.length).toBe(0);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('generates deterministic locale keys', () => {
|
|
58
|
+
const content = `export default function Btn() { return <button>Save</button>; }`;
|
|
59
|
+
const scanner1 = new AstScanner({ filePath: 'src/Btn.tsx', content, sourceRoot: 'src' });
|
|
60
|
+
const scanner2 = new AstScanner({ filePath: 'src/Btn.tsx', content, sourceRoot: 'src' });
|
|
61
|
+
const results1 = scanner1.scan();
|
|
62
|
+
const results2 = scanner2.scan();
|
|
63
|
+
expect(results1[0]?.suggestedKey).toBe(results2[0]?.suggestedKey);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import * as fs from 'fs';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
|
|
4
|
+
import type { AssetReference, AssetType, LegacyCdnUrl } from '@ai-localize/shared';
|
|
5
|
+
import { ASSET_EXTENSIONS } from '@ai-localize/shared';
|
|
6
|
+
|
|
7
|
+
const CDN_URL_PATTERN = /https?:\/\/[a-zA-Z0-9\-.]+\.[a-zA-Z]{2,}\/[^\s"'`\)\]>]+/g;
|
|
8
|
+
const CSS_URL_PATTERN = /url\(['"\s]?([^'")]+)['"\s]?\)/g;
|
|
9
|
+
const IMPORT_ASSET_PATTERN =
|
|
10
|
+
/import\s+\w+\s+from\s+['"]([^'"]+\.(png|jpg|jpeg|svg|webp|gif|ico|woff|woff2|ttf|eot|mp4))['"];?/gi;
|
|
11
|
+
const SRC_ATTR_PATTERN =
|
|
12
|
+
/(?:src|href)=["']([^"']+\.(png|jpg|jpeg|svg|webp|gif|ico|mp4))["']/gi;
|
|
13
|
+
|
|
14
|
+
export class AssetScanner {
|
|
15
|
+
private legacyCdnPattern: RegExp | null = null;
|
|
16
|
+
|
|
17
|
+
constructor(legacyCdnPattern?: string) {
|
|
18
|
+
if (legacyCdnPattern) {
|
|
19
|
+
try {
|
|
20
|
+
this.legacyCdnPattern = new RegExp(legacyCdnPattern, 'g');
|
|
21
|
+
} catch {
|
|
22
|
+
// Invalid regex, ignore
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
scanFile(filePath: string): { assets: AssetReference[]; legacyCdnUrls: LegacyCdnUrl[] } {
|
|
28
|
+
const assets: AssetReference[] = [];
|
|
29
|
+
const legacyCdnUrls: LegacyCdnUrl[] = [];
|
|
30
|
+
|
|
31
|
+
let content: string;
|
|
32
|
+
try {
|
|
33
|
+
content = fs.readFileSync(filePath, 'utf-8');
|
|
34
|
+
} catch {
|
|
35
|
+
return { assets, legacyCdnUrls };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
let m: RegExpExecArray | null;
|
|
39
|
+
|
|
40
|
+
IMPORT_ASSET_PATTERN.lastIndex = 0;
|
|
41
|
+
while ((m = IMPORT_ASSET_PATTERN.exec(content)) !== null) {
|
|
42
|
+
const assetPath = m[1];
|
|
43
|
+
assets.push({
|
|
44
|
+
filePath,
|
|
45
|
+
line: this.getLineNumber(content, m.index),
|
|
46
|
+
assetPath,
|
|
47
|
+
assetType: this.getAssetType(assetPath),
|
|
48
|
+
referenceType: 'import',
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
CSS_URL_PATTERN.lastIndex = 0;
|
|
53
|
+
while ((m = CSS_URL_PATTERN.exec(content)) !== null) {
|
|
54
|
+
const assetPath = m[1];
|
|
55
|
+
if (assetPath.startsWith('data:')) continue;
|
|
56
|
+
assets.push({
|
|
57
|
+
filePath,
|
|
58
|
+
line: this.getLineNumber(content, m.index),
|
|
59
|
+
assetPath,
|
|
60
|
+
assetType: this.getAssetType(assetPath),
|
|
61
|
+
referenceType: 'css-url',
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
SRC_ATTR_PATTERN.lastIndex = 0;
|
|
66
|
+
while ((m = SRC_ATTR_PATTERN.exec(content)) !== null) {
|
|
67
|
+
assets.push({
|
|
68
|
+
filePath,
|
|
69
|
+
line: this.getLineNumber(content, m.index),
|
|
70
|
+
assetPath: m[1],
|
|
71
|
+
assetType: this.getAssetType(m[1]),
|
|
72
|
+
referenceType: 'src-attr',
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (this.legacyCdnPattern) {
|
|
77
|
+
this.legacyCdnPattern.lastIndex = 0;
|
|
78
|
+
while ((m = this.legacyCdnPattern.exec(content)) !== null) {
|
|
79
|
+
const url = m[0];
|
|
80
|
+
legacyCdnUrls.push({
|
|
81
|
+
filePath,
|
|
82
|
+
line: this.getLineNumber(content, m.index),
|
|
83
|
+
url,
|
|
84
|
+
assetPath: this.extractPathFromUrl(url),
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
CDN_URL_PATTERN.lastIndex = 0;
|
|
90
|
+
while ((m = CDN_URL_PATTERN.exec(content)) !== null) {
|
|
91
|
+
const url = m[0];
|
|
92
|
+
if (!ASSET_EXTENSIONS.some((ext) => url.includes(`.${ext}`))) continue;
|
|
93
|
+
const line = this.getLineNumber(content, m.index);
|
|
94
|
+
if (!legacyCdnUrls.find((u) => u.url === url && u.line === line)) {
|
|
95
|
+
legacyCdnUrls.push({ filePath, line, url, assetPath: this.extractPathFromUrl(url) });
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return { assets, legacyCdnUrls };
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
private getLineNumber(content: string, index: number): number {
|
|
103
|
+
return content.slice(0, index).split('\n').length;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
private getAssetType(assetPath: string): AssetType {
|
|
107
|
+
const ext = path.extname(assetPath).toLowerCase().replace('.', '') as AssetType;
|
|
108
|
+
return (ASSET_EXTENSIONS.includes(ext) ? ext : 'other') as AssetType;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
private extractPathFromUrl(url: string): string {
|
|
112
|
+
try {
|
|
113
|
+
return new URL(url).pathname;
|
|
114
|
+
} catch {
|
|
115
|
+
return url;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|