ai-localize-scanner 2.0.5 → 2.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,32 @@
1
1
  # ai-localize-scanner
2
2
 
3
+ ## 2.0.7
4
+
5
+ ### Minor Changes
6
+
7
+ - **CSS/utility class filtering in `AstScanner`**:
8
+ - New `NON_TRANSLATABLE_ATTR_NAMES` set: JSX attributes like `type`, `role`, `method`,
9
+ `href`, `src`, `rel`, `target`, `id`, `name`, and 20+ others are now skipped.
10
+ - New `NON_TRANSLATABLE_PROP_KEYS` set: object property values are skipped when the key
11
+ is a CSS-in-JS property (`fontFamily`, `color`, `display`, etc.) or structural prop.
12
+ - New `CSS_UTILITY_FN_NAMES` set: string literals inside calls to `clsx`, `cx`, `cn`,
13
+ `twMerge`, `twJoin`, `classnames`, `styled`, `css`, etc. are now skipped.
14
+ - `isCssClassString()` applied to every candidate string.
15
+ - Centralised `isTranslatableText()` private method applies all checks + `ignoreTextPatterns`.
16
+ - **`ignoreTextPatterns` config support** — user-defined regex patterns applied per scan.
17
+ - `regexFallbackScan()` updated to use the same `isTranslatableText()` pipeline.
18
+ - `ProjectScanner` passes `config.ignoreTextPatterns` to `AstScanner`.
19
+ - Added `repository`, `homepage`, `bugs`, `author` fields to `package.json` for npm registry display.
20
+ - Added `sideEffects: false` to enable tree-shaking in bundlers.
21
+ - Added `prepublishOnly` script to ensure the package is built before publishing.
22
+
23
+ ## 2.0.6
24
+
25
+ ### Patch Changes
26
+
27
+ - Add per-package README.md files so each package displays documentation on npmjs.com
28
+ - Update README version badge to 2.0.6
29
+
3
30
  ## 2.0.3
4
31
 
5
32
  ### Patch Changes
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024-2026 ai-localize-core contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,75 @@
1
+ # ai-localize-scanner
2
+
3
+ > Babel AST-based hardcoded text detection + asset scanner for the [ai-localize-core](https://github.com/ai-localize/ai-localize-core) platform.
4
+
5
+ [![npm version](https://img.shields.io/npm/v/ai-localize-scanner.svg)](https://www.npmjs.com/package/ai-localize-scanner)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
7
+
8
+ ---
9
+
10
+ ## What it does
11
+
12
+ - Parses JS/TS/JSX/TSX/Vue/Angular source files with `@babel/parser`
13
+ - Detects hardcoded strings in JSX text, attributes, string literals, template literals
14
+ - Skips strings already inside translation calls (`t()`, `useTranslation`, custom hooks)
15
+ - Detects static asset references (images, fonts, CSS, JS)
16
+ - Finds legacy CDN URLs for migration
17
+ - Supports **incremental scanning** (git-aware file hashing cache)
18
+ - Generates deterministic `suggestedKey` for every detected string
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ npm install ai-localize-scanner
24
+ ```
25
+
26
+ ## Usage
27
+
28
+ ```ts
29
+ import { ProjectScanner } from 'ai-localize-scanner';
30
+
31
+ const scanner = new ProjectScanner({
32
+ framework: 'react-vite',
33
+ sourceDir: 'src',
34
+ localesDir: 'locales',
35
+ defaultLanguage: 'en',
36
+ targetLanguages: ['fr', 'de'],
37
+ });
38
+
39
+ const result = await scanner.scan();
40
+ // result.detectedTexts — array of DetectedText
41
+ // result.assets — array of AssetReference
42
+ // result.legacyCdnUrls — array of LegacyCdnUrl
43
+ // result.scannedFiles — number of files processed
44
+ // result.duration — scan time in ms
45
+ ```
46
+
47
+ ## Detected text contexts
48
+
49
+ `jsx-text` · `string-literal` · `jsx-attribute` · `placeholder` · `aria-label` · `title` · `alt` · `button` · `heading` · `label` · `validation` · `tooltip` · `table-header` · `modal` · `toast` · `alert` · `template-literal` · `object-value` · `array-item`
50
+
51
+ ## Key generation
52
+
53
+ Keys are generated deterministically from file path + text:
54
+
55
+ ```
56
+ src/pages/dashboard/Banner.tsx + "Welcome to the Dashboard"
57
+ → pages.dashboard.banner.welcome_to_the_dashboard
58
+ ```
59
+
60
+ Set `keyStyle: "screaming_snake"` for UPPER_SNAKE_CASE keys:
61
+ ```
62
+ "Save Changes" → SAVE_CHANGES
63
+ ```
64
+
65
+ ## Incremental scanning
66
+
67
+ Enable with `incrementalCache: true` (default). File hashes are stored in `.ai-localize-cache/scan-cache.json`. Only changed files are re-scanned on subsequent runs.
68
+
69
+ ---
70
+
71
+ ## Part of ai-localize-core
72
+
73
+ Install the CLI for the complete toolset: `npm install -g ai-localize-cli`
74
+
75
+ MIT © ai-localize-core contributors
package/dist/index.d.mts CHANGED
@@ -6,38 +6,27 @@ interface AstScanOptions {
6
6
  sourceRoot?: string;
7
7
  /**
8
8
  * Controls the format of the generated locale key for each detected text.
9
- *
9
+ *
10
10
  * - `"path"` (default) — hierarchical dot-notation key derived from file path + text:
11
11
  * `settings.settings_page.save_changes`
12
12
  *
13
13
  * - `"screaming_snake"` — UPPER_SNAKE_CASE key derived solely from the text value:
14
14
  * "Save Changes" → `SAVE_CHANGES`
15
- * "Max Count" → `MAX_COUNT`
15
+ * "Max Count" → `MAX_COUNT`
16
16
  */
17
17
  keyStyle?: KeyStyle;
18
18
  /**
19
19
  * Optional codemod config from ai-localize.config.json.
20
20
  *
21
21
  * The scanner uses this to recognise already-translated strings even when
22
- * the project uses a custom i18n library or a locally-defined hook:
23
- *
24
- * importPackage — matched against import source strings. Supports:
25
- * - npm package names: "react-i18next", "my-i18n-lib"
26
- * - path aliases: "@/hooks/useTranslation", "@/i18n"
27
- * - relative paths: "../../hooks/useTranslation"
28
- * Matching is done by checking whether the import source equals the value
29
- * OR ends with the last path segment(s) of the value (normalised).
30
- *
31
- * hookName — the hook identifier (e.g. "useTranslation", "useI18n").
32
- * Added directly to the translation-function names set regardless of
33
- * how the hook is imported. This means even default imports, re-exports
34
- * or barrel aliases are handled correctly:
35
- * import useT from '../../hooks/useT' (default import, hookName="useT")
36
- *
37
- * translationFunction — the accessor returned by the hook (e.g. "t").
38
- * Added directly to the translation-function names set.
22
+ * the project uses a custom i18n library or a locally-defined hook.
39
23
  */
40
24
  codemodConfig?: CodemodConfig;
25
+ /**
26
+ * Additional regex patterns (as strings) from config.ignoreTextPatterns.
27
+ * Any scanned string matching at least one pattern is excluded.
28
+ */
29
+ ignoreTextPatterns?: string[];
41
30
  }
42
31
  /**
43
32
  * Scans a JS/TS/JSX/TSX file using Babel AST to find hardcoded text.
@@ -45,6 +34,7 @@ interface AstScanOptions {
45
34
  declare class AstScanner {
46
35
  private options;
47
36
  private detectedTexts;
37
+ private compiledIgnorePatterns;
48
38
  /** Identifiers whose call/bracket expressions contain already-translated strings. */
49
39
  private translationFunctionNames;
50
40
  /**
@@ -55,6 +45,16 @@ declare class AstScanner {
55
45
  private importSourceMatchers;
56
46
  constructor(options: AstScanOptions);
57
47
  scan(): DetectedText[];
48
+ /**
49
+ * Central check: is this text worth extracting as a locale key?
50
+ * Applies isHumanReadableText(), isCssClassString(), and user ignoreTextPatterns.
51
+ */
52
+ private isTranslatableText;
53
+ /**
54
+ * Returns true when the node path is inside a CSS utility function call:
55
+ * clsx("a", "b"), cn("x"), twMerge("foo", "bar"), styled("div"), etc.
56
+ */
57
+ private isInsideCssUtilityCall;
58
58
  /**
59
59
  * Walk import declarations; when the source matches a known translation
60
60
  * import, collect all named/default imports as translation function names.
@@ -83,7 +83,14 @@ declare class AssetScanner {
83
83
  declare class IncrementalScanCache {
84
84
  private cachePath;
85
85
  private cache;
86
- constructor(cacheDir: string);
86
+ /**
87
+ * @param cacheDir Directory where `scan-cache.json` is stored.
88
+ * @param configHash SHA-256 hash of the resolved config object.
89
+ * When this differs from the persisted value the entire
90
+ * cache is invalidated so that config changes (keyStyle,
91
+ * ignoreTextPatterns, codemods, etc.) are always reflected.
92
+ */
93
+ constructor(cacheDir: string, configHash?: string);
87
94
  private load;
88
95
  isFileChanged(filePath: string): boolean;
89
96
  getCachedResult(filePath: string): DetectedText[] | null;
@@ -103,6 +110,16 @@ declare class ProjectScanner {
103
110
  private cache?;
104
111
  private assetScanner;
105
112
  constructor(config: LocalizationConfig);
113
+ /**
114
+ * Produces a stable SHA-256 fingerprint of the config fields that affect
115
+ * scan output. When any of these change the incremental cache is fully
116
+ * invalidated so the next run re-scans every file with the new settings.
117
+ *
118
+ * Fields intentionally excluded: `incrementalCache`, `cacheDir`, `aws`,
119
+ * `plugins` — none of those influence what text the AST scanner detects
120
+ * or how keys are generated.
121
+ */
122
+ private hashConfig;
106
123
  scan(options?: ScanOptions): Promise<ScanResult>;
107
124
  private scanFile;
108
125
  private chunkArray;
package/dist/index.d.ts CHANGED
@@ -6,38 +6,27 @@ interface AstScanOptions {
6
6
  sourceRoot?: string;
7
7
  /**
8
8
  * Controls the format of the generated locale key for each detected text.
9
- *
9
+ *
10
10
  * - `"path"` (default) — hierarchical dot-notation key derived from file path + text:
11
11
  * `settings.settings_page.save_changes`
12
12
  *
13
13
  * - `"screaming_snake"` — UPPER_SNAKE_CASE key derived solely from the text value:
14
14
  * "Save Changes" → `SAVE_CHANGES`
15
- * "Max Count" → `MAX_COUNT`
15
+ * "Max Count" → `MAX_COUNT`
16
16
  */
17
17
  keyStyle?: KeyStyle;
18
18
  /**
19
19
  * Optional codemod config from ai-localize.config.json.
20
20
  *
21
21
  * The scanner uses this to recognise already-translated strings even when
22
- * the project uses a custom i18n library or a locally-defined hook:
23
- *
24
- * importPackage — matched against import source strings. Supports:
25
- * - npm package names: "react-i18next", "my-i18n-lib"
26
- * - path aliases: "@/hooks/useTranslation", "@/i18n"
27
- * - relative paths: "../../hooks/useTranslation"
28
- * Matching is done by checking whether the import source equals the value
29
- * OR ends with the last path segment(s) of the value (normalised).
30
- *
31
- * hookName — the hook identifier (e.g. "useTranslation", "useI18n").
32
- * Added directly to the translation-function names set regardless of
33
- * how the hook is imported. This means even default imports, re-exports
34
- * or barrel aliases are handled correctly:
35
- * import useT from '../../hooks/useT' (default import, hookName="useT")
36
- *
37
- * translationFunction — the accessor returned by the hook (e.g. "t").
38
- * Added directly to the translation-function names set.
22
+ * the project uses a custom i18n library or a locally-defined hook.
39
23
  */
40
24
  codemodConfig?: CodemodConfig;
25
+ /**
26
+ * Additional regex patterns (as strings) from config.ignoreTextPatterns.
27
+ * Any scanned string matching at least one pattern is excluded.
28
+ */
29
+ ignoreTextPatterns?: string[];
41
30
  }
42
31
  /**
43
32
  * Scans a JS/TS/JSX/TSX file using Babel AST to find hardcoded text.
@@ -45,6 +34,7 @@ interface AstScanOptions {
45
34
  declare class AstScanner {
46
35
  private options;
47
36
  private detectedTexts;
37
+ private compiledIgnorePatterns;
48
38
  /** Identifiers whose call/bracket expressions contain already-translated strings. */
49
39
  private translationFunctionNames;
50
40
  /**
@@ -55,6 +45,16 @@ declare class AstScanner {
55
45
  private importSourceMatchers;
56
46
  constructor(options: AstScanOptions);
57
47
  scan(): DetectedText[];
48
+ /**
49
+ * Central check: is this text worth extracting as a locale key?
50
+ * Applies isHumanReadableText(), isCssClassString(), and user ignoreTextPatterns.
51
+ */
52
+ private isTranslatableText;
53
+ /**
54
+ * Returns true when the node path is inside a CSS utility function call:
55
+ * clsx("a", "b"), cn("x"), twMerge("foo", "bar"), styled("div"), etc.
56
+ */
57
+ private isInsideCssUtilityCall;
58
58
  /**
59
59
  * Walk import declarations; when the source matches a known translation
60
60
  * import, collect all named/default imports as translation function names.
@@ -83,7 +83,14 @@ declare class AssetScanner {
83
83
  declare class IncrementalScanCache {
84
84
  private cachePath;
85
85
  private cache;
86
- constructor(cacheDir: string);
86
+ /**
87
+ * @param cacheDir Directory where `scan-cache.json` is stored.
88
+ * @param configHash SHA-256 hash of the resolved config object.
89
+ * When this differs from the persisted value the entire
90
+ * cache is invalidated so that config changes (keyStyle,
91
+ * ignoreTextPatterns, codemods, etc.) are always reflected.
92
+ */
93
+ constructor(cacheDir: string, configHash?: string);
87
94
  private load;
88
95
  isFileChanged(filePath: string): boolean;
89
96
  getCachedResult(filePath: string): DetectedText[] | null;
@@ -103,6 +110,16 @@ declare class ProjectScanner {
103
110
  private cache?;
104
111
  private assetScanner;
105
112
  constructor(config: LocalizationConfig);
113
+ /**
114
+ * Produces a stable SHA-256 fingerprint of the config fields that affect
115
+ * scan output. When any of these change the incremental cache is fully
116
+ * invalidated so the next run re-scans every file with the new settings.
117
+ *
118
+ * Fields intentionally excluded: `incrementalCache`, `cacheDir`, `aws`,
119
+ * `plugins` — none of those influence what text the AST scanner detects
120
+ * or how keys are generated.
121
+ */
122
+ private hashConfig;
106
123
  scan(options?: ScanOptions): Promise<ScanResult>;
107
124
  private scanFile;
108
125
  private chunkArray;