localediff 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- localediff-0.1.0/LICENSE +21 -0
- localediff-0.1.0/PKG-INFO +151 -0
- localediff-0.1.0/README.md +127 -0
- localediff-0.1.0/pyproject.toml +39 -0
- localediff-0.1.0/setup.cfg +4 -0
- localediff-0.1.0/src/localediff/__init__.py +6 -0
- localediff-0.1.0/src/localediff/__main__.py +6 -0
- localediff-0.1.0/src/localediff/cli.py +200 -0
- localediff-0.1.0/src/localediff/core.py +212 -0
- localediff-0.1.0/src/localediff/report.py +90 -0
- localediff-0.1.0/src/localediff.egg-info/PKG-INFO +151 -0
- localediff-0.1.0/src/localediff.egg-info/SOURCES.txt +15 -0
- localediff-0.1.0/src/localediff.egg-info/dependency_links.txt +1 -0
- localediff-0.1.0/src/localediff.egg-info/entry_points.txt +2 -0
- localediff-0.1.0/src/localediff.egg-info/top_level.txt +1 -0
- localediff-0.1.0/tests/test_core.py +113 -0
- localediff-0.1.0/tests/test_report.py +46 -0
localediff-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 localediff contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: localediff
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Find drift between i18n locale files — missing keys, CLDR-aware plural gaps, empty values. Framework-agnostic, zero dependencies.
|
|
5
|
+
Author: yyfjj
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/jjdoor/localediff-py
|
|
8
|
+
Project-URL: Repository, https://github.com/jjdoor/localediff-py
|
|
9
|
+
Project-URL: Issues, https://github.com/jjdoor/localediff-py/issues
|
|
10
|
+
Keywords: i18n,l10n,locale,translation,missing-translations,drift,diff,i18next,cli,ci,pluralization
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Topic :: Software Development :: Internationalization
|
|
18
|
+
Classifier: Topic :: Software Development :: Localization
|
|
19
|
+
Classifier: Topic :: Utilities
|
|
20
|
+
Requires-Python: >=3.8
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# localediff
|
|
26
|
+
|
|
27
|
+
**Find drift between your i18n locale files — before your users do.** You add a
|
|
28
|
+
string to `en.json`, ship it, and three weeks later notice `fr.json` and
|
|
29
|
+
`zh.json` were never updated. `localediff` catches that in CI: missing keys,
|
|
30
|
+
plural forms a language actually needs, and keys that exist but were left blank.
|
|
31
|
+
Framework-agnostic, **zero dependencies**.
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install localediff
|
|
35
|
+
localediff ./locales
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
✗ fr.json
|
|
40
|
+
missing (1): auth.errors.locked
|
|
41
|
+
plural cart.items — has {other}, missing {one}
|
|
42
|
+
empty (1): footer.copyright
|
|
43
|
+
extra (1): legacy.banner
|
|
44
|
+
|
|
45
|
+
✓ zh.json — in sync
|
|
46
|
+
|
|
47
|
+
✗ 1 of 2 file(s) drifted — 1 missing, 1 plural gap(s), 1 empty, 1 extra
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Why another i18n tool
|
|
51
|
+
|
|
52
|
+
Most existing tools are tied to one framework, need an account, or are heavy
|
|
53
|
+
AST-based linters. `localediff` just reads JSON. It compares **structure**, so it
|
|
54
|
+
works for next-intl, react-intl, i18next, vue-i18n, Django/Flask JSON catalogs,
|
|
55
|
+
or any plain message file — and it ships for **both PyPI and npm** with identical
|
|
56
|
+
behavior.
|
|
57
|
+
|
|
58
|
+
### It understands plurals per language
|
|
59
|
+
|
|
60
|
+
This is the part naive "diff two JSON files" scripts get wrong. English has two
|
|
61
|
+
plural forms (`one`, `other`); Chinese has one (`other`); Russian has four
|
|
62
|
+
(`one`, `few`, `many`, `other`). A file with only `items_other` is **correct for
|
|
63
|
+
Chinese** but **broken for French**. `localediff` resolves the required CLDR
|
|
64
|
+
categories from each target's language, so it flags the real bug without crying
|
|
65
|
+
wolf on `zh.json`.
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
# en base: items_one + items_other
|
|
69
|
+
zh.json → items_other only → ✓ in sync (Chinese needs only `other`)
|
|
70
|
+
fr.json → items_other only → ✗ missing {one}
|
|
71
|
+
ru.json → items_one + _other → ✗ missing {few, many}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Usage
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
# Scan a folder. The base defaults to en.json; everything else is checked.
|
|
78
|
+
localediff ./locales
|
|
79
|
+
|
|
80
|
+
# Pick a different base language in the folder.
|
|
81
|
+
localediff ./locales --base de
|
|
82
|
+
|
|
83
|
+
# Compare specific files explicitly.
|
|
84
|
+
localediff --base en.json --check fr.json zh.json
|
|
85
|
+
|
|
86
|
+
# Shorthand: first file is the base.
|
|
87
|
+
localediff en.json fr.json zh.json
|
|
88
|
+
|
|
89
|
+
# Machine-readable output for CI gates.
|
|
90
|
+
localediff ./locales --format json
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
You can also run it as a module: `python -m localediff ./locales`.
|
|
94
|
+
|
|
95
|
+
## What it checks
|
|
96
|
+
|
|
97
|
+
| Check | Meaning |
|
|
98
|
+
|-------|---------|
|
|
99
|
+
| **missing** | a key in the base that the target never translated |
|
|
100
|
+
| **plural** | a pluralized key (`key_one`, `key_other`, …) missing a CLDR form the **target language** requires |
|
|
101
|
+
| **empty** | a key present in the target whose value is a blank string |
|
|
102
|
+
| **extra** | a key in the target the base no longer has |
|
|
103
|
+
|
|
104
|
+
Nested objects are flattened to dot-paths (`auth.errors.locked`); arrays are
|
|
105
|
+
indexed (`steps.0`). Plural keys use the i18next suffix convention
|
|
106
|
+
(`_zero`, `_one`, `_two`, `_few`, `_many`, `_other`). Unknown languages fall back
|
|
107
|
+
to parity with the base, so you never get a confidently-wrong result.
|
|
108
|
+
|
|
109
|
+
## Options
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
--base <file|lang> base/source locale (a file, or a lang stem in dir mode)
|
|
113
|
+
--check <files...> one or more target locales to compare against the base
|
|
114
|
+
--dir <dir> scan a directory of *.json locales
|
|
115
|
+
--lang <code> force the target language for plural rules
|
|
116
|
+
--format text|json output format (default: text)
|
|
117
|
+
--ignore-missing don't report missing keys
|
|
118
|
+
--ignore-extra don't report extra keys
|
|
119
|
+
--ignore-plural don't report plural gaps
|
|
120
|
+
--ignore-empty don't report empty values
|
|
121
|
+
-v, --version
|
|
122
|
+
-h, --help
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## In CI
|
|
126
|
+
|
|
127
|
+
`localediff` exits non-zero when anything has drifted:
|
|
128
|
+
|
|
129
|
+
| Exit code | Meaning |
|
|
130
|
+
|-----------|---------|
|
|
131
|
+
| `0` | every checked file is in sync |
|
|
132
|
+
| `1` | one or more files have drift |
|
|
133
|
+
| `2` | error (file not found, invalid JSON, bad arguments) |
|
|
134
|
+
|
|
135
|
+
## Also available for Node
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
npx localediff ./locales
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Same checks, same flags, same exit codes — [localediff on npm](https://github.com/jjdoor/localediff).
|
|
142
|
+
|
|
143
|
+
## Scope
|
|
144
|
+
|
|
145
|
+
JSON locale files only (the common case). YAML/`.properties`/gettext are not
|
|
146
|
+
supported — parsing them would mean pulling in a dependency, and zero-dep is the
|
|
147
|
+
point. Convert to JSON, or open an issue to discuss.
|
|
148
|
+
|
|
149
|
+
## License
|
|
150
|
+
|
|
151
|
+
MIT
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# localediff
|
|
2
|
+
|
|
3
|
+
**Find drift between your i18n locale files — before your users do.** You add a
|
|
4
|
+
string to `en.json`, ship it, and three weeks later notice `fr.json` and
|
|
5
|
+
`zh.json` were never updated. `localediff` catches that in CI: missing keys,
|
|
6
|
+
plural forms a language actually needs, and keys that exist but were left blank.
|
|
7
|
+
Framework-agnostic, **zero dependencies**.
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install localediff
|
|
11
|
+
localediff ./locales
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
✗ fr.json
|
|
16
|
+
missing (1): auth.errors.locked
|
|
17
|
+
plural cart.items — has {other}, missing {one}
|
|
18
|
+
empty (1): footer.copyright
|
|
19
|
+
extra (1): legacy.banner
|
|
20
|
+
|
|
21
|
+
✓ zh.json — in sync
|
|
22
|
+
|
|
23
|
+
✗ 1 of 2 file(s) drifted — 1 missing, 1 plural gap(s), 1 empty, 1 extra
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Why another i18n tool
|
|
27
|
+
|
|
28
|
+
Most existing tools are tied to one framework, need an account, or are heavy
|
|
29
|
+
AST-based linters. `localediff` just reads JSON. It compares **structure**, so it
|
|
30
|
+
works for next-intl, react-intl, i18next, vue-i18n, Django/Flask JSON catalogs,
|
|
31
|
+
or any plain message file — and it ships for **both PyPI and npm** with identical
|
|
32
|
+
behavior.
|
|
33
|
+
|
|
34
|
+
### It understands plurals per language
|
|
35
|
+
|
|
36
|
+
This is the part naive "diff two JSON files" scripts get wrong. English has two
|
|
37
|
+
plural forms (`one`, `other`); Chinese has one (`other`); Russian has four
|
|
38
|
+
(`one`, `few`, `many`, `other`). A file with only `items_other` is **correct for
|
|
39
|
+
Chinese** but **broken for French**. `localediff` resolves the required CLDR
|
|
40
|
+
categories from each target's language, so it flags the real bug without crying
|
|
41
|
+
wolf on `zh.json`.
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# en base: items_one + items_other
|
|
45
|
+
zh.json → items_other only → ✓ in sync (Chinese needs only `other`)
|
|
46
|
+
fr.json → items_other only → ✗ missing {one}
|
|
47
|
+
ru.json → items_one + _other → ✗ missing {few, many}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Usage
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Scan a folder. The base defaults to en.json; everything else is checked.
|
|
54
|
+
localediff ./locales
|
|
55
|
+
|
|
56
|
+
# Pick a different base language in the folder.
|
|
57
|
+
localediff ./locales --base de
|
|
58
|
+
|
|
59
|
+
# Compare specific files explicitly.
|
|
60
|
+
localediff --base en.json --check fr.json zh.json
|
|
61
|
+
|
|
62
|
+
# Shorthand: first file is the base.
|
|
63
|
+
localediff en.json fr.json zh.json
|
|
64
|
+
|
|
65
|
+
# Machine-readable output for CI gates.
|
|
66
|
+
localediff ./locales --format json
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
You can also run it as a module: `python -m localediff ./locales`.
|
|
70
|
+
|
|
71
|
+
## What it checks
|
|
72
|
+
|
|
73
|
+
| Check | Meaning |
|
|
74
|
+
|-------|---------|
|
|
75
|
+
| **missing** | a key in the base that the target never translated |
|
|
76
|
+
| **plural** | a pluralized key (`key_one`, `key_other`, …) missing a CLDR form the **target language** requires |
|
|
77
|
+
| **empty** | a key present in the target whose value is a blank string |
|
|
78
|
+
| **extra** | a key in the target the base no longer has |
|
|
79
|
+
|
|
80
|
+
Nested objects are flattened to dot-paths (`auth.errors.locked`); arrays are
|
|
81
|
+
indexed (`steps.0`). Plural keys use the i18next suffix convention
|
|
82
|
+
(`_zero`, `_one`, `_two`, `_few`, `_many`, `_other`). Unknown languages fall back
|
|
83
|
+
to parity with the base, so you never get a confidently-wrong result.
|
|
84
|
+
|
|
85
|
+
## Options
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
--base <file|lang> base/source locale (a file, or a lang stem in dir mode)
|
|
89
|
+
--check <files...> one or more target locales to compare against the base
|
|
90
|
+
--dir <dir> scan a directory of *.json locales
|
|
91
|
+
--lang <code> force the target language for plural rules
|
|
92
|
+
--format text|json output format (default: text)
|
|
93
|
+
--ignore-missing don't report missing keys
|
|
94
|
+
--ignore-extra don't report extra keys
|
|
95
|
+
--ignore-plural don't report plural gaps
|
|
96
|
+
--ignore-empty don't report empty values
|
|
97
|
+
-v, --version
|
|
98
|
+
-h, --help
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## In CI
|
|
102
|
+
|
|
103
|
+
`localediff` exits non-zero when anything has drifted:
|
|
104
|
+
|
|
105
|
+
| Exit code | Meaning |
|
|
106
|
+
|-----------|---------|
|
|
107
|
+
| `0` | every checked file is in sync |
|
|
108
|
+
| `1` | one or more files have drift |
|
|
109
|
+
| `2` | error (file not found, invalid JSON, bad arguments) |
|
|
110
|
+
|
|
111
|
+
## Also available for Node
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
npx localediff ./locales
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Same checks, same flags, same exit codes — [localediff on npm](https://github.com/jjdoor/localediff).
|
|
118
|
+
|
|
119
|
+
## Scope
|
|
120
|
+
|
|
121
|
+
JSON locale files only (the common case). YAML/`.properties`/gettext are not
|
|
122
|
+
supported — parsing them would mean pulling in a dependency, and zero-dep is the
|
|
123
|
+
point. Convert to JSON, or open an issue to discuss.
|
|
124
|
+
|
|
125
|
+
## License
|
|
126
|
+
|
|
127
|
+
MIT
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "localediff"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Find drift between i18n locale files — missing keys, CLDR-aware plural gaps, empty values. Framework-agnostic, zero dependencies."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "yyfjj" }]
|
|
13
|
+
keywords = ["i18n", "l10n", "locale", "translation", "missing-translations", "drift", "diff", "i18next", "cli", "ci", "pluralization"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Environment :: Console",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Topic :: Software Development :: Internationalization",
|
|
22
|
+
"Topic :: Software Development :: Localization",
|
|
23
|
+
"Topic :: Utilities",
|
|
24
|
+
]
|
|
25
|
+
dependencies = []
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Homepage = "https://github.com/jjdoor/localediff-py"
|
|
29
|
+
Repository = "https://github.com/jjdoor/localediff-py"
|
|
30
|
+
Issues = "https://github.com/jjdoor/localediff-py/issues"
|
|
31
|
+
|
|
32
|
+
[project.scripts]
|
|
33
|
+
localediff = "localediff.cli:main"
|
|
34
|
+
|
|
35
|
+
[tool.setuptools]
|
|
36
|
+
package-dir = { "" = "src" }
|
|
37
|
+
|
|
38
|
+
[tool.setuptools.packages.find]
|
|
39
|
+
where = ["src"]
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""localediff command-line interface."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from . import core, report
|
|
9
|
+
|
|
10
|
+
VERSION = "0.1.0"
|
|
11
|
+
|
|
12
|
+
# ---- tiny color helpers (no dep) ----
|
|
13
|
+
_COLOR = sys.stdout.isatty() and not os.environ.get("NO_COLOR")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _c(code, s):
|
|
17
|
+
return f"\x1b[{code}m{s}\x1b[0m" if _COLOR else s
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
COLORS = {
|
|
21
|
+
"red": lambda s: _c("31", s),
|
|
22
|
+
"green": lambda s: _c("32", s),
|
|
23
|
+
"yellow": lambda s: _c("33", s),
|
|
24
|
+
"cyan": lambda s: _c("36", s),
|
|
25
|
+
"dim": lambda s: _c("2", s),
|
|
26
|
+
"bold": lambda s: _c("1", s),
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def red(s):
|
|
31
|
+
return COLORS["red"](s)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
HELP = f"""{COLORS['bold']('localediff')} — find drift between i18n locale files. Framework-agnostic, zero deps.
|
|
35
|
+
|
|
36
|
+
{COLORS['bold']('Usage')}
|
|
37
|
+
localediff --base en.json --check fr.json zh.json compare files explicitly
|
|
38
|
+
localediff ./locales scan a dir (base: en.json)
|
|
39
|
+
localediff ./locales --base de scan a dir, base de.json
|
|
40
|
+
localediff en.json fr.json zh.json first file is the base
|
|
41
|
+
|
|
42
|
+
{COLORS['bold']('What it finds')}
|
|
43
|
+
missing key in the base, never translated in the target
|
|
44
|
+
plural pluralized key missing a CLDR form the target language needs
|
|
45
|
+
empty key present in the target but its value is blank
|
|
46
|
+
extra key in the target the base no longer has
|
|
47
|
+
|
|
48
|
+
{COLORS['bold']('Options')}
|
|
49
|
+
--base <file|lang> base/source locale (a file, or a lang stem in dir mode)
|
|
50
|
+
--check <files...> one or more target locales to compare against the base
|
|
51
|
+
--dir <dir> scan a directory of *.json locales
|
|
52
|
+
--lang <code> force the target language for plural rules (else inferred
|
|
53
|
+
from each file name, e.g. fr.json -> fr)
|
|
54
|
+
--format text|json output format (default: text)
|
|
55
|
+
--ignore-missing don't report missing keys
|
|
56
|
+
--ignore-extra don't report extra keys
|
|
57
|
+
--ignore-plural don't report plural gaps
|
|
58
|
+
--ignore-empty don't report empty values
|
|
59
|
+
-v, --version
|
|
60
|
+
-h, --help
|
|
61
|
+
|
|
62
|
+
{COLORS['bold']('Exit')} 0 in sync · 1 drift found · 2 error
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def fail(msg):
|
|
67
|
+
sys.stderr.write(red(f"localediff: {msg}\n"))
|
|
68
|
+
sys.exit(2)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def read_json(file):
|
|
72
|
+
try:
|
|
73
|
+
with open(file, "r", encoding="utf-8") as fh:
|
|
74
|
+
text = fh.read()
|
|
75
|
+
except FileNotFoundError:
|
|
76
|
+
return fail(f"cannot read {file}: no such file")
|
|
77
|
+
except OSError as e:
|
|
78
|
+
return fail(f"cannot read {file}: {e}")
|
|
79
|
+
try:
|
|
80
|
+
return json.loads(text)
|
|
81
|
+
except json.JSONDecodeError as e:
|
|
82
|
+
return fail(f"invalid JSON in {file}: {e}")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def flag(args, name):
|
|
86
|
+
"""Value after --name, or None."""
|
|
87
|
+
if name in args:
|
|
88
|
+
i = args.index(name)
|
|
89
|
+
if i + 1 < len(args):
|
|
90
|
+
return args[i + 1]
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def has(args, name):
|
|
95
|
+
return name in args
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def collect_check(args):
|
|
99
|
+
"""Values after --check up to the next --flag."""
|
|
100
|
+
if "--check" not in args:
|
|
101
|
+
return []
|
|
102
|
+
i = args.index("--check")
|
|
103
|
+
out = []
|
|
104
|
+
j = i + 1
|
|
105
|
+
while j < len(args) and not args[j].startswith("--"):
|
|
106
|
+
out.append(args[j])
|
|
107
|
+
j += 1
|
|
108
|
+
return out
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def lang_stem(file):
|
|
112
|
+
return re.sub(r"\.json$", "", os.path.basename(file), flags=re.IGNORECASE)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def resolve_inputs(args):
|
|
116
|
+
opts = {
|
|
117
|
+
"ignoreMissing": has(args, "--ignore-missing"),
|
|
118
|
+
"ignoreExtra": has(args, "--ignore-extra"),
|
|
119
|
+
"ignorePlural": has(args, "--ignore-plural"),
|
|
120
|
+
"ignoreEmpty": has(args, "--ignore-empty"),
|
|
121
|
+
}
|
|
122
|
+
fmt = flag(args, "--format") or "text"
|
|
123
|
+
if fmt not in ("text", "json"):
|
|
124
|
+
fail("--format must be text or json")
|
|
125
|
+
forced_lang = flag(args, "--lang")
|
|
126
|
+
|
|
127
|
+
consumed = set()
|
|
128
|
+
for f in ("--base", "--format", "--dir", "--lang"):
|
|
129
|
+
if f in args:
|
|
130
|
+
i = args.index(f)
|
|
131
|
+
consumed.add(i)
|
|
132
|
+
consumed.add(i + 1)
|
|
133
|
+
if "--check" in args:
|
|
134
|
+
ci = args.index("--check")
|
|
135
|
+
consumed.add(ci)
|
|
136
|
+
j = ci + 1
|
|
137
|
+
while j < len(args) and not args[j].startswith("--"):
|
|
138
|
+
consumed.add(j)
|
|
139
|
+
j += 1
|
|
140
|
+
positionals = [a for i, a in enumerate(args) if not a.startswith("--") and i not in consumed]
|
|
141
|
+
|
|
142
|
+
base_arg = flag(args, "--base")
|
|
143
|
+
check_args = collect_check(args)
|
|
144
|
+
dir_ = flag(args, "--dir")
|
|
145
|
+
if not dir_ and len(positionals) == 1 and os.path.isdir(positionals[0]):
|
|
146
|
+
dir_ = positionals[0]
|
|
147
|
+
|
|
148
|
+
if dir_:
|
|
149
|
+
if not os.path.isdir(dir_):
|
|
150
|
+
fail(f"not a directory: {dir_}")
|
|
151
|
+
jsons = sorted(f for f in os.listdir(dir_) if re.search(r"\.json$", f, re.IGNORECASE))
|
|
152
|
+
if not jsons:
|
|
153
|
+
fail(f"no .json locale files in {dir_}")
|
|
154
|
+
base_lang = lang_stem(base_arg) if base_arg else "en"
|
|
155
|
+
base_name = next((f for f in jsons if lang_stem(f) == base_lang), None)
|
|
156
|
+
if not base_name:
|
|
157
|
+
fail(f'base locale "{base_lang}.json" not found in {dir_} (have: {", ".join(jsons)})')
|
|
158
|
+
base_file = os.path.join(dir_, base_name)
|
|
159
|
+
check_files = [os.path.join(dir_, f) for f in jsons if f != base_name]
|
|
160
|
+
if not check_files:
|
|
161
|
+
fail(f"only the base locale is present in {dir_}; nothing to compare")
|
|
162
|
+
elif base_arg and check_args:
|
|
163
|
+
base_file = base_arg
|
|
164
|
+
check_files = check_args
|
|
165
|
+
elif len(positionals) >= 2:
|
|
166
|
+
base_file = positionals[0]
|
|
167
|
+
check_files = positionals[1:]
|
|
168
|
+
else:
|
|
169
|
+
fail("nothing to compare. Try: localediff --base en.json --check fr.json or localediff ./locales")
|
|
170
|
+
return base_file, check_files, opts, fmt, forced_lang
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def main(argv=None):
|
|
174
|
+
args = sys.argv[1:] if argv is None else argv
|
|
175
|
+
if not args or args[0] in ("-h", "--help"):
|
|
176
|
+
sys.stdout.write(HELP)
|
|
177
|
+
return 0
|
|
178
|
+
if args[0] in ("-v", "--version"):
|
|
179
|
+
sys.stdout.write(VERSION + "\n")
|
|
180
|
+
return 0
|
|
181
|
+
|
|
182
|
+
base_file, check_files, opts, fmt, forced_lang = resolve_inputs(args)
|
|
183
|
+
base = read_json(base_file)
|
|
184
|
+
|
|
185
|
+
results = []
|
|
186
|
+
for file in check_files:
|
|
187
|
+
lang = forced_lang or core.normalize_lang(lang_stem(file))
|
|
188
|
+
rep = core.compare(base, read_json(file), {**opts, "lang": lang})
|
|
189
|
+
results.append({"file": file, "base": base_file, "lang": lang, "report": rep})
|
|
190
|
+
|
|
191
|
+
if fmt == "json":
|
|
192
|
+
sys.stdout.write(report.format_json(results) + "\n")
|
|
193
|
+
else:
|
|
194
|
+
sys.stdout.write(report.format_text(results, COLORS if _COLOR else None) + "\n")
|
|
195
|
+
|
|
196
|
+
return 1 if any(not r["report"]["inSync"] for r in results) else 0
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
if __name__ == "__main__":
|
|
200
|
+
sys.exit(main())
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""localediff core — pure locale-comparison logic. No fs, no clock, no network.
|
|
2
|
+
|
|
3
|
+
Given a base locale object (the source of truth, e.g. en.json) and a target
|
|
4
|
+
locale object (e.g. fr.json), find where they have drifted apart:
|
|
5
|
+
|
|
6
|
+
- missing keys the base has a key the target never translated
|
|
7
|
+
- extra keys the target still has a key the base has dropped
|
|
8
|
+
- plural gaps the target is missing a CLDR plural form its own language
|
|
9
|
+
requires (i18next ``_one`` / ``_other`` / ``_few`` ... suffixes)
|
|
10
|
+
- empty values the key exists in the target but its string is blank
|
|
11
|
+
|
|
12
|
+
Comparison is structural and framework-agnostic: nested objects/arrays are
|
|
13
|
+
flattened to dot-paths, so it works for next-intl, react-intl, i18next,
|
|
14
|
+
vue-i18n, or any plain JSON message catalog. Behavior mirrors the Node package
|
|
15
|
+
of the same name exactly, so both produce identical reports.
|
|
16
|
+
|
|
17
|
+
The plural check is CLDR-aware on purpose. A naive "the target must have every
|
|
18
|
+
form the base has" rule false-positives on languages with fewer plural
|
|
19
|
+
categories than English — Chinese/Japanese/Korean only ever need ``other``, so
|
|
20
|
+
an ``items_other``-only zh.json is *correct*, while the same shape in fr.json is
|
|
21
|
+
a real bug. We resolve the required categories from the target's language.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import re
|
|
25
|
+
|
|
26
|
+
PLURAL_CATEGORIES = ["zero", "one", "two", "few", "many", "other"]
|
|
27
|
+
_PLURAL_RE = re.compile(r"^(.+)_(zero|one|two|few|many|other)$")
|
|
28
|
+
|
|
29
|
+
# CLDR cardinal plural categories needed for everyday integer counts, keyed by
|
|
30
|
+
# language. Conservative on purpose — only languages we're confident about are
|
|
31
|
+
# listed; anything else falls back to parity with the base (see
|
|
32
|
+
# required_categories). Decimal-only categories (e.g. Romance "many" for compact
|
|
33
|
+
# millions) are deliberately omitted to avoid noisy false positives.
|
|
34
|
+
LANG_PLURALS = {
|
|
35
|
+
# no count distinction — `other` is the only form
|
|
36
|
+
"zh": ["other"], "ja": ["other"], "ko": ["other"], "vi": ["other"],
|
|
37
|
+
"th": ["other"], "id": ["other"], "ms": ["other"], "lo": ["other"],
|
|
38
|
+
"km": ["other"], "my": ["other"], "yo": ["other"],
|
|
39
|
+
# one / other (the large majority)
|
|
40
|
+
"en": ["one", "other"], "de": ["one", "other"], "nl": ["one", "other"],
|
|
41
|
+
"sv": ["one", "other"], "da": ["one", "other"], "nb": ["one", "other"],
|
|
42
|
+
"nn": ["one", "other"], "no": ["one", "other"], "fi": ["one", "other"],
|
|
43
|
+
"es": ["one", "other"], "it": ["one", "other"], "pt": ["one", "other"],
|
|
44
|
+
"fr": ["one", "other"], "ca": ["one", "other"], "gl": ["one", "other"],
|
|
45
|
+
"el": ["one", "other"], "hu": ["one", "other"], "tr": ["one", "other"],
|
|
46
|
+
"fa": ["one", "other"], "hi": ["one", "other"], "bg": ["one", "other"],
|
|
47
|
+
"et": ["one", "other"], "eu": ["one", "other"], "af": ["one", "other"],
|
|
48
|
+
# Slavic & friends: one / few / many / other
|
|
49
|
+
"ru": ["one", "few", "many", "other"], "uk": ["one", "few", "many", "other"],
|
|
50
|
+
"pl": ["one", "few", "many", "other"], "be": ["one", "few", "many", "other"],
|
|
51
|
+
"lt": ["one", "few", "many", "other"],
|
|
52
|
+
# one / few / other
|
|
53
|
+
"cs": ["one", "few", "other"], "sk": ["one", "few", "other"],
|
|
54
|
+
"hr": ["one", "few", "other"], "sr": ["one", "few", "other"],
|
|
55
|
+
"ro": ["one", "few", "other"],
|
|
56
|
+
# smaller sets
|
|
57
|
+
"lv": ["zero", "one", "other"], "sl": ["one", "two", "few", "other"],
|
|
58
|
+
# all six
|
|
59
|
+
"ar": ["zero", "one", "two", "few", "many", "other"],
|
|
60
|
+
"cy": ["zero", "one", "two", "few", "many", "other"],
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def normalize_lang(tag) -> str:
|
|
65
|
+
"""Normalize a locale tag to its base language code.
|
|
66
|
+
|
|
67
|
+
"zh-Hans-CN" -> "zh", "pt_BR" -> "pt", "EN" -> "en".
|
|
68
|
+
"""
|
|
69
|
+
return re.split(r"[-_]", str(tag or "").lower())[0]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def flatten(obj):
|
|
73
|
+
"""Flatten a nested object/array into ordered ``(path, value)`` leaf tuples.
|
|
74
|
+
|
|
75
|
+
``{"a": {"b": 1}, "c": [2, 3]}`` becomes
|
|
76
|
+
``[("a.b", 1), ("c.0", 2), ("c.1", 3)]``, in document order (depth-first).
|
|
77
|
+
Empty dicts/lists are kept as leaves so their key still participates.
|
|
78
|
+
"""
|
|
79
|
+
out = []
|
|
80
|
+
_walk(obj, "", out)
|
|
81
|
+
return out
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _walk(value, prefix, out):
|
|
85
|
+
if isinstance(value, dict):
|
|
86
|
+
if not value:
|
|
87
|
+
out.append((prefix, value))
|
|
88
|
+
return
|
|
89
|
+
for k, child in value.items():
|
|
90
|
+
_walk(child, k if prefix == "" else f"{prefix}.{k}", out)
|
|
91
|
+
elif isinstance(value, list):
|
|
92
|
+
if not value:
|
|
93
|
+
out.append((prefix, value))
|
|
94
|
+
return
|
|
95
|
+
for i, child in enumerate(value):
|
|
96
|
+
_walk(child, str(i) if prefix == "" else f"{prefix}.{i}", out)
|
|
97
|
+
else:
|
|
98
|
+
out.append((prefix, value))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def split_plural(path):
|
|
102
|
+
"""If ``path`` ends with a CLDR plural suffix, return ``(stem, category)``.
|
|
103
|
+
|
|
104
|
+
"items_one" -> ("items", "one"). Otherwise return None.
|
|
105
|
+
"""
|
|
106
|
+
m = _PLURAL_RE.match(path)
|
|
107
|
+
return (m.group(1), m.group(2)) if m else None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def plural_groups(entries):
|
|
111
|
+
"""Group plural-suffixed leaf paths by stem.
|
|
112
|
+
|
|
113
|
+
Only stems whose category set includes ``other`` (the form i18next always
|
|
114
|
+
requires) count as real plural groups — this keeps ordinary keys like
|
|
115
|
+
``step_one`` from being mistaken for a plural. Returns ``dict[stem, set]`` in
|
|
116
|
+
first-seen order.
|
|
117
|
+
"""
|
|
118
|
+
groups = {}
|
|
119
|
+
for path, _value in entries:
|
|
120
|
+
sp = split_plural(path)
|
|
121
|
+
if not sp:
|
|
122
|
+
continue
|
|
123
|
+
stem, cat = sp
|
|
124
|
+
groups.setdefault(stem, set()).add(cat)
|
|
125
|
+
for stem in [s for s, cats in groups.items() if "other" not in cats]:
|
|
126
|
+
del groups[stem]
|
|
127
|
+
return groups
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _order_cats(cat_set):
|
|
131
|
+
return [c for c in PLURAL_CATEGORIES if c in cat_set]
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def required_categories(lang, base_cats):
|
|
135
|
+
"""The plural categories a target file should define for a count-based key.
|
|
136
|
+
|
|
137
|
+
Uses the CLDR table for known languages; falls back to parity with the
|
|
138
|
+
base's own categories when the language is unknown (the right default for
|
|
139
|
+
the common en->X case).
|
|
140
|
+
"""
|
|
141
|
+
table = LANG_PLURALS.get(lang)
|
|
142
|
+
return list(table) if table else _order_cats(base_cats)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _is_blank(value):
|
|
146
|
+
return isinstance(value, str) and value.strip() == ""
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def compare(base, target, opts=None):
|
|
150
|
+
"""Compare a target locale against the base; return a structured drift report.
|
|
151
|
+
|
|
152
|
+
``opts`` may carry ``lang`` plus ``ignoreMissing`` / ``ignoreExtra`` /
|
|
153
|
+
``ignorePlural`` / ``ignoreEmpty`` booleans.
|
|
154
|
+
"""
|
|
155
|
+
opts = opts or {}
|
|
156
|
+
base_entries = flatten(base)
|
|
157
|
+
target_entries = flatten(target)
|
|
158
|
+
|
|
159
|
+
base_plurals = plural_groups(base_entries)
|
|
160
|
+
target_plurals = plural_groups(target_entries)
|
|
161
|
+
plural_stems = set(base_plurals.keys())
|
|
162
|
+
|
|
163
|
+
def in_base_plural_stem(path):
|
|
164
|
+
sp = split_plural(path)
|
|
165
|
+
return sp is not None and sp[0] in plural_stems
|
|
166
|
+
|
|
167
|
+
base_plain = [(p, v) for (p, v) in base_entries if not in_base_plural_stem(p)]
|
|
168
|
+
target_plain = [(p, v) for (p, v) in target_entries if not in_base_plural_stem(p)]
|
|
169
|
+
|
|
170
|
+
target_plain_paths = {p for (p, _v) in target_plain}
|
|
171
|
+
base_plain_paths = {p for (p, _v) in base_plain}
|
|
172
|
+
base_value = {p: v for (p, v) in base_entries}
|
|
173
|
+
|
|
174
|
+
missing = [p for (p, _v) in base_plain if p not in target_plain_paths]
|
|
175
|
+
extra = [p for (p, _v) in target_plain if p not in base_plain_paths]
|
|
176
|
+
|
|
177
|
+
lang = normalize_lang(opts.get("lang"))
|
|
178
|
+
plural = []
|
|
179
|
+
for stem, base_cats in base_plurals.items():
|
|
180
|
+
t_cats = target_plurals.get(stem, set())
|
|
181
|
+
required = required_categories(lang, base_cats)
|
|
182
|
+
missing_cats = [c for c in required if c not in t_cats]
|
|
183
|
+
if missing_cats:
|
|
184
|
+
plural.append({
|
|
185
|
+
"stem": stem,
|
|
186
|
+
"required": required,
|
|
187
|
+
"have": _order_cats(t_cats),
|
|
188
|
+
"missing": missing_cats,
|
|
189
|
+
})
|
|
190
|
+
|
|
191
|
+
empty = []
|
|
192
|
+
for path, value in target_entries:
|
|
193
|
+
bv = base_value.get(path)
|
|
194
|
+
if _is_blank(value) and isinstance(bv, str) and not _is_blank(bv):
|
|
195
|
+
empty.append(path)
|
|
196
|
+
|
|
197
|
+
report = {
|
|
198
|
+
"missing": [] if opts.get("ignoreMissing") else missing,
|
|
199
|
+
"extra": [] if opts.get("ignoreExtra") else extra,
|
|
200
|
+
"plural": [] if opts.get("ignorePlural") else plural,
|
|
201
|
+
"empty": [] if opts.get("ignoreEmpty") else empty,
|
|
202
|
+
}
|
|
203
|
+
report["counts"] = {
|
|
204
|
+
"missing": len(report["missing"]),
|
|
205
|
+
"extra": len(report["extra"]),
|
|
206
|
+
"plural": len(report["plural"]),
|
|
207
|
+
"empty": len(report["empty"]),
|
|
208
|
+
}
|
|
209
|
+
report["inSync"] = all(
|
|
210
|
+
report["counts"][k] == 0 for k in ("missing", "extra", "plural", "empty")
|
|
211
|
+
)
|
|
212
|
+
return report
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Formatting for localediff reports. Pure: data + a color palette -> string."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _identity(s):
|
|
7
|
+
return s
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
_PLAIN = {
|
|
11
|
+
"red": _identity, "green": _identity, "yellow": _identity,
|
|
12
|
+
"cyan": _identity, "dim": _identity, "bold": _identity,
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def format_text(results, c=None):
|
|
17
|
+
"""Render per-file results as human-readable text.
|
|
18
|
+
|
|
19
|
+
``results`` is a list of ``{"file", "report", ...}`` dicts. ``c`` is an
|
|
20
|
+
optional color palette (a dict of name -> callable); defaults to no color.
|
|
21
|
+
"""
|
|
22
|
+
c = c or _PLAIN
|
|
23
|
+
lines = []
|
|
24
|
+
drift_files = 0
|
|
25
|
+
totals = {"missing": 0, "extra": 0, "plural": 0, "empty": 0}
|
|
26
|
+
|
|
27
|
+
for r in results:
|
|
28
|
+
rep = r["report"]
|
|
29
|
+
file = r["file"]
|
|
30
|
+
if rep["inSync"]:
|
|
31
|
+
lines.append(f"{c['green']('✓')} {c['bold'](file)} {c['dim']('— in sync')}")
|
|
32
|
+
continue
|
|
33
|
+
drift_files += 1
|
|
34
|
+
lines.append(f"{c['red']('✗')} {c['bold'](file)}")
|
|
35
|
+
if rep["missing"]:
|
|
36
|
+
lines.append(f" {c['yellow']('missing')} ({len(rep['missing'])}): "
|
|
37
|
+
f"{', '.join(rep['missing'])}")
|
|
38
|
+
for p in rep["plural"]:
|
|
39
|
+
if p["have"]:
|
|
40
|
+
detail = f"has {{{','.join(p['have'])}}}, missing {{{','.join(p['missing'])}}}"
|
|
41
|
+
else:
|
|
42
|
+
detail = f"untranslated, needs {{{','.join(p['missing'])}}}"
|
|
43
|
+
lines.append(f" {c['cyan']('plural')} {p['stem']} — {detail}")
|
|
44
|
+
if rep["empty"]:
|
|
45
|
+
lines.append(f" {c['dim']('empty')} ({len(rep['empty'])}): "
|
|
46
|
+
f"{', '.join(rep['empty'])}")
|
|
47
|
+
if rep["extra"]:
|
|
48
|
+
lines.append(f" {c['dim']('extra')} ({len(rep['extra'])}): "
|
|
49
|
+
f"{', '.join(rep['extra'])}")
|
|
50
|
+
lines.append("")
|
|
51
|
+
for k in totals:
|
|
52
|
+
totals[k] += rep["counts"][k]
|
|
53
|
+
|
|
54
|
+
if drift_files == 0:
|
|
55
|
+
summary = c["green"](f"✓ all {len(results)} file(s) in sync")
|
|
56
|
+
else:
|
|
57
|
+
summary = (
|
|
58
|
+
c["red"](f"✗ {drift_files} of {len(results)} file(s) drifted")
|
|
59
|
+
+ c["dim"](f" — {totals['missing']} missing, {totals['plural']} plural gap(s), "
|
|
60
|
+
f"{totals['empty']} empty, {totals['extra']} extra")
|
|
61
|
+
)
|
|
62
|
+
lines.append(summary)
|
|
63
|
+
return "\n".join(lines)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def format_json(results):
|
|
67
|
+
"""Render results as machine-readable JSON for CI consumption."""
|
|
68
|
+
files = []
|
|
69
|
+
for r in results:
|
|
70
|
+
rep = r["report"]
|
|
71
|
+
files.append({
|
|
72
|
+
"file": r["file"],
|
|
73
|
+
"base": r["base"],
|
|
74
|
+
"lang": r.get("lang"),
|
|
75
|
+
"inSync": rep["inSync"],
|
|
76
|
+
"missing": rep["missing"],
|
|
77
|
+
"plural": rep["plural"],
|
|
78
|
+
"empty": rep["empty"],
|
|
79
|
+
"extra": rep["extra"],
|
|
80
|
+
"counts": rep["counts"],
|
|
81
|
+
})
|
|
82
|
+
drifted = sum(1 for f in files if not f["inSync"])
|
|
83
|
+
out = {
|
|
84
|
+
"base": results[0]["base"] if results else None,
|
|
85
|
+
"checked": len(files),
|
|
86
|
+
"drifted": drifted,
|
|
87
|
+
"inSync": drifted == 0,
|
|
88
|
+
"files": files,
|
|
89
|
+
}
|
|
90
|
+
return json.dumps(out, indent=2, ensure_ascii=False)
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: localediff
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Find drift between i18n locale files — missing keys, CLDR-aware plural gaps, empty values. Framework-agnostic, zero dependencies.
|
|
5
|
+
Author: yyfjj
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/jjdoor/localediff-py
|
|
8
|
+
Project-URL: Repository, https://github.com/jjdoor/localediff-py
|
|
9
|
+
Project-URL: Issues, https://github.com/jjdoor/localediff-py/issues
|
|
10
|
+
Keywords: i18n,l10n,locale,translation,missing-translations,drift,diff,i18next,cli,ci,pluralization
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Topic :: Software Development :: Internationalization
|
|
18
|
+
Classifier: Topic :: Software Development :: Localization
|
|
19
|
+
Classifier: Topic :: Utilities
|
|
20
|
+
Requires-Python: >=3.8
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# localediff
|
|
26
|
+
|
|
27
|
+
**Find drift between your i18n locale files — before your users do.** You add a
|
|
28
|
+
string to `en.json`, ship it, and three weeks later notice `fr.json` and
|
|
29
|
+
`zh.json` were never updated. `localediff` catches that in CI: missing keys,
|
|
30
|
+
plural forms a language actually needs, and keys that exist but were left blank.
|
|
31
|
+
Framework-agnostic, **zero dependencies**.
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install localediff
|
|
35
|
+
localediff ./locales
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
✗ fr.json
|
|
40
|
+
missing (1): auth.errors.locked
|
|
41
|
+
plural cart.items — has {other}, missing {one}
|
|
42
|
+
empty (1): footer.copyright
|
|
43
|
+
extra (1): legacy.banner
|
|
44
|
+
|
|
45
|
+
✓ zh.json — in sync
|
|
46
|
+
|
|
47
|
+
✗ 1 of 2 file(s) drifted — 1 missing, 1 plural gap(s), 1 empty, 1 extra
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Why another i18n tool
|
|
51
|
+
|
|
52
|
+
Most existing tools are tied to one framework, need an account, or are heavy
|
|
53
|
+
AST-based linters. `localediff` just reads JSON. It compares **structure**, so it
|
|
54
|
+
works for next-intl, react-intl, i18next, vue-i18n, Django/Flask JSON catalogs,
|
|
55
|
+
or any plain message file — and it ships for **both PyPI and npm** with identical
|
|
56
|
+
behavior.
|
|
57
|
+
|
|
58
|
+
### It understands plurals per language
|
|
59
|
+
|
|
60
|
+
This is the part naive "diff two JSON files" scripts get wrong. English has two
|
|
61
|
+
plural forms (`one`, `other`); Chinese has one (`other`); Russian has four
|
|
62
|
+
(`one`, `few`, `many`, `other`). A file with only `items_other` is **correct for
|
|
63
|
+
Chinese** but **broken for French**. `localediff` resolves the required CLDR
|
|
64
|
+
categories from each target's language, so it flags the real bug without crying
|
|
65
|
+
wolf on `zh.json`.
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
# en base: items_one + items_other
|
|
69
|
+
zh.json → items_other only → ✓ in sync (Chinese needs only `other`)
|
|
70
|
+
fr.json → items_other only → ✗ missing {one}
|
|
71
|
+
ru.json → items_one + _other → ✗ missing {few, many}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Usage
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
# Scan a folder. The base defaults to en.json; everything else is checked.
|
|
78
|
+
localediff ./locales
|
|
79
|
+
|
|
80
|
+
# Pick a different base language in the folder.
|
|
81
|
+
localediff ./locales --base de
|
|
82
|
+
|
|
83
|
+
# Compare specific files explicitly.
|
|
84
|
+
localediff --base en.json --check fr.json zh.json
|
|
85
|
+
|
|
86
|
+
# Shorthand: first file is the base.
|
|
87
|
+
localediff en.json fr.json zh.json
|
|
88
|
+
|
|
89
|
+
# Machine-readable output for CI gates.
|
|
90
|
+
localediff ./locales --format json
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
You can also run it as a module: `python -m localediff ./locales`.
|
|
94
|
+
|
|
95
|
+
## What it checks
|
|
96
|
+
|
|
97
|
+
| Check | Meaning |
|
|
98
|
+
|-------|---------|
|
|
99
|
+
| **missing** | a key in the base that the target never translated |
|
|
100
|
+
| **plural** | a pluralized key (`key_one`, `key_other`, …) missing a CLDR form the **target language** requires |
|
|
101
|
+
| **empty** | a key present in the target whose value is a blank string |
|
|
102
|
+
| **extra** | a key in the target the base no longer has |
|
|
103
|
+
|
|
104
|
+
Nested objects are flattened to dot-paths (`auth.errors.locked`); arrays are
|
|
105
|
+
indexed (`steps.0`). Plural keys use the i18next suffix convention
|
|
106
|
+
(`_zero`, `_one`, `_two`, `_few`, `_many`, `_other`). Unknown languages fall back
|
|
107
|
+
to parity with the base, so you never get a confidently-wrong result.
|
|
108
|
+
|
|
109
|
+
## Options
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
--base <file|lang> base/source locale (a file, or a lang stem in dir mode)
|
|
113
|
+
--check <files...> one or more target locales to compare against the base
|
|
114
|
+
--dir <dir> scan a directory of *.json locales
|
|
115
|
+
--lang <code> force the target language for plural rules
|
|
116
|
+
--format text|json output format (default: text)
|
|
117
|
+
--ignore-missing don't report missing keys
|
|
118
|
+
--ignore-extra don't report extra keys
|
|
119
|
+
--ignore-plural don't report plural gaps
|
|
120
|
+
--ignore-empty don't report empty values
|
|
121
|
+
-v, --version
|
|
122
|
+
-h, --help
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## In CI
|
|
126
|
+
|
|
127
|
+
`localediff` exits non-zero when anything has drifted:
|
|
128
|
+
|
|
129
|
+
| Exit code | Meaning |
|
|
130
|
+
|-----------|---------|
|
|
131
|
+
| `0` | every checked file is in sync |
|
|
132
|
+
| `1` | one or more files have drift |
|
|
133
|
+
| `2` | error (file not found, invalid JSON, bad arguments) |
|
|
134
|
+
|
|
135
|
+
## Also available for Node
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
npx localediff ./locales
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Same checks, same flags, same exit codes — [localediff on npm](https://github.com/jjdoor/localediff).
|
|
142
|
+
|
|
143
|
+
## Scope
|
|
144
|
+
|
|
145
|
+
JSON locale files only (the common case). YAML/`.properties`/gettext are not
|
|
146
|
+
supported — parsing them would mean pulling in a dependency, and zero-dep is the
|
|
147
|
+
point. Convert to JSON, or open an issue to discuss.
|
|
148
|
+
|
|
149
|
+
## License
|
|
150
|
+
|
|
151
|
+
MIT
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/localediff/__init__.py
|
|
5
|
+
src/localediff/__main__.py
|
|
6
|
+
src/localediff/cli.py
|
|
7
|
+
src/localediff/core.py
|
|
8
|
+
src/localediff/report.py
|
|
9
|
+
src/localediff.egg-info/PKG-INFO
|
|
10
|
+
src/localediff.egg-info/SOURCES.txt
|
|
11
|
+
src/localediff.egg-info/dependency_links.txt
|
|
12
|
+
src/localediff.egg-info/entry_points.txt
|
|
13
|
+
src/localediff.egg-info/top_level.txt
|
|
14
|
+
tests/test_core.py
|
|
15
|
+
tests/test_report.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
localediff
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
from localediff.core import (
|
|
2
|
+
compare, flatten, normalize_lang, plural_groups, required_categories, split_plural,
|
|
3
|
+
)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_flatten_dot_paths_in_document_order():
|
|
7
|
+
entries = flatten({"a": {"b": 1, "c": 2}, "d": 3})
|
|
8
|
+
assert entries == [("a.b", 1), ("a.c", 2), ("d", 3)]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_flatten_indexes_arrays_and_keeps_empty_containers():
|
|
12
|
+
entries = flatten({"list": ["x", "y"], "blank": {}, "none": []})
|
|
13
|
+
assert [p for p, _ in entries] == ["list.0", "list.1", "blank", "none"]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_split_plural_recognizes_cldr_suffixes_only():
|
|
17
|
+
assert split_plural("items_one") == ("items", "one")
|
|
18
|
+
assert split_plural("a.b.count_many") == ("a.b.count", "many")
|
|
19
|
+
assert split_plural("items") is None
|
|
20
|
+
assert split_plural("step_two_factor") is None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_plural_groups_keeps_only_groups_with_other():
|
|
24
|
+
entries = flatten({
|
|
25
|
+
"items_one": "a", "items_other": "b", # real plural
|
|
26
|
+
"step_one": "x", "step_two": "y", # NOT a plural (no `other`)
|
|
27
|
+
})
|
|
28
|
+
groups = plural_groups(entries)
|
|
29
|
+
assert list(groups.keys()) == ["items"]
|
|
30
|
+
assert groups["items"] == {"one", "other"}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_normalize_lang_strips_region_script_subtags():
|
|
34
|
+
assert normalize_lang("zh-Hans-CN") == "zh"
|
|
35
|
+
assert normalize_lang("pt_BR") == "pt"
|
|
36
|
+
assert normalize_lang("FR") == "fr"
|
|
37
|
+
assert normalize_lang("") == ""
|
|
38
|
+
assert normalize_lang(None) == ""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_required_categories_table_then_parity_fallback():
|
|
42
|
+
assert required_categories("zh", {"one", "other"}) == ["other"]
|
|
43
|
+
assert required_categories("ru", {"one", "other"}) == ["one", "few", "many", "other"]
|
|
44
|
+
assert required_categories("xx", {"one", "other"}) == ["one", "other"]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_compare_missing_and_extra_plain_keys():
|
|
48
|
+
base = {"a": "1", "b": "2", "nested": {"c": "3"}}
|
|
49
|
+
target = {"a": "uno", "nested": {"c": "tres", "d": "cuatro"}}
|
|
50
|
+
r = compare(base, target, {"lang": "es"})
|
|
51
|
+
assert r["missing"] == ["b"]
|
|
52
|
+
assert r["extra"] == ["nested.d"]
|
|
53
|
+
assert r["inSync"] is False
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_compare_empty_values_only_when_base_has_text():
|
|
57
|
+
base = {"a": "hello", "b": "world", "c": ""}
|
|
58
|
+
target = {"a": "", "b": " ", "c": ""}
|
|
59
|
+
r = compare(base, target, {"lang": "de"})
|
|
60
|
+
assert r["empty"] == ["a", "b"]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_compare_plural_gap_respects_target_language():
|
|
64
|
+
base = {"items_one": "{{count}} item", "items_other": "{{count}} items"}
|
|
65
|
+
|
|
66
|
+
# Chinese only needs `other` — an `_other`-only file is CORRECT.
|
|
67
|
+
zh = compare(base, {"items_other": "件"}, {"lang": "zh"})
|
|
68
|
+
assert zh["plural"] == []
|
|
69
|
+
assert zh["inSync"] is True
|
|
70
|
+
|
|
71
|
+
# French needs one+other — the same shape IS a bug.
|
|
72
|
+
fr = compare(base, {"items_other": "articles"}, {"lang": "fr"})
|
|
73
|
+
assert fr["plural"] == [
|
|
74
|
+
{"stem": "items", "required": ["one", "other"], "have": ["other"], "missing": ["one"]}
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
# Russian needs four forms — flags few+many even though the base lacks them.
|
|
78
|
+
ru = compare(base, {"items_one": "товар", "items_other": "товара"}, {"lang": "ru"})
|
|
79
|
+
assert ru["plural"][0]["missing"] == ["few", "many"]
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_compare_plural_members_not_double_reported_as_missing():
|
|
83
|
+
base = {"items_one": "a", "items_other": "b"}
|
|
84
|
+
r = compare(base, {}, {"lang": "fr"})
|
|
85
|
+
assert r["missing"] == []
|
|
86
|
+
assert len(r["plural"]) == 1
|
|
87
|
+
assert r["plural"][0]["have"] == []
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def test_compare_ordinary_one_two_keys_are_plain_keys():
|
|
91
|
+
base = {"step_one": "First", "step_two": "Second"}
|
|
92
|
+
target = {"step_one": "Premier"}
|
|
93
|
+
r = compare(base, target, {"lang": "fr"})
|
|
94
|
+
assert r["plural"] == []
|
|
95
|
+
assert r["missing"] == ["step_two"]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def test_compare_ignore_flags_reach_in_sync():
|
|
99
|
+
base = {"a": "1", "b": "2"}
|
|
100
|
+
target = {"a": "uno", "c": "tres"}
|
|
101
|
+
assert compare(base, target, {"lang": "es"})["inSync"] is False
|
|
102
|
+
r = compare(base, target, {"lang": "es", "ignoreMissing": True, "ignoreExtra": True})
|
|
103
|
+
assert r["missing"] == []
|
|
104
|
+
assert r["extra"] == []
|
|
105
|
+
assert r["inSync"] is True
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test_compare_identical_locales_in_sync():
|
|
109
|
+
base = {"a": "1", "nested": {"b": "2"}, "items_one": "x", "items_other": "y"}
|
|
110
|
+
import copy
|
|
111
|
+
r = compare(base, copy.deepcopy(base), {"lang": "en"})
|
|
112
|
+
assert r["inSync"] is True
|
|
113
|
+
assert r["counts"] == {"missing": 0, "extra": 0, "plural": 0, "empty": 0}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from localediff.core import compare
|
|
4
|
+
from localediff.report import format_json, format_text
|
|
5
|
+
|
|
6
|
+
BASE = {"a": "1", "b": "2", "items_one": "x", "items_other": "y", "footer": "hi"}
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def build_results():
|
|
10
|
+
fr = compare(BASE, {"a": "uno", "items_other": "y", "footer": "", "legacy": "old"}, {"lang": "fr"})
|
|
11
|
+
zh = compare(BASE, {"a": "1", "b": "2", "items_other": "y", "footer": "hi"}, {"lang": "zh"})
|
|
12
|
+
return [
|
|
13
|
+
{"file": "fr.json", "base": "en.json", "lang": "fr", "report": fr},
|
|
14
|
+
{"file": "zh.json", "base": "en.json", "lang": "zh", "report": zh},
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_format_text_labels_each_category_and_in_sync_file():
|
|
19
|
+
out = format_text(build_results())
|
|
20
|
+
assert "✗ fr.json" in out
|
|
21
|
+
assert "missing (1): b" in out
|
|
22
|
+
assert "plural items — has {other}, missing {one}" in out
|
|
23
|
+
assert "empty (1): footer" in out
|
|
24
|
+
assert "extra (1): legacy" in out
|
|
25
|
+
assert "✓ zh.json" in out
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_format_text_summary_counts_drifted_files():
|
|
29
|
+
out = format_text(build_results())
|
|
30
|
+
assert "✗ 1 of 2 file(s) drifted" in out
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_format_text_all_in_sync():
|
|
34
|
+
ok = compare(BASE, BASE, {"lang": "en"})
|
|
35
|
+
out = format_text([{"file": "en2.json", "base": "en.json", "report": ok}])
|
|
36
|
+
assert "✓ all 1 file(s) in sync" in out
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_format_json_valid_with_detail_and_totals():
|
|
40
|
+
parsed = json.loads(format_json(build_results()))
|
|
41
|
+
assert parsed["checked"] == 2
|
|
42
|
+
assert parsed["drifted"] == 1
|
|
43
|
+
assert parsed["inSync"] is False
|
|
44
|
+
assert parsed["files"][0]["file"] == "fr.json"
|
|
45
|
+
assert parsed["files"][0]["counts"]["missing"] == 1
|
|
46
|
+
assert parsed["files"][1]["inSync"] is True
|