python-po-lint 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
po_lint/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """python-po-lint: Lint .po translation files for contamination, wrong languages, shifts, and garbled text."""
2
+
3
+ __version__ = "0.1.0"
po_lint/checks.py ADDED
@@ -0,0 +1,291 @@
1
+ """Individual lint checks for .po file entries."""
2
+
3
+ import re
4
+ import unicodedata
5
+ from dataclasses import dataclass
6
+ from enum import Enum
7
+
8
+
9
+ class Severity(Enum):
10
+ ERROR = "error"
11
+ WARNING = "warning"
12
+
13
+
14
+ class IssueType(Enum):
15
+ WRONG_LANGUAGE = "wrong_language"
16
+ WRONG_SCRIPT = "wrong_script"
17
+ SHIFTED_ENTRY = "shifted_entry"
18
+ GARBLED_TEXT = "garbled_text"
19
+ UNTRANSLATED = "untranslated"
20
+
21
+
22
+ @dataclass
23
+ class Issue:
24
+ """A single lint issue found in a .po file."""
25
+
26
+ file: str
27
+ line: int
28
+ msgid: str
29
+ msgstr: str
30
+ issue_type: IssueType
31
+ severity: Severity
32
+ message: str
33
+ detected_lang: str = ""
34
+ confidence: float = 0.0
35
+
36
+ def __str__(self) -> str:
37
+ msgid_short = self.msgid[:60] + "..." if len(self.msgid) > 60 else self.msgid
38
+ return f" {self.severity.value.upper()}: [{self.issue_type.value}] {self.message}\n msgid: {msgid_short!r}"
39
+
40
+
41
+ # Script detection patterns — covers all major writing systems
42
+ SCRIPT_PATTERNS = {
43
+ "latin": re.compile(r"[a-zA-Z\u00C0-\u024F\u1E00-\u1EFF\u0100-\u017F\u0180-\u024F]"),
44
+ "arabic": re.compile(r"[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]"),
45
+ "cyrillic": re.compile(r"[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F]"),
46
+ "devanagari": re.compile(r"[\u0900-\u097F\uA8E0-\uA8FF]"),
47
+ "bengali": re.compile(r"[\u0980-\u09FF]"),
48
+ "gurmukhi": re.compile(r"[\u0A00-\u0A7F]"),
49
+ "gujarati": re.compile(r"[\u0A80-\u0AFF]"),
50
+ "oriya": re.compile(r"[\u0B00-\u0B7F]"),
51
+ "tamil": re.compile(r"[\u0B80-\u0BFF]"),
52
+ "telugu": re.compile(r"[\u0C00-\u0C7F]"),
53
+ "kannada": re.compile(r"[\u0C80-\u0CFF]"),
54
+ "malayalam": re.compile(r"[\u0D00-\u0D7F]"),
55
+ "sinhala": re.compile(r"[\u0D80-\u0DFF]"),
56
+ "thai": re.compile(r"[\u0E00-\u0E7F]"),
57
+ "lao": re.compile(r"[\u0E80-\u0EFF]"),
58
+ "tibetan": re.compile(r"[\u0F00-\u0FFF]"),
59
+ "myanmar": re.compile(r"[\u1000-\u109F\uAA60-\uAA7F]"),
60
+ "georgian": re.compile(r"[\u10A0-\u10FF\u2D00-\u2D2F]"),
61
+ "hangul": re.compile(r"[\uAC00-\uD7AF\u1100-\u11FF\u3130-\u318F]"),
62
+ "cjk": re.compile(r"[\u4E00-\u9FFF\u3400-\u4DBF\uF900-\uFAFF]"),
63
+ "kana": re.compile(r"[\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF]"),
64
+ "hebrew": re.compile(r"[\u0590-\u05FF\uFB1D-\uFB4F]"),
65
+ "armenian": re.compile(r"[\u0530-\u058F\uFB00-\uFB17]"),
66
+ "ethiopic": re.compile(r"[\u1200-\u137F\u1380-\u139F\u2D80-\u2DDF]"),
67
+ "khmer": re.compile(r"[\u1780-\u17FF\u19E0-\u19FF]"),
68
+ }
69
+
70
+ # Expected scripts per locale — comprehensive mapping covering all fastText languages.
71
+ # Languages may accept multiple scripts (e.g. Serbian uses both Cyrillic and Latin).
72
+ # fmt: off
73
+ LOCALE_SCRIPTS = {
74
+ # Latin script
75
+ "af": {"latin"}, "az": {"latin"}, "br": {"latin"}, "bs": {"latin"}, "ca": {"latin"},
76
+ "ceb": {"latin"}, "co": {"latin"}, "cs": {"latin"}, "cy": {"latin"}, "da": {"latin"},
77
+ "de": {"latin"}, "en": {"latin"}, "eo": {"latin"}, "es": {"latin"}, "et": {"latin"},
78
+ "eu": {"latin"}, "fi": {"latin"}, "fil": {"latin"}, "fr": {"latin"}, "fy": {"latin"},
79
+ "ga": {"latin"}, "gd": {"latin"}, "gl": {"latin"}, "ha": {"latin"}, "haw": {"latin"},
80
+ "hr": {"latin"}, "ht": {"latin"}, "hu": {"latin"}, "id": {"latin"}, "ig": {"latin"},
81
+ "is": {"latin"}, "it": {"latin"}, "jv": {"latin"}, "la": {"latin"}, "lb": {"latin"},
82
+ "lt": {"latin"}, "lv": {"latin"}, "mg": {"latin"}, "mi": {"latin"}, "ms": {"latin"},
83
+ "mt": {"latin"}, "nl": {"latin"}, "no": {"latin"}, "nb": {"latin"}, "nn": {"latin"},
84
+ "ny": {"latin"}, "oc": {"latin"}, "pl": {"latin"}, "pt": {"latin"}, "ro": {"latin"},
85
+ "rw": {"latin"}, "sk": {"latin"}, "sl": {"latin"}, "sm": {"latin"}, "sn": {"latin"},
86
+ "so": {"latin"}, "sq": {"latin"}, "st": {"latin"}, "su": {"latin"}, "sv": {"latin"},
87
+ "sw": {"latin"}, "tl": {"latin"}, "tr": {"latin"}, "uz": {"latin"}, "vi": {"latin"},
88
+ "war": {"latin"}, "xh": {"latin"}, "yo": {"latin"}, "zu": {"latin"},
89
+ # Cyrillic script
90
+ "be": {"cyrillic"}, "bg": {"cyrillic"}, "ky": {"cyrillic"}, "kk": {"cyrillic"},
91
+ "mk": {"cyrillic"}, "mn": {"cyrillic"}, "ru": {"cyrillic"}, "tg": {"cyrillic"},
92
+ "tt": {"cyrillic"}, "uk": {"cyrillic"},
93
+ # Multi-script languages
94
+ "sr": {"cyrillic", "latin"}, "sh": {"cyrillic", "latin"},
95
+ # Arabic script
96
+ "ar": {"arabic"}, "fa": {"arabic"}, "ku": {"arabic"}, "ps": {"arabic"},
97
+ "sd": {"arabic"}, "ug": {"arabic"}, "ur": {"arabic"},
98
+ # Devanagari
99
+ "hi": {"devanagari"}, "mr": {"devanagari"}, "ne": {"devanagari"}, "sa": {"devanagari"},
100
+ # Other Indic scripts
101
+ "as": {"bengali"}, "bn": {"bengali"},
102
+ "gu": {"gujarati"},
103
+ "kn": {"kannada"},
104
+ "ml": {"malayalam"},
105
+ "or": {"oriya"},
106
+ "pa": {"gurmukhi"},
107
+ "si": {"sinhala"},
108
+ "ta": {"tamil"},
109
+ "te": {"telugu"},
110
+ # East Asian
111
+ "zh": {"cjk"}, "zh_Hans": {"cjk"}, "zh_Hant": {"cjk"},
112
+ "ja": {"cjk", "kana"},
113
+ "ko": {"hangul"},
114
+ # Other scripts
115
+ "am": {"ethiopic"}, "ti": {"ethiopic"},
116
+ "el": {"latin"}, # Greek — could add greek script pattern if needed
117
+ "he": {"hebrew"}, "yi": {"hebrew"},
118
+ "hy": {"armenian"},
119
+ "ka": {"georgian"},
120
+ "km": {"khmer"},
121
+ "lo": {"lao"},
122
+ "my": {"myanmar"},
123
+ "th": {"thai"},
124
+ }
125
+ # fmt: on
126
+
127
+
128
+ def detect_scripts(text: str) -> dict[str, int]:
129
+ """Count characters belonging to each script in the text."""
130
+ counts = {}
131
+ for name, pattern in SCRIPT_PATTERNS.items():
132
+ count = len(pattern.findall(text))
133
+ if count > 0:
134
+ counts[name] = count
135
+ return counts
136
+
137
+
138
+ def check_wrong_script(msgstr: str, locale: str) -> Issue | None:
139
+ """Check if the translation uses the wrong writing script or wrong language within the same script.
140
+
141
+ 1. Entirely wrong script (e.g. Latin in Arabic file) → ERROR
142
+ 2. Distinctive character check for same-script languages (e.g. Russian chars in Ukrainian file) → ERROR
143
+ """
144
+ expected = LOCALE_SCRIPTS.get(locale)
145
+ if not expected:
146
+ return None
147
+
148
+ scripts = detect_scripts(msgstr)
149
+ if not scripts:
150
+ return None
151
+
152
+ # Check if any expected script is present
153
+ expected_present = any(s in scripts for s in expected)
154
+
155
+ # Find dominant script
156
+ dominant = max(scripts, key=scripts.get)
157
+ dominant_count = scripts[dominant]
158
+ total = sum(scripts.values())
159
+
160
+ if dominant in expected:
161
+ # Script is correct — check distinctive characters for same-script languages
162
+ return _check_distinctive_chars(msgstr, locale)
163
+
164
+ # Wrong script is dominant
165
+ if dominant_count / total < 0.5:
166
+ return None
167
+
168
+ # Expected script is present but not dominant — likely technical terms mixed in, ignore
169
+ if expected_present:
170
+ return None
171
+
172
+ # Expected script is completely absent — clear contamination
173
+ return Issue(
174
+ file="",
175
+ line=0,
176
+ msgid="",
177
+ msgstr=msgstr,
178
+ issue_type=IssueType.WRONG_SCRIPT,
179
+ severity=Severity.ERROR,
180
+ message=f"Expected {'/'.join(expected)} script, found entirely {dominant} ({dominant_count}/{total} chars)",
181
+ )
182
+
183
+
184
+ def _check_distinctive_chars(msgstr: str, locale: str) -> Issue | None:
185
+ """Check for foreign distinctive characters within the same script.
186
+
187
+ If foreign-only characters are found, it's contamination — regardless of
188
+ whether the locale's own distinctive characters are also present.
189
+ If no distinctive characters from either side are found, we can't tell — skip.
190
+ """
191
+ config = DISTINCTIVE_CHARS.get(locale)
192
+ if not config:
193
+ return None
194
+
195
+ chars = set(msgstr)
196
+ has_own = bool(chars & config["own"])
197
+
198
+ for foreign_lang, foreign_chars in config.items():
199
+ if foreign_lang == "own":
200
+ continue
201
+ has_foreign = bool(chars & foreign_chars)
202
+ if not has_foreign:
203
+ continue
204
+
205
+ if has_own:
206
+ message = f"Mixed {locale}/{foreign_lang} characters — possible contamination"
207
+ else:
208
+ message = f"Found {foreign_lang}-only characters, no {locale}-specific characters"
209
+
210
+ return Issue(
211
+ file="",
212
+ line=0,
213
+ msgid="",
214
+ msgstr=msgstr,
215
+ issue_type=IssueType.WRONG_SCRIPT,
216
+ severity=Severity.ERROR,
217
+ message=message,
218
+ )
219
+
220
+ return None
221
+
222
+
223
+
224
+ # Distinctive characters per locale — used to distinguish languages that share
225
+ # a script but have unique alphabet characters. Each entry maps a locale to its
226
+ # own unique characters and the foreign characters that indicate contamination.
227
+ # Add new entries as needed for other language pairs (e.g. Serbian/Bulgarian).
228
+ DISTINCTIVE_CHARS: dict[str, dict[str, set[str]]] = {
229
+ "uk": {"own": set("ґєіїҐЄІЇ"), "ru": set("ёыэъЁЫЭЪ")},
230
+ "ru": {"own": set("ёыэъЁЫЭЪ"), "uk": set("ґєіїҐЄІЇ")},
231
+ }
232
+
233
+
234
+ def check_shifted_entry(msgid: str, msgstr: str) -> Issue | None:
235
+ """Detect entries where msgstr appears to be shifted (belongs to a different msgid).
236
+
237
+ Heuristic: if msgid is long (>100 chars) but msgstr is very short (<15% of msgid length),
238
+ the translation is likely shifted from a different entry.
239
+ """
240
+ if not msgstr or not msgid:
241
+ return None
242
+
243
+ msgid_len = len(msgid)
244
+ msgstr_len = len(msgstr)
245
+
246
+ if msgid_len < 100:
247
+ return None
248
+
249
+ ratio = msgstr_len / msgid_len
250
+ if ratio >= 0.15:
251
+ return None
252
+
253
+ return Issue(
254
+ file="",
255
+ line=0,
256
+ msgid=msgid,
257
+ msgstr=msgstr,
258
+ issue_type=IssueType.SHIFTED_ENTRY,
259
+ severity=Severity.WARNING,
260
+ message=f"Possible shifted entry: msgstr is {ratio:.0%} the length of msgid ({msgstr_len} vs {msgid_len} chars)",
261
+ )
262
+
263
+
264
+ def check_garbled_text(msgstr: str) -> Issue | None:
265
+ """Detect garbled/corrupted text patterns."""
266
+ if len(msgstr) < 5:
267
+ return None
268
+
269
+ # Check for high ratio of replacement characters or unusual unicode categories
270
+ suspicious = 0
271
+ total = 0
272
+ for char in msgstr:
273
+ cat = unicodedata.category(char)
274
+ total += 1
275
+ if cat.startswith("C") and cat != "Cf": # Control chars (except format)
276
+ suspicious += 1
277
+ elif char == "\ufffd": # Replacement character
278
+ suspicious += 1
279
+
280
+ if total > 0 and suspicious / total > 0.1:
281
+ return Issue(
282
+ file="",
283
+ line=0,
284
+ msgid="",
285
+ msgstr=msgstr,
286
+ issue_type=IssueType.GARBLED_TEXT,
287
+ severity=Severity.ERROR,
288
+ message=f"Garbled text detected: {suspicious}/{total} suspicious characters",
289
+ )
290
+
291
+ return None
po_lint/cli.py ADDED
@@ -0,0 +1,208 @@
1
+ """CLI entry point for po-lint."""
2
+
3
+ import argparse
4
+ import json
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ from po_lint.checks import Severity
9
+ from po_lint.config import load_config
10
+ from po_lint.detector import init_model
11
+ from po_lint.linter import lint_locale_dir
12
+
13
+
14
+ def main(argv: list[str] | None = None) -> int:
15
+ parser = argparse.ArgumentParser(
16
+ prog="po-lint",
17
+ description="Lint .po translation files for contamination, wrong languages, shifts, and garbled text.",
18
+ )
19
+ parser.add_argument(
20
+ "paths",
21
+ nargs="*",
22
+ type=Path,
23
+ help="Locale directories to lint. If omitted, reads from pyproject.toml [tool.po-lint] config.",
24
+ )
25
+ parser.add_argument(
26
+ "--config-dir",
27
+ type=Path,
28
+ default=None,
29
+ help="Directory containing pyproject.toml (default: current directory).",
30
+ )
31
+ parser.add_argument(
32
+ "--confidence",
33
+ type=float,
34
+ default=None,
35
+ help="Minimum confidence threshold for wrong language detection (default: 0.5).",
36
+ )
37
+ parser.add_argument(
38
+ "--languages",
39
+ nargs="*",
40
+ default=None,
41
+ help="Only check these language codes (e.g. --languages fr de nl).",
42
+ )
43
+ parser.add_argument(
44
+ "--source-language",
45
+ default=None,
46
+ help="Source language of the .po files (default: 'en'). Detections matching this language are allowed.",
47
+ )
48
+ parser.add_argument(
49
+ "--min-detection-length",
50
+ type=int,
51
+ default=None,
52
+ help="Minimum cleaned text length for language detection (default: 30).",
53
+ )
54
+ parser.add_argument(
55
+ "--compact-model",
56
+ action="store_true",
57
+ help="Use the compact fastText model (917KB, less accurate) instead of the full model (126MB).",
58
+ )
59
+ parser.add_argument(
60
+ "--format",
61
+ choices=["text", "json"],
62
+ default="text",
63
+ help="Output format (default: text).",
64
+ )
65
+ parser.add_argument(
66
+ "--warnings-as-errors",
67
+ action="store_true",
68
+ help="Treat warnings as errors (exit code 1).",
69
+ )
70
+ parser.add_argument(
71
+ "--no-color",
72
+ action="store_true",
73
+ help="Disable colored output.",
74
+ )
75
+ args = parser.parse_args(argv)
76
+
77
+ # Load config from pyproject.toml
78
+ config_dir = args.config_dir or Path.cwd()
79
+ config = load_config(config_dir)
80
+
81
+ # CLI args override config
82
+ confidence = args.confidence if args.confidence is not None else config.confidence_threshold
83
+ languages = args.languages if args.languages is not None else (config.languages or None)
84
+ source_language = args.source_language if args.source_language is not None else config.source_language
85
+ min_detection_length = (
86
+ args.min_detection_length if args.min_detection_length is not None
87
+ else config.min_detection_length
88
+ )
89
+
90
+ # Resolve locale directories
91
+ if args.paths:
92
+ locale_dirs = [p for p in args.paths if p.is_dir()]
93
+ if not locale_dirs:
94
+ print(f"Error: No valid directories found in {args.paths}", file=sys.stderr)
95
+ return 2
96
+ else:
97
+ locale_dirs = config.resolve_locale_dirs(config_dir)
98
+ if not locale_dirs:
99
+ print("Error: No locale directories found. Specify paths or configure [tool.po-lint] in pyproject.toml.",
100
+ file=sys.stderr)
101
+ return 2
102
+
103
+ # Initialize model
104
+ compact = args.compact_model or config.compact_model
105
+ init_model(compact=compact)
106
+
107
+ # Run linting
108
+ all_issues = []
109
+ for locale_dir in locale_dirs:
110
+ if args.format == "text":
111
+ print(f"Linting {locale_dir}...")
112
+ issues = lint_locale_dir(
113
+ locale_dir,
114
+ languages=languages,
115
+ source_language=source_language,
116
+ confidence_threshold=confidence,
117
+ min_text_length=config.min_text_length,
118
+ min_detection_length=min_detection_length,
119
+ ignore_patterns=config.ignore_patterns,
120
+ )
121
+ all_issues.extend(issues)
122
+
123
+ if args.format == "json":
124
+ return _output_json(all_issues, args.warnings_as_errors)
125
+
126
+ return _output_text(all_issues, args)
127
+
128
+
129
+ def _output_json(issues, warnings_as_errors: bool) -> int:
130
+ errors = [i for i in issues if i.severity == Severity.ERROR]
131
+ warnings = [i for i in issues if i.severity == Severity.WARNING]
132
+
133
+ output = {
134
+ "summary": {
135
+ "errors": len(errors),
136
+ "warnings": len(warnings),
137
+ "files": len({i.file for i in issues}),
138
+ },
139
+ "issues": [
140
+ {
141
+ "file": i.file,
142
+ "line": i.line,
143
+ "severity": i.severity.value,
144
+ "type": i.issue_type.value,
145
+ "message": i.message,
146
+ "msgid": i.msgid,
147
+ "msgstr": i.msgstr,
148
+ "detected_lang": i.detected_lang or None,
149
+ "confidence": round(i.confidence, 4) if i.confidence else None,
150
+ }
151
+ for i in sorted(issues, key=lambda x: (x.file, x.line))
152
+ ],
153
+ }
154
+ print(json.dumps(output, ensure_ascii=False, indent=2))
155
+
156
+ if errors:
157
+ return 1
158
+ if warnings and warnings_as_errors:
159
+ return 1
160
+ return 0
161
+
162
+
163
+ def _output_text(issues, args) -> int:
164
+ if not issues:
165
+ return 0
166
+
167
+ errors = [i for i in issues if i.severity == Severity.ERROR]
168
+ warnings = [i for i in issues if i.severity == Severity.WARNING]
169
+
170
+ by_file: dict[str, list] = {}
171
+ for issue in issues:
172
+ by_file.setdefault(issue.file, []).append(issue)
173
+
174
+ use_color = not args.no_color and sys.stdout.isatty()
175
+
176
+ for file_path, file_issues in sorted(by_file.items()):
177
+ if use_color:
178
+ print(f"\n\033[1m{file_path}\033[0m")
179
+ else:
180
+ print(f"\n{file_path}")
181
+
182
+ for issue in sorted(file_issues, key=lambda i: i.line):
183
+ prefix = _severity_prefix(issue.severity, use_color)
184
+ msgid_short = issue.msgid[:60] + "..." if len(issue.msgid) > 60 else issue.msgid
185
+ msgstr_short = issue.msgstr[:60] + "..." if len(issue.msgstr) > 60 else issue.msgstr
186
+ print(f" line {issue.line}: {prefix} [{issue.issue_type.value}] {issue.message}")
187
+ if msgid_short:
188
+ print(f" msgid: {msgid_short!r}")
189
+ if msgstr_short:
190
+ print(f" msgstr: {msgstr_short!r}")
191
+
192
+ print(f"\nFound {len(errors)} error(s) and {len(warnings)} warning(s) in {len(by_file)} file(s).")
193
+
194
+ if errors:
195
+ return 1
196
+ if warnings and args.warnings_as_errors:
197
+ return 1
198
+ return 0
199
+
200
+
201
+ def _severity_prefix(severity: Severity, color: bool) -> str:
202
+ if severity == Severity.ERROR:
203
+ return "\033[31mERROR\033[0m" if color else "ERROR"
204
+ return "\033[33mWARNING\033[0m" if color else "WARNING"
205
+
206
+
207
+ if __name__ == "__main__":
208
+ sys.exit(main())
po_lint/config.py ADDED
@@ -0,0 +1,94 @@
1
+ """Configuration loading from pyproject.toml."""
2
+
3
+ import importlib.util
4
+ import sys
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+
8
+ if sys.version_info >= (3, 11):
9
+ import tomllib
10
+ else:
11
+ try:
12
+ import tomllib
13
+ except ImportError:
14
+ import tomli as tomllib
15
+
16
+
17
+ @dataclass
18
+ class Config:
19
+ """po-lint configuration."""
20
+
21
+ paths: list[Path] = field(default_factory=lambda: [Path("locale")])
22
+ packages: list[str] = field(default_factory=list)
23
+ languages: list[str] = field(default_factory=list)
24
+ source_language: str = "en"
25
+ confidence_threshold: float = 0.5
26
+ min_text_length: int = 3
27
+ min_detection_length: int = 30
28
+ ignore_patterns: list[str] = field(default_factory=list)
29
+ compact_model: bool = False
30
+
31
+ def resolve_locale_dirs(self, base_dir: Path) -> list[Path]:
32
+ """Resolve all locale directories from paths and packages.
33
+
34
+ Returns a list of existing locale directories.
35
+ """
36
+ locale_dirs = []
37
+
38
+ # Explicit paths (relative to base_dir)
39
+ for p in self.paths:
40
+ resolved = base_dir / p if not p.is_absolute() else p
41
+ if resolved.is_dir():
42
+ locale_dirs.append(resolved)
43
+
44
+ # Auto-discover from installed packages
45
+ for package_name in self.packages:
46
+ locale_dir = find_package_locale(package_name)
47
+ if locale_dir and locale_dir.is_dir():
48
+ locale_dirs.append(locale_dir)
49
+
50
+ return locale_dirs
51
+
52
+
53
+ def find_package_locale(package_name: str) -> Path | None:
54
+ """Find the locale directory for an installed Python package."""
55
+ spec = importlib.util.find_spec(package_name)
56
+ if spec is None or spec.origin is None:
57
+ return None
58
+ package_dir = Path(spec.origin).parent
59
+ locale_dir = package_dir / "locale"
60
+ if locale_dir.is_dir():
61
+ return locale_dir
62
+ return None
63
+
64
+
65
+ def load_config(project_dir: Path | None = None) -> Config:
66
+ """Load configuration from pyproject.toml in the given directory.
67
+
68
+ Falls back to defaults if no config file or no [tool.po-lint] section found.
69
+ """
70
+ if project_dir is None:
71
+ project_dir = Path.cwd()
72
+
73
+ pyproject = project_dir / "pyproject.toml"
74
+ if not pyproject.exists():
75
+ return Config()
76
+
77
+ with open(pyproject, "rb") as f:
78
+ data = tomllib.load(f)
79
+
80
+ tool_config = data.get("tool", {}).get("po-lint", {})
81
+ if not tool_config:
82
+ return Config()
83
+
84
+ return Config(
85
+ paths=[Path(p) for p in tool_config.get("paths", ["locale"])],
86
+ packages=tool_config.get("packages", []),
87
+ languages=tool_config.get("languages", []),
88
+ source_language=tool_config.get("source_language", "en"),
89
+ confidence_threshold=tool_config.get("confidence_threshold", 0.5),
90
+ min_text_length=tool_config.get("min_text_length", 3),
91
+ min_detection_length=tool_config.get("min_detection_length", 30),
92
+ ignore_patterns=tool_config.get("ignore_patterns", []),
93
+ compact_model=tool_config.get("compact_model", False),
94
+ )