app-localizer 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ """Translate Apple String Catalog files."""
2
+
3
+ __all__ = [
4
+ "StringsCatalogTranslator",
5
+ "TranslationConfig",
6
+ ]
7
+
8
+
9
+ def __getattr__(name):
10
+ if name in __all__:
11
+ from app_localizer.xcstrings import StringsCatalogTranslator, TranslationConfig
12
+
13
+ exports = {
14
+ "StringsCatalogTranslator": StringsCatalogTranslator,
15
+ "TranslationConfig": TranslationConfig,
16
+ }
17
+ return exports[name]
18
+
19
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -0,0 +1,597 @@
1
+ """Utilities for reading and mutating Apple String Catalog files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from collections import Counter
7
+ from copy import deepcopy
8
+ from dataclasses import dataclass
9
+ from hashlib import sha256
10
+ from pathlib import Path
11
+ import re
12
+ from typing import Any, Dict, Iterator, Mapping, Optional
13
+
14
+ from app_localizer.cldr import ordered_categories, required_cardinal_categories
15
+ from app_localizer.fileio import atomic_write_json
16
+
17
+ JsonObject = Dict[str, Any]
18
+
19
+ PLURAL_VARIATION_KEY = "plural"
20
+ CARDINAL_CATEGORIES = frozenset({"zero", "one", "two", "few", "many", "other"})
21
+
22
+ PRINTF_PLACEHOLDER_PATTERN = re.compile(
23
+ r"%(?:\d+\$)?[-+#0 ]*(?:\*|\d+)?(?:\.(?:\*|\d+))?"
24
+ r"(?:hh|h|ll|l|L|z|j|t)?[@diuoxXfFeEgGaAcsp%]"
25
+ )
26
+ POSITIONAL_PRINTF_PATTERN = re.compile(r"%\d+\$")
27
+ SWIFT_INTERPOLATION_PATTERN = re.compile(r"\\\([^)]+\)")
28
+ SUBSTITUTION_PLACEHOLDER_PATTERN = re.compile(r"%(?:\d+\$)?#@\w+@|%arg\b")
29
+
30
+
31
+ class CatalogError(Exception):
32
+ """Raised when a string catalog is missing required structure."""
33
+
34
+
35
+ @dataclass
36
+ class StringUnitReference:
37
+ """A mutable reference to a string unit inside a localization tree."""
38
+
39
+ key_path: str
40
+ string_unit: JsonObject
41
+
42
+
43
+ def load_catalog(path: Path) -> JsonObject:
44
+ """Load and validate a string catalog from disk."""
45
+ if not path.exists():
46
+ raise CatalogError(f"Catalog does not exist: {path}")
47
+ if not path.is_file():
48
+ raise CatalogError(f"Catalog path is not a file: {path}")
49
+
50
+ try:
51
+ with path.open("r", encoding="utf-8") as file:
52
+ catalog = json.load(file, object_pairs_hook=_object_rejecting_duplicate_keys)
53
+ except json.JSONDecodeError as error:
54
+ raise CatalogError(f"Catalog is not valid JSON: {error}") from error
55
+ except ValueError as error:
56
+ raise CatalogError(f"Catalog is not valid: {error}") from error
57
+ except OSError as error:
58
+ raise CatalogError(f"Catalog could not be read: {error}") from error
59
+
60
+ if not isinstance(catalog, dict):
61
+ raise CatalogError("Catalog root must be a JSON object.")
62
+ if not isinstance(catalog.get("sourceLanguage"), str):
63
+ raise CatalogError("Catalog must contain a string sourceLanguage.")
64
+ if not isinstance(catalog.get("strings"), dict):
65
+ raise CatalogError("Catalog must contain a strings object.")
66
+
67
+ version = catalog.get("version")
68
+ if version is not None and (
69
+ not isinstance(version, str) or version.split(".", 1)[0] != "1"
70
+ ):
71
+ raise CatalogError(
72
+ f"Catalog version {version!r} is not supported. Supported versions: 1.x."
73
+ )
74
+
75
+ return catalog
76
+
77
+
78
+ def write_catalog(path: Path, catalog: Mapping[str, Any]) -> None:
79
+ """Write a catalog atomically using Xcode-style JSON spacing."""
80
+ try:
81
+ atomic_write_json(path, catalog)
82
+ except OSError as error:
83
+ raise CatalogError(f"Catalog could not be written: {error}") from error
84
+
85
+
86
+ def ensure_source_localizations(catalog: JsonObject) -> bool:
87
+ """Ensure every string has a usable source localization."""
88
+ source_language = catalog["sourceLanguage"]
89
+ changed = False
90
+
91
+ for key, string_data in catalog["strings"].items():
92
+ if not isinstance(string_data, dict):
93
+ raise CatalogError(f"String entry {key!r} must be an object.")
94
+
95
+ localizations = string_data.get("localizations")
96
+ if not isinstance(localizations, dict):
97
+ localizations = {}
98
+ string_data["localizations"] = localizations
99
+ changed = True
100
+
101
+ source_localization = localizations.get(source_language)
102
+ if not isinstance(source_localization, dict) or not string_units_by_key_path(
103
+ source_localization, key
104
+ ):
105
+ localizations[source_language] = {
106
+ "stringUnit": {
107
+ "state": "translated",
108
+ "value": key,
109
+ }
110
+ }
111
+ changed = True
112
+
113
+ return changed
114
+
115
+
116
+ def ensure_target_localization(
117
+ string_data: JsonObject,
118
+ source_language: str,
119
+ target_language: str,
120
+ fallback_value: str,
121
+ ) -> JsonObject:
122
+ """Return a target localization that contains every source string unit."""
123
+ localizations = string_data["localizations"]
124
+ source_localization = localizations.get(source_language)
125
+ if not isinstance(source_localization, dict):
126
+ source_localization = {
127
+ "stringUnit": {
128
+ "state": "translated",
129
+ "value": fallback_value,
130
+ }
131
+ }
132
+
133
+ target_localization = localizations.get(target_language)
134
+ if isinstance(target_localization, dict):
135
+ reconcile_target_localization(target_localization, source_localization)
136
+ return target_localization
137
+
138
+ target_localization = clone_localization_for_translation(source_localization)
139
+ localizations[target_language] = target_localization
140
+ return target_localization
141
+
142
+
143
+ def reconcile_target_localization(
144
+ target_localization: JsonObject,
145
+ source_localization: Mapping[str, Any],
146
+ ) -> bool:
147
+ """Add missing source string units to an existing target localization."""
148
+ return _merge_missing_units(target_localization, source_localization)
149
+
150
+
151
+ def clone_localization_for_translation(localization: Mapping[str, Any]) -> JsonObject:
152
+ """Deep-copy a localization and mark every string unit as new."""
153
+ cloned = deepcopy(dict(localization))
154
+ _mark_translatable_units_new(cloned)
155
+ return cloned
156
+
157
+
158
+ def string_units_by_key_path(localization: Mapping[str, Any], key_path: str) -> Dict[str, JsonObject]:
159
+ """Return string units keyed by their catalog key path."""
160
+ return {
161
+ reference.key_path: reference.string_unit
162
+ for reference in iter_string_units(localization, key_path)
163
+ }
164
+
165
+
166
+ def plural_category_for_key_path(key_path: str) -> Optional[str]:
167
+ """Return the CLDR plural category a key path ends in, or ``None``.
168
+
169
+ A plural unit's key path ends in ``.plural.<category>`` (for example
170
+ ``key.plural.few`` or ``key.device.iphone.plural.few``).
171
+ """
172
+ segments = key_path.split(".")
173
+ if (
174
+ len(segments) >= 2
175
+ and segments[-2] == PLURAL_VARIATION_KEY
176
+ and segments[-1] in CARDINAL_CATEGORIES
177
+ ):
178
+ return segments[-1]
179
+ return None
180
+
181
+
182
+ def expand_plural_categories(localization: JsonObject, target_language: str) -> bool:
183
+ """Add plural categories the target language requires but the source omits.
184
+
185
+ Walks the localization's variation tree, including plural maps nested under a
186
+ device axis and inside substitutions. Each missing CLDR cardinal category is
187
+ added by cloning the plural map's ``other`` unit and marking the clone new, so
188
+ the surrounding translate loop fills it. Never touches non-plural axes such as
189
+ ``device``, never removes a category, and is a no-op when the target language
190
+ has no curated rule. Returns whether anything was added.
191
+ """
192
+ required = required_cardinal_categories(target_language)
193
+ if required is None:
194
+ return False
195
+ return _expand_plural_in_node(localization, required)
196
+
197
+
198
+ def required_source_units_for_target(
199
+ source_localization: Mapping[str, Any],
200
+ key_path: str,
201
+ target_language: str,
202
+ ) -> Dict[str, JsonObject]:
203
+ """Source string units to translate for the target, by key path.
204
+
205
+ This is the source's own units plus, for every plural map, the categories the
206
+ target language requires but the source lacks. Each added category points at
207
+ the source's ``other`` unit, the correct base text for producing that
208
+ category's form. With no curated rule for the target, this is exactly the
209
+ source's units.
210
+ """
211
+ required = required_cardinal_categories(target_language)
212
+ units: Dict[str, JsonObject] = {}
213
+ _collect_required_source_units(source_localization, key_path, required, units)
214
+ return units
215
+
216
+
217
+ def iter_string_units(localization: Mapping[str, Any], key_path: str) -> Iterator[StringUnitReference]:
218
+ """Yield mutable string units in a localization or variation tree."""
219
+ if not isinstance(localization, dict):
220
+ return
221
+
222
+ string_unit = localization.get("stringUnit")
223
+ if isinstance(string_unit, dict):
224
+ yield StringUnitReference(key_path=key_path, string_unit=string_unit)
225
+
226
+ variations = localization.get("variations")
227
+ if isinstance(variations, dict):
228
+ yield from _iter_variation_units(variations, key_path)
229
+
230
+ substitutions = localization.get("substitutions")
231
+ if isinstance(substitutions, dict):
232
+ yield from _iter_substitution_units(substitutions, key_path)
233
+
234
+
235
+ def placeholders_match(source: str, translation: str) -> bool:
236
+ """Return whether a translation preserves source placeholders exactly."""
237
+ source_parts = _extract_placeholders(source)
238
+ translation_parts = _extract_placeholders(translation)
239
+ return (
240
+ Counter(source_parts.substitutions) == Counter(translation_parts.substitutions)
241
+ and Counter(source_parts.swift_interpolations)
242
+ == Counter(translation_parts.swift_interpolations)
243
+ and _printf_placeholders_match(source_parts.printf, translation_parts.printf)
244
+ )
245
+
246
+
247
+ @dataclass(frozen=True)
248
+ class _PlaceholderParts:
249
+ """Placeholder tokens grouped by their runtime safety rules."""
250
+
251
+ substitutions: list[str]
252
+ printf: list[str]
253
+ swift_interpolations: list[str]
254
+
255
+
256
+ def source_fingerprint(catalog: Mapping[str, Any]) -> str:
257
+ """Hash only source text and translation-relevant metadata."""
258
+ source_language = catalog["sourceLanguage"]
259
+ strings = catalog["strings"]
260
+ payload: JsonObject = {
261
+ "sourceLanguage": source_language,
262
+ "strings": {},
263
+ }
264
+
265
+ for key, string_data in strings.items():
266
+ if not isinstance(string_data, dict):
267
+ continue
268
+
269
+ relevant_data = {
270
+ name: deepcopy(value)
271
+ for name, value in string_data.items()
272
+ if name != "localizations"
273
+ }
274
+ relevant_data["localizations"] = {
275
+ source_language: deepcopy(
276
+ string_data.get("localizations", {}).get(source_language)
277
+ )
278
+ }
279
+ payload["strings"][key] = relevant_data
280
+
281
+ encoded = json.dumps(payload, ensure_ascii=False, sort_keys=True).encode("utf-8")
282
+ return sha256(encoded).hexdigest()
283
+
284
+
285
+ # This walker and its two siblings, _expand_plural_in_variations and
286
+ # _collect_required_variation_units, share the same leaf-vs-axis test and key-path
287
+ # construction. They must stay aligned: if the collector yields a key path the
288
+ # expander does not also create in the target, translation reports a missing
289
+ # target unit. ExpandPluralCategoriesTests and RequiredSourceUnitsForTargetTests
290
+ # guard the alignment.
291
+ def _iter_variation_units(variations: Mapping[str, Any], key_path: str) -> Iterator[StringUnitReference]:
292
+ for name, value in variations.items():
293
+ if not isinstance(value, dict):
294
+ continue
295
+
296
+ child_key_path = f"{key_path}.{name}" if key_path else name
297
+ if "stringUnit" in value or "variations" in value:
298
+ yield from iter_string_units(value, child_key_path)
299
+ else:
300
+ yield from _iter_variation_units(value, child_key_path)
301
+
302
+
303
+ def _expand_plural_in_node(node: Any, required: frozenset) -> bool:
304
+ if not isinstance(node, dict):
305
+ return False
306
+
307
+ changed = False
308
+ variations = node.get("variations")
309
+ if isinstance(variations, dict):
310
+ changed = _expand_plural_in_variations(variations, required) or changed
311
+
312
+ substitutions = node.get("substitutions")
313
+ if isinstance(substitutions, dict):
314
+ for substitution in substitutions.values():
315
+ changed = _expand_plural_in_node(substitution, required) or changed
316
+
317
+ return changed
318
+
319
+
320
+ # Mirrors _iter_variation_units; keep the leaf-vs-axis test and recursion aligned.
321
+ def _expand_plural_in_variations(variations: Mapping[str, Any], required: frozenset) -> bool:
322
+ changed = False
323
+ for name, value in variations.items():
324
+ if not isinstance(value, dict):
325
+ continue
326
+ if "stringUnit" in value or "variations" in value:
327
+ changed = _expand_plural_in_node(value, required) or changed
328
+ continue
329
+
330
+ # value is a nested axis map keyed by category or device name.
331
+ if name == PLURAL_VARIATION_KEY:
332
+ changed = _expand_plural_map(value, required) or changed
333
+ for child in list(value.values()):
334
+ changed = _expand_plural_in_node(child, required) or changed
335
+ return changed
336
+
337
+
338
+ def _expand_plural_map(plural_map: JsonObject, required: frozenset) -> bool:
339
+ other = plural_map.get("other")
340
+ if not isinstance(other, dict) or not isinstance(other.get("stringUnit"), dict):
341
+ return False
342
+
343
+ changed = False
344
+ for category in ordered_categories(required):
345
+ if category == "other" or category in plural_map:
346
+ continue
347
+ plural_map[category] = clone_localization_for_translation(other)
348
+ changed = True
349
+ return changed
350
+
351
+
352
+ def _collect_required_source_units(
353
+ node: Any,
354
+ key_path: str,
355
+ required: Optional[frozenset],
356
+ out: Dict[str, JsonObject],
357
+ ) -> None:
358
+ if not isinstance(node, dict):
359
+ return
360
+
361
+ string_unit = node.get("stringUnit")
362
+ if isinstance(string_unit, dict):
363
+ out[key_path] = string_unit
364
+
365
+ variations = node.get("variations")
366
+ if isinstance(variations, dict):
367
+ _collect_required_variation_units(variations, key_path, required, out)
368
+
369
+ substitutions = node.get("substitutions")
370
+ if isinstance(substitutions, dict):
371
+ for name, substitution in substitutions.items():
372
+ child_key_path = (
373
+ f"{key_path}.substitutions.{name}" if key_path else f"substitutions.{name}"
374
+ )
375
+ _collect_required_source_units(substitution, child_key_path, required, out)
376
+
377
+
378
+ # Mirrors _iter_variation_units; keep the leaf-vs-axis test and key paths aligned.
379
+ def _collect_required_variation_units(
380
+ variations: Mapping[str, Any],
381
+ key_path: str,
382
+ required: Optional[frozenset],
383
+ out: Dict[str, JsonObject],
384
+ ) -> None:
385
+ for name, value in variations.items():
386
+ if not isinstance(value, dict):
387
+ continue
388
+
389
+ child_key_path = f"{key_path}.{name}" if key_path else name
390
+ if "stringUnit" in value or "variations" in value:
391
+ _collect_required_source_units(value, child_key_path, required, out)
392
+ continue
393
+
394
+ # value is a nested axis map keyed by category or device name.
395
+ _collect_required_variation_units(value, child_key_path, required, out)
396
+ if name == PLURAL_VARIATION_KEY and required is not None:
397
+ _synthesize_target_only_plurals(value, child_key_path, required, out)
398
+
399
+
400
+ def _synthesize_target_only_plurals(
401
+ plural_map: Mapping[str, Any],
402
+ key_path: str,
403
+ required: frozenset,
404
+ out: Dict[str, JsonObject],
405
+ ) -> None:
406
+ other = plural_map.get("other")
407
+ other_unit = other.get("stringUnit") if isinstance(other, dict) else None
408
+ if not isinstance(other_unit, dict):
409
+ return
410
+
411
+ for category in required:
412
+ if category == "other" or category in plural_map:
413
+ continue
414
+ child_key_path = f"{key_path}.{category}" if key_path else category
415
+ out[child_key_path] = other_unit
416
+
417
+
418
+ def _object_rejecting_duplicate_keys(pairs: Any) -> JsonObject:
419
+ obj: JsonObject = {}
420
+ for key, value in pairs:
421
+ if key in obj:
422
+ raise ValueError(f"Duplicate key {key!r} in JSON object.")
423
+ obj[key] = value
424
+ return obj
425
+
426
+
427
+ def _iter_substitution_units(substitutions: Mapping[str, Any], key_path: str) -> Iterator[StringUnitReference]:
428
+ for name, substitution in substitutions.items():
429
+ if not isinstance(substitution, dict):
430
+ continue
431
+
432
+ child_key_path = f"{key_path}.substitutions.{name}" if key_path else f"substitutions.{name}"
433
+ yield from iter_string_units(substitution, child_key_path)
434
+
435
+
436
+ def _extract_placeholders(value: str) -> _PlaceholderParts:
437
+ substitutions = SUBSTITUTION_PLACEHOLDER_PATTERN.findall(value)
438
+ remainder = SUBSTITUTION_PLACEHOLDER_PATTERN.sub(" ", value)
439
+ return _PlaceholderParts(
440
+ substitutions=substitutions,
441
+ printf=PRINTF_PLACEHOLDER_PATTERN.findall(remainder),
442
+ swift_interpolations=SWIFT_INTERPOLATION_PATTERN.findall(remainder),
443
+ )
444
+
445
+
446
+ def _printf_placeholders_match(source: list[str], translation: list[str]) -> bool:
447
+ source_escaped_percents = source.count("%%")
448
+ translation_escaped_percents = translation.count("%%")
449
+ if source_escaped_percents != translation_escaped_percents:
450
+ return False
451
+
452
+ source_arguments = [placeholder for placeholder in source if placeholder != "%%"]
453
+ translation_arguments = [placeholder for placeholder in translation if placeholder != "%%"]
454
+ if _all_printf_arguments_are_positional(source_arguments):
455
+ return Counter(source_arguments) == Counter(translation_arguments)
456
+ return source_arguments == translation_arguments
457
+
458
+
459
+ def _all_printf_arguments_are_positional(placeholders: list[str]) -> bool:
460
+ return bool(placeholders) and all(
461
+ POSITIONAL_PRINTF_PATTERN.match(placeholder) is not None
462
+ for placeholder in placeholders
463
+ )
464
+
465
+
466
+ def _clone_string_unit(source_string_unit: Mapping[str, Any]) -> JsonObject:
467
+ cloned = deepcopy(dict(source_string_unit))
468
+ cloned["state"] = "new"
469
+ return cloned
470
+
471
+
472
+ def _mark_translatable_units_new(node: JsonObject) -> None:
473
+ string_unit = node.get("stringUnit")
474
+ if isinstance(string_unit, dict):
475
+ string_unit["state"] = "new"
476
+
477
+ variations = node.get("variations")
478
+ if isinstance(variations, dict):
479
+ _mark_variation_units_new(variations)
480
+
481
+ substitutions = node.get("substitutions")
482
+ if isinstance(substitutions, dict):
483
+ for substitution in substitutions.values():
484
+ if isinstance(substitution, dict):
485
+ _mark_translatable_units_new(substitution)
486
+
487
+ if (
488
+ not isinstance(string_unit, dict)
489
+ and not isinstance(variations, dict)
490
+ and not isinstance(substitutions, dict)
491
+ ):
492
+ _mark_variation_units_new(node)
493
+
494
+
495
+ def _mark_variation_units_new(variations: Mapping[str, Any]) -> None:
496
+ for value in variations.values():
497
+ if not isinstance(value, dict):
498
+ continue
499
+ if "stringUnit" in value or "variations" in value:
500
+ _mark_translatable_units_new(value)
501
+ else:
502
+ _mark_variation_units_new(value)
503
+
504
+
505
+ def _merge_missing_units(target_node: JsonObject, source_node: Mapping[str, Any]) -> bool:
506
+ changed = False
507
+
508
+ source_string_unit = source_node.get("stringUnit")
509
+ source_variations = source_node.get("variations")
510
+
511
+ if isinstance(source_string_unit, dict):
512
+ if "variations" in target_node:
513
+ del target_node["variations"]
514
+ changed = True
515
+ if not isinstance(target_node.get("stringUnit"), dict):
516
+ target_node["stringUnit"] = _clone_string_unit(source_string_unit)
517
+ changed = True
518
+ elif isinstance(source_variations, dict):
519
+ if "stringUnit" in target_node:
520
+ del target_node["stringUnit"]
521
+ changed = True
522
+
523
+ target_variations = target_node.get("variations")
524
+ if not isinstance(target_variations, dict):
525
+ target_variations = {}
526
+ target_node["variations"] = target_variations
527
+ changed = True
528
+
529
+ changed = _merge_missing_variations(target_variations, source_variations) or changed
530
+
531
+ return _merge_missing_substitutions(target_node, source_node) or changed
532
+
533
+
534
+ def _merge_missing_substitutions(target_node: JsonObject, source_node: Mapping[str, Any]) -> bool:
535
+ source_substitutions = source_node.get("substitutions")
536
+ if not isinstance(source_substitutions, dict):
537
+ if "substitutions" in target_node:
538
+ del target_node["substitutions"]
539
+ return True
540
+ return False
541
+
542
+ changed = False
543
+ target_substitutions = target_node.get("substitutions")
544
+ if not isinstance(target_substitutions, dict):
545
+ target_substitutions = {}
546
+ target_node["substitutions"] = target_substitutions
547
+ changed = True
548
+
549
+ for name, source_substitution in source_substitutions.items():
550
+ if not isinstance(source_substitution, dict):
551
+ continue
552
+
553
+ target_substitution = target_substitutions.get(name)
554
+ if not isinstance(target_substitution, dict):
555
+ target_substitutions[name] = clone_localization_for_translation(source_substitution)
556
+ changed = True
557
+ continue
558
+
559
+ for metadata_key in ("argNum", "formatSpecifier"):
560
+ if metadata_key not in source_substitution:
561
+ continue
562
+ if target_substitution.get(metadata_key) != source_substitution[metadata_key]:
563
+ target_substitution[metadata_key] = deepcopy(source_substitution[metadata_key])
564
+ changed = True
565
+
566
+ changed = _merge_missing_units(target_substitution, source_substitution) or changed
567
+
568
+ for name in list(target_substitutions):
569
+ if not isinstance(source_substitutions.get(name), dict):
570
+ del target_substitutions[name]
571
+ changed = True
572
+
573
+ return changed
574
+
575
+
576
+ def _merge_missing_variations(
577
+ target_variations: JsonObject,
578
+ source_variations: Mapping[str, Any],
579
+ ) -> bool:
580
+ changed = False
581
+
582
+ for name, source_child in source_variations.items():
583
+ if not isinstance(source_child, dict):
584
+ continue
585
+
586
+ target_child = target_variations.get(name)
587
+ if not isinstance(target_child, dict):
588
+ target_variations[name] = clone_localization_for_translation(source_child)
589
+ changed = True
590
+ continue
591
+
592
+ if "stringUnit" in source_child or "variations" in source_child:
593
+ changed = _merge_missing_units(target_child, source_child) or changed
594
+ else:
595
+ changed = _merge_missing_variations(target_child, source_child) or changed
596
+
597
+ return changed