ol-openedx-course-translations 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ol-openedx-course-translations might be problematic. Click here for more details.

Files changed (35) hide show
  1. ol_openedx_course_translations/apps.py +12 -2
  2. ol_openedx_course_translations/glossaries/machine_learning/ar.txt +175 -0
  3. ol_openedx_course_translations/glossaries/machine_learning/de.txt +175 -0
  4. ol_openedx_course_translations/glossaries/machine_learning/el.txt +988 -0
  5. ol_openedx_course_translations/glossaries/machine_learning/es.txt +175 -0
  6. ol_openedx_course_translations/glossaries/machine_learning/fr.txt +175 -0
  7. ol_openedx_course_translations/glossaries/machine_learning/ja.txt +175 -0
  8. ol_openedx_course_translations/glossaries/machine_learning/pt-br.txt +175 -0
  9. ol_openedx_course_translations/glossaries/machine_learning/ru.txt +213 -0
  10. ol_openedx_course_translations/management/commands/sync_and_translate_language.py +1866 -0
  11. ol_openedx_course_translations/management/commands/translate_course.py +419 -470
  12. ol_openedx_course_translations/middleware.py +143 -0
  13. ol_openedx_course_translations/providers/__init__.py +1 -0
  14. ol_openedx_course_translations/providers/base.py +278 -0
  15. ol_openedx_course_translations/providers/deepl_provider.py +292 -0
  16. ol_openedx_course_translations/providers/llm_providers.py +565 -0
  17. ol_openedx_course_translations/settings/cms.py +17 -0
  18. ol_openedx_course_translations/settings/common.py +57 -30
  19. ol_openedx_course_translations/settings/lms.py +15 -0
  20. ol_openedx_course_translations/tasks.py +222 -0
  21. ol_openedx_course_translations/urls.py +16 -0
  22. ol_openedx_course_translations/utils/__init__.py +0 -0
  23. ol_openedx_course_translations/utils/command_utils.py +197 -0
  24. ol_openedx_course_translations/utils/constants.py +216 -0
  25. ol_openedx_course_translations/utils/course_translations.py +581 -0
  26. ol_openedx_course_translations/utils/translation_sync.py +808 -0
  27. ol_openedx_course_translations/views.py +73 -0
  28. ol_openedx_course_translations-0.3.0.dist-info/METADATA +407 -0
  29. ol_openedx_course_translations-0.3.0.dist-info/RECORD +35 -0
  30. ol_openedx_course_translations-0.3.0.dist-info/entry_points.txt +5 -0
  31. ol_openedx_course_translations-0.1.0.dist-info/METADATA +0 -63
  32. ol_openedx_course_translations-0.1.0.dist-info/RECORD +0 -11
  33. ol_openedx_course_translations-0.1.0.dist-info/entry_points.txt +0 -2
  34. {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.0.dist-info}/WHEEL +0 -0
  35. {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,808 @@
1
+ """Translation synchronization module for syncing and managing translation files."""
2
+
3
+ import json
4
+ from collections import OrderedDict
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ import polib # type: ignore[import-untyped]
9
+
10
+ from ol_openedx_course_translations.utils.constants import (
11
+ BACKEND_PO_FILES,
12
+ DEFAULT_JSON_INDENT,
13
+ DEFAULT_PLURAL_FORM,
14
+ EXPECTED_GLOSSARY_PARTS,
15
+ LANGUAGE_MAPPING,
16
+ LEARNER_FACING_APPS,
17
+ PLURAL_FORMS,
18
+ PO_HEADER_BUGS_EMAIL,
19
+ PO_HEADER_CONTENT_TRANSFER_ENCODING,
20
+ PO_HEADER_CONTENT_TYPE,
21
+ PO_HEADER_MIME_VERSION,
22
+ PO_HEADER_POT_CREATION_DATE,
23
+ PO_HEADER_PROJECT_VERSION,
24
+ PO_HEADER_TRANSIFEX_TEAM_BASE_URL,
25
+ TRANSLATION_FILE_NAMES,
26
+ TYPO_PATTERNS,
27
+ )
28
+
29
+
30
+ def load_json_file(file_path: Path) -> dict:
31
+ """Load a JSON translation file."""
32
+ if not file_path.exists():
33
+ return {}
34
+ try:
35
+ with file_path.open(encoding="utf-8") as f:
36
+ return json.load(f)
37
+ except json.JSONDecodeError as e:
38
+ msg = f"Error parsing JSON file {file_path}: {e}"
39
+ raise ValueError(msg) from e
40
+
41
+
42
+ def save_json_file(file_path: Path, data: dict, indent: int = DEFAULT_JSON_INDENT):
43
+ """Save a JSON translation file with proper formatting."""
44
+ file_path.parent.mkdir(parents=True, exist_ok=True)
45
+ with file_path.open("w", encoding="utf-8") as f:
46
+ json.dump(data, f, ensure_ascii=False, indent=indent)
47
+ f.write("\n")
48
+
49
+
50
+ def find_typo_mappings(data: dict) -> list[tuple[str, str]]:
51
+ """Find typo keys and their correct counterparts."""
52
+ mappings = []
53
+
54
+ for typo, correct in TYPO_PATTERNS:
55
+ typo_keys = [k for k in data if typo in k]
56
+ for typo_key in typo_keys:
57
+ correct_key = typo_key.replace(typo, correct)
58
+ if correct_key in data:
59
+ mappings.append((typo_key, correct_key))
60
+
61
+ return mappings
62
+
63
+
64
+ def sync_or_create_json_file(en_file: Path, target_file: Path) -> dict:
65
+ """
66
+ Sync or create a JSON translation file.
67
+ Returns dict with stats:
68
+ {'action': 'created'|'synced'|'skipped', 'added': int,
69
+ 'fixed': int, 'removed': int}
70
+ """
71
+ try:
72
+ en_data = load_json_file(en_file)
73
+ except ValueError:
74
+ return {
75
+ "action": "skipped",
76
+ "added": 0,
77
+ "fixed": 0,
78
+ "removed": 0,
79
+ "error": "English file not readable",
80
+ }
81
+
82
+ if not en_data:
83
+ return {
84
+ "action": "skipped",
85
+ "added": 0,
86
+ "fixed": 0,
87
+ "removed": 0,
88
+ "error": "English file is empty",
89
+ }
90
+
91
+ target_data = load_json_file(target_file) if target_file.exists() else {}
92
+ file_exists = target_file.exists()
93
+
94
+ stats = {
95
+ "action": "created" if not file_exists else "synced",
96
+ "added": 0,
97
+ "fixed": 0,
98
+ "removed": 0,
99
+ }
100
+
101
+ if file_exists:
102
+ ordered_data = OrderedDict(target_data)
103
+
104
+ typo_mappings = find_typo_mappings(ordered_data)
105
+ for typo_key, correct_key in typo_mappings:
106
+ typo_value = ordered_data.get(typo_key, "")
107
+ correct_value = ordered_data.get(correct_key, "")
108
+
109
+ if not correct_value and typo_value:
110
+ ordered_data[correct_key] = typo_value
111
+ # Type assertion: stats["fixed"] is always int
112
+ stats["fixed"] = int(stats["fixed"]) + 1
113
+
114
+ if typo_key in ordered_data:
115
+ del ordered_data[typo_key]
116
+ # Type assertion: stats["removed"] is always int
117
+ stats["removed"] = int(stats["removed"]) + 1
118
+
119
+ for key in en_data:
120
+ if key not in ordered_data:
121
+ ordered_data[key] = ""
122
+ # Type assertion: stats["added"] is always int
123
+ stats["added"] = int(stats["added"]) + 1
124
+
125
+ target_data = dict(ordered_data)
126
+ else:
127
+ target_data = dict.fromkeys(en_data, "")
128
+ stats["added"] = len(en_data)
129
+
130
+ save_json_file(target_file, target_data)
131
+
132
+ return stats
133
+
134
+
135
+ def _get_base_lang(lang_code: str) -> str:
136
+ """Extract base language code from locale code (e.g., 'es_ES' -> 'es')."""
137
+ return lang_code.split("_")[0] if "_" in lang_code else lang_code
138
+
139
+
140
+ def _get_plural_form(lang_code: str) -> str:
141
+ """Get plural form string for a language code."""
142
+ base_lang = _get_base_lang(lang_code)
143
+ return PLURAL_FORMS.get(base_lang, DEFAULT_PLURAL_FORM)
144
+
145
+
146
+ def create_po_file_header(lang_code: str, iso_code: str | None = None) -> str:
147
+ """Create PO file header for a language."""
148
+ if iso_code is None:
149
+ iso_code = lang_code
150
+
151
+ base_lang = _get_base_lang(lang_code)
152
+ plural = _get_plural_form(lang_code)
153
+ lang_name = LANGUAGE_MAPPING.get(lang_code, lang_code)
154
+
155
+ return f"""msgid ""
156
+ msgstr ""
157
+ "Project-Id-Version: {PO_HEADER_PROJECT_VERSION}\\n"
158
+ "Report-Msgid-Bugs-To: {PO_HEADER_BUGS_EMAIL}\\n"
159
+ "POT-Creation-Date: {PO_HEADER_POT_CREATION_DATE}\\n"
160
+ "PO-Revision-Date: 2025-01-01 00:00+0000\\n"
161
+ "Last-Translator: \\n"
162
+ "Language-Team: {lang_name} ({PO_HEADER_TRANSIFEX_TEAM_BASE_URL}/{base_lang}/)\\n"
163
+ "MIME-Version: {PO_HEADER_MIME_VERSION}\\n"
164
+ "Content-Type: {PO_HEADER_CONTENT_TYPE}\\n"
165
+ "Content-Transfer-Encoding: {PO_HEADER_CONTENT_TRANSFER_ENCODING}\\n"
166
+ "Language: {iso_code}\\n"
167
+ "Plural-Forms: {plural}\\n"
168
+
169
+ """
170
+
171
+
172
+ def parse_po_file(po_file: Path) -> dict[str, str]:
173
+ """
174
+ Parse a PO file and extract msgid -> msgstr mappings.
175
+ For plural forms, uses msgid as the key
176
+ (msgid_plural entries are handled separately).
177
+ Uses polib if available, falls back to manual parsing.
178
+ """
179
+ if not po_file.exists():
180
+ return {}
181
+
182
+ po = polib.pofile(str(po_file))
183
+ entries = {}
184
+ for entry in po:
185
+ if entry.msgid: # Skip empty header msgid
186
+ # For plural entries, use msgid as key
187
+ entries[entry.msgid] = entry.msgstr or ""
188
+ return entries
189
+
190
+
191
+ def parse_po_file_with_metadata(po_file: Path) -> dict[str, dict]:
192
+ """
193
+ Parse a PO file and extract msgid -> metadata mappings.
194
+ Returns dict with structure:
195
+ {msgid: {'msgstr': str, 'msgid_plural': str, 'msgstr_plural': dict,
196
+ 'locations': List[str], 'flags': List[str], 'is_plural': bool}}
197
+ Uses polib if available, falls back to manual parsing.
198
+ """
199
+ if not po_file.exists():
200
+ return {}
201
+
202
+ po = polib.pofile(str(po_file))
203
+ entries = {}
204
+ for entry in po:
205
+ if entry.msgid: # Skip empty header msgid
206
+ locations = [
207
+ f"{occ[0]}:{occ[1]}" if len(occ) > 1 else occ[0]
208
+ for occ in entry.occurrences
209
+ ]
210
+
211
+ entry_data = {
212
+ "msgstr": entry.msgstr or "",
213
+ "locations": locations,
214
+ "flags": entry.flags, # List of flags like ['python-format']
215
+ "is_plural": entry.msgid_plural is not None,
216
+ }
217
+ if entry.msgid_plural:
218
+ entry_data["msgid_plural"] = entry.msgid_plural
219
+ # Convert msgstr_plural dict to simple dict
220
+ entry_data["msgstr_plural"] = {
221
+ i: entry.msgstr_plural.get(i, "")
222
+ for i in range(len(entry.msgstr_plural))
223
+ }
224
+ entries[entry.msgid] = entry_data
225
+ return entries
226
+
227
+
228
+ def _create_po_entry_from_en(entry: polib.POEntry) -> polib.POEntry:
229
+ """Create a new PO entry from an English entry with empty translation."""
230
+ new_entry = polib.POEntry(
231
+ msgid=entry.msgid,
232
+ msgid_plural=entry.msgid_plural,
233
+ occurrences=entry.occurrences,
234
+ flags=entry.flags,
235
+ )
236
+ if entry.msgid_plural:
237
+ # Initialize plural forms (at least 2)
238
+ num_forms = max(2, len(entry.msgstr_plural) if entry.msgstr_plural else 2)
239
+ new_entry.msgstr_plural = dict.fromkeys(range(num_forms), "")
240
+ else:
241
+ new_entry.msgstr = ""
242
+ return new_entry
243
+
244
+
245
+ def _sync_existing_po_file(
246
+ en_po: polib.POFile, target_po: polib.POFile, target_file: Path
247
+ ) -> int:
248
+ """Sync existing PO file by adding missing entries. Returns count added."""
249
+ # Create a set of existing entries (msgid + msgid_plural for plural entries)
250
+ existing_entries = set()
251
+ for entry in target_po:
252
+ if entry.msgid:
253
+ key = (entry.msgid, entry.msgid_plural if entry.msgid_plural else None)
254
+ existing_entries.add(key)
255
+
256
+ # Add missing entries from English file
257
+ added_count = 0
258
+ for entry in en_po:
259
+ if not entry.msgid: # Skip header
260
+ continue
261
+
262
+ entry_key = (entry.msgid, entry.msgid_plural if entry.msgid_plural else None)
263
+ if entry_key not in existing_entries:
264
+ new_entry = _create_po_entry_from_en(entry)
265
+ target_po.append(new_entry)
266
+ added_count += 1
267
+
268
+ if added_count > 0:
269
+ target_file.parent.mkdir(parents=True, exist_ok=True)
270
+ target_po.save(str(target_file))
271
+
272
+ return added_count
273
+
274
+
275
+ def _create_new_po_file(
276
+ en_po: polib.POFile, target_file: Path, lang_code: str, iso_code: str | None
277
+ ) -> int:
278
+ """Create a new PO file with all entries from English. Returns count added."""
279
+ target_po = polib.POFile()
280
+
281
+ # Set metadata - preserve important fields from English file
282
+ target_po.metadata = en_po.metadata.copy()
283
+ target_po.metadata["Language"] = iso_code or lang_code
284
+
285
+ # Ensure Plural-Forms is set correctly for the target language
286
+ if "Plural-Forms" not in target_po.metadata:
287
+ target_po.metadata["Plural-Forms"] = _get_plural_form(lang_code)
288
+
289
+ # Copy all entries with empty translations
290
+ added_count = 0
291
+ for entry in en_po:
292
+ if not entry.msgid: # Skip header
293
+ continue
294
+
295
+ new_entry = _create_po_entry_from_en(entry)
296
+ target_po.append(new_entry)
297
+ added_count += 1
298
+
299
+ target_file.parent.mkdir(parents=True, exist_ok=True)
300
+ target_po.save(str(target_file))
301
+ return added_count
302
+
303
+
304
+ def sync_or_create_po_file(
305
+ en_file: Path, target_file: Path, lang_code: str, iso_code: str | None = None
306
+ ) -> dict:
307
+ """
308
+ Sync or create a PO file, preserving location comments and format flags.
309
+ Returns dict with stats: {'action': 'created'|'synced'|'skipped', 'added': int}
310
+ Uses polib if available for robust PO file handling.
311
+ """
312
+ if not en_file.exists():
313
+ return {"action": "skipped", "added": 0, "error": "English file does not exist"}
314
+
315
+ file_exists = target_file.exists()
316
+ stats = {"action": "created" if not file_exists else "synced", "added": 0}
317
+
318
+ # Use polib for robust PO file handling
319
+ en_po = polib.pofile(str(en_file))
320
+
321
+ if not en_po:
322
+ return {"action": "skipped", "added": 0, "error": "English file has no entries"}
323
+
324
+ if file_exists:
325
+ # File exists: sync entries
326
+ target_po = polib.pofile(str(target_file))
327
+ stats["added"] = _sync_existing_po_file(en_po, target_po, target_file)
328
+ else:
329
+ # File doesn't exist: create new with all entries from English
330
+ stats["added"] = _create_new_po_file(en_po, target_file, lang_code, iso_code)
331
+
332
+ return stats
333
+
334
+
335
+ def _extract_empty_keys_from_frontend(base_dir: Path, iso_code: str) -> list[dict]:
336
+ """Extract empty keys from frontend JSON files."""
337
+ empty_keys = []
338
+
339
+ for app in LEARNER_FACING_APPS:
340
+ target_file = (
341
+ base_dir
342
+ / app
343
+ / "src"
344
+ / TRANSLATION_FILE_NAMES["i18n_dir"]
345
+ / TRANSLATION_FILE_NAMES["messages_dir"]
346
+ / f"{iso_code}.json"
347
+ )
348
+ en_file = (
349
+ base_dir
350
+ / app
351
+ / "src"
352
+ / TRANSLATION_FILE_NAMES["i18n_dir"]
353
+ / TRANSLATION_FILE_NAMES["transifex_input"]
354
+ )
355
+ if not en_file.exists():
356
+ en_file = (
357
+ base_dir
358
+ / app
359
+ / "src"
360
+ / TRANSLATION_FILE_NAMES["i18n_dir"]
361
+ / TRANSLATION_FILE_NAMES["messages_dir"]
362
+ / TRANSLATION_FILE_NAMES["english"]
363
+ )
364
+
365
+ if not target_file.exists() or not en_file.exists():
366
+ continue
367
+
368
+ try:
369
+ target_data = load_json_file(target_file)
370
+ en_data = load_json_file(en_file)
371
+
372
+ for key in en_data:
373
+ target_value = target_data.get(key, "")
374
+ if not target_value or (
375
+ isinstance(target_value, str) and not target_value.strip()
376
+ ):
377
+ empty_keys.append(
378
+ {
379
+ "app": app,
380
+ "key": key,
381
+ "english": en_data[key],
382
+ "translation": "",
383
+ "file_type": "json",
384
+ "file_path": str(target_file.resolve()),
385
+ }
386
+ )
387
+ except (OSError, ValueError, json.JSONDecodeError):
388
+ continue
389
+
390
+ return empty_keys
391
+
392
+
393
+ def _is_po_entry_empty(
394
+ entry: polib.POEntry, target_entry: polib.POEntry | None
395
+ ) -> bool:
396
+ """Check if a PO entry is empty or missing."""
397
+ if target_entry is None:
398
+ return True
399
+
400
+ if entry.msgid_plural:
401
+ # Plural entry - check if plural forms are empty
402
+ return any(
403
+ not target_entry.msgstr_plural.get(i, "").strip()
404
+ for i in range(len(target_entry.msgstr_plural))
405
+ )
406
+
407
+ # Singular entry - check if empty
408
+ return not target_entry.msgstr or not target_entry.msgstr.strip()
409
+
410
+
411
+ def _extract_empty_keys_from_backend(base_dir: Path, backend_locale: str) -> list[dict]:
412
+ """Extract empty keys from backend PO files."""
413
+ empty_keys = []
414
+ locale_dir = (
415
+ base_dir
416
+ / TRANSLATION_FILE_NAMES["edx_platform"]
417
+ / TRANSLATION_FILE_NAMES["conf_dir"]
418
+ / TRANSLATION_FILE_NAMES["locale_dir"]
419
+ / backend_locale
420
+ / TRANSLATION_FILE_NAMES["lc_messages"]
421
+ )
422
+
423
+ for po_file_name in BACKEND_PO_FILES:
424
+ target_file = locale_dir / po_file_name
425
+ en_file = (
426
+ base_dir
427
+ / TRANSLATION_FILE_NAMES["edx_platform"]
428
+ / TRANSLATION_FILE_NAMES["conf_dir"]
429
+ / TRANSLATION_FILE_NAMES["locale_dir"]
430
+ / "en"
431
+ / TRANSLATION_FILE_NAMES["lc_messages"]
432
+ / po_file_name
433
+ )
434
+
435
+ if not target_file.exists() or not en_file.exists():
436
+ continue
437
+
438
+ try:
439
+ target_po = polib.pofile(str(target_file))
440
+ en_po = polib.pofile(str(en_file))
441
+
442
+ target_entries_dict = {
443
+ entry.msgid: entry for entry in target_po if entry.msgid
444
+ }
445
+
446
+ for entry in en_po:
447
+ if not entry.msgid: # Skip header
448
+ continue
449
+
450
+ target_entry = target_entries_dict.get(entry.msgid)
451
+ if _is_po_entry_empty(entry, target_entry):
452
+ empty_keys.append(
453
+ {
454
+ "app": "edx-platform",
455
+ "key": entry.msgid,
456
+ "english": entry.msgid,
457
+ "translation": "",
458
+ "file_type": "po",
459
+ "file_path": str(target_file.resolve()),
460
+ "po_file": po_file_name,
461
+ "is_plural": entry.msgid_plural is not None,
462
+ "msgid_plural": entry.msgid_plural
463
+ if entry.msgid_plural
464
+ else None,
465
+ }
466
+ )
467
+ except (OSError, polib.POFileError, ValueError):
468
+ continue
469
+
470
+ return empty_keys
471
+
472
+
473
+ def extract_empty_keys(
474
+ base_dir: Path,
475
+ lang_code: str,
476
+ iso_code: str | None = None,
477
+ *,
478
+ skip_backend: bool = False,
479
+ ) -> list[dict]:
480
+ """
481
+ Extract all empty translation keys for a language.
482
+ Returns list of dicts with:
483
+ {'app': str, 'key': str, 'english': str, 'file_type': 'json'|'po'}
484
+ """
485
+ if iso_code is None:
486
+ iso_code = lang_code
487
+
488
+ empty_keys = _extract_empty_keys_from_frontend(base_dir, iso_code)
489
+
490
+ if not skip_backend:
491
+ backend_locale = iso_code if iso_code and iso_code != lang_code else lang_code
492
+ empty_keys.extend(_extract_empty_keys_from_backend(base_dir, backend_locale))
493
+
494
+ return empty_keys
495
+
496
+
497
+ def apply_json_translations(file_path: Path, translations: dict[str, str]) -> int:
498
+ """
499
+ Apply translations to a JSON file.
500
+ Returns number of translations applied.
501
+ """
502
+ data = load_json_file(file_path)
503
+ applied = 0
504
+
505
+ for key, translation in translations.items():
506
+ if key in data:
507
+ # Check if the value is empty (empty string, whitespace only, or None)
508
+ current_value = data[key]
509
+ if not current_value or (
510
+ isinstance(current_value, str) and not current_value.strip()
511
+ ):
512
+ data[key] = translation
513
+ applied += 1
514
+
515
+ if applied > 0:
516
+ save_json_file(file_path, data)
517
+
518
+ return applied
519
+
520
+
521
+ def load_glossary(glossary_path: Path, _lang_code: str = "") -> dict[str, Any]:
522
+ """
523
+ Load glossary for a language from a text file.
524
+ Parses text format with term mappings like: - 'english term' -> 'translation'
525
+ Returns dict mapping English -> Translation (string or dict for plural forms).
526
+
527
+ Args:
528
+ glossary_path: Path to the glossary text file.
529
+ _lang_code: Language code (currently unused, kept for API compatibility).
530
+
531
+ Returns:
532
+ Dictionary mapping English terms to translations. Translations can be:
533
+ - Strings for singular terms
534
+ - Dicts with 'singular' and 'plural' keys for plural forms
535
+
536
+ Text file format:
537
+ # Comments and headers
538
+ ## TERM MAPPINGS
539
+ - 'english term' -> 'translation'
540
+ - 'another term' -> 'another translation'
541
+
542
+ Example:
543
+ - 'accuracy' -> 'الدقة'
544
+ - 'activation function' -> 'دالّة التفعيل'
545
+ """
546
+ if not glossary_path.exists():
547
+ return {}
548
+
549
+ glossary = {}
550
+
551
+ try:
552
+ with glossary_path.open(encoding="utf-8") as f:
553
+ for raw_line in f:
554
+ line = raw_line.strip()
555
+
556
+ # Skip empty lines, comments, and headers
557
+ if not line or line.startswith("#"):
558
+ continue
559
+
560
+ # Parse lines like: - 'english term' -> 'translation'
561
+ if line.startswith("- ") and "->" in line:
562
+ # Extract the mapping
563
+ # Format: - 'english term' -> 'translation'
564
+ mapping_line = line[2:].strip() # Remove leading '- '
565
+ parts = mapping_line.split("->", 1)
566
+
567
+ if len(parts) == EXPECTED_GLOSSARY_PARTS:
568
+ english_term = parts[0].strip().strip("'\"")
569
+ translation = parts[1].strip().strip("'\"")
570
+
571
+ if english_term and translation:
572
+ glossary[english_term] = translation
573
+ except (OSError, UnicodeDecodeError):
574
+ # Log specific file-related errors but return empty dict to allow continuation
575
+ # In a library function, we can't use stdout, so we just return empty dict
576
+ # The caller can handle logging if needed
577
+ return {}
578
+ except (ValueError, AttributeError, IndexError):
579
+ # Catch parsing errors and other unexpected errors
580
+ return {}
581
+ else:
582
+ return glossary
583
+
584
+
585
+ def match_glossary_term(
586
+ text: str, glossary: dict[str, Any] | None, *, exact_match: bool = True
587
+ ) -> Any | None:
588
+ """
589
+ Match text against glossary terms.
590
+ Returns translation (string or dict with 'singular'/'plural') if match found,
591
+ None otherwise.
592
+ Supports both simple format ("term": "translation") and plural format
593
+ ("term": {"singular": "...", "plural": "..."}).
594
+
595
+ Args:
596
+ text: The text to match against glossary terms.
597
+ glossary: Dictionary mapping English terms to translations, or None.
598
+ exact_match: If True, only exact matches are returned.
599
+ If False, case-insensitive and partial matches are allowed.
600
+
601
+ Returns:
602
+ Translation string/dict if match found, None otherwise.
603
+ """
604
+ if not glossary:
605
+ return None
606
+
607
+ if text in glossary:
608
+ # Return as-is: string for singular, dict for plural
609
+ return glossary[text]
610
+
611
+ if not exact_match:
612
+ text_lower = text.lower().strip()
613
+ for term, translation in glossary.items():
614
+ if term.lower().strip() == text_lower:
615
+ return translation
616
+
617
+ for term, translation in glossary.items():
618
+ if term.lower() in text_lower or text_lower in term.lower():
619
+ return translation
620
+
621
+ return None
622
+
623
+
624
+ def _apply_plural_dict_translation(
625
+ entry: polib.POEntry, translation: dict[str, str]
626
+ ) -> bool:
627
+ """Apply plural translation from dict. Returns True if applied."""
628
+ plural_applied = False
629
+ if not entry.msgstr_plural.get(0, "").strip():
630
+ entry.msgstr_plural[0] = translation["singular"]
631
+ plural_applied = True
632
+ if len(entry.msgstr_plural) > 1 and not entry.msgstr_plural.get(1, "").strip():
633
+ entry.msgstr_plural[1] = translation["plural"]
634
+ plural_applied = True
635
+ return plural_applied
636
+
637
+
638
+ def _apply_plural_string_translation(entry: polib.POEntry, translation: str) -> bool:
639
+ """Apply plural translation from string. Returns True if applied."""
640
+ plural_applied = False
641
+ for i in range(len(entry.msgstr_plural)):
642
+ if not entry.msgstr_plural.get(i, "").strip():
643
+ entry.msgstr_plural[i] = translation
644
+ plural_applied = True
645
+ return plural_applied
646
+
647
+
648
+ def _apply_translation_to_entry(entry: polib.POEntry, translation: Any) -> bool:
649
+ """
650
+ Apply translation to a PO entry. Returns True if translation was applied.
651
+
652
+ Args:
653
+ entry: The PO entry to apply translation to.
654
+ translation: Translation value (string or dict with 'singular'/'plural').
655
+
656
+ Returns:
657
+ True if translation was applied, False otherwise.
658
+ """
659
+ if entry.msgid_plural:
660
+ # Plural entry
661
+ if (
662
+ isinstance(translation, dict)
663
+ and "singular" in translation
664
+ and "plural" in translation
665
+ ):
666
+ return _apply_plural_dict_translation(entry, translation)
667
+ if (
668
+ isinstance(translation, str)
669
+ and translation
670
+ and _apply_plural_string_translation(entry, translation)
671
+ ):
672
+ return True
673
+ # Singular entry - translation should be a string
674
+ elif (
675
+ isinstance(translation, str)
676
+ and translation
677
+ and (not entry.msgstr or not entry.msgstr.strip())
678
+ ):
679
+ entry.msgstr = translation
680
+ return True
681
+ return False
682
+
683
+
684
+ def apply_po_translations(file_path: Path, translations: dict[str, Any]) -> int:
685
+ """
686
+ Apply translations to a PO file. Returns number of translations applied.
687
+ Handles both singular and plural forms.
688
+ For plural forms, translations dict can contain:
689
+ - Dict with 'singular' and 'plural' keys: {"singular": "...", "plural": "..."}
690
+ - String: applies same translation to all plural forms
691
+ """
692
+ po = polib.pofile(str(file_path))
693
+ applied = 0
694
+
695
+ for entry in po:
696
+ if not entry.msgid:
697
+ continue
698
+
699
+ if entry.msgid in translations:
700
+ translation = translations[entry.msgid]
701
+ if _apply_translation_to_entry(entry, translation):
702
+ applied += 1
703
+
704
+ if applied > 0:
705
+ po.save(str(file_path))
706
+
707
+ return applied
708
+
709
+
710
+ def _sync_frontend_translations(base_dir: Path, iso_code: str) -> dict[str, int]:
711
+ """Sync frontend translation files. Returns stats."""
712
+ frontend_stats = {"added": 0, "fixed": 0, "removed": 0, "created": 0, "synced": 0}
713
+
714
+ for app in LEARNER_FACING_APPS:
715
+ app_dir = base_dir / app / "src" / TRANSLATION_FILE_NAMES["i18n_dir"]
716
+ messages_dir = app_dir / TRANSLATION_FILE_NAMES["messages_dir"]
717
+
718
+ en_file = app_dir / TRANSLATION_FILE_NAMES["transifex_input"]
719
+ if not en_file.exists():
720
+ en_file = messages_dir / TRANSLATION_FILE_NAMES["english"]
721
+
722
+ target_file = messages_dir / f"{iso_code}.json"
723
+
724
+ if not en_file.exists():
725
+ continue
726
+
727
+ try:
728
+ stats = sync_or_create_json_file(en_file, target_file)
729
+ if stats["action"] == "created":
730
+ frontend_stats["created"] += 1
731
+ elif stats["action"] == "synced":
732
+ frontend_stats["synced"] += 1
733
+
734
+ frontend_stats["added"] += stats.get("added", 0)
735
+ frontend_stats["fixed"] += stats.get("fixed", 0)
736
+ frontend_stats["removed"] += stats.get("removed", 0)
737
+ except (OSError, ValueError, json.JSONDecodeError):
738
+ continue
739
+
740
+ return frontend_stats
741
+
742
+
743
+ def _sync_backend_translations(
744
+ base_dir: Path, lang_code: str, iso_code: str
745
+ ) -> dict[str, int]:
746
+ """Sync backend translation files. Returns stats."""
747
+ backend_stats = {"added": 0}
748
+ backend_locale = iso_code if iso_code and iso_code != lang_code else lang_code
749
+ locale_dir = (
750
+ base_dir
751
+ / TRANSLATION_FILE_NAMES["edx_platform"]
752
+ / TRANSLATION_FILE_NAMES["conf_dir"]
753
+ / TRANSLATION_FILE_NAMES["locale_dir"]
754
+ / backend_locale
755
+ / TRANSLATION_FILE_NAMES["lc_messages"]
756
+ )
757
+
758
+ for po_file_name in BACKEND_PO_FILES:
759
+ en_file = (
760
+ base_dir
761
+ / TRANSLATION_FILE_NAMES["edx_platform"]
762
+ / TRANSLATION_FILE_NAMES["conf_dir"]
763
+ / TRANSLATION_FILE_NAMES["locale_dir"]
764
+ / "en"
765
+ / TRANSLATION_FILE_NAMES["lc_messages"]
766
+ / po_file_name
767
+ )
768
+ target_file = locale_dir / po_file_name
769
+
770
+ if not en_file.exists():
771
+ continue
772
+
773
+ try:
774
+ stats = sync_or_create_po_file(
775
+ en_file, target_file, backend_locale, iso_code
776
+ )
777
+ backend_stats["added"] += stats.get("added", 0)
778
+ except (OSError, polib.POFileError, ValueError):
779
+ continue
780
+
781
+ return backend_stats
782
+
783
+
784
+ def sync_all_translations(
785
+ base_dir: Path,
786
+ lang_code: str,
787
+ iso_code: str | None = None,
788
+ *,
789
+ skip_backend: bool = False,
790
+ ) -> dict:
791
+ """
792
+ Sync all translation files for a language.
793
+ Returns summary stats.
794
+ """
795
+ if iso_code is None:
796
+ iso_code = lang_code
797
+
798
+ frontend_stats = _sync_frontend_translations(base_dir, iso_code)
799
+ backend_stats = (
800
+ _sync_backend_translations(base_dir, lang_code, iso_code)
801
+ if not skip_backend
802
+ else {"added": 0}
803
+ )
804
+
805
+ return {
806
+ "frontend": frontend_stats,
807
+ "backend": backend_stats,
808
+ }