ol-openedx-course-translations 0.1.0__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ol-openedx-course-translations might be problematic. Click here for more details.
- ol_openedx_course_translations/admin.py +29 -0
- ol_openedx_course_translations/apps.py +13 -2
- ol_openedx_course_translations/filters.py +39 -0
- ol_openedx_course_translations/glossaries/machine_learning/ar.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/de.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/el.txt +988 -0
- ol_openedx_course_translations/glossaries/machine_learning/es.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/fr.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/ja.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/pt-br.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/ru.txt +213 -0
- ol_openedx_course_translations/management/commands/sync_and_translate_language.py +1866 -0
- ol_openedx_course_translations/management/commands/translate_course.py +472 -475
- ol_openedx_course_translations/middleware.py +143 -0
- ol_openedx_course_translations/migrations/0001_add_translation_logs.py +84 -0
- ol_openedx_course_translations/migrations/__init__.py +0 -0
- ol_openedx_course_translations/models.py +57 -0
- ol_openedx_course_translations/providers/__init__.py +1 -0
- ol_openedx_course_translations/providers/base.py +278 -0
- ol_openedx_course_translations/providers/deepl_provider.py +292 -0
- ol_openedx_course_translations/providers/llm_providers.py +581 -0
- ol_openedx_course_translations/settings/cms.py +17 -0
- ol_openedx_course_translations/settings/common.py +58 -30
- ol_openedx_course_translations/settings/lms.py +38 -0
- ol_openedx_course_translations/tasks.py +222 -0
- ol_openedx_course_translations/urls.py +16 -0
- ol_openedx_course_translations/utils/__init__.py +0 -0
- ol_openedx_course_translations/utils/command_utils.py +197 -0
- ol_openedx_course_translations/utils/constants.py +218 -0
- ol_openedx_course_translations/utils/course_translations.py +608 -0
- ol_openedx_course_translations/utils/translation_sync.py +808 -0
- ol_openedx_course_translations/views.py +73 -0
- ol_openedx_course_translations-0.3.5.dist-info/METADATA +409 -0
- ol_openedx_course_translations-0.3.5.dist-info/RECORD +40 -0
- ol_openedx_course_translations-0.3.5.dist-info/entry_points.txt +5 -0
- ol_openedx_course_translations-0.1.0.dist-info/METADATA +0 -63
- ol_openedx_course_translations-0.1.0.dist-info/RECORD +0 -11
- ol_openedx_course_translations-0.1.0.dist-info/entry_points.txt +0 -2
- {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.5.dist-info}/WHEEL +0 -0
- {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.5.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -0,0 +1,808 @@
|
|
|
1
|
+
"""Translation synchronization module for syncing and managing translation files."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections import OrderedDict
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import polib # type: ignore[import-untyped]
|
|
9
|
+
|
|
10
|
+
from ol_openedx_course_translations.utils.constants import (
|
|
11
|
+
BACKEND_PO_FILES,
|
|
12
|
+
DEFAULT_JSON_INDENT,
|
|
13
|
+
DEFAULT_PLURAL_FORM,
|
|
14
|
+
EXPECTED_GLOSSARY_PARTS,
|
|
15
|
+
LANGUAGE_MAPPING,
|
|
16
|
+
LEARNER_FACING_APPS,
|
|
17
|
+
PLURAL_FORMS,
|
|
18
|
+
PO_HEADER_BUGS_EMAIL,
|
|
19
|
+
PO_HEADER_CONTENT_TRANSFER_ENCODING,
|
|
20
|
+
PO_HEADER_CONTENT_TYPE,
|
|
21
|
+
PO_HEADER_MIME_VERSION,
|
|
22
|
+
PO_HEADER_POT_CREATION_DATE,
|
|
23
|
+
PO_HEADER_PROJECT_VERSION,
|
|
24
|
+
PO_HEADER_TRANSIFEX_TEAM_BASE_URL,
|
|
25
|
+
TRANSLATION_FILE_NAMES,
|
|
26
|
+
TYPO_PATTERNS,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def load_json_file(file_path: Path) -> dict:
|
|
31
|
+
"""Load a JSON translation file."""
|
|
32
|
+
if not file_path.exists():
|
|
33
|
+
return {}
|
|
34
|
+
try:
|
|
35
|
+
with file_path.open(encoding="utf-8") as f:
|
|
36
|
+
return json.load(f)
|
|
37
|
+
except json.JSONDecodeError as e:
|
|
38
|
+
msg = f"Error parsing JSON file {file_path}: {e}"
|
|
39
|
+
raise ValueError(msg) from e
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def save_json_file(file_path: Path, data: dict, indent: int = DEFAULT_JSON_INDENT):
|
|
43
|
+
"""Save a JSON translation file with proper formatting."""
|
|
44
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
with file_path.open("w", encoding="utf-8") as f:
|
|
46
|
+
json.dump(data, f, ensure_ascii=False, indent=indent)
|
|
47
|
+
f.write("\n")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def find_typo_mappings(data: dict) -> list[tuple[str, str]]:
|
|
51
|
+
"""Find typo keys and their correct counterparts."""
|
|
52
|
+
mappings = []
|
|
53
|
+
|
|
54
|
+
for typo, correct in TYPO_PATTERNS:
|
|
55
|
+
typo_keys = [k for k in data if typo in k]
|
|
56
|
+
for typo_key in typo_keys:
|
|
57
|
+
correct_key = typo_key.replace(typo, correct)
|
|
58
|
+
if correct_key in data:
|
|
59
|
+
mappings.append((typo_key, correct_key))
|
|
60
|
+
|
|
61
|
+
return mappings
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def sync_or_create_json_file(en_file: Path, target_file: Path) -> dict:
|
|
65
|
+
"""
|
|
66
|
+
Sync or create a JSON translation file.
|
|
67
|
+
Returns dict with stats:
|
|
68
|
+
{'action': 'created'|'synced'|'skipped', 'added': int,
|
|
69
|
+
'fixed': int, 'removed': int}
|
|
70
|
+
"""
|
|
71
|
+
try:
|
|
72
|
+
en_data = load_json_file(en_file)
|
|
73
|
+
except ValueError:
|
|
74
|
+
return {
|
|
75
|
+
"action": "skipped",
|
|
76
|
+
"added": 0,
|
|
77
|
+
"fixed": 0,
|
|
78
|
+
"removed": 0,
|
|
79
|
+
"error": "English file not readable",
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if not en_data:
|
|
83
|
+
return {
|
|
84
|
+
"action": "skipped",
|
|
85
|
+
"added": 0,
|
|
86
|
+
"fixed": 0,
|
|
87
|
+
"removed": 0,
|
|
88
|
+
"error": "English file is empty",
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
target_data = load_json_file(target_file) if target_file.exists() else {}
|
|
92
|
+
file_exists = target_file.exists()
|
|
93
|
+
|
|
94
|
+
stats = {
|
|
95
|
+
"action": "created" if not file_exists else "synced",
|
|
96
|
+
"added": 0,
|
|
97
|
+
"fixed": 0,
|
|
98
|
+
"removed": 0,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if file_exists:
|
|
102
|
+
ordered_data = OrderedDict(target_data)
|
|
103
|
+
|
|
104
|
+
typo_mappings = find_typo_mappings(ordered_data)
|
|
105
|
+
for typo_key, correct_key in typo_mappings:
|
|
106
|
+
typo_value = ordered_data.get(typo_key, "")
|
|
107
|
+
correct_value = ordered_data.get(correct_key, "")
|
|
108
|
+
|
|
109
|
+
if not correct_value and typo_value:
|
|
110
|
+
ordered_data[correct_key] = typo_value
|
|
111
|
+
# Type assertion: stats["fixed"] is always int
|
|
112
|
+
stats["fixed"] = int(stats["fixed"]) + 1
|
|
113
|
+
|
|
114
|
+
if typo_key in ordered_data:
|
|
115
|
+
del ordered_data[typo_key]
|
|
116
|
+
# Type assertion: stats["removed"] is always int
|
|
117
|
+
stats["removed"] = int(stats["removed"]) + 1
|
|
118
|
+
|
|
119
|
+
for key in en_data:
|
|
120
|
+
if key not in ordered_data:
|
|
121
|
+
ordered_data[key] = ""
|
|
122
|
+
# Type assertion: stats["added"] is always int
|
|
123
|
+
stats["added"] = int(stats["added"]) + 1
|
|
124
|
+
|
|
125
|
+
target_data = dict(ordered_data)
|
|
126
|
+
else:
|
|
127
|
+
target_data = dict.fromkeys(en_data, "")
|
|
128
|
+
stats["added"] = len(en_data)
|
|
129
|
+
|
|
130
|
+
save_json_file(target_file, target_data)
|
|
131
|
+
|
|
132
|
+
return stats
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _get_base_lang(lang_code: str) -> str:
|
|
136
|
+
"""Extract base language code from locale code (e.g., 'es_ES' -> 'es')."""
|
|
137
|
+
return lang_code.split("_")[0] if "_" in lang_code else lang_code
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _get_plural_form(lang_code: str) -> str:
|
|
141
|
+
"""Get plural form string for a language code."""
|
|
142
|
+
base_lang = _get_base_lang(lang_code)
|
|
143
|
+
return PLURAL_FORMS.get(base_lang, DEFAULT_PLURAL_FORM)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def create_po_file_header(lang_code: str, iso_code: str | None = None) -> str:
|
|
147
|
+
"""Create PO file header for a language."""
|
|
148
|
+
if iso_code is None:
|
|
149
|
+
iso_code = lang_code
|
|
150
|
+
|
|
151
|
+
base_lang = _get_base_lang(lang_code)
|
|
152
|
+
plural = _get_plural_form(lang_code)
|
|
153
|
+
lang_name = LANGUAGE_MAPPING.get(lang_code, lang_code)
|
|
154
|
+
|
|
155
|
+
return f"""msgid ""
|
|
156
|
+
msgstr ""
|
|
157
|
+
"Project-Id-Version: {PO_HEADER_PROJECT_VERSION}\\n"
|
|
158
|
+
"Report-Msgid-Bugs-To: {PO_HEADER_BUGS_EMAIL}\\n"
|
|
159
|
+
"POT-Creation-Date: {PO_HEADER_POT_CREATION_DATE}\\n"
|
|
160
|
+
"PO-Revision-Date: 2025-01-01 00:00+0000\\n"
|
|
161
|
+
"Last-Translator: \\n"
|
|
162
|
+
"Language-Team: {lang_name} ({PO_HEADER_TRANSIFEX_TEAM_BASE_URL}/{base_lang}/)\\n"
|
|
163
|
+
"MIME-Version: {PO_HEADER_MIME_VERSION}\\n"
|
|
164
|
+
"Content-Type: {PO_HEADER_CONTENT_TYPE}\\n"
|
|
165
|
+
"Content-Transfer-Encoding: {PO_HEADER_CONTENT_TRANSFER_ENCODING}\\n"
|
|
166
|
+
"Language: {iso_code}\\n"
|
|
167
|
+
"Plural-Forms: {plural}\\n"
|
|
168
|
+
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def parse_po_file(po_file: Path) -> dict[str, str]:
|
|
173
|
+
"""
|
|
174
|
+
Parse a PO file and extract msgid -> msgstr mappings.
|
|
175
|
+
For plural forms, uses msgid as the key
|
|
176
|
+
(msgid_plural entries are handled separately).
|
|
177
|
+
Uses polib if available, falls back to manual parsing.
|
|
178
|
+
"""
|
|
179
|
+
if not po_file.exists():
|
|
180
|
+
return {}
|
|
181
|
+
|
|
182
|
+
po = polib.pofile(str(po_file))
|
|
183
|
+
entries = {}
|
|
184
|
+
for entry in po:
|
|
185
|
+
if entry.msgid: # Skip empty header msgid
|
|
186
|
+
# For plural entries, use msgid as key
|
|
187
|
+
entries[entry.msgid] = entry.msgstr or ""
|
|
188
|
+
return entries
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def parse_po_file_with_metadata(po_file: Path) -> dict[str, dict]:
|
|
192
|
+
"""
|
|
193
|
+
Parse a PO file and extract msgid -> metadata mappings.
|
|
194
|
+
Returns dict with structure:
|
|
195
|
+
{msgid: {'msgstr': str, 'msgid_plural': str, 'msgstr_plural': dict,
|
|
196
|
+
'locations': List[str], 'flags': List[str], 'is_plural': bool}}
|
|
197
|
+
Uses polib if available, falls back to manual parsing.
|
|
198
|
+
"""
|
|
199
|
+
if not po_file.exists():
|
|
200
|
+
return {}
|
|
201
|
+
|
|
202
|
+
po = polib.pofile(str(po_file))
|
|
203
|
+
entries = {}
|
|
204
|
+
for entry in po:
|
|
205
|
+
if entry.msgid: # Skip empty header msgid
|
|
206
|
+
locations = [
|
|
207
|
+
f"{occ[0]}:{occ[1]}" if len(occ) > 1 else occ[0]
|
|
208
|
+
for occ in entry.occurrences
|
|
209
|
+
]
|
|
210
|
+
|
|
211
|
+
entry_data = {
|
|
212
|
+
"msgstr": entry.msgstr or "",
|
|
213
|
+
"locations": locations,
|
|
214
|
+
"flags": entry.flags, # List of flags like ['python-format']
|
|
215
|
+
"is_plural": entry.msgid_plural is not None,
|
|
216
|
+
}
|
|
217
|
+
if entry.msgid_plural:
|
|
218
|
+
entry_data["msgid_plural"] = entry.msgid_plural
|
|
219
|
+
# Convert msgstr_plural dict to simple dict
|
|
220
|
+
entry_data["msgstr_plural"] = {
|
|
221
|
+
i: entry.msgstr_plural.get(i, "")
|
|
222
|
+
for i in range(len(entry.msgstr_plural))
|
|
223
|
+
}
|
|
224
|
+
entries[entry.msgid] = entry_data
|
|
225
|
+
return entries
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _create_po_entry_from_en(entry: polib.POEntry) -> polib.POEntry:
|
|
229
|
+
"""Create a new PO entry from an English entry with empty translation."""
|
|
230
|
+
new_entry = polib.POEntry(
|
|
231
|
+
msgid=entry.msgid,
|
|
232
|
+
msgid_plural=entry.msgid_plural,
|
|
233
|
+
occurrences=entry.occurrences,
|
|
234
|
+
flags=entry.flags,
|
|
235
|
+
)
|
|
236
|
+
if entry.msgid_plural:
|
|
237
|
+
# Initialize plural forms (at least 2)
|
|
238
|
+
num_forms = max(2, len(entry.msgstr_plural) if entry.msgstr_plural else 2)
|
|
239
|
+
new_entry.msgstr_plural = dict.fromkeys(range(num_forms), "")
|
|
240
|
+
else:
|
|
241
|
+
new_entry.msgstr = ""
|
|
242
|
+
return new_entry
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _sync_existing_po_file(
|
|
246
|
+
en_po: polib.POFile, target_po: polib.POFile, target_file: Path
|
|
247
|
+
) -> int:
|
|
248
|
+
"""Sync existing PO file by adding missing entries. Returns count added."""
|
|
249
|
+
# Create a set of existing entries (msgid + msgid_plural for plural entries)
|
|
250
|
+
existing_entries = set()
|
|
251
|
+
for entry in target_po:
|
|
252
|
+
if entry.msgid:
|
|
253
|
+
key = (entry.msgid, entry.msgid_plural if entry.msgid_plural else None)
|
|
254
|
+
existing_entries.add(key)
|
|
255
|
+
|
|
256
|
+
# Add missing entries from English file
|
|
257
|
+
added_count = 0
|
|
258
|
+
for entry in en_po:
|
|
259
|
+
if not entry.msgid: # Skip header
|
|
260
|
+
continue
|
|
261
|
+
|
|
262
|
+
entry_key = (entry.msgid, entry.msgid_plural if entry.msgid_plural else None)
|
|
263
|
+
if entry_key not in existing_entries:
|
|
264
|
+
new_entry = _create_po_entry_from_en(entry)
|
|
265
|
+
target_po.append(new_entry)
|
|
266
|
+
added_count += 1
|
|
267
|
+
|
|
268
|
+
if added_count > 0:
|
|
269
|
+
target_file.parent.mkdir(parents=True, exist_ok=True)
|
|
270
|
+
target_po.save(str(target_file))
|
|
271
|
+
|
|
272
|
+
return added_count
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _create_new_po_file(
|
|
276
|
+
en_po: polib.POFile, target_file: Path, lang_code: str, iso_code: str | None
|
|
277
|
+
) -> int:
|
|
278
|
+
"""Create a new PO file with all entries from English. Returns count added."""
|
|
279
|
+
target_po = polib.POFile()
|
|
280
|
+
|
|
281
|
+
# Set metadata - preserve important fields from English file
|
|
282
|
+
target_po.metadata = en_po.metadata.copy()
|
|
283
|
+
target_po.metadata["Language"] = iso_code or lang_code
|
|
284
|
+
|
|
285
|
+
# Ensure Plural-Forms is set correctly for the target language
|
|
286
|
+
if "Plural-Forms" not in target_po.metadata:
|
|
287
|
+
target_po.metadata["Plural-Forms"] = _get_plural_form(lang_code)
|
|
288
|
+
|
|
289
|
+
# Copy all entries with empty translations
|
|
290
|
+
added_count = 0
|
|
291
|
+
for entry in en_po:
|
|
292
|
+
if not entry.msgid: # Skip header
|
|
293
|
+
continue
|
|
294
|
+
|
|
295
|
+
new_entry = _create_po_entry_from_en(entry)
|
|
296
|
+
target_po.append(new_entry)
|
|
297
|
+
added_count += 1
|
|
298
|
+
|
|
299
|
+
target_file.parent.mkdir(parents=True, exist_ok=True)
|
|
300
|
+
target_po.save(str(target_file))
|
|
301
|
+
return added_count
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def sync_or_create_po_file(
|
|
305
|
+
en_file: Path, target_file: Path, lang_code: str, iso_code: str | None = None
|
|
306
|
+
) -> dict:
|
|
307
|
+
"""
|
|
308
|
+
Sync or create a PO file, preserving location comments and format flags.
|
|
309
|
+
Returns dict with stats: {'action': 'created'|'synced'|'skipped', 'added': int}
|
|
310
|
+
Uses polib if available for robust PO file handling.
|
|
311
|
+
"""
|
|
312
|
+
if not en_file.exists():
|
|
313
|
+
return {"action": "skipped", "added": 0, "error": "English file does not exist"}
|
|
314
|
+
|
|
315
|
+
file_exists = target_file.exists()
|
|
316
|
+
stats = {"action": "created" if not file_exists else "synced", "added": 0}
|
|
317
|
+
|
|
318
|
+
# Use polib for robust PO file handling
|
|
319
|
+
en_po = polib.pofile(str(en_file))
|
|
320
|
+
|
|
321
|
+
if not en_po:
|
|
322
|
+
return {"action": "skipped", "added": 0, "error": "English file has no entries"}
|
|
323
|
+
|
|
324
|
+
if file_exists:
|
|
325
|
+
# File exists: sync entries
|
|
326
|
+
target_po = polib.pofile(str(target_file))
|
|
327
|
+
stats["added"] = _sync_existing_po_file(en_po, target_po, target_file)
|
|
328
|
+
else:
|
|
329
|
+
# File doesn't exist: create new with all entries from English
|
|
330
|
+
stats["added"] = _create_new_po_file(en_po, target_file, lang_code, iso_code)
|
|
331
|
+
|
|
332
|
+
return stats
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _extract_empty_keys_from_frontend(base_dir: Path, iso_code: str) -> list[dict]:
|
|
336
|
+
"""Extract empty keys from frontend JSON files."""
|
|
337
|
+
empty_keys = []
|
|
338
|
+
|
|
339
|
+
for app in LEARNER_FACING_APPS:
|
|
340
|
+
target_file = (
|
|
341
|
+
base_dir
|
|
342
|
+
/ app
|
|
343
|
+
/ "src"
|
|
344
|
+
/ TRANSLATION_FILE_NAMES["i18n_dir"]
|
|
345
|
+
/ TRANSLATION_FILE_NAMES["messages_dir"]
|
|
346
|
+
/ f"{iso_code}.json"
|
|
347
|
+
)
|
|
348
|
+
en_file = (
|
|
349
|
+
base_dir
|
|
350
|
+
/ app
|
|
351
|
+
/ "src"
|
|
352
|
+
/ TRANSLATION_FILE_NAMES["i18n_dir"]
|
|
353
|
+
/ TRANSLATION_FILE_NAMES["transifex_input"]
|
|
354
|
+
)
|
|
355
|
+
if not en_file.exists():
|
|
356
|
+
en_file = (
|
|
357
|
+
base_dir
|
|
358
|
+
/ app
|
|
359
|
+
/ "src"
|
|
360
|
+
/ TRANSLATION_FILE_NAMES["i18n_dir"]
|
|
361
|
+
/ TRANSLATION_FILE_NAMES["messages_dir"]
|
|
362
|
+
/ TRANSLATION_FILE_NAMES["english"]
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
if not target_file.exists() or not en_file.exists():
|
|
366
|
+
continue
|
|
367
|
+
|
|
368
|
+
try:
|
|
369
|
+
target_data = load_json_file(target_file)
|
|
370
|
+
en_data = load_json_file(en_file)
|
|
371
|
+
|
|
372
|
+
for key in en_data:
|
|
373
|
+
target_value = target_data.get(key, "")
|
|
374
|
+
if not target_value or (
|
|
375
|
+
isinstance(target_value, str) and not target_value.strip()
|
|
376
|
+
):
|
|
377
|
+
empty_keys.append(
|
|
378
|
+
{
|
|
379
|
+
"app": app,
|
|
380
|
+
"key": key,
|
|
381
|
+
"english": en_data[key],
|
|
382
|
+
"translation": "",
|
|
383
|
+
"file_type": "json",
|
|
384
|
+
"file_path": str(target_file.resolve()),
|
|
385
|
+
}
|
|
386
|
+
)
|
|
387
|
+
except (OSError, ValueError, json.JSONDecodeError):
|
|
388
|
+
continue
|
|
389
|
+
|
|
390
|
+
return empty_keys
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def _is_po_entry_empty(
|
|
394
|
+
entry: polib.POEntry, target_entry: polib.POEntry | None
|
|
395
|
+
) -> bool:
|
|
396
|
+
"""Check if a PO entry is empty or missing."""
|
|
397
|
+
if target_entry is None:
|
|
398
|
+
return True
|
|
399
|
+
|
|
400
|
+
if entry.msgid_plural:
|
|
401
|
+
# Plural entry - check if plural forms are empty
|
|
402
|
+
return any(
|
|
403
|
+
not target_entry.msgstr_plural.get(i, "").strip()
|
|
404
|
+
for i in range(len(target_entry.msgstr_plural))
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
# Singular entry - check if empty
|
|
408
|
+
return not target_entry.msgstr or not target_entry.msgstr.strip()
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def _extract_empty_keys_from_backend(base_dir: Path, backend_locale: str) -> list[dict]:
|
|
412
|
+
"""Extract empty keys from backend PO files."""
|
|
413
|
+
empty_keys = []
|
|
414
|
+
locale_dir = (
|
|
415
|
+
base_dir
|
|
416
|
+
/ TRANSLATION_FILE_NAMES["edx_platform"]
|
|
417
|
+
/ TRANSLATION_FILE_NAMES["conf_dir"]
|
|
418
|
+
/ TRANSLATION_FILE_NAMES["locale_dir"]
|
|
419
|
+
/ backend_locale
|
|
420
|
+
/ TRANSLATION_FILE_NAMES["lc_messages"]
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
for po_file_name in BACKEND_PO_FILES:
|
|
424
|
+
target_file = locale_dir / po_file_name
|
|
425
|
+
en_file = (
|
|
426
|
+
base_dir
|
|
427
|
+
/ TRANSLATION_FILE_NAMES["edx_platform"]
|
|
428
|
+
/ TRANSLATION_FILE_NAMES["conf_dir"]
|
|
429
|
+
/ TRANSLATION_FILE_NAMES["locale_dir"]
|
|
430
|
+
/ "en"
|
|
431
|
+
/ TRANSLATION_FILE_NAMES["lc_messages"]
|
|
432
|
+
/ po_file_name
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
if not target_file.exists() or not en_file.exists():
|
|
436
|
+
continue
|
|
437
|
+
|
|
438
|
+
try:
|
|
439
|
+
target_po = polib.pofile(str(target_file))
|
|
440
|
+
en_po = polib.pofile(str(en_file))
|
|
441
|
+
|
|
442
|
+
target_entries_dict = {
|
|
443
|
+
entry.msgid: entry for entry in target_po if entry.msgid
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
for entry in en_po:
|
|
447
|
+
if not entry.msgid: # Skip header
|
|
448
|
+
continue
|
|
449
|
+
|
|
450
|
+
target_entry = target_entries_dict.get(entry.msgid)
|
|
451
|
+
if _is_po_entry_empty(entry, target_entry):
|
|
452
|
+
empty_keys.append(
|
|
453
|
+
{
|
|
454
|
+
"app": "edx-platform",
|
|
455
|
+
"key": entry.msgid,
|
|
456
|
+
"english": entry.msgid,
|
|
457
|
+
"translation": "",
|
|
458
|
+
"file_type": "po",
|
|
459
|
+
"file_path": str(target_file.resolve()),
|
|
460
|
+
"po_file": po_file_name,
|
|
461
|
+
"is_plural": entry.msgid_plural is not None,
|
|
462
|
+
"msgid_plural": entry.msgid_plural
|
|
463
|
+
if entry.msgid_plural
|
|
464
|
+
else None,
|
|
465
|
+
}
|
|
466
|
+
)
|
|
467
|
+
except (OSError, polib.POFileError, ValueError):
|
|
468
|
+
continue
|
|
469
|
+
|
|
470
|
+
return empty_keys
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def extract_empty_keys(
|
|
474
|
+
base_dir: Path,
|
|
475
|
+
lang_code: str,
|
|
476
|
+
iso_code: str | None = None,
|
|
477
|
+
*,
|
|
478
|
+
skip_backend: bool = False,
|
|
479
|
+
) -> list[dict]:
|
|
480
|
+
"""
|
|
481
|
+
Extract all empty translation keys for a language.
|
|
482
|
+
Returns list of dicts with:
|
|
483
|
+
{'app': str, 'key': str, 'english': str, 'file_type': 'json'|'po'}
|
|
484
|
+
"""
|
|
485
|
+
if iso_code is None:
|
|
486
|
+
iso_code = lang_code
|
|
487
|
+
|
|
488
|
+
empty_keys = _extract_empty_keys_from_frontend(base_dir, iso_code)
|
|
489
|
+
|
|
490
|
+
if not skip_backend:
|
|
491
|
+
backend_locale = iso_code if iso_code and iso_code != lang_code else lang_code
|
|
492
|
+
empty_keys.extend(_extract_empty_keys_from_backend(base_dir, backend_locale))
|
|
493
|
+
|
|
494
|
+
return empty_keys
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def apply_json_translations(file_path: Path, translations: dict[str, str]) -> int:
|
|
498
|
+
"""
|
|
499
|
+
Apply translations to a JSON file.
|
|
500
|
+
Returns number of translations applied.
|
|
501
|
+
"""
|
|
502
|
+
data = load_json_file(file_path)
|
|
503
|
+
applied = 0
|
|
504
|
+
|
|
505
|
+
for key, translation in translations.items():
|
|
506
|
+
if key in data:
|
|
507
|
+
# Check if the value is empty (empty string, whitespace only, or None)
|
|
508
|
+
current_value = data[key]
|
|
509
|
+
if not current_value or (
|
|
510
|
+
isinstance(current_value, str) and not current_value.strip()
|
|
511
|
+
):
|
|
512
|
+
data[key] = translation
|
|
513
|
+
applied += 1
|
|
514
|
+
|
|
515
|
+
if applied > 0:
|
|
516
|
+
save_json_file(file_path, data)
|
|
517
|
+
|
|
518
|
+
return applied
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def load_glossary(glossary_path: Path, _lang_code: str = "") -> dict[str, Any]:
|
|
522
|
+
"""
|
|
523
|
+
Load glossary for a language from a text file.
|
|
524
|
+
Parses text format with term mappings like: - 'english term' -> 'translation'
|
|
525
|
+
Returns dict mapping English -> Translation (string or dict for plural forms).
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
glossary_path: Path to the glossary text file.
|
|
529
|
+
_lang_code: Language code (currently unused, kept for API compatibility).
|
|
530
|
+
|
|
531
|
+
Returns:
|
|
532
|
+
Dictionary mapping English terms to translations. Translations can be:
|
|
533
|
+
- Strings for singular terms
|
|
534
|
+
- Dicts with 'singular' and 'plural' keys for plural forms
|
|
535
|
+
|
|
536
|
+
Text file format:
|
|
537
|
+
# Comments and headers
|
|
538
|
+
## TERM MAPPINGS
|
|
539
|
+
- 'english term' -> 'translation'
|
|
540
|
+
- 'another term' -> 'another translation'
|
|
541
|
+
|
|
542
|
+
Example:
|
|
543
|
+
- 'accuracy' -> 'الدقة'
|
|
544
|
+
- 'activation function' -> 'دالّة التفعيل'
|
|
545
|
+
"""
|
|
546
|
+
if not glossary_path.exists():
|
|
547
|
+
return {}
|
|
548
|
+
|
|
549
|
+
glossary = {}
|
|
550
|
+
|
|
551
|
+
try:
|
|
552
|
+
with glossary_path.open(encoding="utf-8") as f:
|
|
553
|
+
for raw_line in f:
|
|
554
|
+
line = raw_line.strip()
|
|
555
|
+
|
|
556
|
+
# Skip empty lines, comments, and headers
|
|
557
|
+
if not line or line.startswith("#"):
|
|
558
|
+
continue
|
|
559
|
+
|
|
560
|
+
# Parse lines like: - 'english term' -> 'translation'
|
|
561
|
+
if line.startswith("- ") and "->" in line:
|
|
562
|
+
# Extract the mapping
|
|
563
|
+
# Format: - 'english term' -> 'translation'
|
|
564
|
+
mapping_line = line[2:].strip() # Remove leading '- '
|
|
565
|
+
parts = mapping_line.split("->", 1)
|
|
566
|
+
|
|
567
|
+
if len(parts) == EXPECTED_GLOSSARY_PARTS:
|
|
568
|
+
english_term = parts[0].strip().strip("'\"")
|
|
569
|
+
translation = parts[1].strip().strip("'\"")
|
|
570
|
+
|
|
571
|
+
if english_term and translation:
|
|
572
|
+
glossary[english_term] = translation
|
|
573
|
+
except (OSError, UnicodeDecodeError):
|
|
574
|
+
# Log specific file-related errors but return empty dict to allow continuation
|
|
575
|
+
# In a library function, we can't use stdout, so we just return empty dict
|
|
576
|
+
# The caller can handle logging if needed
|
|
577
|
+
return {}
|
|
578
|
+
except (ValueError, AttributeError, IndexError):
|
|
579
|
+
# Catch parsing errors and other unexpected errors
|
|
580
|
+
return {}
|
|
581
|
+
else:
|
|
582
|
+
return glossary
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def match_glossary_term(
|
|
586
|
+
text: str, glossary: dict[str, Any] | None, *, exact_match: bool = True
|
|
587
|
+
) -> Any | None:
|
|
588
|
+
"""
|
|
589
|
+
Match text against glossary terms.
|
|
590
|
+
Returns translation (string or dict with 'singular'/'plural') if match found,
|
|
591
|
+
None otherwise.
|
|
592
|
+
Supports both simple format ("term": "translation") and plural format
|
|
593
|
+
("term": {"singular": "...", "plural": "..."}).
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
text: The text to match against glossary terms.
|
|
597
|
+
glossary: Dictionary mapping English terms to translations, or None.
|
|
598
|
+
exact_match: If True, only exact matches are returned.
|
|
599
|
+
If False, case-insensitive and partial matches are allowed.
|
|
600
|
+
|
|
601
|
+
Returns:
|
|
602
|
+
Translation string/dict if match found, None otherwise.
|
|
603
|
+
"""
|
|
604
|
+
if not glossary:
|
|
605
|
+
return None
|
|
606
|
+
|
|
607
|
+
if text in glossary:
|
|
608
|
+
# Return as-is: string for singular, dict for plural
|
|
609
|
+
return glossary[text]
|
|
610
|
+
|
|
611
|
+
if not exact_match:
|
|
612
|
+
text_lower = text.lower().strip()
|
|
613
|
+
for term, translation in glossary.items():
|
|
614
|
+
if term.lower().strip() == text_lower:
|
|
615
|
+
return translation
|
|
616
|
+
|
|
617
|
+
for term, translation in glossary.items():
|
|
618
|
+
if term.lower() in text_lower or text_lower in term.lower():
|
|
619
|
+
return translation
|
|
620
|
+
|
|
621
|
+
return None
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
def _apply_plural_dict_translation(
|
|
625
|
+
entry: polib.POEntry, translation: dict[str, str]
|
|
626
|
+
) -> bool:
|
|
627
|
+
"""Apply plural translation from dict. Returns True if applied."""
|
|
628
|
+
plural_applied = False
|
|
629
|
+
if not entry.msgstr_plural.get(0, "").strip():
|
|
630
|
+
entry.msgstr_plural[0] = translation["singular"]
|
|
631
|
+
plural_applied = True
|
|
632
|
+
if len(entry.msgstr_plural) > 1 and not entry.msgstr_plural.get(1, "").strip():
|
|
633
|
+
entry.msgstr_plural[1] = translation["plural"]
|
|
634
|
+
plural_applied = True
|
|
635
|
+
return plural_applied
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def _apply_plural_string_translation(entry: polib.POEntry, translation: str) -> bool:
|
|
639
|
+
"""Apply plural translation from string. Returns True if applied."""
|
|
640
|
+
plural_applied = False
|
|
641
|
+
for i in range(len(entry.msgstr_plural)):
|
|
642
|
+
if not entry.msgstr_plural.get(i, "").strip():
|
|
643
|
+
entry.msgstr_plural[i] = translation
|
|
644
|
+
plural_applied = True
|
|
645
|
+
return plural_applied
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
def _apply_translation_to_entry(entry: polib.POEntry, translation: Any) -> bool:
|
|
649
|
+
"""
|
|
650
|
+
Apply translation to a PO entry. Returns True if translation was applied.
|
|
651
|
+
|
|
652
|
+
Args:
|
|
653
|
+
entry: The PO entry to apply translation to.
|
|
654
|
+
translation: Translation value (string or dict with 'singular'/'plural').
|
|
655
|
+
|
|
656
|
+
Returns:
|
|
657
|
+
True if translation was applied, False otherwise.
|
|
658
|
+
"""
|
|
659
|
+
if entry.msgid_plural:
|
|
660
|
+
# Plural entry
|
|
661
|
+
if (
|
|
662
|
+
isinstance(translation, dict)
|
|
663
|
+
and "singular" in translation
|
|
664
|
+
and "plural" in translation
|
|
665
|
+
):
|
|
666
|
+
return _apply_plural_dict_translation(entry, translation)
|
|
667
|
+
if (
|
|
668
|
+
isinstance(translation, str)
|
|
669
|
+
and translation
|
|
670
|
+
and _apply_plural_string_translation(entry, translation)
|
|
671
|
+
):
|
|
672
|
+
return True
|
|
673
|
+
# Singular entry - translation should be a string
|
|
674
|
+
elif (
|
|
675
|
+
isinstance(translation, str)
|
|
676
|
+
and translation
|
|
677
|
+
and (not entry.msgstr or not entry.msgstr.strip())
|
|
678
|
+
):
|
|
679
|
+
entry.msgstr = translation
|
|
680
|
+
return True
|
|
681
|
+
return False
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def apply_po_translations(file_path: Path, translations: dict[str, Any]) -> int:
|
|
685
|
+
"""
|
|
686
|
+
Apply translations to a PO file. Returns number of translations applied.
|
|
687
|
+
Handles both singular and plural forms.
|
|
688
|
+
For plural forms, translations dict can contain:
|
|
689
|
+
- Dict with 'singular' and 'plural' keys: {"singular": "...", "plural": "..."}
|
|
690
|
+
- String: applies same translation to all plural forms
|
|
691
|
+
"""
|
|
692
|
+
po = polib.pofile(str(file_path))
|
|
693
|
+
applied = 0
|
|
694
|
+
|
|
695
|
+
for entry in po:
|
|
696
|
+
if not entry.msgid:
|
|
697
|
+
continue
|
|
698
|
+
|
|
699
|
+
if entry.msgid in translations:
|
|
700
|
+
translation = translations[entry.msgid]
|
|
701
|
+
if _apply_translation_to_entry(entry, translation):
|
|
702
|
+
applied += 1
|
|
703
|
+
|
|
704
|
+
if applied > 0:
|
|
705
|
+
po.save(str(file_path))
|
|
706
|
+
|
|
707
|
+
return applied
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
def _sync_frontend_translations(base_dir: Path, iso_code: str) -> dict[str, int]:
|
|
711
|
+
"""Sync frontend translation files. Returns stats."""
|
|
712
|
+
frontend_stats = {"added": 0, "fixed": 0, "removed": 0, "created": 0, "synced": 0}
|
|
713
|
+
|
|
714
|
+
for app in LEARNER_FACING_APPS:
|
|
715
|
+
app_dir = base_dir / app / "src" / TRANSLATION_FILE_NAMES["i18n_dir"]
|
|
716
|
+
messages_dir = app_dir / TRANSLATION_FILE_NAMES["messages_dir"]
|
|
717
|
+
|
|
718
|
+
en_file = app_dir / TRANSLATION_FILE_NAMES["transifex_input"]
|
|
719
|
+
if not en_file.exists():
|
|
720
|
+
en_file = messages_dir / TRANSLATION_FILE_NAMES["english"]
|
|
721
|
+
|
|
722
|
+
target_file = messages_dir / f"{iso_code}.json"
|
|
723
|
+
|
|
724
|
+
if not en_file.exists():
|
|
725
|
+
continue
|
|
726
|
+
|
|
727
|
+
try:
|
|
728
|
+
stats = sync_or_create_json_file(en_file, target_file)
|
|
729
|
+
if stats["action"] == "created":
|
|
730
|
+
frontend_stats["created"] += 1
|
|
731
|
+
elif stats["action"] == "synced":
|
|
732
|
+
frontend_stats["synced"] += 1
|
|
733
|
+
|
|
734
|
+
frontend_stats["added"] += stats.get("added", 0)
|
|
735
|
+
frontend_stats["fixed"] += stats.get("fixed", 0)
|
|
736
|
+
frontend_stats["removed"] += stats.get("removed", 0)
|
|
737
|
+
except (OSError, ValueError, json.JSONDecodeError):
|
|
738
|
+
continue
|
|
739
|
+
|
|
740
|
+
return frontend_stats
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
def _sync_backend_translations(
|
|
744
|
+
base_dir: Path, lang_code: str, iso_code: str
|
|
745
|
+
) -> dict[str, int]:
|
|
746
|
+
"""Sync backend translation files. Returns stats."""
|
|
747
|
+
backend_stats = {"added": 0}
|
|
748
|
+
backend_locale = iso_code if iso_code and iso_code != lang_code else lang_code
|
|
749
|
+
locale_dir = (
|
|
750
|
+
base_dir
|
|
751
|
+
/ TRANSLATION_FILE_NAMES["edx_platform"]
|
|
752
|
+
/ TRANSLATION_FILE_NAMES["conf_dir"]
|
|
753
|
+
/ TRANSLATION_FILE_NAMES["locale_dir"]
|
|
754
|
+
/ backend_locale
|
|
755
|
+
/ TRANSLATION_FILE_NAMES["lc_messages"]
|
|
756
|
+
)
|
|
757
|
+
|
|
758
|
+
for po_file_name in BACKEND_PO_FILES:
|
|
759
|
+
en_file = (
|
|
760
|
+
base_dir
|
|
761
|
+
/ TRANSLATION_FILE_NAMES["edx_platform"]
|
|
762
|
+
/ TRANSLATION_FILE_NAMES["conf_dir"]
|
|
763
|
+
/ TRANSLATION_FILE_NAMES["locale_dir"]
|
|
764
|
+
/ "en"
|
|
765
|
+
/ TRANSLATION_FILE_NAMES["lc_messages"]
|
|
766
|
+
/ po_file_name
|
|
767
|
+
)
|
|
768
|
+
target_file = locale_dir / po_file_name
|
|
769
|
+
|
|
770
|
+
if not en_file.exists():
|
|
771
|
+
continue
|
|
772
|
+
|
|
773
|
+
try:
|
|
774
|
+
stats = sync_or_create_po_file(
|
|
775
|
+
en_file, target_file, backend_locale, iso_code
|
|
776
|
+
)
|
|
777
|
+
backend_stats["added"] += stats.get("added", 0)
|
|
778
|
+
except (OSError, polib.POFileError, ValueError):
|
|
779
|
+
continue
|
|
780
|
+
|
|
781
|
+
return backend_stats
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
def sync_all_translations(
|
|
785
|
+
base_dir: Path,
|
|
786
|
+
lang_code: str,
|
|
787
|
+
iso_code: str | None = None,
|
|
788
|
+
*,
|
|
789
|
+
skip_backend: bool = False,
|
|
790
|
+
) -> dict:
|
|
791
|
+
"""
|
|
792
|
+
Sync all translation files for a language.
|
|
793
|
+
Returns summary stats.
|
|
794
|
+
"""
|
|
795
|
+
if iso_code is None:
|
|
796
|
+
iso_code = lang_code
|
|
797
|
+
|
|
798
|
+
frontend_stats = _sync_frontend_translations(base_dir, iso_code)
|
|
799
|
+
backend_stats = (
|
|
800
|
+
_sync_backend_translations(base_dir, lang_code, iso_code)
|
|
801
|
+
if not skip_backend
|
|
802
|
+
else {"added": 0}
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
return {
|
|
806
|
+
"frontend": frontend_stats,
|
|
807
|
+
"backend": backend_stats,
|
|
808
|
+
}
|