ol-openedx-course-translations 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ol-openedx-course-translations might be problematic. Click here for more details.

Files changed (35) hide show
  1. ol_openedx_course_translations/apps.py +12 -2
  2. ol_openedx_course_translations/glossaries/machine_learning/ar.txt +175 -0
  3. ol_openedx_course_translations/glossaries/machine_learning/de.txt +175 -0
  4. ol_openedx_course_translations/glossaries/machine_learning/el.txt +988 -0
  5. ol_openedx_course_translations/glossaries/machine_learning/es.txt +175 -0
  6. ol_openedx_course_translations/glossaries/machine_learning/fr.txt +175 -0
  7. ol_openedx_course_translations/glossaries/machine_learning/ja.txt +175 -0
  8. ol_openedx_course_translations/glossaries/machine_learning/pt-br.txt +175 -0
  9. ol_openedx_course_translations/glossaries/machine_learning/ru.txt +213 -0
  10. ol_openedx_course_translations/management/commands/sync_and_translate_language.py +1866 -0
  11. ol_openedx_course_translations/management/commands/translate_course.py +419 -470
  12. ol_openedx_course_translations/middleware.py +143 -0
  13. ol_openedx_course_translations/providers/__init__.py +1 -0
  14. ol_openedx_course_translations/providers/base.py +278 -0
  15. ol_openedx_course_translations/providers/deepl_provider.py +292 -0
  16. ol_openedx_course_translations/providers/llm_providers.py +565 -0
  17. ol_openedx_course_translations/settings/cms.py +17 -0
  18. ol_openedx_course_translations/settings/common.py +57 -30
  19. ol_openedx_course_translations/settings/lms.py +15 -0
  20. ol_openedx_course_translations/tasks.py +222 -0
  21. ol_openedx_course_translations/urls.py +16 -0
  22. ol_openedx_course_translations/utils/__init__.py +0 -0
  23. ol_openedx_course_translations/utils/command_utils.py +197 -0
  24. ol_openedx_course_translations/utils/constants.py +216 -0
  25. ol_openedx_course_translations/utils/course_translations.py +581 -0
  26. ol_openedx_course_translations/utils/translation_sync.py +808 -0
  27. ol_openedx_course_translations/views.py +73 -0
  28. ol_openedx_course_translations-0.3.0.dist-info/METADATA +407 -0
  29. ol_openedx_course_translations-0.3.0.dist-info/RECORD +35 -0
  30. ol_openedx_course_translations-0.3.0.dist-info/entry_points.txt +5 -0
  31. ol_openedx_course_translations-0.1.0.dist-info/METADATA +0 -63
  32. ol_openedx_course_translations-0.1.0.dist-info/RECORD +0 -11
  33. ol_openedx_course_translations-0.1.0.dist-info/entry_points.txt +0 -2
  34. {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.0.dist-info}/WHEEL +0 -0
  35. {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,143 @@
1
+ """
2
+ Middleware to set/reset language preference cookie and
3
+ user preference based on course language.
4
+ """
5
+
6
+ import re
7
+
8
+ from django.conf import settings
9
+ from django.http import HttpResponseRedirect
10
+ from django.utils.deprecation import MiddlewareMixin
11
+ from opaque_keys.edx.keys import CourseKey
12
+ from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
13
+ from openedx.core.djangoapps.lang_pref import LANGUAGE_KEY
14
+ from openedx.core.djangoapps.lang_pref import helpers as lang_pref_helpers
15
+ from openedx.core.djangoapps.user_api.preferences.api import set_user_preference
16
+
17
+ ENGLISH_LANGUAGE_CODE = "en"
18
+
19
+
20
+ def should_process_request(request):
21
+ """
22
+ Return True if language auto-selection should run for this request.
23
+ """
24
+ return (
25
+ settings.ENABLE_AUTO_LANGUAGE_SELECTION
26
+ and hasattr(request, "user")
27
+ and request.user.is_authenticated
28
+ )
29
+
30
+
31
+ def set_language(request, response, language):
32
+ """
33
+ Set both cookie and user preference for language.
34
+ """
35
+ lang_pref_helpers.set_language_cookie(request, response, language)
36
+ set_user_preference(request.user, LANGUAGE_KEY, language)
37
+
38
+
39
+ def redirect_current_path(request):
40
+ """
41
+ Redirect to the same URL to ensure language change takes effect.
42
+ """
43
+ return HttpResponseRedirect(request.get_full_path())
44
+
45
+
46
+ class CourseLanguageCookieMiddleware(MiddlewareMixin):
47
+ """
48
+ LMS middleware that:
49
+ - Sets language based on course language
50
+ - Forces English for exempt paths and authoring MFEs
51
+ """
52
+
53
+ COURSE_URL_REGEX = re.compile(
54
+ rf"^/courses/(?P<course_key>{settings.COURSE_KEY_REGEX})(?:/|$)",
55
+ re.IGNORECASE,
56
+ )
57
+
58
+ def process_response(self, request, response):
59
+ """
60
+ Process the response to set/reset language cookie based on course language.
61
+ """
62
+ if not should_process_request(request):
63
+ return response
64
+
65
+ path = getattr(request, "path_info", request.path)
66
+
67
+ if self._should_force_english(request, path):
68
+ return self._force_english_if_needed(request, response)
69
+
70
+ course_language = self._get_course_language(path)
71
+ if not course_language:
72
+ return response
73
+
74
+ return self._apply_course_language(request, response, course_language)
75
+
76
+ def _should_force_english(self, request, path):
77
+ """
78
+ Determine if English should be forced based on request origin or exempt paths.
79
+ """
80
+ return request.META.get(
81
+ "HTTP_ORIGIN"
82
+ ) == settings.COURSE_AUTHORING_MICROFRONTEND_URL or any(
83
+ exempt_path in path
84
+ for exempt_path in settings.AUTO_LANGUAGE_SELECTION_EXEMPT_PATHS
85
+ )
86
+
87
+ def _force_english_if_needed(self, request, response):
88
+ """
89
+ Force language to English if not already set.
90
+ """
91
+ cookie_val = lang_pref_helpers.get_language_cookie(request)
92
+
93
+ if cookie_val != ENGLISH_LANGUAGE_CODE:
94
+ set_language(request, response, ENGLISH_LANGUAGE_CODE)
95
+ return redirect_current_path(request)
96
+
97
+ return response
98
+
99
+ def _get_course_language(self, path):
100
+ """
101
+ Extract course language from the course URL path.
102
+ """
103
+ match = self.COURSE_URL_REGEX.match(path)
104
+ if not match:
105
+ return None
106
+
107
+ try:
108
+ course_key = CourseKey.from_string(match.group("course_key"))
109
+ overview = CourseOverview.get_from_id(course_key)
110
+ except Exception: # noqa: BLE001
111
+ return None
112
+
113
+ return getattr(overview, "language", None)
114
+
115
+ def _apply_course_language(self, request, response, language):
116
+ """
117
+ Apply the course language if it differs from the current cookie value.
118
+ """
119
+ cookie_val = lang_pref_helpers.get_language_cookie(request)
120
+ if cookie_val != language:
121
+ set_language(request, response, language)
122
+ return redirect_current_path(request)
123
+
124
+ return response
125
+
126
+
127
+ class CourseLanguageCookieResetMiddleware(MiddlewareMixin):
128
+ """
129
+ CMS middleware that always resets language to English.
130
+ """
131
+
132
+ def process_response(self, request, response):
133
+ """
134
+ Process the response to reset language cookie to English.
135
+ """
136
+ if not should_process_request(request):
137
+ return response
138
+
139
+ cookie_val = lang_pref_helpers.get_language_cookie(request)
140
+ if cookie_val and cookie_val != ENGLISH_LANGUAGE_CODE:
141
+ set_language(request, response, ENGLISH_LANGUAGE_CODE)
142
+
143
+ return response
@@ -0,0 +1 @@
1
+ """Translation providers for course content."""
@@ -0,0 +1,278 @@
1
+ """Base classes for translation providers."""
2
+
3
+ import logging
4
+ from abc import ABC, abstractmethod
5
+ from pathlib import Path
6
+
7
+ import srt
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ MAX_SUBTITLE_TRANSLATION_RETRIES = 1
12
+
13
+
14
+ def load_glossary(target_language: str, glossary_directory: str | None = None) -> str:
15
+ """
16
+ Load a glossary for the given language from the glossary directory.
17
+
18
+ Args:
19
+ target_language: Target language code
20
+ glossary_directory: Path to glossary directory
21
+
22
+ Returns:
23
+ Glossary content as string, empty if not found or directory not provided
24
+ """
25
+ if not glossary_directory:
26
+ return ""
27
+
28
+ glossary_dir_path = Path(glossary_directory)
29
+ if not glossary_dir_path.exists() or not glossary_dir_path.is_dir():
30
+ logger.warning("Glossary directory not found: %s", glossary_dir_path)
31
+ return ""
32
+
33
+ glossary_file_path = glossary_dir_path / f"{target_language.lower()}.txt"
34
+ if not glossary_file_path.exists():
35
+ logger.warning(
36
+ "Glossary file not found for language %s: %s",
37
+ target_language,
38
+ glossary_file_path,
39
+ )
40
+ return ""
41
+
42
+ return glossary_file_path.read_text(encoding="utf-8-sig").strip()
43
+
44
+
45
+ class TranslationProvider(ABC):
46
+ """Abstract base class for translation providers."""
47
+
48
+ def __init__(self, primary_api_key: str, repair_api_key: str | None = None):
49
+ """
50
+ Initialize translation provider with API keys.
51
+
52
+ Args:
53
+ primary_api_key: API key for primary translation service
54
+ repair_api_key: API key for repair service (DeepL API key)
55
+ """
56
+ self.primary_api_key = primary_api_key
57
+ self.repair_api_key = repair_api_key
58
+
59
+ def translate_srt_with_validation(
60
+ self,
61
+ subtitle_list: list[srt.Subtitle],
62
+ target_language: str,
63
+ glossary_file: str | None = None,
64
+ ) -> list[srt.Subtitle]:
65
+ """
66
+ Translate SRT subtitles with timestamp validation and repair.
67
+
68
+ Performs translation, validates timestamps, and attempts repair
69
+ if validation fails using DeepL.
70
+
71
+ Args:
72
+ subtitle_list: List of subtitle objects to translate
73
+ target_language: Target language code
74
+ glossary_file: Path to glossary directory (optional)
75
+
76
+ Returns:
77
+ List of translated subtitle objects with validated timestamps
78
+
79
+ Raises:
80
+ ValueError: If timestamp repair fails after validation
81
+ """
82
+ log = logger.getChild("TranslationProvider")
83
+ log.info(" 🌐 Translating subtitles to %s...", target_language)
84
+
85
+ # Try translation with retries
86
+ translated_subtitles = []
87
+ for attempt in range(MAX_SUBTITLE_TRANSLATION_RETRIES + 1):
88
+ if attempt > 0:
89
+ log.info(" 🔧 Retrying subtitle translations...")
90
+
91
+ translated_subtitles = self.translate_subtitles(
92
+ subtitle_list, target_language, glossary_file
93
+ )
94
+
95
+ log.info(
96
+ " 🔍 %sValidating translated subtitles...",
97
+ "Re-" if attempt > 0 else "",
98
+ )
99
+
100
+ if self._validate_timestamps(subtitle_list, translated_subtitles):
101
+ log.info(
102
+ " ✅ Timestamps validated successfully%s.",
103
+ " on retry" if attempt > 0 else "",
104
+ )
105
+ return translated_subtitles
106
+
107
+ log.warning(
108
+ " ❌ Timestamp %svalidation failed.", "re-" if attempt > 0 else ""
109
+ )
110
+
111
+ repaired_subtitles = self._repair_timestamps_with_deepl(
112
+ subtitle_list, target_language
113
+ )
114
+
115
+ log.info(" 🔍 Re-validating repaired subtitles...")
116
+ if self._validate_timestamps(subtitle_list, repaired_subtitles):
117
+ log.info(" ✅ Timestamps repaired and validated successfully.")
118
+ return repaired_subtitles
119
+
120
+ log.error(" ❌ Timestamp repair failed. Translation cannot proceed.")
121
+ raise ValueError( # noqa: TRY003
122
+ "Subtitle timestamp repair failed - timestamps could not be validated" # noqa: EM101
123
+ )
124
+
125
+ def _validate_timestamps(
126
+ self, original: list[srt.Subtitle], translated: list[srt.Subtitle]
127
+ ) -> bool:
128
+ """
129
+ Validate that timestamps and cue numbers are preserved.
130
+
131
+ Checks for cue count mismatches, index mismatches, timestamp mismatches,
132
+ and blank translations.
133
+
134
+ Args:
135
+ original: Original subtitle list
136
+ translated: Translated subtitle list
137
+
138
+ Returns:
139
+ True if validation passes, False otherwise
140
+ """
141
+ issues = []
142
+ if len(original) != len(translated):
143
+ issues.append(
144
+ f"Cue count mismatch: original {len(original)}, "
145
+ f"translated {len(translated)}"
146
+ )
147
+
148
+ for i, (orig, trans) in enumerate(zip(original, translated)):
149
+ if orig.index != trans.index:
150
+ issues.append(
151
+ f"Cue {i + 1}: index mismatch ({orig.index} vs {trans.index})"
152
+ )
153
+ if orig.start != trans.start or orig.end != trans.end:
154
+ issues.append(f"Cue {i + 1}: timestamp mismatch")
155
+ if orig.content.strip() and not trans.content.strip():
156
+ issues.append(f"Cue {i + 1}: translation is BLANK")
157
+
158
+ if issues:
159
+ logger.warning("Translation validation found issues:")
160
+ for issue in issues[:10]:
161
+ logger.warning(" - %s", issue)
162
+ if len(issues) > 10: # noqa: PLR2004
163
+ logger.warning(" ... and %s more issues", len(issues) - 10)
164
+ return False
165
+ return True
166
+
167
+ def _repair_timestamps_with_deepl(
168
+ self,
169
+ original: list[srt.Subtitle],
170
+ target_lang: str,
171
+ ) -> list[srt.Subtitle]:
172
+ """
173
+ Repair misaligned timestamps using DeepL translation.
174
+
175
+ Uses DeepL to retranslate subtitles with proper timestamp preservation.
176
+
177
+ Args:
178
+ original: Original subtitle list with correct timestamps
179
+ target_lang: Target language code
180
+
181
+ Returns:
182
+ List of repaired subtitles with corrected timestamps
183
+ """
184
+ if not self.repair_api_key:
185
+ logger.warning(" No repair API key available, skipping repair.")
186
+ return original
187
+
188
+ logger.info(" 🔧 Repairing timestamps using DeepL...")
189
+
190
+ try:
191
+ # Import DeepL provider for repair
192
+ from ol_openedx_course_translations.providers.deepl_provider import ( # noqa: PLC0415
193
+ DeepLProvider,
194
+ )
195
+
196
+ # Create DeepL provider instance for repair
197
+ deepl_provider = DeepLProvider(self.repair_api_key, None)
198
+
199
+ # Use DeepL to translate with proper timestamp preservation
200
+ repaired_subtitles = deepl_provider.translate_subtitles(
201
+ original, target_lang, None
202
+ )
203
+
204
+ logger.info(" ✅ DeepL repair completed.")
205
+ return repaired_subtitles # noqa: TRY300
206
+
207
+ except Exception as e: # noqa: BLE001
208
+ logger.error(" ❌ DeepL repair failed: %s", e) # noqa: TRY400
209
+ # Fallback: return original with empty content to preserve structure
210
+ return [
211
+ srt.Subtitle(
212
+ index=sub.index,
213
+ start=sub.start,
214
+ end=sub.end,
215
+ content="",
216
+ )
217
+ for sub in original
218
+ ]
219
+
220
+ @abstractmethod
221
+ def translate_subtitles(
222
+ self,
223
+ subtitle_list: list[srt.Subtitle],
224
+ target_language: str,
225
+ glossary_file: str | None = None,
226
+ ) -> list[srt.Subtitle]:
227
+ """
228
+ Translate SRT subtitles.
229
+
230
+ Args:
231
+ subtitle_list: List of subtitle objects to translate
232
+ target_language: Target language code
233
+ glossary_file: Path to glossary directory (optional)
234
+
235
+ Returns:
236
+ List of translated subtitle objects
237
+ """
238
+
239
+ @abstractmethod
240
+ def translate_text(
241
+ self,
242
+ source_text: str,
243
+ target_language: str,
244
+ tag_handling: str | None = None,
245
+ glossary_file: str | None = None,
246
+ ) -> str:
247
+ """
248
+ Translate plain text or HTML/XML.
249
+
250
+ Args:
251
+ source_text: Text to translate
252
+ target_language: Target language code
253
+ tag_handling: How to handle XML/HTML tags (optional)
254
+ glossary_file: Path to glossary directory (optional)
255
+
256
+ Returns:
257
+ Translated text
258
+ """
259
+
260
+ @abstractmethod
261
+ def translate_document(
262
+ self,
263
+ input_file_path: Path,
264
+ output_file_path: Path,
265
+ source_language: str,
266
+ target_language: str,
267
+ glossary_file: str | None = None,
268
+ ) -> None:
269
+ """
270
+ Translate document file.
271
+
272
+ Args:
273
+ input_file_path: Path to input file
274
+ output_file_path: Path to output file
275
+ source_language: Source language code
276
+ target_language: Target language code
277
+ glossary_file: Path to glossary directory (optional)
278
+ """