ol-openedx-course-translations 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ol-openedx-course-translations might be problematic. Click here for more details.

Files changed (35) hide show
  1. ol_openedx_course_translations/apps.py +12 -2
  2. ol_openedx_course_translations/glossaries/machine_learning/ar.txt +175 -0
  3. ol_openedx_course_translations/glossaries/machine_learning/de.txt +175 -0
  4. ol_openedx_course_translations/glossaries/machine_learning/el.txt +988 -0
  5. ol_openedx_course_translations/glossaries/machine_learning/es.txt +175 -0
  6. ol_openedx_course_translations/glossaries/machine_learning/fr.txt +175 -0
  7. ol_openedx_course_translations/glossaries/machine_learning/ja.txt +175 -0
  8. ol_openedx_course_translations/glossaries/machine_learning/pt-br.txt +175 -0
  9. ol_openedx_course_translations/glossaries/machine_learning/ru.txt +213 -0
  10. ol_openedx_course_translations/management/commands/sync_and_translate_language.py +1866 -0
  11. ol_openedx_course_translations/management/commands/translate_course.py +419 -470
  12. ol_openedx_course_translations/middleware.py +143 -0
  13. ol_openedx_course_translations/providers/__init__.py +1 -0
  14. ol_openedx_course_translations/providers/base.py +278 -0
  15. ol_openedx_course_translations/providers/deepl_provider.py +292 -0
  16. ol_openedx_course_translations/providers/llm_providers.py +565 -0
  17. ol_openedx_course_translations/settings/cms.py +17 -0
  18. ol_openedx_course_translations/settings/common.py +57 -30
  19. ol_openedx_course_translations/settings/lms.py +15 -0
  20. ol_openedx_course_translations/tasks.py +222 -0
  21. ol_openedx_course_translations/urls.py +16 -0
  22. ol_openedx_course_translations/utils/__init__.py +0 -0
  23. ol_openedx_course_translations/utils/command_utils.py +197 -0
  24. ol_openedx_course_translations/utils/constants.py +216 -0
  25. ol_openedx_course_translations/utils/course_translations.py +581 -0
  26. ol_openedx_course_translations/utils/translation_sync.py +808 -0
  27. ol_openedx_course_translations/views.py +73 -0
  28. ol_openedx_course_translations-0.3.0.dist-info/METADATA +407 -0
  29. ol_openedx_course_translations-0.3.0.dist-info/RECORD +35 -0
  30. ol_openedx_course_translations-0.3.0.dist-info/entry_points.txt +5 -0
  31. ol_openedx_course_translations-0.1.0.dist-info/METADATA +0 -63
  32. ol_openedx_course_translations-0.1.0.dist-info/RECORD +0 -11
  33. ol_openedx_course_translations-0.1.0.dist-info/entry_points.txt +0 -2
  34. {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.0.dist-info}/WHEEL +0 -0
  35. {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,292 @@
1
+ """DeepL translation provider."""
2
+
3
+ import logging
4
+ import re
5
+ from pathlib import Path
6
+
7
+ import deepl
8
+ import srt
9
+
10
+ from .base import TranslationProvider
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # DeepL API constants
15
+ DEEPL_MAX_PAYLOAD_SIZE = 128000 # 128KB limit
16
+ DEEPL_ENABLE_BETA_LANGUAGES = True
17
+ # Language code mappings for DeepL API
18
+ DEEPL_LANGUAGE_CODES = {
19
+ "fr": "FR",
20
+ "de": "DE",
21
+ "es": "ES",
22
+ "pt": "PT-PT",
23
+ "pt-br": "PT-BR",
24
+ "hi": "HI",
25
+ "ar": "AR",
26
+ "zh": "ZH",
27
+ "kr": "KO",
28
+ "ja": "JA",
29
+ "id": "ID",
30
+ "ru": "RU",
31
+ "el": "EL",
32
+ "tr": "TR",
33
+ "sq": "SQ",
34
+ }
35
+
36
+
37
+ def _check_payload_size(payload: str) -> None:
38
+ """
39
+ Check if payload size exceeds DeepL API limits.
40
+
41
+ Args:
42
+ payload: Payload string to check
43
+
44
+ Raises:
45
+ ValueError: If payload exceeds 128KB limit
46
+ """
47
+ if len(payload.encode("utf-8")) > DEEPL_MAX_PAYLOAD_SIZE:
48
+ msg = "Payload too large for DeepL API"
49
+ raise ValueError(msg)
50
+
51
+
52
+ def _validate_batch_response(xml_matches: list, subtitle_batch: list) -> None:
53
+ """
54
+ Validate that DeepL returned the expected number of items.
55
+
56
+ Args:
57
+ xml_matches: List of XML matches from DeepL response
58
+ subtitle_batch: Original subtitle batch
59
+
60
+ Raises:
61
+ ValueError: If counts don't match
62
+ """
63
+ if len(xml_matches) != len(subtitle_batch):
64
+ logger.warning(
65
+ "DeepL returned %d items, expected %d. Retrying with smaller batch.",
66
+ len(xml_matches),
67
+ len(subtitle_batch),
68
+ )
69
+ msg = "Count mismatch in DeepL response"
70
+ raise ValueError(msg)
71
+
72
+
73
+ class DeepLProvider(TranslationProvider):
74
+ """DeepL translation provider."""
75
+
76
+ def __init__(self, primary_api_key: str, repair_api_key: str | None = None):
77
+ """
78
+ Initialize DeepL provider.
79
+
80
+ Args:
81
+ primary_api_key: DeepL API key
82
+ repair_api_key: API key for repair service (optional)
83
+ """
84
+ super().__init__(primary_api_key, repair_api_key)
85
+ self.deepl_translator = deepl.Translator(auth_key=primary_api_key)
86
+
87
+ def translate_subtitles(
88
+ self,
89
+ subtitle_list: list[srt.Subtitle],
90
+ target_language: str,
91
+ glossary_directory: str | None = None, # noqa: ARG002
92
+ ) -> list[srt.Subtitle]:
93
+ """
94
+ Translate SRT subtitles using DeepL.
95
+
96
+ Uses XML tag handling to preserve subtitle structure and timestamps.
97
+ Implements dynamic batch sizing to handle API limits.
98
+
99
+ Args:
100
+ subtitle_list: List of subtitle objects to translate
101
+ target_language: Target language code
102
+ glossary_directory: Path to glossary directory (not used by DeepL)
103
+
104
+ Returns:
105
+ List of translated subtitle objects
106
+
107
+ Raises:
108
+ ValueError: If target language is not supported by DeepL
109
+ """
110
+ deepl_target_code = DEEPL_LANGUAGE_CODES.get(target_language.lower())
111
+ if not deepl_target_code:
112
+ error_msg = f"DeepL does not support language '{target_language}'."
113
+ raise ValueError(error_msg)
114
+
115
+ deepl_extra_params = {"enable_beta_languages": DEEPL_ENABLE_BETA_LANGUAGES}
116
+
117
+ translated_subtitle_list = []
118
+ current_batch_size = len(subtitle_list)
119
+
120
+ current_index = 0
121
+ while current_index < len(subtitle_list):
122
+ subtitle_batch = subtitle_list[
123
+ current_index : current_index + current_batch_size
124
+ ]
125
+ logger.info(
126
+ " Translating batch starting at ID %s (%s blocks)...",
127
+ subtitle_batch[0].index,
128
+ len(subtitle_batch),
129
+ )
130
+
131
+ try:
132
+ # Construct XML payload
133
+ xml_payload_parts = ["<d>"]
134
+ for subtitle_item in subtitle_batch:
135
+ xml_safe_content = (
136
+ subtitle_item.content.replace("&", "&amp;")
137
+ .replace("<", "&lt;")
138
+ .replace(">", "&gt;")
139
+ )
140
+ xml_payload_parts.append(
141
+ f'<s i="{subtitle_item.index}">{xml_safe_content}</s>'
142
+ )
143
+ xml_payload_parts.append("</d>")
144
+ xml_payload = "".join(xml_payload_parts)
145
+
146
+ _check_payload_size(xml_payload)
147
+
148
+ translation_result = self.deepl_translator.translate_text(
149
+ xml_payload,
150
+ source_lang="EN",
151
+ target_lang=deepl_target_code,
152
+ preserve_formatting=True,
153
+ tag_handling="xml",
154
+ split_sentences="nonewlines",
155
+ extra_body_parameters=deepl_extra_params,
156
+ )
157
+
158
+ translated_xml_content = translation_result.text
159
+
160
+ # Parse XML back
161
+ subtitle_pattern = re.compile(r'<s i="(\d+)">(.*?)</s>', re.DOTALL)
162
+ xml_matches = subtitle_pattern.findall(translated_xml_content)
163
+
164
+ _validate_batch_response(xml_matches, subtitle_batch)
165
+
166
+ subtitle_index_map = {str(sub.index): sub for sub in subtitle_batch}
167
+
168
+ for subtitle_index_str, translated_content in xml_matches:
169
+ # Unescape XML entities
170
+ unescaped_content = (
171
+ translated_content.replace("&lt;", "<")
172
+ .replace("&gt;", ">")
173
+ .replace("&amp;", "&")
174
+ )
175
+
176
+ if subtitle_index_str in subtitle_index_map:
177
+ original_subtitle = subtitle_index_map[subtitle_index_str]
178
+ translated_subtitle_list.append(
179
+ srt.Subtitle(
180
+ index=original_subtitle.index,
181
+ start=original_subtitle.start,
182
+ end=original_subtitle.end,
183
+ content=unescaped_content.strip(),
184
+ )
185
+ )
186
+
187
+ current_index += current_batch_size
188
+
189
+ except Exception as translation_error:
190
+ if current_batch_size <= 1:
191
+ logger.exception("Failed even with batch size 1")
192
+ raise
193
+
194
+ logger.warning(" Error: %s. Reducing batch size...", translation_error)
195
+ current_batch_size = max(1, current_batch_size // 2)
196
+ continue
197
+
198
+ return translated_subtitle_list
199
+
200
+ def translate_text(
201
+ self,
202
+ source_text: str,
203
+ target_language: str,
204
+ tag_handling: str | None = None,
205
+ glossary_directory: str | None = None, # noqa: ARG002
206
+ ) -> str:
207
+ """
208
+ Translate text using DeepL.
209
+
210
+ Args:
211
+ source_text: Text to translate
212
+ target_language: Target language code
213
+ tag_handling: How to handle XML/HTML tags ("xml" or "html")
214
+ glossary_directory: Path to glossary directory (not used by DeepL)
215
+
216
+ Returns:
217
+ Translated text, or original text if translation fails
218
+
219
+ Raises:
220
+ ValueError: If target language is not supported by DeepL
221
+ """
222
+ if not source_text or not source_text.strip():
223
+ return source_text
224
+
225
+ deepl_target_code = DEEPL_LANGUAGE_CODES.get(target_language.lower())
226
+ if not deepl_target_code:
227
+ error_msg = f"DeepL does not support language '{target_language}'."
228
+ raise ValueError(error_msg)
229
+
230
+ try:
231
+ translation_result = self.deepl_translator.translate_text(
232
+ source_text,
233
+ source_lang="EN",
234
+ target_lang=deepl_target_code,
235
+ tag_handling=tag_handling,
236
+ )
237
+ except deepl.exceptions.DeepLException as deepl_error:
238
+ logger.warning("DeepL translation failed: %s", deepl_error)
239
+ return source_text
240
+ else:
241
+ return translation_result.text
242
+
243
+ def translate_document(
244
+ self,
245
+ input_file_path: Path,
246
+ output_file_path: Path,
247
+ source_language: str,
248
+ target_language: str,
249
+ glossary_directory: str | None = None,
250
+ ) -> None:
251
+ """
252
+ Translate document using DeepL.
253
+
254
+ For SRT files, uses subtitle translation. For other files, uses DeepL's
255
+ document translation API.
256
+
257
+ Args:
258
+ input_file_path: Path to input file
259
+ output_file_path: Path to output file
260
+ source_language: Source language code
261
+ target_language: Target language code
262
+ glossary_directory: Path to glossary directory (optional)
263
+
264
+ Raises:
265
+ ValueError: If target language is not supported by DeepL
266
+ """
267
+ deepl_target_code = DEEPL_LANGUAGE_CODES.get(target_language.lower())
268
+ if not deepl_target_code:
269
+ error_msg = f"DeepL does not support language '{target_language}'."
270
+ raise ValueError(error_msg)
271
+
272
+ try:
273
+ # For SRT files, use subtitle translation
274
+ if input_file_path.suffix == ".srt":
275
+ srt_content = input_file_path.read_text(encoding="utf-8")
276
+ subtitle_list = list(srt.parse(srt_content))
277
+
278
+ translated_subtitle_list = self.translate_srt_with_validation(
279
+ subtitle_list, target_language, glossary_directory
280
+ )
281
+
282
+ translated_srt_content = srt.compose(translated_subtitle_list)
283
+ output_file_path.write_text(translated_srt_content, encoding="utf-8")
284
+ else:
285
+ self.deepl_translator.translate_document_from_filepath(
286
+ input_file_path,
287
+ output_file_path,
288
+ source_lang=source_language,
289
+ target_lang=deepl_target_code,
290
+ )
291
+ except deepl.exceptions.DeepLException as deepl_error:
292
+ logger.warning("DeepL document translation failed: %s", deepl_error)