ol-openedx-course-translations 0.1.0__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ol-openedx-course-translations might be problematic. Click here for more details.
- ol_openedx_course_translations/admin.py +29 -0
- ol_openedx_course_translations/apps.py +13 -2
- ol_openedx_course_translations/filters.py +39 -0
- ol_openedx_course_translations/glossaries/machine_learning/ar.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/de.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/el.txt +988 -0
- ol_openedx_course_translations/glossaries/machine_learning/es.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/fr.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/ja.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/pt-br.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/ru.txt +213 -0
- ol_openedx_course_translations/management/commands/sync_and_translate_language.py +1866 -0
- ol_openedx_course_translations/management/commands/translate_course.py +472 -475
- ol_openedx_course_translations/middleware.py +143 -0
- ol_openedx_course_translations/migrations/0001_add_translation_logs.py +84 -0
- ol_openedx_course_translations/migrations/__init__.py +0 -0
- ol_openedx_course_translations/models.py +57 -0
- ol_openedx_course_translations/providers/__init__.py +1 -0
- ol_openedx_course_translations/providers/base.py +278 -0
- ol_openedx_course_translations/providers/deepl_provider.py +292 -0
- ol_openedx_course_translations/providers/llm_providers.py +581 -0
- ol_openedx_course_translations/settings/cms.py +17 -0
- ol_openedx_course_translations/settings/common.py +58 -30
- ol_openedx_course_translations/settings/lms.py +38 -0
- ol_openedx_course_translations/tasks.py +222 -0
- ol_openedx_course_translations/urls.py +16 -0
- ol_openedx_course_translations/utils/__init__.py +0 -0
- ol_openedx_course_translations/utils/command_utils.py +197 -0
- ol_openedx_course_translations/utils/constants.py +218 -0
- ol_openedx_course_translations/utils/course_translations.py +608 -0
- ol_openedx_course_translations/utils/translation_sync.py +808 -0
- ol_openedx_course_translations/views.py +73 -0
- ol_openedx_course_translations-0.3.5.dist-info/METADATA +409 -0
- ol_openedx_course_translations-0.3.5.dist-info/RECORD +40 -0
- ol_openedx_course_translations-0.3.5.dist-info/entry_points.txt +5 -0
- ol_openedx_course_translations-0.1.0.dist-info/METADATA +0 -63
- ol_openedx_course_translations-0.1.0.dist-info/RECORD +0 -11
- ol_openedx_course_translations-0.1.0.dist-info/entry_points.txt +0 -2
- {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.5.dist-info}/WHEEL +0 -0
- {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.5.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
"""DeepL translation provider."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import deepl
|
|
8
|
+
import srt
|
|
9
|
+
|
|
10
|
+
from .base import TranslationProvider
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
# DeepL API constants
|
|
15
|
+
DEEPL_MAX_PAYLOAD_SIZE = 128000 # 128KB limit
|
|
16
|
+
DEEPL_ENABLE_BETA_LANGUAGES = True
|
|
17
|
+
# Language code mappings for DeepL API
|
|
18
|
+
DEEPL_LANGUAGE_CODES = {
|
|
19
|
+
"fr": "FR",
|
|
20
|
+
"de": "DE",
|
|
21
|
+
"es": "ES",
|
|
22
|
+
"pt": "PT-PT",
|
|
23
|
+
"pt-br": "PT-BR",
|
|
24
|
+
"hi": "HI",
|
|
25
|
+
"ar": "AR",
|
|
26
|
+
"zh": "ZH",
|
|
27
|
+
"kr": "KO",
|
|
28
|
+
"ja": "JA",
|
|
29
|
+
"id": "ID",
|
|
30
|
+
"ru": "RU",
|
|
31
|
+
"el": "EL",
|
|
32
|
+
"tr": "TR",
|
|
33
|
+
"sq": "SQ",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _check_payload_size(payload: str) -> None:
|
|
38
|
+
"""
|
|
39
|
+
Check if payload size exceeds DeepL API limits.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
payload: Payload string to check
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
ValueError: If payload exceeds 128KB limit
|
|
46
|
+
"""
|
|
47
|
+
if len(payload.encode("utf-8")) > DEEPL_MAX_PAYLOAD_SIZE:
|
|
48
|
+
msg = "Payload too large for DeepL API"
|
|
49
|
+
raise ValueError(msg)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _validate_batch_response(xml_matches: list, subtitle_batch: list) -> None:
|
|
53
|
+
"""
|
|
54
|
+
Validate that DeepL returned the expected number of items.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
xml_matches: List of XML matches from DeepL response
|
|
58
|
+
subtitle_batch: Original subtitle batch
|
|
59
|
+
|
|
60
|
+
Raises:
|
|
61
|
+
ValueError: If counts don't match
|
|
62
|
+
"""
|
|
63
|
+
if len(xml_matches) != len(subtitle_batch):
|
|
64
|
+
logger.warning(
|
|
65
|
+
"DeepL returned %d items, expected %d. Retrying with smaller batch.",
|
|
66
|
+
len(xml_matches),
|
|
67
|
+
len(subtitle_batch),
|
|
68
|
+
)
|
|
69
|
+
msg = "Count mismatch in DeepL response"
|
|
70
|
+
raise ValueError(msg)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class DeepLProvider(TranslationProvider):
|
|
74
|
+
"""DeepL translation provider."""
|
|
75
|
+
|
|
76
|
+
def __init__(self, primary_api_key: str, repair_api_key: str | None = None):
|
|
77
|
+
"""
|
|
78
|
+
Initialize DeepL provider.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
primary_api_key: DeepL API key
|
|
82
|
+
repair_api_key: API key for repair service (optional)
|
|
83
|
+
"""
|
|
84
|
+
super().__init__(primary_api_key, repair_api_key)
|
|
85
|
+
self.deepl_translator = deepl.Translator(auth_key=primary_api_key)
|
|
86
|
+
|
|
87
|
+
def translate_subtitles(
|
|
88
|
+
self,
|
|
89
|
+
subtitle_list: list[srt.Subtitle],
|
|
90
|
+
target_language: str,
|
|
91
|
+
glossary_directory: str | None = None, # noqa: ARG002
|
|
92
|
+
) -> list[srt.Subtitle]:
|
|
93
|
+
"""
|
|
94
|
+
Translate SRT subtitles using DeepL.
|
|
95
|
+
|
|
96
|
+
Uses XML tag handling to preserve subtitle structure and timestamps.
|
|
97
|
+
Implements dynamic batch sizing to handle API limits.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
subtitle_list: List of subtitle objects to translate
|
|
101
|
+
target_language: Target language code
|
|
102
|
+
glossary_directory: Path to glossary directory (not used by DeepL)
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
List of translated subtitle objects
|
|
106
|
+
|
|
107
|
+
Raises:
|
|
108
|
+
ValueError: If target language is not supported by DeepL
|
|
109
|
+
"""
|
|
110
|
+
deepl_target_code = DEEPL_LANGUAGE_CODES.get(target_language.lower())
|
|
111
|
+
if not deepl_target_code:
|
|
112
|
+
error_msg = f"DeepL does not support language '{target_language}'."
|
|
113
|
+
raise ValueError(error_msg)
|
|
114
|
+
|
|
115
|
+
deepl_extra_params = {"enable_beta_languages": DEEPL_ENABLE_BETA_LANGUAGES}
|
|
116
|
+
|
|
117
|
+
translated_subtitle_list = []
|
|
118
|
+
current_batch_size = len(subtitle_list)
|
|
119
|
+
|
|
120
|
+
current_index = 0
|
|
121
|
+
while current_index < len(subtitle_list):
|
|
122
|
+
subtitle_batch = subtitle_list[
|
|
123
|
+
current_index : current_index + current_batch_size
|
|
124
|
+
]
|
|
125
|
+
logger.info(
|
|
126
|
+
" Translating batch starting at ID %s (%s blocks)...",
|
|
127
|
+
subtitle_batch[0].index,
|
|
128
|
+
len(subtitle_batch),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
# Construct XML payload
|
|
133
|
+
xml_payload_parts = ["<d>"]
|
|
134
|
+
for subtitle_item in subtitle_batch:
|
|
135
|
+
xml_safe_content = (
|
|
136
|
+
subtitle_item.content.replace("&", "&")
|
|
137
|
+
.replace("<", "<")
|
|
138
|
+
.replace(">", ">")
|
|
139
|
+
)
|
|
140
|
+
xml_payload_parts.append(
|
|
141
|
+
f'<s i="{subtitle_item.index}">{xml_safe_content}</s>'
|
|
142
|
+
)
|
|
143
|
+
xml_payload_parts.append("</d>")
|
|
144
|
+
xml_payload = "".join(xml_payload_parts)
|
|
145
|
+
|
|
146
|
+
_check_payload_size(xml_payload)
|
|
147
|
+
|
|
148
|
+
translation_result = self.deepl_translator.translate_text(
|
|
149
|
+
xml_payload,
|
|
150
|
+
source_lang="EN",
|
|
151
|
+
target_lang=deepl_target_code,
|
|
152
|
+
preserve_formatting=True,
|
|
153
|
+
tag_handling="xml",
|
|
154
|
+
split_sentences="nonewlines",
|
|
155
|
+
extra_body_parameters=deepl_extra_params,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
translated_xml_content = translation_result.text
|
|
159
|
+
|
|
160
|
+
# Parse XML back
|
|
161
|
+
subtitle_pattern = re.compile(r'<s i="(\d+)">(.*?)</s>', re.DOTALL)
|
|
162
|
+
xml_matches = subtitle_pattern.findall(translated_xml_content)
|
|
163
|
+
|
|
164
|
+
_validate_batch_response(xml_matches, subtitle_batch)
|
|
165
|
+
|
|
166
|
+
subtitle_index_map = {str(sub.index): sub for sub in subtitle_batch}
|
|
167
|
+
|
|
168
|
+
for subtitle_index_str, translated_content in xml_matches:
|
|
169
|
+
# Unescape XML entities
|
|
170
|
+
unescaped_content = (
|
|
171
|
+
translated_content.replace("<", "<")
|
|
172
|
+
.replace(">", ">")
|
|
173
|
+
.replace("&", "&")
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
if subtitle_index_str in subtitle_index_map:
|
|
177
|
+
original_subtitle = subtitle_index_map[subtitle_index_str]
|
|
178
|
+
translated_subtitle_list.append(
|
|
179
|
+
srt.Subtitle(
|
|
180
|
+
index=original_subtitle.index,
|
|
181
|
+
start=original_subtitle.start,
|
|
182
|
+
end=original_subtitle.end,
|
|
183
|
+
content=unescaped_content.strip(),
|
|
184
|
+
)
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
current_index += current_batch_size
|
|
188
|
+
|
|
189
|
+
except Exception as translation_error:
|
|
190
|
+
if current_batch_size <= 1:
|
|
191
|
+
logger.exception("Failed even with batch size 1")
|
|
192
|
+
raise
|
|
193
|
+
|
|
194
|
+
logger.warning(" Error: %s. Reducing batch size...", translation_error)
|
|
195
|
+
current_batch_size = max(1, current_batch_size // 2)
|
|
196
|
+
continue
|
|
197
|
+
|
|
198
|
+
return translated_subtitle_list
|
|
199
|
+
|
|
200
|
+
def translate_text(
|
|
201
|
+
self,
|
|
202
|
+
source_text: str,
|
|
203
|
+
target_language: str,
|
|
204
|
+
tag_handling: str | None = None,
|
|
205
|
+
glossary_directory: str | None = None, # noqa: ARG002
|
|
206
|
+
) -> str:
|
|
207
|
+
"""
|
|
208
|
+
Translate text using DeepL.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
source_text: Text to translate
|
|
212
|
+
target_language: Target language code
|
|
213
|
+
tag_handling: How to handle XML/HTML tags ("xml" or "html")
|
|
214
|
+
glossary_directory: Path to glossary directory (not used by DeepL)
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
Translated text, or original text if translation fails
|
|
218
|
+
|
|
219
|
+
Raises:
|
|
220
|
+
ValueError: If target language is not supported by DeepL
|
|
221
|
+
"""
|
|
222
|
+
if not source_text or not source_text.strip():
|
|
223
|
+
return source_text
|
|
224
|
+
|
|
225
|
+
deepl_target_code = DEEPL_LANGUAGE_CODES.get(target_language.lower())
|
|
226
|
+
if not deepl_target_code:
|
|
227
|
+
error_msg = f"DeepL does not support language '{target_language}'."
|
|
228
|
+
raise ValueError(error_msg)
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
translation_result = self.deepl_translator.translate_text(
|
|
232
|
+
source_text,
|
|
233
|
+
source_lang="EN",
|
|
234
|
+
target_lang=deepl_target_code,
|
|
235
|
+
tag_handling=tag_handling,
|
|
236
|
+
)
|
|
237
|
+
except deepl.exceptions.DeepLException as deepl_error:
|
|
238
|
+
logger.warning("DeepL translation failed: %s", deepl_error)
|
|
239
|
+
return source_text
|
|
240
|
+
else:
|
|
241
|
+
return translation_result.text
|
|
242
|
+
|
|
243
|
+
def translate_document(
|
|
244
|
+
self,
|
|
245
|
+
input_file_path: Path,
|
|
246
|
+
output_file_path: Path,
|
|
247
|
+
source_language: str,
|
|
248
|
+
target_language: str,
|
|
249
|
+
glossary_directory: str | None = None,
|
|
250
|
+
) -> None:
|
|
251
|
+
"""
|
|
252
|
+
Translate document using DeepL.
|
|
253
|
+
|
|
254
|
+
For SRT files, uses subtitle translation. For other files, uses DeepL's
|
|
255
|
+
document translation API.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
input_file_path: Path to input file
|
|
259
|
+
output_file_path: Path to output file
|
|
260
|
+
source_language: Source language code
|
|
261
|
+
target_language: Target language code
|
|
262
|
+
glossary_directory: Path to glossary directory (optional)
|
|
263
|
+
|
|
264
|
+
Raises:
|
|
265
|
+
ValueError: If target language is not supported by DeepL
|
|
266
|
+
"""
|
|
267
|
+
deepl_target_code = DEEPL_LANGUAGE_CODES.get(target_language.lower())
|
|
268
|
+
if not deepl_target_code:
|
|
269
|
+
error_msg = f"DeepL does not support language '{target_language}'."
|
|
270
|
+
raise ValueError(error_msg)
|
|
271
|
+
|
|
272
|
+
try:
|
|
273
|
+
# For SRT files, use subtitle translation
|
|
274
|
+
if input_file_path.suffix == ".srt":
|
|
275
|
+
srt_content = input_file_path.read_text(encoding="utf-8")
|
|
276
|
+
subtitle_list = list(srt.parse(srt_content))
|
|
277
|
+
|
|
278
|
+
translated_subtitle_list = self.translate_srt_with_validation(
|
|
279
|
+
subtitle_list, target_language, glossary_directory
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
translated_srt_content = srt.compose(translated_subtitle_list)
|
|
283
|
+
output_file_path.write_text(translated_srt_content, encoding="utf-8")
|
|
284
|
+
else:
|
|
285
|
+
self.deepl_translator.translate_document_from_filepath(
|
|
286
|
+
input_file_path,
|
|
287
|
+
output_file_path,
|
|
288
|
+
source_lang=source_language,
|
|
289
|
+
target_lang=deepl_target_code,
|
|
290
|
+
)
|
|
291
|
+
except deepl.exceptions.DeepLException as deepl_error:
|
|
292
|
+
logger.warning("DeepL document translation failed: %s", deepl_error)
|