ol-openedx-course-translations 0.1.0__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ol-openedx-course-translations might be problematic. Click here for more details.

Files changed (40) hide show
  1. ol_openedx_course_translations/admin.py +29 -0
  2. ol_openedx_course_translations/apps.py +13 -2
  3. ol_openedx_course_translations/filters.py +39 -0
  4. ol_openedx_course_translations/glossaries/machine_learning/ar.txt +175 -0
  5. ol_openedx_course_translations/glossaries/machine_learning/de.txt +175 -0
  6. ol_openedx_course_translations/glossaries/machine_learning/el.txt +988 -0
  7. ol_openedx_course_translations/glossaries/machine_learning/es.txt +175 -0
  8. ol_openedx_course_translations/glossaries/machine_learning/fr.txt +175 -0
  9. ol_openedx_course_translations/glossaries/machine_learning/ja.txt +175 -0
  10. ol_openedx_course_translations/glossaries/machine_learning/pt-br.txt +175 -0
  11. ol_openedx_course_translations/glossaries/machine_learning/ru.txt +213 -0
  12. ol_openedx_course_translations/management/commands/sync_and_translate_language.py +1866 -0
  13. ol_openedx_course_translations/management/commands/translate_course.py +472 -475
  14. ol_openedx_course_translations/middleware.py +143 -0
  15. ol_openedx_course_translations/migrations/0001_add_translation_logs.py +84 -0
  16. ol_openedx_course_translations/migrations/__init__.py +0 -0
  17. ol_openedx_course_translations/models.py +57 -0
  18. ol_openedx_course_translations/providers/__init__.py +1 -0
  19. ol_openedx_course_translations/providers/base.py +278 -0
  20. ol_openedx_course_translations/providers/deepl_provider.py +292 -0
  21. ol_openedx_course_translations/providers/llm_providers.py +581 -0
  22. ol_openedx_course_translations/settings/cms.py +17 -0
  23. ol_openedx_course_translations/settings/common.py +58 -30
  24. ol_openedx_course_translations/settings/lms.py +38 -0
  25. ol_openedx_course_translations/tasks.py +222 -0
  26. ol_openedx_course_translations/urls.py +16 -0
  27. ol_openedx_course_translations/utils/__init__.py +0 -0
  28. ol_openedx_course_translations/utils/command_utils.py +197 -0
  29. ol_openedx_course_translations/utils/constants.py +218 -0
  30. ol_openedx_course_translations/utils/course_translations.py +608 -0
  31. ol_openedx_course_translations/utils/translation_sync.py +808 -0
  32. ol_openedx_course_translations/views.py +73 -0
  33. ol_openedx_course_translations-0.3.5.dist-info/METADATA +409 -0
  34. ol_openedx_course_translations-0.3.5.dist-info/RECORD +40 -0
  35. ol_openedx_course_translations-0.3.5.dist-info/entry_points.txt +5 -0
  36. ol_openedx_course_translations-0.1.0.dist-info/METADATA +0 -63
  37. ol_openedx_course_translations-0.1.0.dist-info/RECORD +0 -11
  38. ol_openedx_course_translations-0.1.0.dist-info/entry_points.txt +0 -2
  39. {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.5.dist-info}/WHEEL +0 -0
  40. {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.5.dist-info}/licenses/LICENSE.txt +0 -0
@@ -2,25 +2,64 @@
2
2
  Management command to translate course content to a specified language.
3
3
  """
4
4
 
5
- import json
6
5
  import logging
7
6
  import shutil
8
- import tarfile
7
+ import time
9
8
  from pathlib import Path
10
- from typing import Any
11
9
 
12
- import deepl
13
- from defusedxml import ElementTree
10
+ from celery import group
14
11
  from django.conf import settings
15
12
  from django.core.management.base import BaseCommand, CommandError
16
13
 
14
+ from ol_openedx_course_translations.models import CourseTranslationLog
15
+ from ol_openedx_course_translations.tasks import (
16
+ translate_file_task,
17
+ translate_grading_policy_task,
18
+ translate_policy_json_task,
19
+ )
20
+ from ol_openedx_course_translations.utils.constants import PROVIDER_DEEPL
21
+ from ol_openedx_course_translations.utils.course_translations import (
22
+ create_translated_archive,
23
+ create_translated_copy,
24
+ extract_course_archive,
25
+ generate_course_key_from_xml,
26
+ get_translatable_file_paths,
27
+ update_course_language_attribute,
28
+ validate_course_inputs,
29
+ )
30
+
17
31
  logger = logging.getLogger(__name__)
18
32
 
33
+ # Task configuration
34
+ TASK_TIMEOUT_SECONDS = 3600 * 2 # 2 hour total timeout for all tasks
35
+ TASK_POLL_INTERVAL_SECONDS = 2 # Poll every 2 seconds for task completion
36
+
19
37
 
20
38
  class Command(BaseCommand):
21
39
  """Translate given course content to the specified language."""
22
40
 
23
- help = "Translate course content to the specified language."
41
+ help = (
42
+ "Translate course content to the specified language.\n\n"
43
+ "Configuration:\n"
44
+ "All translation providers should be configured in TRANSLATIONS_PROVIDERS:\n"
45
+ "{\n"
46
+ ' "deepl": {"api_key": "<YOUR_DEEPL_API_KEY>"},\n'
47
+ ' "openai": {"api_key": "<KEY>", "default_model": "gpt-5.2"},\n'
48
+ ' "gemini": {"api_key": "<KEY>", "default_model": "gemini-3-pro-preview"},\n'
49
+ ' "mistral": {"api_key": "<KEY>", "default_model": "mistral-large-latest"}\n'
50
+ "}\n"
51
+ )
52
+
53
+ def __init__(self, *args, **kwargs):
54
+ """Initialize the command with empty task list."""
55
+ super().__init__(*args, **kwargs)
56
+ self.tasks = []
57
+ self.translated_course_dir = None
58
+ self.content_provider_name = None
59
+ self.content_model = None
60
+ self.srt_provider_name = None
61
+ self.srt_model = None
62
+ self.glossary_directory = None
24
63
 
25
64
  def add_arguments(self, parser) -> None:
26
65
  """Entry point for subclassed commands to add custom arguments."""
@@ -34,8 +73,8 @@ class Command(BaseCommand):
34
73
  ),
35
74
  )
36
75
  parser.add_argument(
37
- "--translation-language",
38
- dest="translation_language",
76
+ "--target-language",
77
+ dest="target_language",
39
78
  required=True,
40
79
  help=(
41
80
  "Specify the language code in ISO format "
@@ -44,545 +83,503 @@ class Command(BaseCommand):
44
83
  )
45
84
  parser.add_argument(
46
85
  "--course-dir",
47
- dest="course_directory",
86
+ dest="course_archive_path",
48
87
  required=True,
49
88
  help="Specify the course directory (tar archive).",
50
89
  )
90
+ parser.add_argument(
91
+ "--content-translation-provider",
92
+ dest="content_translation_provider",
93
+ required=True,
94
+ help=(
95
+ "Translation provider for content (XML/HTML and text). "
96
+ "Format: 'deepl', 'PROVIDER', or 'PROVIDER/MODEL' "
97
+ "(e.g., 'openai', 'openai/gpt-5.2', 'gemini', 'gemini/gemini-3-pro-preview'). " # noqa: E501
98
+ "If model is not specified, uses the default model from settings."
99
+ ),
100
+ )
101
+ parser.add_argument(
102
+ "--srt-translation-provider",
103
+ dest="srt_translation_provider",
104
+ required=True,
105
+ help=(
106
+ "Translation provider for SRT subtitles. "
107
+ "Format: 'deepl', 'PROVIDER', or 'PROVIDER/MODEL' "
108
+ "(e.g., 'openai', 'openai/gpt-5.2', 'gemini', 'gemini/gemini-3-pro-preview'). " # noqa: E501
109
+ "If model is not specified, uses the default model from settings."
110
+ ),
111
+ )
112
+ parser.add_argument(
113
+ "--glossary-dir",
114
+ dest="glossary_directory",
115
+ required=False,
116
+ help=(
117
+ "Path to glossary directory containing "
118
+ "language-specific glossary files."
119
+ ),
120
+ )
121
+
122
+ def _parse_and_validate_provider_spec(
123
+ self, provider_spec: str
124
+ ) -> tuple[str, str | None]:
125
+ """
126
+ Parse and validate provider specification into provider name and model.
127
+
128
+ Resolves model from settings if not provided in specification.
129
+
130
+ Args:
131
+ provider_spec: Provider specification
132
+
133
+ Returns:
134
+ Tuple of (provider_name, model_name). model_name is None for DeepL or
135
+ resolved from settings if not specified.
136
+
137
+ Raises:
138
+ CommandError: If provider specification format is invalid
139
+ or model and api_key cannot be resolved
140
+ """
141
+ # Parse the specification
142
+ if "/" in provider_spec:
143
+ parts = provider_spec.split("/", 1)
144
+ if len(parts) != 2 or not parts[0] or not parts[1]: # noqa: PLR2004
145
+ error_msg = (
146
+ f"Invalid provider specification: {provider_spec}. "
147
+ "Use format 'PROVIDER' or 'PROVIDER/MODEL' "
148
+ "(e.g., 'openai', 'openai/gpt-5.2')"
149
+ )
150
+ raise CommandError(error_msg)
151
+ provider_name = parts[0].lower()
152
+ model_name = parts[1]
153
+ else:
154
+ provider_name = provider_spec.lower()
155
+ model_name = None
156
+
157
+ # Try to get default model from settings
158
+ providers_config = getattr(settings, "TRANSLATIONS_PROVIDERS", {})
159
+ if provider_name not in providers_config:
160
+ error_msg = (
161
+ f"Provider '{provider_name}' not configured in TRANSLATIONS_PROVIDERS. "
162
+ f"Available providers: {', '.join(providers_config.keys())}"
163
+ )
164
+ raise CommandError(error_msg)
165
+
166
+ provider_config = providers_config[provider_name]
167
+ api_key = provider_config.get("api_key")
168
+ if not api_key:
169
+ error_msg = (
170
+ f"API key for provider '{provider_name}' is not configured in "
171
+ "TRANSLATIONS_PROVIDERS. Please set the 'api_key' in settings."
172
+ )
173
+ raise CommandError(error_msg)
174
+
175
+ # DeepL doesn't use models
176
+ if provider_name == PROVIDER_DEEPL:
177
+ return provider_name, None
178
+
179
+ # If model is explicitly provided, return it
180
+ if model_name:
181
+ return provider_name, model_name
182
+
183
+ default_model = provider_config.get("default_model")
184
+ if not default_model:
185
+ error_msg = (
186
+ f"No model specified for provider '{provider_name}' and no "
187
+ f"default_model found in TRANSLATIONS_PROVIDERS['{provider_name}']. "
188
+ f"Either specify a model (e.g., '{provider_name}/gpt-5.2') or "
189
+ f"configure a default_model in settings."
190
+ )
191
+ raise CommandError(error_msg)
192
+
193
+ return provider_name, default_model
51
194
 
52
195
  def handle(self, **options) -> None:
53
196
  """Handle the translate_course command."""
54
197
  try:
55
- self._validate_inputs(options)
198
+ start_time = time.perf_counter()
199
+ course_archive_path = Path(options["course_archive_path"])
200
+ source_language = options["source_language"].upper()
201
+ target_language = options["target_language"].upper()
202
+ content_provider_spec = options["content_translation_provider"]
203
+ srt_provider_spec = options["srt_translation_provider"]
204
+ glossary_directory = options.get("glossary_directory")
205
+
206
+ # Parse and validate provider specifications (includes validation)
207
+ content_provider_name, content_model = (
208
+ self._parse_and_validate_provider_spec(content_provider_spec)
209
+ )
210
+ srt_provider_name, srt_model = self._parse_and_validate_provider_spec(
211
+ srt_provider_spec
212
+ )
56
213
 
57
- course_dir = Path(options["course_directory"])
58
- source_language = options["source_language"]
59
- translation_language = options["translation_language"]
214
+ # Log the resolved configuration
215
+ if content_model:
216
+ self.stdout.write(
217
+ f"Content provider: {content_provider_name}/{content_model}"
218
+ )
219
+ else:
220
+ self.stdout.write(f"Content provider: {content_provider_name}")
221
+
222
+ if srt_model:
223
+ self.stdout.write(f"SRT provider: {srt_provider_name}/{srt_model}")
224
+ else:
225
+ self.stdout.write(f"SRT provider: {srt_provider_name}")
226
+
227
+ # Validate inputs
228
+ validate_course_inputs(course_archive_path)
229
+
230
+ # Store provider names and models
231
+ self.content_provider_name = content_provider_name
232
+ self.content_model = content_model
233
+ self.srt_provider_name = srt_provider_name
234
+ self.srt_model = srt_model
235
+ self.glossary_directory = glossary_directory
60
236
 
61
237
  # Extract course archive
62
- extracted_dir = self._extract_course_archive(course_dir)
238
+ extracted_course_dir = extract_course_archive(course_archive_path)
63
239
 
64
240
  # Create translated copy
65
- translated_dir = self._create_translated_copy(
66
- extracted_dir, translation_language
241
+ translated_course_dir = create_translated_copy(
242
+ extracted_course_dir, target_language
67
243
  )
68
244
 
245
+ # Store for cleanup on failure
246
+ self.translated_course_dir = translated_course_dir
247
+
69
248
  # Delete extracted directory after copying
70
- if extracted_dir.exists():
71
- shutil.rmtree(extracted_dir)
249
+ if extracted_course_dir.exists():
250
+ shutil.rmtree(extracted_course_dir)
72
251
 
73
- # Translate content
74
- billed_chars = self._translate_course_content(
75
- translated_dir, source_language, translation_language
252
+ # Translate content asynchronously
253
+ self._translate_course_content_async(
254
+ translated_course_dir, source_language, target_language
76
255
  )
77
256
 
257
+ # Wait for all tasks and report status
258
+ command_stats = self._wait_and_report_tasks()
259
+ total_time_taken_msg = (
260
+ f"Command finished in: {time.perf_counter() - start_time:.2f} seconds."
261
+ )
262
+ self.stdout.write(self.style.SUCCESS(total_time_taken_msg))
263
+ command_stats.append(total_time_taken_msg)
264
+
265
+ # Add translation log entry
266
+ self._add_translation_log_entry(
267
+ source_language=source_language,
268
+ target_language=target_language,
269
+ command_stats=command_stats,
270
+ )
78
271
  # Create final archive
79
- archive_path = self._create_translated_archive(
80
- translated_dir, translation_language, course_dir.stem
272
+ translated_archive_path = create_translated_archive(
273
+ translated_course_dir, target_language, course_archive_path.stem
81
274
  )
82
-
83
- self.stdout.write(
84
- self.style.SUCCESS(
85
- f"Translation completed. Archive created: {archive_path}"
86
- )
275
+ success_msg = (
276
+ f"Translation completed successfully. Translated archive created: "
277
+ f"{translated_archive_path}"
87
278
  )
88
- logger.info("Total billed characters: %s", billed_chars)
279
+ self.stdout.write(self.style.SUCCESS(success_msg))
89
280
 
90
281
  except Exception as e:
91
282
  logger.exception("Translation failed")
283
+
284
+ # Cleanup translated course directory on failure
285
+ if self.translated_course_dir and self.translated_course_dir.exists():
286
+ self.stdout.write(
287
+ self.style.WARNING(
288
+ f"Cleaning up translated course directory: {self.translated_course_dir}" # noqa: E501
289
+ )
290
+ )
291
+ shutil.rmtree(self.translated_course_dir)
292
+
92
293
  error_msg = f"Translation failed: {e}"
93
294
  raise CommandError(error_msg) from e
94
295
 
95
- def get_supported_archive_extension(self, filename: str) -> str | None:
96
- """
97
- Return the supported archive extension if filename ends with one, else None.
296
+ def _translate_course_content_async(
297
+ self, course_dir: Path, source_language: str, target_language: str
298
+ ) -> None:
98
299
  """
99
- for ext in settings.OL_OPENEDX_COURSE_TRANSLATIONS_SUPPORTED_ARCHIVE_EXTENSIONS:
100
- if filename.endswith(ext):
101
- return ext
102
- return None
300
+ Translate all course content using Celery tasks.
103
301
 
104
- def _validate_inputs(self, options: dict[str, Any]) -> None:
105
- """Validate command inputs."""
106
- course_dir = Path(options["course_directory"])
302
+ Args:
303
+ course_dir: Path to the course directory
304
+ source_language: Source language code
305
+ target_language: Target language code
107
306
 
108
- if not course_dir.exists():
109
- error_msg = f"Course directory not found: {course_dir}"
110
- raise CommandError(error_msg)
111
-
112
- if self.get_supported_archive_extension(course_dir.name) is None:
113
- supported_exts = ", ".join(
114
- settings.OL_OPENEDX_COURSE_TRANSLATIONS_SUPPORTED_ARCHIVE_EXTENSIONS
115
- )
116
- error_msg = f"Course directory must be a tar file: {supported_exts}"
117
- raise CommandError(error_msg)
118
-
119
- if not hasattr(settings, "DEEPL_API_KEY") or not settings.DEEPL_API_KEY:
120
- error_msg = "DEEPL_API_KEY setting is required"
121
- raise CommandError(error_msg)
122
-
123
- def _extract_course_archive(self, course_dir: Path) -> Path:
124
- """Extract course archive to working directory."""
125
- # Use the parent directory of the source file as the base extraction directory
126
- extract_base_dir = course_dir.parent
127
-
128
- # Get base name without extension
129
- ext = self.get_supported_archive_extension(course_dir.name)
130
- tarball_base = course_dir.name[: -len(ext)] if ext else course_dir.name
131
-
132
- extracted_dir = extract_base_dir / tarball_base
133
-
134
- if not extracted_dir.exists():
135
- try:
136
- with tarfile.open(course_dir, "r:*") as tar:
137
- # Validate tar file before extraction
138
- self._validate_tar_file(tar)
139
- tar.extractall(path=extracted_dir, filter="data")
140
- except (tarfile.TarError, OSError) as e:
141
- error_msg = f"Failed to extract archive: {e}"
142
- raise CommandError(error_msg) from e
143
-
144
- logger.info("Extracted course to: %s", extracted_dir)
145
- return extracted_dir
146
-
147
- def _validate_tar_file(self, tar: tarfile.TarFile) -> None:
148
- """Validate tar file contents for security."""
149
- for member in tar.getmembers():
150
- # Check for directory traversal attacks
151
- if member.name.startswith("/") or ".." in member.name:
152
- error_msg = f"Unsafe tar member: {member.name}"
153
- raise CommandError(error_msg)
154
- # Check for excessively large files
155
- if (
156
- member.size > 512 * 1024 * 1024
157
- ): # 0.5GB limit because courses on Production are big
158
- error_msg = f"File too large: {member.name}"
159
- raise CommandError(error_msg)
160
-
161
- def _create_translated_copy(
162
- self, source_dir: Path, translation_language: str
163
- ) -> Path:
164
- """Create a copy of the course for translation."""
165
- base_name = source_dir.name
166
- new_dir_name = f"{translation_language}_{base_name}"
167
- new_dir_path = source_dir.parent / new_dir_name
307
+ Raises:
308
+ CommandError: If course directory is not found
309
+ """
310
+ course_directory = course_dir / "course"
168
311
 
169
- if new_dir_path.exists():
170
- error_msg = f"Translation directory already exists: {new_dir_path}"
312
+ if not course_directory.exists() or not course_directory.is_dir():
313
+ error_msg = f"Course directory not found: {course_directory}"
171
314
  raise CommandError(error_msg)
172
315
 
173
- shutil.copytree(source_dir, new_dir_path)
174
- logger.info("Created translation copy: %s", new_dir_path)
175
- return new_dir_path
176
-
177
- def _translate_course_content(
178
- self, course_dir: Path, source_language: str, translation_language: str
179
- ) -> int:
180
- """Translate all course content and return total billed characters."""
181
- total_billed_chars = 0
182
-
183
- # Translate files in main directories
184
- for search_dir in [course_dir, course_dir.parent]:
185
- total_billed_chars += self._translate_files_in_directory(
186
- search_dir, source_language, translation_language, recursive=False
187
- )
316
+ # Update language attributes in course XML, doing this
317
+ # because tasks can override the XML files
318
+ update_course_language_attribute(course_directory, target_language)
188
319
 
189
- # Translate files in target subdirectories
190
- for dir_name in settings.OL_OPENEDX_COURSE_TRANSLATIONS_TARGET_DIRECTORIES:
191
- target_dir = search_dir / dir_name
192
- if target_dir.exists() and target_dir.is_dir():
193
- total_billed_chars += self._translate_files_in_directory(
194
- target_dir,
195
- source_language,
196
- translation_language,
197
- recursive=True,
198
- )
320
+ # Collect all tasks
321
+ self.tasks = []
199
322
 
200
- # Translate special JSON files
201
- total_billed_chars += self._translate_grading_policy(
202
- course_dir, source_language, translation_language
203
- )
204
- total_billed_chars += self._translate_policy_json(
205
- course_dir, source_language, translation_language
323
+ # Add translation tasks for files in course directory
324
+ self._add_file_translation_tasks(
325
+ course_directory, source_language, target_language, recursive=False
206
326
  )
207
327
 
208
- return total_billed_chars
328
+ # Add translation tasks for target subdirectories
329
+ for target_dir_name in settings.COURSE_TRANSLATIONS_TARGET_DIRECTORIES:
330
+ target_directory = course_directory / target_dir_name
331
+ if target_directory.exists() and target_directory.is_dir():
332
+ self._add_file_translation_tasks(
333
+ target_directory, source_language, target_language, recursive=True
334
+ )
335
+
336
+ # Add tasks for special JSON files
337
+ self._add_grading_policy_tasks(course_dir, target_language)
338
+ self._add_policy_json_tasks(course_dir, target_language)
209
339
 
210
- def _translate_files_in_directory(
340
+ def _add_file_translation_tasks(
211
341
  self,
212
- directory: Path,
342
+ directory_path: Path,
213
343
  source_language: str,
214
- translation_language: str,
344
+ target_language: str,
215
345
  *,
216
346
  recursive: bool = False,
217
- ) -> int:
218
- """Translate files in a directory."""
219
- total_billed_chars = 0
220
-
221
- if recursive:
222
- file_paths: list[Path] = []
223
- for ext in settings.OL_OPENEDX_COURSE_TRANSLATIONS_TRANSLATABLE_EXTENSIONS:
224
- file_paths.extend(directory.rglob(f"*{ext}"))
225
- else:
226
- file_paths = [
227
- f
228
- for f in directory.iterdir()
229
- if f.is_file()
230
- and any(
231
- f.name.endswith(ext)
232
- for ext in settings.OL_OPENEDX_COURSE_TRANSLATIONS_TRANSLATABLE_EXTENSIONS # noqa: E501
233
- )
234
- ]
235
-
236
- for file_path in file_paths:
237
- try:
238
- total_billed_chars += self._translate_file(
239
- file_path, source_language, translation_language
240
- )
241
- except (OSError, UnicodeDecodeError) as e:
242
- logger.warning("Failed to translate %s: %s", file_path, e)
243
-
244
- return total_billed_chars
347
+ ) -> None:
348
+ """
349
+ Add Celery tasks for file translation to the task list.
245
350
 
246
- def _translate_file(
247
- self, file_path: Path, source_language: str, translation_language: str
248
- ) -> int:
249
- """Translate a single file and return billed characters."""
250
- try:
251
- content = file_path.read_text(encoding="utf-8")
252
- logger.debug("Translating: %s", file_path)
351
+ Args:
352
+ directory_path: Path to directory containing files to translate
353
+ source_language: Source language code
354
+ target_language: Target language code
355
+ recursive: Whether to search for files recursively
356
+ """
357
+ translatable_file_paths = get_translatable_file_paths(
358
+ directory_path, recursive=recursive
359
+ )
253
360
 
254
- translated_content, billed_chars = self._translate_text(
255
- content, source_language, translation_language, file_path.name
361
+ for file_path in translatable_file_paths:
362
+ task = translate_file_task.s(
363
+ str(file_path),
364
+ source_language,
365
+ target_language,
366
+ self.content_provider_name,
367
+ self.content_model,
368
+ self.srt_provider_name,
369
+ self.srt_model,
370
+ self.glossary_directory,
256
371
  )
372
+ self.tasks.append(("file", str(file_path), task))
373
+ logger.info("Added translation task for: %s", file_path)
257
374
 
258
- # Handle XML display_name translation
259
- if file_path.suffix == ".xml":
260
- translated_content = self._translate_display_name(
261
- translated_content, source_language, translation_language
262
- )
263
-
264
- file_path.write_text(translated_content, encoding="utf-8")
265
- except (OSError, UnicodeDecodeError) as e:
266
- logger.warning("Failed to translate %s: %s", file_path, e)
267
- return 0
268
- else:
269
- return billed_chars
270
-
271
- def _translate_grading_policy(
272
- self, course_dir: Path, source_language: str, translation_language: str
273
- ) -> int:
274
- """Translate grading_policy.json files."""
275
- total_billed_chars = 0
276
- policies_dir = course_dir / "course" / "policies"
375
+ def _add_grading_policy_tasks(self, course_dir: Path, target_language: str) -> None:
376
+ """
377
+ Add Celery tasks for grading_policy.json translation to the task list.
277
378
 
278
- if not policies_dir.exists():
279
- return 0
379
+ Args:
380
+ course_dir: Path to the course directory
381
+ target_language: Target language code
382
+ """
383
+ course_policies_dir = course_dir / "course" / "policies"
280
384
 
281
- for child_dir in policies_dir.iterdir():
282
- if not child_dir.is_dir():
283
- continue
385
+ if not course_policies_dir.exists():
386
+ return
284
387
 
285
- grading_policy_path = child_dir / "grading_policy.json"
286
- if not grading_policy_path.exists():
388
+ for policy_child_dir in course_policies_dir.iterdir():
389
+ if not policy_child_dir.is_dir():
287
390
  continue
288
391
 
289
- try:
290
- grading_policy = json.loads(
291
- grading_policy_path.read_text(encoding="utf-8")
292
- )
293
- updated = False
294
-
295
- for item in grading_policy.get("GRADER", []):
296
- if "short_label" in item:
297
- translated_label, billed_chars = self._translate_text(
298
- item["short_label"], source_language, translation_language
299
- )
300
- item["short_label"] = translated_label
301
- total_billed_chars += billed_chars
302
- updated = True
303
-
304
- if updated:
305
- grading_policy_path.write_text(
306
- json.dumps(grading_policy, ensure_ascii=False, indent=4),
307
- encoding="utf-8",
308
- )
309
- except (OSError, json.JSONDecodeError) as e:
310
- logger.warning(
311
- "Failed to translate grading policy in %s: %s", child_dir, e
392
+ grading_policy_file = policy_child_dir / "grading_policy.json"
393
+ if grading_policy_file.exists():
394
+ task = translate_grading_policy_task.s(
395
+ str(grading_policy_file),
396
+ target_language,
397
+ self.content_provider_name,
398
+ self.content_model,
399
+ self.glossary_directory,
312
400
  )
401
+ self.tasks.append(("grading_policy", str(grading_policy_file), task))
402
+ logger.info("Added grading policy task for: %s", grading_policy_file)
313
403
 
314
- return total_billed_chars
315
-
316
- def _translate_policy_json(
317
- self, course_dir: Path, source_language: str, translation_language: str
318
- ) -> int:
319
- """Translate policy.json files."""
320
- total_billed_chars = 0
321
- policies_dir = course_dir / "course" / "policies"
404
+ def _add_policy_json_tasks(self, course_dir: Path, target_language: str) -> None:
405
+ """
406
+ Add Celery tasks for policy.json translation to the task list.
322
407
 
323
- if not policies_dir.exists():
324
- return 0
408
+ Args:
409
+ course_dir: Path to the course directory
410
+ target_language: Target language code
411
+ """
412
+ course_policies_dir = course_dir / "course" / "policies"
325
413
 
326
- for child_dir in policies_dir.iterdir():
327
- if not child_dir.is_dir():
328
- continue
414
+ if not course_policies_dir.exists():
415
+ return
329
416
 
330
- policy_path = child_dir / "policy.json"
331
- if not policy_path.exists():
417
+ for policy_child_dir in course_policies_dir.iterdir():
418
+ if not policy_child_dir.is_dir():
332
419
  continue
333
420
 
334
- try:
335
- policy_data = json.loads(policy_path.read_text(encoding="utf-8"))
336
- updated = False
337
-
338
- for course_obj in policy_data.values():
339
- if not isinstance(course_obj, dict):
340
- continue
341
-
342
- # Translate various fields
343
- billed_chars, field_updated = self._translate_policy_fields(
344
- course_obj, source_language, translation_language
345
- )
346
- total_billed_chars += billed_chars
347
- updated = updated or field_updated
348
-
349
- if updated:
350
- policy_path.write_text(
351
- json.dumps(policy_data, ensure_ascii=False, indent=4),
352
- encoding="utf-8",
353
- )
354
- except (OSError, json.JSONDecodeError) as e:
355
- logger.warning("Failed to translate policy in %s: %s", child_dir, e)
356
-
357
- return total_billed_chars
358
-
359
- def _translate_policy_fields(
360
- self,
361
- course_obj: dict[str, Any],
362
- source_language: str,
363
- translation_language: str,
364
- ) -> tuple[int, bool]:
365
- """Translate specific fields in policy object."""
366
- total_billed_chars = 0
367
- updated = False
368
-
369
- # Translate simple string fields
370
- billed_chars, field_updated = self._translate_string_fields(
371
- course_obj, source_language, translation_language
372
- )
373
- total_billed_chars += billed_chars
374
- updated = updated or field_updated
421
+ policy_file = policy_child_dir / "policy.json"
422
+ if policy_file.exists():
423
+ task = translate_policy_json_task.s(
424
+ str(policy_file),
425
+ target_language,
426
+ self.content_provider_name,
427
+ self.content_model,
428
+ self.glossary_directory,
429
+ )
430
+ self.tasks.append(("policy", str(policy_file), task))
431
+ logger.info("Added policy.json task for: %s", policy_file)
375
432
 
376
- # Translate discussion topics
377
- billed_chars, field_updated = self._translate_discussion_topics(
378
- course_obj, source_language, translation_language
379
- )
380
- total_billed_chars += billed_chars
381
- updated = updated or field_updated
433
+ def _wait_and_report_tasks(self) -> list[str]: # noqa: C901, PLR0915, PLR0912
434
+ """
435
+ Execute all tasks as a Celery group and wait for completion.
382
436
 
383
- # Translate learning info and tabs
384
- billed_chars, field_updated = self._translate_learning_info_and_tabs(
385
- course_obj, source_language, translation_language
386
- )
387
- total_billed_chars += billed_chars
388
- updated = updated or field_updated
437
+ Uses Celery's group primitive to execute tasks in parallel and
438
+ provides detailed progress reporting.
389
439
 
390
- # Translate XML attributes
391
- billed_chars, field_updated = self._translate_xml_attributes(
392
- course_obj, source_language, translation_language
440
+ Raises:
441
+ CommandError: If any tasks fail
442
+ """
443
+ stats = []
444
+ if not self.tasks:
445
+ self.stdout.write("No tasks to execute.")
446
+ return []
447
+
448
+ total_tasks = len(self.tasks)
449
+ self.stdout.write(
450
+ f"\nExecuting {total_tasks} translation tasks in parallel...\n"
393
451
  )
394
- total_billed_chars += billed_chars
395
- updated = updated or field_updated
396
452
 
397
- return total_billed_chars, updated
453
+ # Extract task signatures and create mappings
454
+ task_signatures = [task_sig for _, _, task_sig in self.tasks]
455
+ task_metadata = {
456
+ i: (task_type, file_path)
457
+ for i, (task_type, file_path, _) in enumerate(self.tasks)
458
+ }
398
459
 
399
- def _translate_string_fields(
400
- self,
401
- course_obj: dict[str, Any],
402
- source_language: str,
403
- translation_language: str,
404
- ) -> tuple[int, bool]:
405
- """Translate simple string fields."""
406
- total_billed_chars = 0
407
- updated = False
408
-
409
- string_fields = ["advertised_start", "display_name", "display_organization"]
410
- for field in string_fields:
411
- if field in course_obj:
412
- translated, billed_chars = self._translate_text(
413
- course_obj[field], source_language, translation_language
414
- )
415
- course_obj[field] = translated
416
- total_billed_chars += billed_chars
417
- updated = True
460
+ # Create and execute group
461
+ job = group(task_signatures)
462
+ result = job.apply_async()
418
463
 
419
- return total_billed_chars, updated
464
+ # Wait for all tasks to complete with progress reporting
465
+ completed_count = 0
466
+ self.stdout.flush()
420
467
 
421
- def _translate_discussion_topics(
422
- self,
423
- course_obj: dict[str, Any],
424
- source_language: str,
425
- translation_language: str,
426
- ) -> tuple[int, bool]:
427
- """Translate discussion topics."""
428
- total_billed_chars = 0
429
- updated = False
430
-
431
- if "discussion_topics" in course_obj:
432
- topics = course_obj["discussion_topics"]
433
- if isinstance(topics, dict):
434
- new_topics = {}
435
- for topic_key, value in topics.items():
436
- translated_key, billed_chars = self._translate_text(
437
- topic_key, source_language, translation_language
438
- )
439
- new_topics[translated_key] = value
440
- total_billed_chars += billed_chars
441
- course_obj["discussion_topics"] = new_topics
442
- updated = True
443
-
444
- return total_billed_chars, updated
445
-
446
- def _translate_learning_info_and_tabs(
447
- self,
448
- course_obj: dict[str, Any],
449
- source_language: str,
450
- translation_language: str,
451
- ) -> tuple[int, bool]:
452
- """Translate learning info and tabs."""
453
- total_billed_chars = 0
454
- updated = False
455
-
456
- # Learning info
457
- if "learning_info" in course_obj and isinstance(
458
- course_obj["learning_info"], list
459
- ):
460
- translated_info = []
461
- for item in course_obj["learning_info"]:
462
- translated, billed_chars = self._translate_text(
463
- item, source_language, translation_language
464
- )
465
- translated_info.append(translated)
466
- total_billed_chars += billed_chars
467
- course_obj["learning_info"] = translated_info
468
- updated = True
469
-
470
- # Tabs
471
- if "tabs" in course_obj and isinstance(course_obj["tabs"], list):
472
- for tab in course_obj["tabs"]:
473
- if isinstance(tab, dict) and "name" in tab:
474
- translated, billed_chars = self._translate_text(
475
- tab["name"], source_language, translation_language
476
- )
477
- tab["name"] = translated
478
- total_billed_chars += billed_chars
479
- updated = True
480
-
481
- return total_billed_chars, updated
482
-
483
- def _translate_xml_attributes(
484
- self,
485
- course_obj: dict[str, Any],
486
- source_language: str,
487
- translation_language: str,
488
- ) -> tuple[int, bool]:
489
- """Translate XML attributes."""
490
- total_billed_chars = 0
491
- updated = False
492
-
493
- if "xml_attributes" in course_obj and isinstance(
494
- course_obj["xml_attributes"], dict
495
- ):
496
- xml_attrs = course_obj["xml_attributes"]
497
- xml_fields = [
498
- "diplay_name",
499
- "info_sidebar_name",
500
- ] # Note: keeping typo as in original
501
- for field in xml_fields:
502
- if field in xml_attrs:
503
- translated, billed_chars = self._translate_text(
504
- xml_attrs[field], source_language, translation_language
468
+ try:
469
+ # Poll for completion and show progress
470
+ while not result.ready():
471
+ # Count completed tasks
472
+ new_completed = sum(1 for r in result.results if r.ready())
473
+ if new_completed > completed_count:
474
+ completed_count = new_completed
475
+ self.stdout.write(
476
+ f"\rProgress: {completed_count}/{total_tasks} tasks completed",
477
+ ending="",
505
478
  )
506
- xml_attrs[field] = translated
507
- total_billed_chars += billed_chars
508
- updated = True
509
-
510
- return total_billed_chars, updated
511
-
512
- def _create_translated_archive(
513
- self, translated_dir: Path, translation_language: str, original_name: str
514
- ) -> Path:
515
- """Create tar.gz archive of translated course."""
516
- # Remove all archive extensions from the original name
517
- ext = self.get_supported_archive_extension(original_name)
518
- clean_name = original_name[: -len(ext)] if ext else original_name
519
-
520
- tar_gz_name = f"{translation_language}_{clean_name}.tar.gz"
521
- tar_gz_path = translated_dir.parent / tar_gz_name
522
-
523
- # Remove existing archive
524
- if tar_gz_path.exists():
525
- tar_gz_path.unlink()
479
+ self.stdout.flush()
526
480
 
527
- # Create tar.gz archive containing only the 'course' directory
528
- course_dir_path = translated_dir / "course"
529
- with tarfile.open(tar_gz_path, "w:gz") as tar:
530
- tar.add(course_dir_path, arcname="course")
481
+ # Sleep before next poll (don't use join with timeout)
482
+ time.sleep(TASK_POLL_INTERVAL_SECONDS)
531
483
 
532
- # Delete extracted directory after copying
533
- if translated_dir.exists():
534
- shutil.rmtree(translated_dir)
484
+ # Final update
485
+ self.stdout.write(
486
+ f"\rProgress: {total_tasks}/{total_tasks} tasks completed\n"
487
+ )
535
488
 
536
- logger.info("Created tar.gz archive: %s", tar_gz_path)
537
- return tar_gz_path
489
+ # Get all results (this will raise exceptions if propagate=True)
490
+ results = result.get(timeout=TASK_TIMEOUT_SECONDS, propagate=False)
538
491
 
539
- def _translate_text(
540
- self,
541
- text: str,
542
- source_language: str,
543
- target_language: str,
544
- filename: str | None = None,
545
- ) -> tuple[str, int]:
546
- """Translate text using DeepL API."""
547
- if not text or not text.strip():
548
- return text, 0
549
-
550
- try:
551
- deepl_client = deepl.Translator(settings.DEEPL_API_KEY)
552
-
553
- tag_handling = None
554
- if filename:
555
- extension = Path(filename).suffix.lstrip(".")
556
- if extension in ["html", "xml"]:
557
- tag_handling = extension
558
-
559
- result = deepl_client.translate_text(
560
- text,
561
- source_lang=source_language,
562
- target_lang=target_language,
563
- tag_handling=tag_handling,
564
- )
492
+ except Exception as e:
493
+ logger.exception("Task execution failed")
494
+ error_msg = f"Task execution timeout or error: {e}"
495
+ raise CommandError(error_msg) from e
565
496
 
566
- return result.text, result.billed_characters # noqa: TRY300
567
- except (deepl.exceptions.DeepLException, OSError) as e:
568
- logger.warning("Translation failed for text: %s... Error: %s", text[:50], e)
569
- return text, 0
497
+ # Process results
498
+ completed_tasks = 0
499
+ failed_tasks = 0
500
+ skipped_tasks = 0
501
+
502
+ for i, task_result in enumerate(results):
503
+ task_type, file_path = task_metadata[i]
504
+
505
+ if isinstance(task_result, dict):
506
+ status = task_result.get("status", "unknown")
507
+ if status == "success":
508
+ completed_tasks += 1
509
+ msg = f"✓ {task_type}: {file_path}"
510
+ stats.append(msg)
511
+ self.stdout.write(self.style.SUCCESS(msg))
512
+ elif status == "skipped":
513
+ skipped_tasks += 1
514
+ reason = task_result.get("reason", "Skipped")
515
+ msg = f"⊘ {task_type}: {file_path} - {reason}"
516
+ stats.append(msg)
517
+ self.stdout.write(self.style.WARNING(msg))
518
+ elif status == "error":
519
+ failed_tasks += 1
520
+ error = task_result.get("error", "Unknown error")
521
+ msg = f"✗ {task_type}: {file_path} - {error}"
522
+ stats.append(msg)
523
+ self.stdout.write(self.style.ERROR(msg))
524
+ else:
525
+ failed_tasks += 1
526
+ msg = f"✗ {task_type}: {file_path} - Unknown status: {status}"
527
+ stats.append(msg)
528
+ self.stdout.write(self.style.ERROR(msg))
529
+ else:
530
+ # Task raised an exception
531
+ failed_tasks += 1
532
+ error_msg = str(task_result) if task_result else "Task failed"
533
+ msg = f"✗ {task_type}: {file_path} - {error_msg}"
534
+ stats.append(msg)
535
+ self.stdout.write(self.style.ERROR(msg))
536
+
537
+ # Print summary
538
+ self.stdout.write("\n" + "=" * 60)
539
+ successful_tasks_stats = (
540
+ f"Total tasks: {total_tasks}\nCompleted: {completed_tasks}"
541
+ )
542
+ stats.append(successful_tasks_stats)
543
+ self.stdout.write(self.style.SUCCESS(successful_tasks_stats))
544
+ if skipped_tasks > 0:
545
+ skipped_tasks_stats = f"Skipped: {skipped_tasks}"
546
+ stats.append(skipped_tasks_stats)
547
+ self.stdout.write(self.style.WARNING(skipped_tasks_stats))
548
+ if failed_tasks > 0:
549
+ failed_tasks_stats = f"Failed: {failed_tasks}"
550
+ stats.append(failed_tasks_stats)
551
+ self.stdout.write(self.style.ERROR(failed_tasks_stats))
552
+ self.stdout.write("=" * 60 + "\n")
553
+
554
+ if failed_tasks > 0:
555
+ error_msg = f"{failed_tasks} translation tasks failed"
556
+ raise CommandError(error_msg)
570
557
 
571
- def _translate_display_name(
572
- self, xml_content: str, source_language: str, target_language: str
573
- ) -> str:
574
- """Extract and translate the display_name attribute of the root element."""
575
- try:
576
- root = ElementTree.fromstring(xml_content)
577
- display_name = root.attrib.get("display_name")
558
+ return stats
578
559
 
579
- if display_name:
580
- translated_name, _ = self._translate_text(
581
- display_name, source_language, target_language
582
- )
583
- root.set("display_name", translated_name)
584
- return ElementTree.tostring(root, encoding="unicode")
585
- except ElementTree.ParseError as e:
586
- logger.warning("Could not translate display_name: %s", e)
560
+ def _add_translation_log_entry(
561
+ self, source_language, target_language, command_stats=None
562
+ ) -> None:
563
+ """
564
+ Add a log entry for the course translation operation.
587
565
 
588
- return xml_content
566
+ Args:
567
+ source_language: Source language code
568
+ target_language: Target language code
569
+ command_stats: List of command statistics/logs
570
+ """
571
+ source_course_id = generate_course_key_from_xml(
572
+ course_dir_path=self.translated_course_dir
573
+ )
574
+ command_stats_str = "\n".join(command_stats) if command_stats else ""
575
+
576
+ CourseTranslationLog.objects.create(
577
+ source_course_id=source_course_id,
578
+ source_course_language=source_language,
579
+ target_course_language=target_language,
580
+ srt_provider_name=self.srt_provider_name,
581
+ srt_provider_model=self.srt_model or "",
582
+ content_provider_name=self.content_provider_name,
583
+ content_provider_model=self.content_model or "",
584
+ command_stats=command_stats_str,
585
+ )