ol-openedx-course-translations 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ol-openedx-course-translations might be problematic. Click here for more details.

Files changed (35) hide show
  1. ol_openedx_course_translations/apps.py +12 -2
  2. ol_openedx_course_translations/glossaries/machine_learning/ar.txt +175 -0
  3. ol_openedx_course_translations/glossaries/machine_learning/de.txt +175 -0
  4. ol_openedx_course_translations/glossaries/machine_learning/el.txt +988 -0
  5. ol_openedx_course_translations/glossaries/machine_learning/es.txt +175 -0
  6. ol_openedx_course_translations/glossaries/machine_learning/fr.txt +175 -0
  7. ol_openedx_course_translations/glossaries/machine_learning/ja.txt +175 -0
  8. ol_openedx_course_translations/glossaries/machine_learning/pt-br.txt +175 -0
  9. ol_openedx_course_translations/glossaries/machine_learning/ru.txt +213 -0
  10. ol_openedx_course_translations/management/commands/sync_and_translate_language.py +1866 -0
  11. ol_openedx_course_translations/management/commands/translate_course.py +419 -470
  12. ol_openedx_course_translations/middleware.py +143 -0
  13. ol_openedx_course_translations/providers/__init__.py +1 -0
  14. ol_openedx_course_translations/providers/base.py +278 -0
  15. ol_openedx_course_translations/providers/deepl_provider.py +292 -0
  16. ol_openedx_course_translations/providers/llm_providers.py +565 -0
  17. ol_openedx_course_translations/settings/cms.py +17 -0
  18. ol_openedx_course_translations/settings/common.py +57 -30
  19. ol_openedx_course_translations/settings/lms.py +15 -0
  20. ol_openedx_course_translations/tasks.py +222 -0
  21. ol_openedx_course_translations/urls.py +16 -0
  22. ol_openedx_course_translations/utils/__init__.py +0 -0
  23. ol_openedx_course_translations/utils/command_utils.py +197 -0
  24. ol_openedx_course_translations/utils/constants.py +216 -0
  25. ol_openedx_course_translations/utils/course_translations.py +581 -0
  26. ol_openedx_course_translations/utils/translation_sync.py +808 -0
  27. ol_openedx_course_translations/views.py +73 -0
  28. ol_openedx_course_translations-0.3.0.dist-info/METADATA +407 -0
  29. ol_openedx_course_translations-0.3.0.dist-info/RECORD +35 -0
  30. ol_openedx_course_translations-0.3.0.dist-info/entry_points.txt +5 -0
  31. ol_openedx_course_translations-0.1.0.dist-info/METADATA +0 -63
  32. ol_openedx_course_translations-0.1.0.dist-info/RECORD +0 -11
  33. ol_openedx_course_translations-0.1.0.dist-info/entry_points.txt +0 -2
  34. {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.0.dist-info}/WHEEL +0 -0
  35. {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -2,25 +2,62 @@
2
2
  Management command to translate course content to a specified language.
3
3
  """
4
4
 
5
- import json
6
5
  import logging
7
6
  import shutil
8
- import tarfile
7
+ import time
9
8
  from pathlib import Path
10
- from typing import Any
11
9
 
12
- import deepl
13
- from defusedxml import ElementTree
10
+ from celery import group
14
11
  from django.conf import settings
15
12
  from django.core.management.base import BaseCommand, CommandError
16
13
 
14
+ from ol_openedx_course_translations.tasks import (
15
+ translate_file_task,
16
+ translate_grading_policy_task,
17
+ translate_policy_json_task,
18
+ )
19
+ from ol_openedx_course_translations.utils.constants import PROVIDER_DEEPL
20
+ from ol_openedx_course_translations.utils.course_translations import (
21
+ create_translated_archive,
22
+ create_translated_copy,
23
+ extract_course_archive,
24
+ get_translatable_file_paths,
25
+ update_course_language_attribute,
26
+ validate_course_inputs,
27
+ )
28
+
17
29
  logger = logging.getLogger(__name__)
18
30
 
31
+ # Task configuration
32
+ TASK_TIMEOUT_SECONDS = 3600 * 2 # 2 hour total timeout for all tasks
33
+ TASK_POLL_INTERVAL_SECONDS = 2 # Poll every 2 seconds for task completion
34
+
19
35
 
20
36
  class Command(BaseCommand):
21
37
  """Translate given course content to the specified language."""
22
38
 
23
- help = "Translate course content to the specified language."
39
+ help = (
40
+ "Translate course content to the specified language.\n\n"
41
+ "Configuration:\n"
42
+ "All translation providers should be configured in TRANSLATIONS_PROVIDERS:\n"
43
+ "{\n"
44
+ ' "deepl": {"api_key": "<YOUR_DEEPL_API_KEY>"},\n'
45
+ ' "openai": {"api_key": "<KEY>", "default_model": "gpt-5.2"},\n'
46
+ ' "gemini": {"api_key": "<KEY>", "default_model": "gemini-3-pro-preview"},\n'
47
+ ' "mistral": {"api_key": "<KEY>", "default_model": "mistral-large-latest"}\n'
48
+ "}\n"
49
+ )
50
+
51
+ def __init__(self, *args, **kwargs):
52
+ """Initialize the command with empty task list."""
53
+ super().__init__(*args, **kwargs)
54
+ self.tasks = []
55
+ self.translated_course_dir = None
56
+ self.content_provider_name = None
57
+ self.content_model = None
58
+ self.srt_provider_name = None
59
+ self.srt_model = None
60
+ self.glossary_directory = None
24
61
 
25
62
  def add_arguments(self, parser) -> None:
26
63
  """Entry point for subclassed commands to add custom arguments."""
@@ -34,8 +71,8 @@ class Command(BaseCommand):
34
71
  ),
35
72
  )
36
73
  parser.add_argument(
37
- "--translation-language",
38
- dest="translation_language",
74
+ "--target-language",
75
+ dest="target_language",
39
76
  required=True,
40
77
  help=(
41
78
  "Specify the language code in ISO format "
@@ -44,545 +81,457 @@ class Command(BaseCommand):
44
81
  )
45
82
  parser.add_argument(
46
83
  "--course-dir",
47
- dest="course_directory",
84
+ dest="course_archive_path",
48
85
  required=True,
49
86
  help="Specify the course directory (tar archive).",
50
87
  )
88
+ parser.add_argument(
89
+ "--content-translation-provider",
90
+ dest="content_translation_provider",
91
+ required=True,
92
+ help=(
93
+ "Translation provider for content (XML/HTML and text). "
94
+ "Format: 'deepl', 'PROVIDER', or 'PROVIDER/MODEL' "
95
+ "(e.g., 'openai', 'openai/gpt-5.2', 'gemini', 'gemini/gemini-3-pro-preview'). " # noqa: E501
96
+ "If model is not specified, uses the default model from settings."
97
+ ),
98
+ )
99
+ parser.add_argument(
100
+ "--srt-translation-provider",
101
+ dest="srt_translation_provider",
102
+ required=True,
103
+ help=(
104
+ "Translation provider for SRT subtitles. "
105
+ "Format: 'deepl', 'PROVIDER', or 'PROVIDER/MODEL' "
106
+ "(e.g., 'openai', 'openai/gpt-5.2', 'gemini', 'gemini/gemini-3-pro-preview'). " # noqa: E501
107
+ "If model is not specified, uses the default model from settings."
108
+ ),
109
+ )
110
+ parser.add_argument(
111
+ "--glossary-dir",
112
+ dest="glossary_directory",
113
+ required=False,
114
+ help=(
115
+ "Path to glossary directory containing "
116
+ "language-specific glossary files."
117
+ ),
118
+ )
119
+
120
+ def _parse_and_validate_provider_spec(
121
+ self, provider_spec: str
122
+ ) -> tuple[str, str | None]:
123
+ """
124
+ Parse and validate provider specification into provider name and model.
125
+
126
+ Resolves model from settings if not provided in specification.
127
+
128
+ Args:
129
+ provider_spec: Provider specification
130
+
131
+ Returns:
132
+ Tuple of (provider_name, model_name). model_name is None for DeepL or
133
+ resolved from settings if not specified.
134
+
135
+ Raises:
136
+ CommandError: If provider specification format is invalid
137
+ or model and api_key cannot be resolved
138
+ """
139
+ # Parse the specification
140
+ if "/" in provider_spec:
141
+ parts = provider_spec.split("/", 1)
142
+ if len(parts) != 2 or not parts[0] or not parts[1]: # noqa: PLR2004
143
+ error_msg = (
144
+ f"Invalid provider specification: {provider_spec}. "
145
+ "Use format 'PROVIDER' or 'PROVIDER/MODEL' "
146
+ "(e.g., 'openai', 'openai/gpt-5.2')"
147
+ )
148
+ raise CommandError(error_msg)
149
+ provider_name = parts[0].lower()
150
+ model_name = parts[1]
151
+ else:
152
+ provider_name = provider_spec.lower()
153
+ model_name = None
154
+
155
+ # Try to get default model from settings
156
+ providers_config = getattr(settings, "TRANSLATIONS_PROVIDERS", {})
157
+ if provider_name not in providers_config:
158
+ error_msg = (
159
+ f"Provider '{provider_name}' not configured in TRANSLATIONS_PROVIDERS. "
160
+ f"Available providers: {', '.join(providers_config.keys())}"
161
+ )
162
+ raise CommandError(error_msg)
163
+
164
+ provider_config = providers_config[provider_name]
165
+ api_key = provider_config.get("api_key")
166
+ if not api_key:
167
+ error_msg = (
168
+ f"API key for provider '{provider_name}' is not configured in "
169
+ "TRANSLATIONS_PROVIDERS. Please set the 'api_key' in settings."
170
+ )
171
+ raise CommandError(error_msg)
172
+
173
+ # DeepL doesn't use models
174
+ if provider_name == PROVIDER_DEEPL:
175
+ return provider_name, None
176
+
177
+ # If model is explicitly provided, return it
178
+ if model_name:
179
+ return provider_name, model_name
180
+
181
+ default_model = provider_config.get("default_model")
182
+ if not default_model:
183
+ error_msg = (
184
+ f"No model specified for provider '{provider_name}' and no "
185
+ f"default_model found in TRANSLATIONS_PROVIDERS['{provider_name}']. "
186
+ f"Either specify a model (e.g., '{provider_name}/gpt-5.2') or "
187
+ f"configure a default_model in settings."
188
+ )
189
+ raise CommandError(error_msg)
190
+
191
+ return provider_name, default_model
51
192
 
52
193
  def handle(self, **options) -> None:
53
194
  """Handle the translate_course command."""
54
195
  try:
55
- self._validate_inputs(options)
56
-
57
- course_dir = Path(options["course_directory"])
196
+ course_archive_path = Path(options["course_archive_path"])
58
197
  source_language = options["source_language"]
59
- translation_language = options["translation_language"]
198
+ target_language = options["target_language"]
199
+ content_provider_spec = options["content_translation_provider"]
200
+ srt_provider_spec = options["srt_translation_provider"]
201
+ glossary_directory = options.get("glossary_directory")
202
+
203
+ # Parse and validate provider specifications (includes validation)
204
+ content_provider_name, content_model = (
205
+ self._parse_and_validate_provider_spec(content_provider_spec)
206
+ )
207
+ srt_provider_name, srt_model = self._parse_and_validate_provider_spec(
208
+ srt_provider_spec
209
+ )
210
+
211
+ # Log the resolved configuration
212
+ if content_model:
213
+ self.stdout.write(
214
+ f"Content provider: {content_provider_name}/{content_model}"
215
+ )
216
+ else:
217
+ self.stdout.write(f"Content provider: {content_provider_name}")
218
+
219
+ if srt_model:
220
+ self.stdout.write(f"SRT provider: {srt_provider_name}/{srt_model}")
221
+ else:
222
+ self.stdout.write(f"SRT provider: {srt_provider_name}")
223
+
224
+ # Validate inputs
225
+ validate_course_inputs(course_archive_path)
226
+
227
+ # Store provider names and models
228
+ self.content_provider_name = content_provider_name
229
+ self.content_model = content_model
230
+ self.srt_provider_name = srt_provider_name
231
+ self.srt_model = srt_model
232
+ self.glossary_directory = glossary_directory
60
233
 
61
234
  # Extract course archive
62
- extracted_dir = self._extract_course_archive(course_dir)
235
+ extracted_course_dir = extract_course_archive(course_archive_path)
63
236
 
64
237
  # Create translated copy
65
- translated_dir = self._create_translated_copy(
66
- extracted_dir, translation_language
238
+ translated_course_dir = create_translated_copy(
239
+ extracted_course_dir, target_language
67
240
  )
68
241
 
242
+ # Store for cleanup on failure
243
+ self.translated_course_dir = translated_course_dir
244
+
69
245
  # Delete extracted directory after copying
70
- if extracted_dir.exists():
71
- shutil.rmtree(extracted_dir)
246
+ if extracted_course_dir.exists():
247
+ shutil.rmtree(extracted_course_dir)
72
248
 
73
- # Translate content
74
- billed_chars = self._translate_course_content(
75
- translated_dir, source_language, translation_language
249
+ # Translate content asynchronously
250
+ self._translate_course_content_async(
251
+ translated_course_dir, source_language, target_language
76
252
  )
77
253
 
254
+ # Wait for all tasks and report status
255
+ self._wait_and_report_tasks()
256
+
78
257
  # Create final archive
79
- archive_path = self._create_translated_archive(
80
- translated_dir, translation_language, course_dir.stem
258
+ translated_archive_path = create_translated_archive(
259
+ translated_course_dir, target_language, course_archive_path.stem
81
260
  )
82
261
 
83
262
  self.stdout.write(
84
263
  self.style.SUCCESS(
85
- f"Translation completed. Archive created: {archive_path}"
264
+ f"Translation completed. Archive created: {translated_archive_path}"
86
265
  )
87
266
  )
88
- logger.info("Total billed characters: %s", billed_chars)
89
267
 
90
268
  except Exception as e:
91
269
  logger.exception("Translation failed")
270
+
271
+ # Cleanup translated course directory on failure
272
+ if self.translated_course_dir and self.translated_course_dir.exists():
273
+ self.stdout.write(
274
+ self.style.WARNING(
275
+ f"Cleaning up translated course directory: {self.translated_course_dir}" # noqa: E501
276
+ )
277
+ )
278
+ shutil.rmtree(self.translated_course_dir)
279
+
92
280
  error_msg = f"Translation failed: {e}"
93
281
  raise CommandError(error_msg) from e
94
282
 
95
- def get_supported_archive_extension(self, filename: str) -> str | None:
96
- """
97
- Return the supported archive extension if filename ends with one, else None.
283
+ def _translate_course_content_async(
284
+ self, course_dir: Path, source_language: str, target_language: str
285
+ ) -> None:
98
286
  """
99
- for ext in settings.OL_OPENEDX_COURSE_TRANSLATIONS_SUPPORTED_ARCHIVE_EXTENSIONS:
100
- if filename.endswith(ext):
101
- return ext
102
- return None
103
-
104
- def _validate_inputs(self, options: dict[str, Any]) -> None:
105
- """Validate command inputs."""
106
- course_dir = Path(options["course_directory"])
107
-
108
- if not course_dir.exists():
109
- error_msg = f"Course directory not found: {course_dir}"
110
- raise CommandError(error_msg)
111
-
112
- if self.get_supported_archive_extension(course_dir.name) is None:
113
- supported_exts = ", ".join(
114
- settings.OL_OPENEDX_COURSE_TRANSLATIONS_SUPPORTED_ARCHIVE_EXTENSIONS
115
- )
116
- error_msg = f"Course directory must be a tar file: {supported_exts}"
117
- raise CommandError(error_msg)
287
+ Translate all course content using Celery tasks.
118
288
 
119
- if not hasattr(settings, "DEEPL_API_KEY") or not settings.DEEPL_API_KEY:
120
- error_msg = "DEEPL_API_KEY setting is required"
121
- raise CommandError(error_msg)
289
+ Args:
290
+ course_dir: Path to the course directory
291
+ source_language: Source language code
292
+ target_language: Target language code
122
293
 
123
- def _extract_course_archive(self, course_dir: Path) -> Path:
124
- """Extract course archive to working directory."""
125
- # Use the parent directory of the source file as the base extraction directory
126
- extract_base_dir = course_dir.parent
127
-
128
- # Get base name without extension
129
- ext = self.get_supported_archive_extension(course_dir.name)
130
- tarball_base = course_dir.name[: -len(ext)] if ext else course_dir.name
131
-
132
- extracted_dir = extract_base_dir / tarball_base
133
-
134
- if not extracted_dir.exists():
135
- try:
136
- with tarfile.open(course_dir, "r:*") as tar:
137
- # Validate tar file before extraction
138
- self._validate_tar_file(tar)
139
- tar.extractall(path=extracted_dir, filter="data")
140
- except (tarfile.TarError, OSError) as e:
141
- error_msg = f"Failed to extract archive: {e}"
142
- raise CommandError(error_msg) from e
143
-
144
- logger.info("Extracted course to: %s", extracted_dir)
145
- return extracted_dir
146
-
147
- def _validate_tar_file(self, tar: tarfile.TarFile) -> None:
148
- """Validate tar file contents for security."""
149
- for member in tar.getmembers():
150
- # Check for directory traversal attacks
151
- if member.name.startswith("/") or ".." in member.name:
152
- error_msg = f"Unsafe tar member: {member.name}"
153
- raise CommandError(error_msg)
154
- # Check for excessively large files
155
- if (
156
- member.size > 512 * 1024 * 1024
157
- ): # 0.5GB limit because courses on Production are big
158
- error_msg = f"File too large: {member.name}"
159
- raise CommandError(error_msg)
160
-
161
- def _create_translated_copy(
162
- self, source_dir: Path, translation_language: str
163
- ) -> Path:
164
- """Create a copy of the course for translation."""
165
- base_name = source_dir.name
166
- new_dir_name = f"{translation_language}_{base_name}"
167
- new_dir_path = source_dir.parent / new_dir_name
294
+ Raises:
295
+ CommandError: If course directory is not found
296
+ """
297
+ course_directory = course_dir / "course"
168
298
 
169
- if new_dir_path.exists():
170
- error_msg = f"Translation directory already exists: {new_dir_path}"
299
+ if not course_directory.exists() or not course_directory.is_dir():
300
+ error_msg = f"Course directory not found: {course_directory}"
171
301
  raise CommandError(error_msg)
172
302
 
173
- shutil.copytree(source_dir, new_dir_path)
174
- logger.info("Created translation copy: %s", new_dir_path)
175
- return new_dir_path
176
-
177
- def _translate_course_content(
178
- self, course_dir: Path, source_language: str, translation_language: str
179
- ) -> int:
180
- """Translate all course content and return total billed characters."""
181
- total_billed_chars = 0
182
-
183
- # Translate files in main directories
184
- for search_dir in [course_dir, course_dir.parent]:
185
- total_billed_chars += self._translate_files_in_directory(
186
- search_dir, source_language, translation_language, recursive=False
187
- )
303
+ # Update language attributes in course XML, doing this
304
+ # because tasks can override the XML files
305
+ update_course_language_attribute(course_directory, target_language)
188
306
 
189
- # Translate files in target subdirectories
190
- for dir_name in settings.OL_OPENEDX_COURSE_TRANSLATIONS_TARGET_DIRECTORIES:
191
- target_dir = search_dir / dir_name
192
- if target_dir.exists() and target_dir.is_dir():
193
- total_billed_chars += self._translate_files_in_directory(
194
- target_dir,
195
- source_language,
196
- translation_language,
197
- recursive=True,
198
- )
307
+ # Collect all tasks
308
+ self.tasks = []
199
309
 
200
- # Translate special JSON files
201
- total_billed_chars += self._translate_grading_policy(
202
- course_dir, source_language, translation_language
203
- )
204
- total_billed_chars += self._translate_policy_json(
205
- course_dir, source_language, translation_language
310
+ # Add translation tasks for files in course directory
311
+ self._add_file_translation_tasks(
312
+ course_directory, source_language, target_language, recursive=False
206
313
  )
207
314
 
208
- return total_billed_chars
315
+ # Add translation tasks for target subdirectories
316
+ for target_dir_name in settings.COURSE_TRANSLATIONS_TARGET_DIRECTORIES:
317
+ target_directory = course_directory / target_dir_name
318
+ if target_directory.exists() and target_directory.is_dir():
319
+ self._add_file_translation_tasks(
320
+ target_directory, source_language, target_language, recursive=True
321
+ )
322
+
323
+ # Add tasks for special JSON files
324
+ self._add_grading_policy_tasks(course_dir, target_language)
325
+ self._add_policy_json_tasks(course_dir, target_language)
209
326
 
210
- def _translate_files_in_directory(
327
+ def _add_file_translation_tasks(
211
328
  self,
212
- directory: Path,
329
+ directory_path: Path,
213
330
  source_language: str,
214
- translation_language: str,
331
+ target_language: str,
215
332
  *,
216
333
  recursive: bool = False,
217
- ) -> int:
218
- """Translate files in a directory."""
219
- total_billed_chars = 0
220
-
221
- if recursive:
222
- file_paths: list[Path] = []
223
- for ext in settings.OL_OPENEDX_COURSE_TRANSLATIONS_TRANSLATABLE_EXTENSIONS:
224
- file_paths.extend(directory.rglob(f"*{ext}"))
225
- else:
226
- file_paths = [
227
- f
228
- for f in directory.iterdir()
229
- if f.is_file()
230
- and any(
231
- f.name.endswith(ext)
232
- for ext in settings.OL_OPENEDX_COURSE_TRANSLATIONS_TRANSLATABLE_EXTENSIONS # noqa: E501
233
- )
234
- ]
235
-
236
- for file_path in file_paths:
237
- try:
238
- total_billed_chars += self._translate_file(
239
- file_path, source_language, translation_language
240
- )
241
- except (OSError, UnicodeDecodeError) as e:
242
- logger.warning("Failed to translate %s: %s", file_path, e)
243
-
244
- return total_billed_chars
334
+ ) -> None:
335
+ """
336
+ Add Celery tasks for file translation to the task list.
245
337
 
246
- def _translate_file(
247
- self, file_path: Path, source_language: str, translation_language: str
248
- ) -> int:
249
- """Translate a single file and return billed characters."""
250
- try:
251
- content = file_path.read_text(encoding="utf-8")
252
- logger.debug("Translating: %s", file_path)
338
+ Args:
339
+ directory_path: Path to directory containing files to translate
340
+ source_language: Source language code
341
+ target_language: Target language code
342
+ recursive: Whether to search for files recursively
343
+ """
344
+ translatable_file_paths = get_translatable_file_paths(
345
+ directory_path, recursive=recursive
346
+ )
253
347
 
254
- translated_content, billed_chars = self._translate_text(
255
- content, source_language, translation_language, file_path.name
348
+ for file_path in translatable_file_paths:
349
+ task = translate_file_task.s(
350
+ str(file_path),
351
+ source_language,
352
+ target_language,
353
+ self.content_provider_name,
354
+ self.content_model,
355
+ self.srt_provider_name,
356
+ self.srt_model,
357
+ self.glossary_directory,
256
358
  )
359
+ self.tasks.append(("file", str(file_path), task))
360
+ logger.info("Added translation task for: %s", file_path)
257
361
 
258
- # Handle XML display_name translation
259
- if file_path.suffix == ".xml":
260
- translated_content = self._translate_display_name(
261
- translated_content, source_language, translation_language
262
- )
263
-
264
- file_path.write_text(translated_content, encoding="utf-8")
265
- except (OSError, UnicodeDecodeError) as e:
266
- logger.warning("Failed to translate %s: %s", file_path, e)
267
- return 0
268
- else:
269
- return billed_chars
270
-
271
- def _translate_grading_policy(
272
- self, course_dir: Path, source_language: str, translation_language: str
273
- ) -> int:
274
- """Translate grading_policy.json files."""
275
- total_billed_chars = 0
276
- policies_dir = course_dir / "course" / "policies"
362
+ def _add_grading_policy_tasks(self, course_dir: Path, target_language: str) -> None:
363
+ """
364
+ Add Celery tasks for grading_policy.json translation to the task list.
277
365
 
278
- if not policies_dir.exists():
279
- return 0
366
+ Args:
367
+ course_dir: Path to the course directory
368
+ target_language: Target language code
369
+ """
370
+ course_policies_dir = course_dir / "course" / "policies"
280
371
 
281
- for child_dir in policies_dir.iterdir():
282
- if not child_dir.is_dir():
283
- continue
372
+ if not course_policies_dir.exists():
373
+ return
284
374
 
285
- grading_policy_path = child_dir / "grading_policy.json"
286
- if not grading_policy_path.exists():
375
+ for policy_child_dir in course_policies_dir.iterdir():
376
+ if not policy_child_dir.is_dir():
287
377
  continue
288
378
 
289
- try:
290
- grading_policy = json.loads(
291
- grading_policy_path.read_text(encoding="utf-8")
379
+ grading_policy_file = policy_child_dir / "grading_policy.json"
380
+ if grading_policy_file.exists():
381
+ task = translate_grading_policy_task.s(
382
+ str(grading_policy_file),
383
+ target_language,
384
+ self.content_provider_name,
385
+ self.content_model,
386
+ self.glossary_directory,
292
387
  )
293
- updated = False
294
-
295
- for item in grading_policy.get("GRADER", []):
296
- if "short_label" in item:
297
- translated_label, billed_chars = self._translate_text(
298
- item["short_label"], source_language, translation_language
299
- )
300
- item["short_label"] = translated_label
301
- total_billed_chars += billed_chars
302
- updated = True
303
-
304
- if updated:
305
- grading_policy_path.write_text(
306
- json.dumps(grading_policy, ensure_ascii=False, indent=4),
307
- encoding="utf-8",
308
- )
309
- except (OSError, json.JSONDecodeError) as e:
310
- logger.warning(
311
- "Failed to translate grading policy in %s: %s", child_dir, e
312
- )
313
-
314
- return total_billed_chars
388
+ self.tasks.append(("grading_policy", str(grading_policy_file), task))
389
+ logger.info("Added grading policy task for: %s", grading_policy_file)
315
390
 
316
- def _translate_policy_json(
317
- self, course_dir: Path, source_language: str, translation_language: str
318
- ) -> int:
319
- """Translate policy.json files."""
320
- total_billed_chars = 0
321
- policies_dir = course_dir / "course" / "policies"
391
+ def _add_policy_json_tasks(self, course_dir: Path, target_language: str) -> None:
392
+ """
393
+ Add Celery tasks for policy.json translation to the task list.
322
394
 
323
- if not policies_dir.exists():
324
- return 0
395
+ Args:
396
+ course_dir: Path to the course directory
397
+ target_language: Target language code
398
+ """
399
+ course_policies_dir = course_dir / "course" / "policies"
325
400
 
326
- for child_dir in policies_dir.iterdir():
327
- if not child_dir.is_dir():
328
- continue
401
+ if not course_policies_dir.exists():
402
+ return
329
403
 
330
- policy_path = child_dir / "policy.json"
331
- if not policy_path.exists():
404
+ for policy_child_dir in course_policies_dir.iterdir():
405
+ if not policy_child_dir.is_dir():
332
406
  continue
333
407
 
334
- try:
335
- policy_data = json.loads(policy_path.read_text(encoding="utf-8"))
336
- updated = False
337
-
338
- for course_obj in policy_data.values():
339
- if not isinstance(course_obj, dict):
340
- continue
341
-
342
- # Translate various fields
343
- billed_chars, field_updated = self._translate_policy_fields(
344
- course_obj, source_language, translation_language
345
- )
346
- total_billed_chars += billed_chars
347
- updated = updated or field_updated
348
-
349
- if updated:
350
- policy_path.write_text(
351
- json.dumps(policy_data, ensure_ascii=False, indent=4),
352
- encoding="utf-8",
353
- )
354
- except (OSError, json.JSONDecodeError) as e:
355
- logger.warning("Failed to translate policy in %s: %s", child_dir, e)
356
-
357
- return total_billed_chars
408
+ policy_file = policy_child_dir / "policy.json"
409
+ if policy_file.exists():
410
+ task = translate_policy_json_task.s(
411
+ str(policy_file),
412
+ target_language,
413
+ self.content_provider_name,
414
+ self.content_model,
415
+ self.glossary_directory,
416
+ )
417
+ self.tasks.append(("policy", str(policy_file), task))
418
+ logger.info("Added policy.json task for: %s", policy_file)
358
419
 
359
- def _translate_policy_fields(
360
- self,
361
- course_obj: dict[str, Any],
362
- source_language: str,
363
- translation_language: str,
364
- ) -> tuple[int, bool]:
365
- """Translate specific fields in policy object."""
366
- total_billed_chars = 0
367
- updated = False
368
-
369
- # Translate simple string fields
370
- billed_chars, field_updated = self._translate_string_fields(
371
- course_obj, source_language, translation_language
372
- )
373
- total_billed_chars += billed_chars
374
- updated = updated or field_updated
420
+ def _wait_and_report_tasks(self) -> None: # noqa: C901, PLR0915, PLR0912
421
+ """
422
+ Execute all tasks as a Celery group and wait for completion.
375
423
 
376
- # Translate discussion topics
377
- billed_chars, field_updated = self._translate_discussion_topics(
378
- course_obj, source_language, translation_language
379
- )
380
- total_billed_chars += billed_chars
381
- updated = updated or field_updated
424
+ Uses Celery's group primitive to execute tasks in parallel and
425
+ provides detailed progress reporting.
382
426
 
383
- # Translate learning info and tabs
384
- billed_chars, field_updated = self._translate_learning_info_and_tabs(
385
- course_obj, source_language, translation_language
386
- )
387
- total_billed_chars += billed_chars
388
- updated = updated or field_updated
427
+ Raises:
428
+ CommandError: If any tasks fail
429
+ """
430
+ if not self.tasks:
431
+ self.stdout.write("No tasks to execute.")
432
+ return
389
433
 
390
- # Translate XML attributes
391
- billed_chars, field_updated = self._translate_xml_attributes(
392
- course_obj, source_language, translation_language
434
+ total_tasks = len(self.tasks)
435
+ self.stdout.write(
436
+ f"\nExecuting {total_tasks} translation tasks in parallel...\n"
393
437
  )
394
- total_billed_chars += billed_chars
395
- updated = updated or field_updated
396
-
397
- return total_billed_chars, updated
398
438
 
399
- def _translate_string_fields(
400
- self,
401
- course_obj: dict[str, Any],
402
- source_language: str,
403
- translation_language: str,
404
- ) -> tuple[int, bool]:
405
- """Translate simple string fields."""
406
- total_billed_chars = 0
407
- updated = False
408
-
409
- string_fields = ["advertised_start", "display_name", "display_organization"]
410
- for field in string_fields:
411
- if field in course_obj:
412
- translated, billed_chars = self._translate_text(
413
- course_obj[field], source_language, translation_language
414
- )
415
- course_obj[field] = translated
416
- total_billed_chars += billed_chars
417
- updated = True
418
-
419
- return total_billed_chars, updated
420
-
421
- def _translate_discussion_topics(
422
- self,
423
- course_obj: dict[str, Any],
424
- source_language: str,
425
- translation_language: str,
426
- ) -> tuple[int, bool]:
427
- """Translate discussion topics."""
428
- total_billed_chars = 0
429
- updated = False
430
-
431
- if "discussion_topics" in course_obj:
432
- topics = course_obj["discussion_topics"]
433
- if isinstance(topics, dict):
434
- new_topics = {}
435
- for topic_key, value in topics.items():
436
- translated_key, billed_chars = self._translate_text(
437
- topic_key, source_language, translation_language
438
- )
439
- new_topics[translated_key] = value
440
- total_billed_chars += billed_chars
441
- course_obj["discussion_topics"] = new_topics
442
- updated = True
443
-
444
- return total_billed_chars, updated
439
+ # Extract task signatures and create mappings
440
+ task_signatures = [task_sig for _, _, task_sig in self.tasks]
441
+ task_metadata = {
442
+ i: (task_type, file_path)
443
+ for i, (task_type, file_path, _) in enumerate(self.tasks)
444
+ }
445
445
 
446
- def _translate_learning_info_and_tabs(
447
- self,
448
- course_obj: dict[str, Any],
449
- source_language: str,
450
- translation_language: str,
451
- ) -> tuple[int, bool]:
452
- """Translate learning info and tabs."""
453
- total_billed_chars = 0
454
- updated = False
455
-
456
- # Learning info
457
- if "learning_info" in course_obj and isinstance(
458
- course_obj["learning_info"], list
459
- ):
460
- translated_info = []
461
- for item in course_obj["learning_info"]:
462
- translated, billed_chars = self._translate_text(
463
- item, source_language, translation_language
464
- )
465
- translated_info.append(translated)
466
- total_billed_chars += billed_chars
467
- course_obj["learning_info"] = translated_info
468
- updated = True
469
-
470
- # Tabs
471
- if "tabs" in course_obj and isinstance(course_obj["tabs"], list):
472
- for tab in course_obj["tabs"]:
473
- if isinstance(tab, dict) and "name" in tab:
474
- translated, billed_chars = self._translate_text(
475
- tab["name"], source_language, translation_language
476
- )
477
- tab["name"] = translated
478
- total_billed_chars += billed_chars
479
- updated = True
446
+ # Create and execute group
447
+ job = group(task_signatures)
448
+ result = job.apply_async()
480
449
 
481
- return total_billed_chars, updated
450
+ # Wait for all tasks to complete with progress reporting
451
+ completed_count = 0
452
+ self.stdout.write(f"Progress: 0/{total_tasks} tasks completed")
453
+ self.stdout.flush()
482
454
 
483
- def _translate_xml_attributes(
484
- self,
485
- course_obj: dict[str, Any],
486
- source_language: str,
487
- translation_language: str,
488
- ) -> tuple[int, bool]:
489
- """Translate XML attributes."""
490
- total_billed_chars = 0
491
- updated = False
492
-
493
- if "xml_attributes" in course_obj and isinstance(
494
- course_obj["xml_attributes"], dict
495
- ):
496
- xml_attrs = course_obj["xml_attributes"]
497
- xml_fields = [
498
- "diplay_name",
499
- "info_sidebar_name",
500
- ] # Note: keeping typo as in original
501
- for field in xml_fields:
502
- if field in xml_attrs:
503
- translated, billed_chars = self._translate_text(
504
- xml_attrs[field], source_language, translation_language
455
+ try:
456
+ # Poll for completion and show progress
457
+ while not result.ready():
458
+ # Count completed tasks
459
+ new_completed = sum(1 for r in result.results if r.ready())
460
+ if new_completed > completed_count:
461
+ completed_count = new_completed
462
+ self.stdout.write(
463
+ f"\rProgress: {completed_count}/{total_tasks} tasks completed",
464
+ ending="",
505
465
  )
506
- xml_attrs[field] = translated
507
- total_billed_chars += billed_chars
508
- updated = True
509
-
510
- return total_billed_chars, updated
511
-
512
- def _create_translated_archive(
513
- self, translated_dir: Path, translation_language: str, original_name: str
514
- ) -> Path:
515
- """Create tar.gz archive of translated course."""
516
- # Remove all archive extensions from the original name
517
- ext = self.get_supported_archive_extension(original_name)
518
- clean_name = original_name[: -len(ext)] if ext else original_name
466
+ self.stdout.flush()
519
467
 
520
- tar_gz_name = f"{translation_language}_{clean_name}.tar.gz"
521
- tar_gz_path = translated_dir.parent / tar_gz_name
468
+ # Sleep before next poll (don't use join with timeout)
469
+ time.sleep(TASK_POLL_INTERVAL_SECONDS)
522
470
 
523
- # Remove existing archive
524
- if tar_gz_path.exists():
525
- tar_gz_path.unlink()
526
-
527
- # Create tar.gz archive containing only the 'course' directory
528
- course_dir_path = translated_dir / "course"
529
- with tarfile.open(tar_gz_path, "w:gz") as tar:
530
- tar.add(course_dir_path, arcname="course")
531
-
532
- # Delete extracted directory after copying
533
- if translated_dir.exists():
534
- shutil.rmtree(translated_dir)
535
-
536
- logger.info("Created tar.gz archive: %s", tar_gz_path)
537
- return tar_gz_path
538
-
539
- def _translate_text(
540
- self,
541
- text: str,
542
- source_language: str,
543
- target_language: str,
544
- filename: str | None = None,
545
- ) -> tuple[str, int]:
546
- """Translate text using DeepL API."""
547
- if not text or not text.strip():
548
- return text, 0
549
-
550
- try:
551
- deepl_client = deepl.Translator(settings.DEEPL_API_KEY)
552
-
553
- tag_handling = None
554
- if filename:
555
- extension = Path(filename).suffix.lstrip(".")
556
- if extension in ["html", "xml"]:
557
- tag_handling = extension
558
-
559
- result = deepl_client.translate_text(
560
- text,
561
- source_lang=source_language,
562
- target_lang=target_language,
563
- tag_handling=tag_handling,
471
+ # Final update
472
+ self.stdout.write(
473
+ f"\rProgress: {total_tasks}/{total_tasks} tasks completed\n"
564
474
  )
565
475
 
566
- return result.text, result.billed_characters # noqa: TRY300
567
- except (deepl.exceptions.DeepLException, OSError) as e:
568
- logger.warning("Translation failed for text: %s... Error: %s", text[:50], e)
569
- return text, 0
476
+ # Get all results (this will raise exceptions if propagate=True)
477
+ results = result.get(timeout=TASK_TIMEOUT_SECONDS, propagate=False)
570
478
 
571
- def _translate_display_name(
572
- self, xml_content: str, source_language: str, target_language: str
573
- ) -> str:
574
- """Extract and translate the display_name attribute of the root element."""
575
- try:
576
- root = ElementTree.fromstring(xml_content)
577
- display_name = root.attrib.get("display_name")
479
+ except Exception as e:
480
+ logger.exception("Task execution failed")
481
+ error_msg = f"Task execution timeout or error: {e}"
482
+ raise CommandError(error_msg) from e
578
483
 
579
- if display_name:
580
- translated_name, _ = self._translate_text(
581
- display_name, source_language, target_language
484
+ # Process results
485
+ completed_tasks = 0
486
+ failed_tasks = 0
487
+ skipped_tasks = 0
488
+
489
+ for i, task_result in enumerate(results):
490
+ task_type, file_path = task_metadata[i]
491
+
492
+ if isinstance(task_result, dict):
493
+ status = task_result.get("status", "unknown")
494
+
495
+ if status == "success":
496
+ completed_tasks += 1
497
+ self.stdout.write(self.style.SUCCESS(f"✓ {task_type}: {file_path}"))
498
+ elif status == "skipped":
499
+ skipped_tasks += 1
500
+ reason = task_result.get("reason", "Skipped")
501
+ self.stdout.write(
502
+ self.style.WARNING(f"⊘ {task_type}: {file_path} - {reason}")
503
+ )
504
+ elif status == "error":
505
+ failed_tasks += 1
506
+ error = task_result.get("error", "Unknown error")
507
+ self.stdout.write(
508
+ self.style.ERROR(f"✗ {task_type}: {file_path} - {error}")
509
+ )
510
+ else:
511
+ failed_tasks += 1
512
+ self.stdout.write(
513
+ self.style.ERROR(
514
+ f"✗ {task_type}: {file_path} - Unknown status: {status}"
515
+ )
516
+ )
517
+ else:
518
+ # Task raised an exception
519
+ failed_tasks += 1
520
+ error_msg = str(task_result) if task_result else "Task failed"
521
+ self.stdout.write(
522
+ self.style.ERROR(f"✗ {task_type}: {file_path} - {error_msg}")
582
523
  )
583
- root.set("display_name", translated_name)
584
- return ElementTree.tostring(root, encoding="unicode")
585
- except ElementTree.ParseError as e:
586
- logger.warning("Could not translate display_name: %s", e)
587
524
 
588
- return xml_content
525
+ # Print summary
526
+ self.stdout.write("\n" + "=" * 60)
527
+ self.stdout.write(self.style.SUCCESS(f"Total tasks: {total_tasks}"))
528
+ self.stdout.write(self.style.SUCCESS(f"Completed: {completed_tasks}"))
529
+ if skipped_tasks > 0:
530
+ self.stdout.write(self.style.WARNING(f"Skipped: {skipped_tasks}"))
531
+ if failed_tasks > 0:
532
+ self.stdout.write(self.style.ERROR(f"Failed: {failed_tasks}"))
533
+ self.stdout.write("=" * 60 + "\n")
534
+
535
+ if failed_tasks > 0:
536
+ error_msg = f"{failed_tasks} translation tasks failed"
537
+ raise CommandError(error_msg)