PyPI - ol-openedx-course-translations - Versions diffs - 0.1.0__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend - Supply Chain Defender

ol-openedx-course-translations 0.1.0py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ol-openedx-course-translations might be problematic. Click here for more details.

Files changed (40) hide show

ol_openedx_course_translations/management/commands/translate_course.py CHANGED Viewed

@@ -2,25 +2,64 @@
 Management command to translate course content to a specified language.
 """
-import json
 import logging
 import shutil
-import tarfile
+import time
 from pathlib import Path
-from typing import Any
-import deepl
-from defusedxml import ElementTree
+from celery import group
 from django.conf import settings
 from django.core.management.base import BaseCommand, CommandError
+from ol_openedx_course_translations.models import CourseTranslationLog
+from ol_openedx_course_translations.tasks import (
+    translate_file_task,
+    translate_grading_policy_task,
+    translate_policy_json_task,
+)
+from ol_openedx_course_translations.utils.constants import PROVIDER_DEEPL
+from ol_openedx_course_translations.utils.course_translations import (
+    create_translated_archive,
+    create_translated_copy,
+    extract_course_archive,
+    generate_course_key_from_xml,
+    get_translatable_file_paths,
+    update_course_language_attribute,
+    validate_course_inputs,
+)
 logger = logging.getLogger(__name__)
+# Task configuration
+TASK_TIMEOUT_SECONDS = 3600 * 2  # 2 hour total timeout for all tasks
+TASK_POLL_INTERVAL_SECONDS = 2  # Poll every 2 seconds for task completion
 class Command(BaseCommand):
     """Translate given course content to the specified language."""
-    help = "Translate course content to the specified language."
+    help = (
+        "Translate course content to the specified language.\n\n"
+        "Configuration:\n"
+        "All translation providers should be configured in TRANSLATIONS_PROVIDERS:\n"
+        "{\n"
+        '    "deepl": {"api_key": "<YOUR_DEEPL_API_KEY>"},\n'
+        '    "openai": {"api_key": "<KEY>", "default_model": "gpt-5.2"},\n'
+        '    "gemini": {"api_key": "<KEY>", "default_model": "gemini-3-pro-preview"},\n'
+        '    "mistral": {"api_key": "<KEY>", "default_model": "mistral-large-latest"}\n'
+        "}\n"
+    )
+    def __init__(self, *args, **kwargs):
+        """Initialize the command with empty task list."""
+        super().__init__(*args, **kwargs)
+        self.tasks = []
+        self.translated_course_dir = None
+        self.content_provider_name = None
+        self.content_model = None
+        self.srt_provider_name = None
+        self.srt_model = None
+        self.glossary_directory = None
     def add_arguments(self, parser) -> None:
         """Entry point for subclassed commands to add custom arguments."""
@@ -34,8 +73,8 @@ class Command(BaseCommand):
             ),
         )
         parser.add_argument(
-            "--translation-language",
-            dest="translation_language",
+            "--target-language",
+            dest="target_language",
             required=True,
             help=(
                 "Specify the language code in ISO format "
@@ -44,545 +83,503 @@ class Command(BaseCommand):
         )
         parser.add_argument(
             "--course-dir",
-            dest="course_directory",
+            dest="course_archive_path",
             required=True,
             help="Specify the course directory (tar archive).",
         )
+        parser.add_argument(
+            "--content-translation-provider",
+            dest="content_translation_provider",
+            required=True,
+            help=(
+                "Translation provider for content (XML/HTML and text). "
+                "Format: 'deepl', 'PROVIDER', or 'PROVIDER/MODEL' "
+                "(e.g., 'openai', 'openai/gpt-5.2', 'gemini', 'gemini/gemini-3-pro-preview'). "  # noqa: E501
+                "If model is not specified, uses the default model from settings."
+            ),
+        )
+        parser.add_argument(
+            "--srt-translation-provider",
+            dest="srt_translation_provider",
+            required=True,
+            help=(
+                "Translation provider for SRT subtitles. "
+                "Format: 'deepl', 'PROVIDER', or 'PROVIDER/MODEL' "
+                "(e.g., 'openai', 'openai/gpt-5.2', 'gemini', 'gemini/gemini-3-pro-preview'). "  # noqa: E501
+                "If model is not specified, uses the default model from settings."
+            ),
+        )
+        parser.add_argument(
+            "--glossary-dir",
+            dest="glossary_directory",
+            required=False,
+            help=(
+                "Path to glossary directory containing "
+                "language-specific glossary files."
+            ),
+        )
+    def _parse_and_validate_provider_spec(
+        self, provider_spec: str
+    ) -> tuple[str, str | None]:
+        """
+        Parse and validate provider specification into provider name and model.
+        Resolves model from settings if not provided in specification.
+        Args:
+            provider_spec: Provider specification
+        Returns:
+            Tuple of (provider_name, model_name). model_name is None for DeepL or
+            resolved from settings if not specified.
+        Raises:
+            CommandError: If provider specification format is invalid
+            or model and api_key cannot be resolved
+        """
+        # Parse the specification
+        if "/" in provider_spec:
+            parts = provider_spec.split("/", 1)
+            if len(parts) != 2 or not parts[0] or not parts[1]:  # noqa: PLR2004
+                error_msg = (
+                    f"Invalid provider specification: {provider_spec}. "
+                    "Use format 'PROVIDER' or 'PROVIDER/MODEL' "
+                    "(e.g., 'openai', 'openai/gpt-5.2')"
+                )
+                raise CommandError(error_msg)
+            provider_name = parts[0].lower()
+            model_name = parts[1]
+        else:
+            provider_name = provider_spec.lower()
+            model_name = None
+        # Try to get default model from settings
+        providers_config = getattr(settings, "TRANSLATIONS_PROVIDERS", {})
+        if provider_name not in providers_config:
+            error_msg = (
+                f"Provider '{provider_name}' not configured in TRANSLATIONS_PROVIDERS. "
+                f"Available providers: {', '.join(providers_config.keys())}"
+            )
+            raise CommandError(error_msg)
+        provider_config = providers_config[provider_name]
+        api_key = provider_config.get("api_key")
+        if not api_key:
+            error_msg = (
+                f"API key for provider '{provider_name}' is not configured in "
+                "TRANSLATIONS_PROVIDERS. Please set the 'api_key' in settings."
+            )
+            raise CommandError(error_msg)
+        # DeepL doesn't use models
+        if provider_name == PROVIDER_DEEPL:
+            return provider_name, None
+        # If model is explicitly provided, return it
+        if model_name:
+            return provider_name, model_name
+        default_model = provider_config.get("default_model")
+        if not default_model:
+            error_msg = (
+                f"No model specified for provider '{provider_name}' and no "
+                f"default_model found in TRANSLATIONS_PROVIDERS['{provider_name}']. "
+                f"Either specify a model (e.g., '{provider_name}/gpt-5.2') or "
+                f"configure a default_model in settings."
+            )
+            raise CommandError(error_msg)
+        return provider_name, default_model
     def handle(self, **options) -> None:
         """Handle the translate_course command."""
         try:
-            self._validate_inputs(options)
+            start_time = time.perf_counter()
+            course_archive_path = Path(options["course_archive_path"])
+            source_language = options["source_language"].upper()
+            target_language = options["target_language"].upper()
+            content_provider_spec = options["content_translation_provider"]
+            srt_provider_spec = options["srt_translation_provider"]
+            glossary_directory = options.get("glossary_directory")
+            # Parse and validate provider specifications (includes validation)
+            content_provider_name, content_model = (
+                self._parse_and_validate_provider_spec(content_provider_spec)
+            )
+            srt_provider_name, srt_model = self._parse_and_validate_provider_spec(
+                srt_provider_spec
+            )
-            course_dir = Path(options["course_directory"])
-            source_language = options["source_language"]
-            translation_language = options["translation_language"]
+            # Log the resolved configuration
+            if content_model:
+                self.stdout.write(
+                    f"Content provider: {content_provider_name}/{content_model}"
+                )
+            else:
+                self.stdout.write(f"Content provider: {content_provider_name}")
+            if srt_model:
+                self.stdout.write(f"SRT provider: {srt_provider_name}/{srt_model}")
+            else:
+                self.stdout.write(f"SRT provider: {srt_provider_name}")
+            # Validate inputs
+            validate_course_inputs(course_archive_path)
+            # Store provider names and models
+            self.content_provider_name = content_provider_name
+            self.content_model = content_model
+            self.srt_provider_name = srt_provider_name
+            self.srt_model = srt_model
+            self.glossary_directory = glossary_directory
             # Extract course archive
-            extracted_dir = self._extract_course_archive(course_dir)
+            extracted_course_dir = extract_course_archive(course_archive_path)
             # Create translated copy
-            translated_dir = self._create_translated_copy(
-                extracted_dir, translation_language
+            translated_course_dir = create_translated_copy(
+                extracted_course_dir, target_language
             )
+            # Store for cleanup on failure
+            self.translated_course_dir = translated_course_dir
             # Delete extracted directory after copying
-            if extracted_dir.exists():
-                shutil.rmtree(extracted_dir)
+            if extracted_course_dir.exists():
+                shutil.rmtree(extracted_course_dir)
-            # Translate content
-            billed_chars = self._translate_course_content(
-                translated_dir, source_language, translation_language
+            # Translate content asynchronously
+            self._translate_course_content_async(
+                translated_course_dir, source_language, target_language
             )
+            # Wait for all tasks and report status
+            command_stats = self._wait_and_report_tasks()
+            total_time_taken_msg = (
+                f"Command finished in: {time.perf_counter() - start_time:.2f} seconds."
+            )
+            self.stdout.write(self.style.SUCCESS(total_time_taken_msg))
+            command_stats.append(total_time_taken_msg)
+            # Add translation log entry
+            self._add_translation_log_entry(
+                source_language=source_language,
+                target_language=target_language,
+                command_stats=command_stats,
+            )
             # Create final archive
-            archive_path = self._create_translated_archive(
-                translated_dir, translation_language, course_dir.stem
+            translated_archive_path = create_translated_archive(
+                translated_course_dir, target_language, course_archive_path.stem
             )
-            self.stdout.write(
-                self.style.SUCCESS(
-                    f"Translation completed. Archive created: {archive_path}"
-                )
+            success_msg = (
+                f"Translation completed successfully. Translated archive created: "
+                f"{translated_archive_path}"
             )
-            logger.info("Total billed characters: %s", billed_chars)
+            self.stdout.write(self.style.SUCCESS(success_msg))
         except Exception as e:
             logger.exception("Translation failed")
+            # Cleanup translated course directory on failure
+            if self.translated_course_dir and self.translated_course_dir.exists():
+                self.stdout.write(
+                    self.style.WARNING(
+                        f"Cleaning up translated course directory: {self.translated_course_dir}"  # noqa: E501
+                    )
+                )
+                shutil.rmtree(self.translated_course_dir)
             error_msg = f"Translation failed: {e}"
             raise CommandError(error_msg) from e
-    def get_supported_archive_extension(self, filename: str) -> str | None:
-        """
-        Return the supported archive extension if filename ends with one, else None.
+    def _translate_course_content_async(
+        self, course_dir: Path, source_language: str, target_language: str
+    ) -> None:
         """
-        for ext in settings.OL_OPENEDX_COURSE_TRANSLATIONS_SUPPORTED_ARCHIVE_EXTENSIONS:
-            if filename.endswith(ext):
-                return ext
-        return None
+        Translate all course content using Celery tasks.
-    def _validate_inputs(self, options: dict[str, Any]) -> None:
-        """Validate command inputs."""
-        course_dir = Path(options["course_directory"])
+        Args:
+            course_dir: Path to the course directory
+            source_language: Source language code
+            target_language: Target language code
-        if not course_dir.exists():
-            error_msg = f"Course directory not found: {course_dir}"
-            raise CommandError(error_msg)
-        if self.get_supported_archive_extension(course_dir.name) is None:
-            supported_exts = ", ".join(
-                settings.OL_OPENEDX_COURSE_TRANSLATIONS_SUPPORTED_ARCHIVE_EXTENSIONS
-            )
-            error_msg = f"Course directory must be a tar file: {supported_exts}"
-            raise CommandError(error_msg)
-        if not hasattr(settings, "DEEPL_API_KEY") or not settings.DEEPL_API_KEY:
-            error_msg = "DEEPL_API_KEY setting is required"
-            raise CommandError(error_msg)
-    def _extract_course_archive(self, course_dir: Path) -> Path:
-        """Extract course archive to working directory."""
-        # Use the parent directory of the source file as the base extraction directory
-        extract_base_dir = course_dir.parent
-        # Get base name without extension
-        ext = self.get_supported_archive_extension(course_dir.name)
-        tarball_base = course_dir.name[: -len(ext)] if ext else course_dir.name
-        extracted_dir = extract_base_dir / tarball_base
-        if not extracted_dir.exists():
-            try:
-                with tarfile.open(course_dir, "r:*") as tar:
-                    # Validate tar file before extraction
-                    self._validate_tar_file(tar)
-                    tar.extractall(path=extracted_dir, filter="data")
-            except (tarfile.TarError, OSError) as e:
-                error_msg = f"Failed to extract archive: {e}"
-                raise CommandError(error_msg) from e
-        logger.info("Extracted course to: %s", extracted_dir)
-        return extracted_dir
-    def _validate_tar_file(self, tar: tarfile.TarFile) -> None:
-        """Validate tar file contents for security."""
-        for member in tar.getmembers():
-            # Check for directory traversal attacks
-            if member.name.startswith("/") or ".." in member.name:
-                error_msg = f"Unsafe tar member: {member.name}"
-                raise CommandError(error_msg)
-            # Check for excessively large files
-            if (
-                member.size > 512 * 1024 * 1024
-            ):  # 0.5GB limit because courses on Production are big
-                error_msg = f"File too large: {member.name}"
-                raise CommandError(error_msg)
-    def _create_translated_copy(
-        self, source_dir: Path, translation_language: str
-    ) -> Path:
-        """Create a copy of the course for translation."""
-        base_name = source_dir.name
-        new_dir_name = f"{translation_language}_{base_name}"
-        new_dir_path = source_dir.parent / new_dir_name
+        Raises:
+            CommandError: If course directory is not found
+        """
+        course_directory = course_dir / "course"
-        if new_dir_path.exists():
-            error_msg = f"Translation directory already exists: {new_dir_path}"
+        if not course_directory.exists() or not course_directory.is_dir():
+            error_msg = f"Course directory not found: {course_directory}"
             raise CommandError(error_msg)
-        shutil.copytree(source_dir, new_dir_path)
-        logger.info("Created translation copy: %s", new_dir_path)
-        return new_dir_path
-    def _translate_course_content(
-        self, course_dir: Path, source_language: str, translation_language: str
-    ) -> int:
-        """Translate all course content and return total billed characters."""
-        total_billed_chars = 0
-        # Translate files in main directories
-        for search_dir in [course_dir, course_dir.parent]:
-            total_billed_chars += self._translate_files_in_directory(
-                search_dir, source_language, translation_language, recursive=False
-            )
+        # Update language attributes in course XML, doing this
+        # because tasks can override the XML files
+        update_course_language_attribute(course_directory, target_language)
-            # Translate files in target subdirectories
-            for dir_name in settings.OL_OPENEDX_COURSE_TRANSLATIONS_TARGET_DIRECTORIES:
-                target_dir = search_dir / dir_name
-                if target_dir.exists() and target_dir.is_dir():
-                    total_billed_chars += self._translate_files_in_directory(
-                        target_dir,
-                        source_language,
-                        translation_language,
-                        recursive=True,
-                    )
+        # Collect all tasks
+        self.tasks = []
-        # Translate special JSON files
-        total_billed_chars += self._translate_grading_policy(
-            course_dir, source_language, translation_language
-        )
-        total_billed_chars += self._translate_policy_json(
-            course_dir, source_language, translation_language
+        # Add translation tasks for files in course directory
+        self._add_file_translation_tasks(
+            course_directory, source_language, target_language, recursive=False
         )
-        return total_billed_chars
+        # Add translation tasks for target subdirectories
+        for target_dir_name in settings.COURSE_TRANSLATIONS_TARGET_DIRECTORIES:
+            target_directory = course_directory / target_dir_name
+            if target_directory.exists() and target_directory.is_dir():
+                self._add_file_translation_tasks(
+                    target_directory, source_language, target_language, recursive=True
+                )
+        # Add tasks for special JSON files
+        self._add_grading_policy_tasks(course_dir, target_language)
+        self._add_policy_json_tasks(course_dir, target_language)
-    def _translate_files_in_directory(
+    def _add_file_translation_tasks(
         self,
-        directory: Path,
+        directory_path: Path,
         source_language: str,
-        translation_language: str,
+        target_language: str,
         *,
         recursive: bool = False,
-    ) -> int:
-        """Translate files in a directory."""
-        total_billed_chars = 0
-        if recursive:
-            file_paths: list[Path] = []
-            for ext in settings.OL_OPENEDX_COURSE_TRANSLATIONS_TRANSLATABLE_EXTENSIONS:
-                file_paths.extend(directory.rglob(f"*{ext}"))
-        else:
-            file_paths = [
-                f
-                for f in directory.iterdir()
-                if f.is_file()
-                and any(
-                    f.name.endswith(ext)
-                    for ext in settings.OL_OPENEDX_COURSE_TRANSLATIONS_TRANSLATABLE_EXTENSIONS  # noqa: E501
-                )
-            ]
-        for file_path in file_paths:
-            try:
-                total_billed_chars += self._translate_file(
-                    file_path, source_language, translation_language
-                )
-            except (OSError, UnicodeDecodeError) as e:
-                logger.warning("Failed to translate %s: %s", file_path, e)
-        return total_billed_chars
+    ) -> None:
+        """
+        Add Celery tasks for file translation to the task list.
-    def _translate_file(
-        self, file_path: Path, source_language: str, translation_language: str
-    ) -> int:
-        """Translate a single file and return billed characters."""
-        try:
-            content = file_path.read_text(encoding="utf-8")
-            logger.debug("Translating: %s", file_path)
+        Args:
+            directory_path: Path to directory containing files to translate
+            source_language: Source language code
+            target_language: Target language code
+            recursive: Whether to search for files recursively
+        """
+        translatable_file_paths = get_translatable_file_paths(
+            directory_path, recursive=recursive
+        )
-            translated_content, billed_chars = self._translate_text(
-                content, source_language, translation_language, file_path.name
+        for file_path in translatable_file_paths:
+            task = translate_file_task.s(
+                str(file_path),
+                source_language,
+                target_language,
+                self.content_provider_name,
+                self.content_model,
+                self.srt_provider_name,
+                self.srt_model,
+                self.glossary_directory,
             )
+            self.tasks.append(("file", str(file_path), task))
+            logger.info("Added translation task for: %s", file_path)
-            # Handle XML display_name translation
-            if file_path.suffix == ".xml":
-                translated_content = self._translate_display_name(
-                    translated_content, source_language, translation_language
-                )
-            file_path.write_text(translated_content, encoding="utf-8")
-        except (OSError, UnicodeDecodeError) as e:
-            logger.warning("Failed to translate %s: %s", file_path, e)
-            return 0
-        else:
-            return billed_chars
-    def _translate_grading_policy(
-        self, course_dir: Path, source_language: str, translation_language: str
-    ) -> int:
-        """Translate grading_policy.json files."""
-        total_billed_chars = 0
-        policies_dir = course_dir / "course" / "policies"
+    def _add_grading_policy_tasks(self, course_dir: Path, target_language: str) -> None:
+        """
+        Add Celery tasks for grading_policy.json translation to the task list.
-        if not policies_dir.exists():
-            return 0
+        Args:
+            course_dir: Path to the course directory
+            target_language: Target language code
+        """
+        course_policies_dir = course_dir / "course" / "policies"
-        for child_dir in policies_dir.iterdir():
-            if not child_dir.is_dir():
-                continue
+        if not course_policies_dir.exists():
+            return
-            grading_policy_path = child_dir / "grading_policy.json"
-            if not grading_policy_path.exists():
+        for policy_child_dir in course_policies_dir.iterdir():
+            if not policy_child_dir.is_dir():
                 continue
-            try:
-                grading_policy = json.loads(
-                    grading_policy_path.read_text(encoding="utf-8")
-                )
-                updated = False
-                for item in grading_policy.get("GRADER", []):
-                    if "short_label" in item:
-                        translated_label, billed_chars = self._translate_text(
-                            item["short_label"], source_language, translation_language
-                        )
-                        item["short_label"] = translated_label
-                        total_billed_chars += billed_chars
-                        updated = True
-                if updated:
-                    grading_policy_path.write_text(
-                        json.dumps(grading_policy, ensure_ascii=False, indent=4),
-                        encoding="utf-8",
-                    )
-            except (OSError, json.JSONDecodeError) as e:
-                logger.warning(
-                    "Failed to translate grading policy in %s: %s", child_dir, e
+            grading_policy_file = policy_child_dir / "grading_policy.json"
+            if grading_policy_file.exists():
+                task = translate_grading_policy_task.s(
+                    str(grading_policy_file),
+                    target_language,
+                    self.content_provider_name,
+                    self.content_model,
+                    self.glossary_directory,
                 )
+                self.tasks.append(("grading_policy", str(grading_policy_file), task))
+                logger.info("Added grading policy task for: %s", grading_policy_file)
-        return total_billed_chars
-    def _translate_policy_json(
-        self, course_dir: Path, source_language: str, translation_language: str
-    ) -> int:
-        """Translate policy.json files."""
-        total_billed_chars = 0
-        policies_dir = course_dir / "course" / "policies"
+    def _add_policy_json_tasks(self, course_dir: Path, target_language: str) -> None:
+        """
+        Add Celery tasks for policy.json translation to the task list.
-        if not policies_dir.exists():
-            return 0
+        Args:
+            course_dir: Path to the course directory
+            target_language: Target language code
+        """
+        course_policies_dir = course_dir / "course" / "policies"
-        for child_dir in policies_dir.iterdir():
-            if not child_dir.is_dir():
-                continue
+        if not course_policies_dir.exists():
+            return
-            policy_path = child_dir / "policy.json"
-            if not policy_path.exists():
+        for policy_child_dir in course_policies_dir.iterdir():
+            if not policy_child_dir.is_dir():
                 continue
-            try:
-                policy_data = json.loads(policy_path.read_text(encoding="utf-8"))
-                updated = False
-                for course_obj in policy_data.values():
-                    if not isinstance(course_obj, dict):
-                        continue
-                    # Translate various fields
-                    billed_chars, field_updated = self._translate_policy_fields(
-                        course_obj, source_language, translation_language
-                    )
-                    total_billed_chars += billed_chars
-                    updated = updated or field_updated
-                if updated:
-                    policy_path.write_text(
-                        json.dumps(policy_data, ensure_ascii=False, indent=4),
-                        encoding="utf-8",
-                    )
-            except (OSError, json.JSONDecodeError) as e:
-                logger.warning("Failed to translate policy in %s: %s", child_dir, e)
-        return total_billed_chars
-    def _translate_policy_fields(
-        self,
-        course_obj: dict[str, Any],
-        source_language: str,
-        translation_language: str,
-    ) -> tuple[int, bool]:
-        """Translate specific fields in policy object."""
-        total_billed_chars = 0
-        updated = False
-        # Translate simple string fields
-        billed_chars, field_updated = self._translate_string_fields(
-            course_obj, source_language, translation_language
-        )
-        total_billed_chars += billed_chars
-        updated = updated or field_updated
+            policy_file = policy_child_dir / "policy.json"
+            if policy_file.exists():
+                task = translate_policy_json_task.s(
+                    str(policy_file),
+                    target_language,
+                    self.content_provider_name,
+                    self.content_model,
+                    self.glossary_directory,
+                )
+                self.tasks.append(("policy", str(policy_file), task))
+                logger.info("Added policy.json task for: %s", policy_file)
-        # Translate discussion topics
-        billed_chars, field_updated = self._translate_discussion_topics(
-            course_obj, source_language, translation_language
-        )
-        total_billed_chars += billed_chars
-        updated = updated or field_updated
+    def _wait_and_report_tasks(self) -> list[str]:  # noqa: C901, PLR0915, PLR0912
+        """
+        Execute all tasks as a Celery group and wait for completion.
-        # Translate learning info and tabs
-        billed_chars, field_updated = self._translate_learning_info_and_tabs(
-            course_obj, source_language, translation_language
-        )
-        total_billed_chars += billed_chars
-        updated = updated or field_updated
+        Uses Celery's group primitive to execute tasks in parallel and
+        provides detailed progress reporting.
-        # Translate XML attributes
-        billed_chars, field_updated = self._translate_xml_attributes(
-            course_obj, source_language, translation_language
+        Raises:
+            CommandError: If any tasks fail
+        """
+        stats = []
+        if not self.tasks:
+            self.stdout.write("No tasks to execute.")
+            return []
+        total_tasks = len(self.tasks)
+        self.stdout.write(
+            f"\nExecuting {total_tasks} translation tasks in parallel...\n"
         )
-        total_billed_chars += billed_chars
-        updated = updated or field_updated
-        return total_billed_chars, updated
+        # Extract task signatures and create mappings
+        task_signatures = [task_sig for _, _, task_sig in self.tasks]
+        task_metadata = {
+            i: (task_type, file_path)
+            for i, (task_type, file_path, _) in enumerate(self.tasks)
+        }
-    def _translate_string_fields(
-        self,
-        course_obj: dict[str, Any],
-        source_language: str,
-        translation_language: str,
-    ) -> tuple[int, bool]:
-        """Translate simple string fields."""
-        total_billed_chars = 0
-        updated = False
-        string_fields = ["advertised_start", "display_name", "display_organization"]
-        for field in string_fields:
-            if field in course_obj:
-                translated, billed_chars = self._translate_text(
-                    course_obj[field], source_language, translation_language
-                )
-                course_obj[field] = translated
-                total_billed_chars += billed_chars
-                updated = True
+        # Create and execute group
+        job = group(task_signatures)
+        result = job.apply_async()
-        return total_billed_chars, updated
+        # Wait for all tasks to complete with progress reporting
+        completed_count = 0
+        self.stdout.flush()
-    def _translate_discussion_topics(
-        self,
-        course_obj: dict[str, Any],
-        source_language: str,
-        translation_language: str,
-    ) -> tuple[int, bool]:
-        """Translate discussion topics."""
-        total_billed_chars = 0
-        updated = False
-        if "discussion_topics" in course_obj:
-            topics = course_obj["discussion_topics"]
-            if isinstance(topics, dict):
-                new_topics = {}
-                for topic_key, value in topics.items():
-                    translated_key, billed_chars = self._translate_text(
-                        topic_key, source_language, translation_language
-                    )
-                    new_topics[translated_key] = value
-                    total_billed_chars += billed_chars
-                course_obj["discussion_topics"] = new_topics
-                updated = True
-        return total_billed_chars, updated
-    def _translate_learning_info_and_tabs(
-        self,
-        course_obj: dict[str, Any],
-        source_language: str,
-        translation_language: str,
-    ) -> tuple[int, bool]:
-        """Translate learning info and tabs."""
-        total_billed_chars = 0
-        updated = False
-        # Learning info
-        if "learning_info" in course_obj and isinstance(
-            course_obj["learning_info"], list
-        ):
-            translated_info = []
-            for item in course_obj["learning_info"]:
-                translated, billed_chars = self._translate_text(
-                    item, source_language, translation_language
-                )
-                translated_info.append(translated)
-                total_billed_chars += billed_chars
-            course_obj["learning_info"] = translated_info
-            updated = True
-        # Tabs
-        if "tabs" in course_obj and isinstance(course_obj["tabs"], list):
-            for tab in course_obj["tabs"]:
-                if isinstance(tab, dict) and "name" in tab:
-                    translated, billed_chars = self._translate_text(
-                        tab["name"], source_language, translation_language
-                    )
-                    tab["name"] = translated
-                    total_billed_chars += billed_chars
-                    updated = True
-        return total_billed_chars, updated
-    def _translate_xml_attributes(
-        self,
-        course_obj: dict[str, Any],
-        source_language: str,
-        translation_language: str,
-    ) -> tuple[int, bool]:
-        """Translate XML attributes."""
-        total_billed_chars = 0
-        updated = False
-        if "xml_attributes" in course_obj and isinstance(
-            course_obj["xml_attributes"], dict
-        ):
-            xml_attrs = course_obj["xml_attributes"]
-            xml_fields = [
-                "diplay_name",
-                "info_sidebar_name",
-            ]  # Note: keeping typo as in original
-            for field in xml_fields:
-                if field in xml_attrs:
-                    translated, billed_chars = self._translate_text(
-                        xml_attrs[field], source_language, translation_language
+        try:
+            # Poll for completion and show progress
+            while not result.ready():
+                # Count completed tasks
+                new_completed = sum(1 for r in result.results if r.ready())
+                if new_completed > completed_count:
+                    completed_count = new_completed
+                    self.stdout.write(
+                        f"\rProgress: {completed_count}/{total_tasks} tasks completed",
+                        ending="",
                     )
-                    xml_attrs[field] = translated
-                    total_billed_chars += billed_chars
-                    updated = True
-        return total_billed_chars, updated
-    def _create_translated_archive(
-        self, translated_dir: Path, translation_language: str, original_name: str
-    ) -> Path:
-        """Create tar.gz archive of translated course."""
-        # Remove all archive extensions from the original name
-        ext = self.get_supported_archive_extension(original_name)
-        clean_name = original_name[: -len(ext)] if ext else original_name
-        tar_gz_name = f"{translation_language}_{clean_name}.tar.gz"
-        tar_gz_path = translated_dir.parent / tar_gz_name
-        # Remove existing archive
-        if tar_gz_path.exists():
-            tar_gz_path.unlink()
+                    self.stdout.flush()
-        # Create tar.gz archive containing only the 'course' directory
-        course_dir_path = translated_dir / "course"
-        with tarfile.open(tar_gz_path, "w:gz") as tar:
-            tar.add(course_dir_path, arcname="course")
+                # Sleep before next poll (don't use join with timeout)
+                time.sleep(TASK_POLL_INTERVAL_SECONDS)
-        # Delete extracted directory after copying
-        if translated_dir.exists():
-            shutil.rmtree(translated_dir)
+            # Final update
+            self.stdout.write(
+                f"\rProgress: {total_tasks}/{total_tasks} tasks completed\n"
+            )
-        logger.info("Created tar.gz archive: %s", tar_gz_path)
-        return tar_gz_path
+            # Get all results (this will raise exceptions if propagate=True)
+            results = result.get(timeout=TASK_TIMEOUT_SECONDS, propagate=False)
-    def _translate_text(
-        self,
-        text: str,
-        source_language: str,
-        target_language: str,
-        filename: str | None = None,
-    ) -> tuple[str, int]:
-        """Translate text using DeepL API."""
-        if not text or not text.strip():
-            return text, 0
-        try:
-            deepl_client = deepl.Translator(settings.DEEPL_API_KEY)
-            tag_handling = None
-            if filename:
-                extension = Path(filename).suffix.lstrip(".")
-                if extension in ["html", "xml"]:
-                    tag_handling = extension
-            result = deepl_client.translate_text(
-                text,
-                source_lang=source_language,
-                target_lang=target_language,
-                tag_handling=tag_handling,
-            )
+        except Exception as e:
+            logger.exception("Task execution failed")
+            error_msg = f"Task execution timeout or error: {e}"
+            raise CommandError(error_msg) from e
-            return result.text, result.billed_characters  # noqa: TRY300
-        except (deepl.exceptions.DeepLException, OSError) as e:
-            logger.warning("Translation failed for text: %s... Error: %s", text[:50], e)
-            return text, 0
+        # Process results
+        completed_tasks = 0
+        failed_tasks = 0
+        skipped_tasks = 0
+        for i, task_result in enumerate(results):
+            task_type, file_path = task_metadata[i]
+            if isinstance(task_result, dict):
+                status = task_result.get("status", "unknown")
+                if status == "success":
+                    completed_tasks += 1
+                    msg = f"✓ {task_type}: {file_path}"
+                    stats.append(msg)
+                    self.stdout.write(self.style.SUCCESS(msg))
+                elif status == "skipped":
+                    skipped_tasks += 1
+                    reason = task_result.get("reason", "Skipped")
+                    msg = f"⊘ {task_type}: {file_path} - {reason}"
+                    stats.append(msg)
+                    self.stdout.write(self.style.WARNING(msg))
+                elif status == "error":
+                    failed_tasks += 1
+                    error = task_result.get("error", "Unknown error")
+                    msg = f"✗ {task_type}: {file_path} - {error}"
+                    stats.append(msg)
+                    self.stdout.write(self.style.ERROR(msg))
+                else:
+                    failed_tasks += 1
+                    msg = f"✗ {task_type}: {file_path} - Unknown status: {status}"
+                    stats.append(msg)
+                    self.stdout.write(self.style.ERROR(msg))
+            else:
+                # Task raised an exception
+                failed_tasks += 1
+                error_msg = str(task_result) if task_result else "Task failed"
+                msg = f"✗ {task_type}: {file_path} - {error_msg}"
+                stats.append(msg)
+                self.stdout.write(self.style.ERROR(msg))
+        # Print summary
+        self.stdout.write("\n" + "=" * 60)
+        successful_tasks_stats = (
+            f"Total tasks: {total_tasks}\nCompleted: {completed_tasks}"
+        )
+        stats.append(successful_tasks_stats)
+        self.stdout.write(self.style.SUCCESS(successful_tasks_stats))
+        if skipped_tasks > 0:
+            skipped_tasks_stats = f"Skipped: {skipped_tasks}"
+            stats.append(skipped_tasks_stats)
+            self.stdout.write(self.style.WARNING(skipped_tasks_stats))
+        if failed_tasks > 0:
+            failed_tasks_stats = f"Failed: {failed_tasks}"
+            stats.append(failed_tasks_stats)
+            self.stdout.write(self.style.ERROR(failed_tasks_stats))
+        self.stdout.write("=" * 60 + "\n")
+        if failed_tasks > 0:
+            error_msg = f"{failed_tasks} translation tasks failed"
+            raise CommandError(error_msg)
-    def _translate_display_name(
-        self, xml_content: str, source_language: str, target_language: str
-    ) -> str:
-        """Extract and translate the display_name attribute of the root element."""
-        try:
-            root = ElementTree.fromstring(xml_content)
-            display_name = root.attrib.get("display_name")
+        return stats
-            if display_name:
-                translated_name, _ = self._translate_text(
-                    display_name, source_language, target_language
-                )
-                root.set("display_name", translated_name)
-                return ElementTree.tostring(root, encoding="unicode")
-        except ElementTree.ParseError as e:
-            logger.warning("Could not translate display_name: %s", e)
+    def _add_translation_log_entry(
+        self, source_language, target_language, command_stats=None
+    ) -> None:
+        """
+        Add a log entry for the course translation operation.
-        return xml_content
+        Args:
+            source_language: Source language code
+            target_language: Target language code
+            command_stats: List of command statistics/logs
+        """
+        source_course_id = generate_course_key_from_xml(
+            course_dir_path=self.translated_course_dir
+        )
+        command_stats_str = "\n".join(command_stats) if command_stats else ""
+        CourseTranslationLog.objects.create(
+            source_course_id=source_course_id,
+            source_course_language=source_language,
+            target_course_language=target_language,
+            srt_provider_name=self.srt_provider_name,
+            srt_provider_model=self.srt_model or "",
+            content_provider_name=self.content_provider_name,
+            content_provider_model=self.content_model or "",
+            command_stats=command_stats_str,
+        )