ol-openedx-course-translations 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ol-openedx-course-translations might be problematic. Click here for more details.
- ol_openedx_course_translations-0.2.0/.gitignore +14 -0
- ol_openedx_course_translations-0.2.0/LICENSE.txt +28 -0
- ol_openedx_course_translations-0.2.0/PKG-INFO +106 -0
- ol_openedx_course_translations-0.2.0/README.rst +92 -0
- ol_openedx_course_translations-0.2.0/ol_openedx_course_translations/__init__.py +3 -0
- ol_openedx_course_translations-0.2.0/ol_openedx_course_translations/apps.py +33 -0
- ol_openedx_course_translations-0.2.0/ol_openedx_course_translations/management/__init__.py +0 -0
- ol_openedx_course_translations-0.2.0/ol_openedx_course_translations/management/commands/__init__.py +0 -0
- ol_openedx_course_translations-0.2.0/ol_openedx_course_translations/management/commands/translate_course.py +687 -0
- ol_openedx_course_translations-0.2.0/ol_openedx_course_translations/middleware.py +143 -0
- ol_openedx_course_translations-0.2.0/ol_openedx_course_translations/settings/cms.py +17 -0
- ol_openedx_course_translations-0.2.0/ol_openedx_course_translations/settings/common.py +35 -0
- ol_openedx_course_translations-0.2.0/ol_openedx_course_translations/settings/lms.py +17 -0
- ol_openedx_course_translations-0.2.0/ol_openedx_course_translations/urls.py +16 -0
- ol_openedx_course_translations-0.2.0/ol_openedx_course_translations/views.py +73 -0
- ol_openedx_course_translations-0.2.0/pyproject.toml +39 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Copyright (C) 2022 MIT Open Learning
|
|
2
|
+
|
|
3
|
+
All rights reserved.
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
* Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
* Neither the name of the copyright holder nor the names of its
|
|
16
|
+
contributors may be used to endorse or promote products derived from
|
|
17
|
+
this software without specific prior written permission.
|
|
18
|
+
|
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ol-openedx-course-translations
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: An Open edX plugin to translate courses
|
|
5
|
+
Author: MIT Office of Digital Learning
|
|
6
|
+
License-Expression: BSD-3-Clause
|
|
7
|
+
License-File: LICENSE.txt
|
|
8
|
+
Keywords: Python,edx
|
|
9
|
+
Requires-Python: >=3.11
|
|
10
|
+
Requires-Dist: deepl>=1.25.0
|
|
11
|
+
Requires-Dist: django>=4.0
|
|
12
|
+
Requires-Dist: djangorestframework>=3.14.0
|
|
13
|
+
Description-Content-Type: text/x-rst
|
|
14
|
+
|
|
15
|
+
OL Open edX Course Translations
|
|
16
|
+
===============================
|
|
17
|
+
|
|
18
|
+
An Open edX plugin to manage course translations.
|
|
19
|
+
|
|
20
|
+
Purpose
|
|
21
|
+
*******
|
|
22
|
+
|
|
23
|
+
Translate course content into multiple languages to enhance accessibility for a global audience.
|
|
24
|
+
|
|
25
|
+
Setup
|
|
26
|
+
=====
|
|
27
|
+
|
|
28
|
+
For detailed installation instructions, please refer to the `plugin installation guide <../../docs#installation-guide>`_.
|
|
29
|
+
|
|
30
|
+
Installation required in:
|
|
31
|
+
|
|
32
|
+
* Studio (CMS)
|
|
33
|
+
* LMS (for auto language selection feature)
|
|
34
|
+
|
|
35
|
+
Configuration
|
|
36
|
+
=============
|
|
37
|
+
|
|
38
|
+
- Add the following configuration values to the config file in Open edX. For any release after Juniper, that config file is ``/edx/etc/lms.yml``. If you're using ``private.py``, add these values to ``lms/envs/private.py``. These should be added to the top level. **Ask a fellow developer for these values.**
|
|
39
|
+
|
|
40
|
+
.. code-block:: python
|
|
41
|
+
|
|
42
|
+
DEEPL_API_KEY: <YOUR_DEEPL_API_KEY_HERE>
|
|
43
|
+
ENABLE_AUTO_LANGUAGE_SELECTION: true # Enable auto language selection based on course language
|
|
44
|
+
|
|
45
|
+
- For Tutor installations, these values can also be managed through a `custom Tutor plugin <https://docs.tutor.edly.io/tutorials/plugin.html#plugin-development-tutorial>`_.
|
|
46
|
+
|
|
47
|
+
Auto Language Selection
|
|
48
|
+
=======================
|
|
49
|
+
|
|
50
|
+
The plugin includes an auto language selection feature that automatically sets the user's language preference based on the course language. When enabled, users will see the static site content in the course's configured language.
|
|
51
|
+
|
|
52
|
+
To enable auto language selection:
|
|
53
|
+
|
|
54
|
+
1. Set ``ENABLE_AUTO_LANGUAGE_SELECTION`` to ``true`` in your settings.
|
|
55
|
+
|
|
56
|
+
2. Set ``SHARED_COOKIE_DOMAIN`` to your domain (e.g., ``.local.openedx.io`` for local tutor setup) to allow cookies to be shared between LMS and CMS.
|
|
57
|
+
|
|
58
|
+
**How it works:**
|
|
59
|
+
|
|
60
|
+
- **LMS**: The ``CourseLanguageCookieMiddleware`` automatically detects course URLs and sets the language preference based on the course's configured language.
|
|
61
|
+
- **CMS**: The ``CourseLanguageCookieResetMiddleware`` ensures Studio always uses English for the authoring interface.
|
|
62
|
+
- **Admin areas**: Admin URLs (``/admin``, ``/sysadmin``, instructor dashboards) are forced to use English regardless of course language.
|
|
63
|
+
|
|
64
|
+
MFE Integration
|
|
65
|
+
===============
|
|
66
|
+
|
|
67
|
+
To make auto language selection work with Micro-Frontends (MFEs), you need to use a custom Footer component that handles language detection and switching.
|
|
68
|
+
|
|
69
|
+
**Setup:**
|
|
70
|
+
|
|
71
|
+
1. Use the Footer component from `src/bridge/settings/openedx/mfe/slot_config/Footer.jsx <https://github.com/mitodl/ol-infrastructure/blob/main/src/bridge/settings/openedx/mfe/slot_config/Footer.jsx>`_ in the `ol-infrastructure <https://github.com/mitodl/ol-infrastructure>`_ repository.
|
|
72
|
+
|
|
73
|
+
2. Enable auto language selection in each MFE by adding the following to their ``.env.development`` file:
|
|
74
|
+
|
|
75
|
+
.. code-block:: bash
|
|
76
|
+
|
|
77
|
+
ENABLE_AUTO_LANGUAGE_SELECTION="true"
|
|
78
|
+
|
|
79
|
+
3. This custom Footer component:
|
|
80
|
+
- Detects the current course context in MFEs
|
|
81
|
+
- Automatically switches the MFE language based on the course's configured language
|
|
82
|
+
- Ensures consistent language experience across the platform
|
|
83
|
+
|
|
84
|
+
4. Configure your MFE slot overrides to use this custom Footer component instead of the default one.
|
|
85
|
+
|
|
86
|
+
**Note:** The custom Footer is required because MFEs run as separate applications and need their own mechanism to detect and respond to course language settings. The environment variable must be set in each MFE's configuration for the feature to work properly.
|
|
87
|
+
|
|
88
|
+
Translating a Course
|
|
89
|
+
====================
|
|
90
|
+
1. Open the course in Studio.
|
|
91
|
+
2. Go to Tools -> Export Course.
|
|
92
|
+
3. Export the course as a .tar.gz file.
|
|
93
|
+
4. Go to the CMS shell
|
|
94
|
+
5. Run the management command to translate the course:
|
|
95
|
+
|
|
96
|
+
.. code-block:: bash
|
|
97
|
+
|
|
98
|
+
./manage.py cms translate_course --source-language <SOURCE_LANGUAGE_CODE, defaults to `EN`> --translation-language <TRANSLATION_LANGUAGE_CODE i.e. AR> --course-dir <PATH_TO_EXPORTED_COURSE_TAR_GZ>
|
|
99
|
+
|
|
100
|
+
License
|
|
101
|
+
*******
|
|
102
|
+
|
|
103
|
+
The code in this repository is licensed under the AGPL 3.0 unless
|
|
104
|
+
otherwise noted.
|
|
105
|
+
|
|
106
|
+
Please see `LICENSE.txt <LICENSE.txt>`_ for details.
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
OL Open edX Course Translations
|
|
2
|
+
===============================
|
|
3
|
+
|
|
4
|
+
An Open edX plugin to manage course translations.
|
|
5
|
+
|
|
6
|
+
Purpose
|
|
7
|
+
*******
|
|
8
|
+
|
|
9
|
+
Translate course content into multiple languages to enhance accessibility for a global audience.
|
|
10
|
+
|
|
11
|
+
Setup
|
|
12
|
+
=====
|
|
13
|
+
|
|
14
|
+
For detailed installation instructions, please refer to the `plugin installation guide <../../docs#installation-guide>`_.
|
|
15
|
+
|
|
16
|
+
Installation required in:
|
|
17
|
+
|
|
18
|
+
* Studio (CMS)
|
|
19
|
+
* LMS (for auto language selection feature)
|
|
20
|
+
|
|
21
|
+
Configuration
|
|
22
|
+
=============
|
|
23
|
+
|
|
24
|
+
- Add the following configuration values to the config file in Open edX. For any release after Juniper, that config file is ``/edx/etc/lms.yml``. If you're using ``private.py``, add these values to ``lms/envs/private.py``. These should be added to the top level. **Ask a fellow developer for these values.**
|
|
25
|
+
|
|
26
|
+
.. code-block:: python
|
|
27
|
+
|
|
28
|
+
DEEPL_API_KEY: <YOUR_DEEPL_API_KEY_HERE>
|
|
29
|
+
ENABLE_AUTO_LANGUAGE_SELECTION: true # Enable auto language selection based on course language
|
|
30
|
+
|
|
31
|
+
- For Tutor installations, these values can also be managed through a `custom Tutor plugin <https://docs.tutor.edly.io/tutorials/plugin.html#plugin-development-tutorial>`_.
|
|
32
|
+
|
|
33
|
+
Auto Language Selection
|
|
34
|
+
=======================
|
|
35
|
+
|
|
36
|
+
The plugin includes an auto language selection feature that automatically sets the user's language preference based on the course language. When enabled, users will see the static site content in the course's configured language.
|
|
37
|
+
|
|
38
|
+
To enable auto language selection:
|
|
39
|
+
|
|
40
|
+
1. Set ``ENABLE_AUTO_LANGUAGE_SELECTION`` to ``true`` in your settings.
|
|
41
|
+
|
|
42
|
+
2. Set ``SHARED_COOKIE_DOMAIN`` to your domain (e.g., ``.local.openedx.io`` for local tutor setup) to allow cookies to be shared between LMS and CMS.
|
|
43
|
+
|
|
44
|
+
**How it works:**
|
|
45
|
+
|
|
46
|
+
- **LMS**: The ``CourseLanguageCookieMiddleware`` automatically detects course URLs and sets the language preference based on the course's configured language.
|
|
47
|
+
- **CMS**: The ``CourseLanguageCookieResetMiddleware`` ensures Studio always uses English for the authoring interface.
|
|
48
|
+
- **Admin areas**: Admin URLs (``/admin``, ``/sysadmin``, instructor dashboards) are forced to use English regardless of course language.
|
|
49
|
+
|
|
50
|
+
MFE Integration
|
|
51
|
+
===============
|
|
52
|
+
|
|
53
|
+
To make auto language selection work with Micro-Frontends (MFEs), you need to use a custom Footer component that handles language detection and switching.
|
|
54
|
+
|
|
55
|
+
**Setup:**
|
|
56
|
+
|
|
57
|
+
1. Use the Footer component from `src/bridge/settings/openedx/mfe/slot_config/Footer.jsx <https://github.com/mitodl/ol-infrastructure/blob/main/src/bridge/settings/openedx/mfe/slot_config/Footer.jsx>`_ in the `ol-infrastructure <https://github.com/mitodl/ol-infrastructure>`_ repository.
|
|
58
|
+
|
|
59
|
+
2. Enable auto language selection in each MFE by adding the following to their ``.env.development`` file:
|
|
60
|
+
|
|
61
|
+
.. code-block:: bash
|
|
62
|
+
|
|
63
|
+
ENABLE_AUTO_LANGUAGE_SELECTION="true"
|
|
64
|
+
|
|
65
|
+
3. This custom Footer component:
|
|
66
|
+
- Detects the current course context in MFEs
|
|
67
|
+
- Automatically switches the MFE language based on the course's configured language
|
|
68
|
+
- Ensures consistent language experience across the platform
|
|
69
|
+
|
|
70
|
+
4. Configure your MFE slot overrides to use this custom Footer component instead of the default one.
|
|
71
|
+
|
|
72
|
+
**Note:** The custom Footer is required because MFEs run as separate applications and need their own mechanism to detect and respond to course language settings. The environment variable must be set in each MFE's configuration for the feature to work properly.
|
|
73
|
+
|
|
74
|
+
Translating a Course
|
|
75
|
+
====================
|
|
76
|
+
1. Open the course in Studio.
|
|
77
|
+
2. Go to Tools -> Export Course.
|
|
78
|
+
3. Export the course as a .tar.gz file.
|
|
79
|
+
4. Go to the CMS shell
|
|
80
|
+
5. Run the management command to translate the course:
|
|
81
|
+
|
|
82
|
+
.. code-block:: bash
|
|
83
|
+
|
|
84
|
+
./manage.py cms translate_course --source-language <SOURCE_LANGUAGE_CODE, defaults to `EN`> --translation-language <TRANSLATION_LANGUAGE_CODE i.e. AR> --course-dir <PATH_TO_EXPORTED_COURSE_TAR_GZ>
|
|
85
|
+
|
|
86
|
+
License
|
|
87
|
+
*******
|
|
88
|
+
|
|
89
|
+
The code in this repository is licensed under the AGPL 3.0 unless
|
|
90
|
+
otherwise noted.
|
|
91
|
+
|
|
92
|
+
Please see `LICENSE.txt <LICENSE.txt>`_ for details.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ol_openedx_course_translations Django application initialization.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from django.apps import AppConfig
|
|
6
|
+
from edx_django_utils.plugins import PluginSettings, PluginURLs
|
|
7
|
+
from openedx.core.djangoapps.plugins.constants import ProjectType, SettingsType
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class OLOpenedXCourseTranslationsConfig(AppConfig):
|
|
11
|
+
"""
|
|
12
|
+
Configuration for the ol_openedx_course_translations Django application.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
name = "ol_openedx_course_translations"
|
|
16
|
+
|
|
17
|
+
plugin_app = {
|
|
18
|
+
PluginURLs.CONFIG: {
|
|
19
|
+
ProjectType.LMS: {
|
|
20
|
+
PluginURLs.NAMESPACE: "",
|
|
21
|
+
PluginURLs.REGEX: "^course-translations/",
|
|
22
|
+
PluginURLs.RELATIVE_PATH: "urls",
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
PluginSettings.CONFIG: {
|
|
26
|
+
ProjectType.CMS: {
|
|
27
|
+
SettingsType.COMMON: {PluginSettings.RELATIVE_PATH: "settings.cms"},
|
|
28
|
+
},
|
|
29
|
+
ProjectType.LMS: {
|
|
30
|
+
SettingsType.COMMON: {PluginSettings.RELATIVE_PATH: "settings.lms"},
|
|
31
|
+
},
|
|
32
|
+
},
|
|
33
|
+
}
|
|
File without changes
|
ol_openedx_course_translations-0.2.0/ol_openedx_course_translations/management/commands/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,687 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Management command to translate course content to a specified language.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import re
|
|
8
|
+
import shutil
|
|
9
|
+
import tarfile
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
from xml.etree.ElementTree import Element
|
|
13
|
+
|
|
14
|
+
import deepl
|
|
15
|
+
from defusedxml import ElementTree
|
|
16
|
+
from django.conf import settings
|
|
17
|
+
from django.core.management.base import BaseCommand, CommandError
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Command(BaseCommand):
|
|
23
|
+
"""Translate given course content to the specified language."""
|
|
24
|
+
|
|
25
|
+
help = "Translate course content to the specified language."
|
|
26
|
+
|
|
27
|
+
def add_arguments(self, parser) -> None:
|
|
28
|
+
"""Entry point for subclassed commands to add custom arguments."""
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--source-language",
|
|
31
|
+
dest="source_language",
|
|
32
|
+
default="EN",
|
|
33
|
+
help=(
|
|
34
|
+
"Specify the source language of the course content "
|
|
35
|
+
"in ISO format, e.g. `EN` for English."
|
|
36
|
+
),
|
|
37
|
+
)
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"--translation-language",
|
|
40
|
+
dest="translation_language",
|
|
41
|
+
required=True,
|
|
42
|
+
help=(
|
|
43
|
+
"Specify the language code in ISO format "
|
|
44
|
+
"to translate the course content into. e.g `AR` for Arabic"
|
|
45
|
+
),
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"--course-dir",
|
|
49
|
+
dest="course_directory",
|
|
50
|
+
required=True,
|
|
51
|
+
help="Specify the course directory (tar archive).",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def handle(self, **options) -> None:
|
|
55
|
+
"""Handle the translate_course command."""
|
|
56
|
+
try:
|
|
57
|
+
self._validate_inputs(options)
|
|
58
|
+
|
|
59
|
+
course_dir = Path(options["course_directory"])
|
|
60
|
+
source_language = options["source_language"]
|
|
61
|
+
translation_language = options["translation_language"]
|
|
62
|
+
|
|
63
|
+
# Extract course archive
|
|
64
|
+
extracted_dir = self._extract_course_archive(course_dir)
|
|
65
|
+
|
|
66
|
+
# Create translated copy
|
|
67
|
+
translated_dir = self._create_translated_copy(
|
|
68
|
+
extracted_dir, translation_language
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Delete extracted directory after copying
|
|
72
|
+
if extracted_dir.exists():
|
|
73
|
+
shutil.rmtree(extracted_dir)
|
|
74
|
+
|
|
75
|
+
# Translate content
|
|
76
|
+
billed_chars = self._translate_course_content(
|
|
77
|
+
translated_dir, source_language, translation_language
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Create final archive
|
|
81
|
+
archive_path = self._create_translated_archive(
|
|
82
|
+
translated_dir, translation_language, course_dir.stem
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
self.stdout.write(
|
|
86
|
+
self.style.SUCCESS(
|
|
87
|
+
f"Translation completed. Archive created: {archive_path}"
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
logger.info("Total billed characters: %s", billed_chars)
|
|
91
|
+
|
|
92
|
+
except Exception as e:
|
|
93
|
+
logger.exception("Translation failed")
|
|
94
|
+
error_msg = f"Translation failed: {e}"
|
|
95
|
+
raise CommandError(error_msg) from e
|
|
96
|
+
|
|
97
|
+
def get_supported_archive_extension(self, filename: str) -> str | None:
|
|
98
|
+
"""
|
|
99
|
+
Return the supported archive extension if filename ends with one, else None.
|
|
100
|
+
"""
|
|
101
|
+
for ext in settings.COURSE_TRANSLATIONS_SUPPORTED_ARCHIVE_EXTENSIONS:
|
|
102
|
+
if filename.endswith(ext):
|
|
103
|
+
return ext
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
def _validate_inputs(self, options: dict[str, Any]) -> None:
|
|
107
|
+
"""Validate command inputs."""
|
|
108
|
+
course_dir = Path(options["course_directory"])
|
|
109
|
+
|
|
110
|
+
if not course_dir.exists():
|
|
111
|
+
error_msg = f"Course directory not found: {course_dir}"
|
|
112
|
+
raise CommandError(error_msg)
|
|
113
|
+
|
|
114
|
+
if self.get_supported_archive_extension(course_dir.name) is None:
|
|
115
|
+
supported_exts = ", ".join(
|
|
116
|
+
settings.COURSE_TRANSLATIONS_SUPPORTED_ARCHIVE_EXTENSIONS
|
|
117
|
+
)
|
|
118
|
+
error_msg = f"Course directory must be a tar file: {supported_exts}"
|
|
119
|
+
raise CommandError(error_msg)
|
|
120
|
+
|
|
121
|
+
if not hasattr(settings, "DEEPL_API_KEY") or not settings.DEEPL_API_KEY:
|
|
122
|
+
error_msg = "DEEPL_API_KEY setting is required"
|
|
123
|
+
raise CommandError(error_msg)
|
|
124
|
+
|
|
125
|
+
def _extract_course_archive(self, course_dir: Path) -> Path:
|
|
126
|
+
"""Extract course archive to working directory."""
|
|
127
|
+
# Use the parent directory of the source file as the base extraction directory
|
|
128
|
+
extract_base_dir = course_dir.parent
|
|
129
|
+
|
|
130
|
+
# Get base name without extension
|
|
131
|
+
ext = self.get_supported_archive_extension(course_dir.name)
|
|
132
|
+
tarball_base = course_dir.name[: -len(ext)] if ext else course_dir.name
|
|
133
|
+
|
|
134
|
+
extracted_dir = extract_base_dir / tarball_base
|
|
135
|
+
|
|
136
|
+
if not extracted_dir.exists():
|
|
137
|
+
try:
|
|
138
|
+
with tarfile.open(course_dir, "r:*") as tar:
|
|
139
|
+
# Validate tar file before extraction
|
|
140
|
+
self._validate_tar_file(tar)
|
|
141
|
+
tar.extractall(path=extracted_dir, filter="data")
|
|
142
|
+
except (tarfile.TarError, OSError) as e:
|
|
143
|
+
error_msg = f"Failed to extract archive: {e}"
|
|
144
|
+
raise CommandError(error_msg) from e
|
|
145
|
+
|
|
146
|
+
logger.info("Extracted course to: %s", extracted_dir)
|
|
147
|
+
return extracted_dir
|
|
148
|
+
|
|
149
|
+
def _validate_tar_file(self, tar: tarfile.TarFile) -> None:
|
|
150
|
+
"""Validate tar file contents for security."""
|
|
151
|
+
for member in tar.getmembers():
|
|
152
|
+
# Check for directory traversal attacks
|
|
153
|
+
if member.name.startswith("/") or ".." in member.name:
|
|
154
|
+
error_msg = f"Unsafe tar member: {member.name}"
|
|
155
|
+
raise CommandError(error_msg)
|
|
156
|
+
# Check for excessively large files
|
|
157
|
+
if (
|
|
158
|
+
member.size > 512 * 1024 * 1024
|
|
159
|
+
): # 0.5GB limit because courses on Production are big
|
|
160
|
+
error_msg = f"File too large: {member.name}"
|
|
161
|
+
raise CommandError(error_msg)
|
|
162
|
+
|
|
163
|
+
def _create_translated_copy(
|
|
164
|
+
self, source_dir: Path, translation_language: str
|
|
165
|
+
) -> Path:
|
|
166
|
+
"""Create a copy of the course for translation."""
|
|
167
|
+
base_name = source_dir.name
|
|
168
|
+
new_dir_name = f"{translation_language}_{base_name}"
|
|
169
|
+
new_dir_path = source_dir.parent / new_dir_name
|
|
170
|
+
|
|
171
|
+
if new_dir_path.exists():
|
|
172
|
+
error_msg = f"Translation directory already exists: {new_dir_path}"
|
|
173
|
+
raise CommandError(error_msg)
|
|
174
|
+
|
|
175
|
+
shutil.copytree(source_dir, new_dir_path)
|
|
176
|
+
logger.info("Created translation copy: %s", new_dir_path)
|
|
177
|
+
return new_dir_path
|
|
178
|
+
|
|
179
|
+
def _translate_course_content(
|
|
180
|
+
self, course_dir: Path, source_language: str, translation_language: str
|
|
181
|
+
) -> int:
|
|
182
|
+
"""Translate all course content and return total billed characters."""
|
|
183
|
+
total_billed_chars = 0
|
|
184
|
+
|
|
185
|
+
# Translate files in main directories
|
|
186
|
+
for search_dir in [course_dir, course_dir.parent]:
|
|
187
|
+
total_billed_chars += self._translate_files_in_directory(
|
|
188
|
+
search_dir, source_language, translation_language, recursive=False
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Translate files in target subdirectories
|
|
192
|
+
for dir_name in settings.COURSE_TRANSLATIONS_TARGET_DIRECTORIES:
|
|
193
|
+
target_dir = search_dir / dir_name
|
|
194
|
+
if target_dir.exists() and target_dir.is_dir():
|
|
195
|
+
total_billed_chars += self._translate_files_in_directory(
|
|
196
|
+
target_dir,
|
|
197
|
+
source_language,
|
|
198
|
+
translation_language,
|
|
199
|
+
recursive=True,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Translate special JSON files
|
|
203
|
+
total_billed_chars += self._translate_grading_policy(
|
|
204
|
+
course_dir, source_language, translation_language
|
|
205
|
+
)
|
|
206
|
+
total_billed_chars += self._translate_policy_json(
|
|
207
|
+
course_dir, source_language, translation_language
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
return total_billed_chars
|
|
211
|
+
|
|
212
|
+
def _translate_files_in_directory(
|
|
213
|
+
self,
|
|
214
|
+
directory: Path,
|
|
215
|
+
source_language: str,
|
|
216
|
+
translation_language: str,
|
|
217
|
+
*,
|
|
218
|
+
recursive: bool = False,
|
|
219
|
+
) -> int:
|
|
220
|
+
"""Translate files in a directory."""
|
|
221
|
+
total_billed_chars = 0
|
|
222
|
+
|
|
223
|
+
if recursive:
|
|
224
|
+
file_paths: list[Path] = []
|
|
225
|
+
for ext in settings.COURSE_TRANSLATIONS_TRANSLATABLE_EXTENSIONS:
|
|
226
|
+
file_paths.extend(directory.rglob(f"*{ext}"))
|
|
227
|
+
else:
|
|
228
|
+
file_paths = [
|
|
229
|
+
f
|
|
230
|
+
for f in directory.iterdir()
|
|
231
|
+
if f.is_file()
|
|
232
|
+
and any(
|
|
233
|
+
f.name.endswith(ext)
|
|
234
|
+
for ext in settings.COURSE_TRANSLATIONS_TRANSLATABLE_EXTENSIONS
|
|
235
|
+
)
|
|
236
|
+
]
|
|
237
|
+
|
|
238
|
+
for file_path in file_paths:
|
|
239
|
+
try:
|
|
240
|
+
total_billed_chars += self._translate_file(
|
|
241
|
+
file_path, source_language, translation_language
|
|
242
|
+
)
|
|
243
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
244
|
+
logger.warning("Failed to translate %s: %s", file_path, e)
|
|
245
|
+
|
|
246
|
+
return total_billed_chars
|
|
247
|
+
|
|
248
|
+
def _update_video_xml(self, xml_content: str, translation_language: str) -> str: # noqa: C901
|
|
249
|
+
"""Update video XML transcripts and transcript tags for the target language."""
|
|
250
|
+
try:
|
|
251
|
+
root = ElementTree.fromstring(xml_content)
|
|
252
|
+
lang_code = translation_language.lower()
|
|
253
|
+
|
|
254
|
+
# Update transcripts attribute in <video>
|
|
255
|
+
if root.tag == "video" and "transcripts" in root.attrib:
|
|
256
|
+
transcripts_json = root.attrib["transcripts"].replace(""", '"')
|
|
257
|
+
transcripts_dict = json.loads(transcripts_json)
|
|
258
|
+
for k in list(transcripts_dict.keys()):
|
|
259
|
+
value = transcripts_dict[k]
|
|
260
|
+
new_key = lang_code
|
|
261
|
+
new_value = re.sub(r"-[a-zA-Z]{2}\.srt$", f"-{new_key}.srt", value)
|
|
262
|
+
transcripts_dict[new_key] = new_value
|
|
263
|
+
new_transcripts = json.dumps(transcripts_dict, ensure_ascii=False)
|
|
264
|
+
root.set("transcripts", new_transcripts)
|
|
265
|
+
|
|
266
|
+
# Add a new <transcript> tag inside <transcripts> for the
|
|
267
|
+
# target language, inheriting attributes
|
|
268
|
+
for video_asset in root.findall("video_asset"):
|
|
269
|
+
for transcripts in video_asset.findall("transcripts"):
|
|
270
|
+
existing_transcript = transcripts.find("transcript")
|
|
271
|
+
new_transcript = Element("transcript")
|
|
272
|
+
if existing_transcript is not None:
|
|
273
|
+
new_transcript.attrib = existing_transcript.attrib.copy()
|
|
274
|
+
new_transcript.set("language_code", lang_code)
|
|
275
|
+
# Avoid duplicates
|
|
276
|
+
if not any(
|
|
277
|
+
t.attrib == new_transcript.attrib
|
|
278
|
+
for t in transcripts.findall("transcript")
|
|
279
|
+
):
|
|
280
|
+
transcripts.append(new_transcript)
|
|
281
|
+
|
|
282
|
+
# Add a new <transcript> tag for the target language
|
|
283
|
+
for transcript in root.findall("transcript"):
|
|
284
|
+
src = transcript.get("src")
|
|
285
|
+
if src:
|
|
286
|
+
new_src = re.sub(r"-[a-zA-Z]{2}\.srt$", f"-{lang_code}.srt", src)
|
|
287
|
+
new_transcript = Element("transcript")
|
|
288
|
+
new_transcript.set("language", lang_code)
|
|
289
|
+
new_transcript.set("src", new_src)
|
|
290
|
+
# Avoid duplicates
|
|
291
|
+
if not any(
|
|
292
|
+
t.get("language") == lang_code and t.get("src") == new_src
|
|
293
|
+
for t in root.findall("transcript")
|
|
294
|
+
):
|
|
295
|
+
root.append(new_transcript)
|
|
296
|
+
|
|
297
|
+
xml_content = ElementTree.tostring(root, encoding="unicode")
|
|
298
|
+
except Exception as e: # noqa: BLE001
|
|
299
|
+
logger.warning("Failed to update transcripts in video XML: %s", e)
|
|
300
|
+
|
|
301
|
+
return xml_content
|
|
302
|
+
|
|
303
|
+
def _translate_file(
|
|
304
|
+
self, file_path: Path, source_language: str, translation_language: str
|
|
305
|
+
) -> int:
|
|
306
|
+
"""Translate a single file and return billed characters."""
|
|
307
|
+
# Handle SRT files with DeepL document translation
|
|
308
|
+
if file_path.suffix == ".srt":
|
|
309
|
+
try:
|
|
310
|
+
billed_chars = self.translate_srt_file(
|
|
311
|
+
file_path, source_language, translation_language
|
|
312
|
+
)
|
|
313
|
+
except Exception as e: # noqa: BLE001
|
|
314
|
+
logger.warning("Failed to translate SRT %s: %s", file_path, e)
|
|
315
|
+
return 0
|
|
316
|
+
else:
|
|
317
|
+
return billed_chars
|
|
318
|
+
|
|
319
|
+
try:
|
|
320
|
+
content = file_path.read_text(encoding="utf-8")
|
|
321
|
+
logger.debug("Translating: %s", file_path)
|
|
322
|
+
|
|
323
|
+
translated_content, billed_chars = self._translate_text(
|
|
324
|
+
content, source_language, translation_language, file_path.name
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Handle XML display_name translation
|
|
328
|
+
if file_path.suffix == ".xml":
|
|
329
|
+
translated_content = self._translate_display_name(
|
|
330
|
+
translated_content, source_language, translation_language
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# If parent directory is 'video', update transcripts attribute
|
|
334
|
+
if file_path.parent.name == "video":
|
|
335
|
+
translated_content = self._update_video_xml(
|
|
336
|
+
translated_content, translation_language
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
file_path.write_text(translated_content, encoding="utf-8")
|
|
340
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
341
|
+
logger.warning("Failed to translate %s: %s", file_path, e)
|
|
342
|
+
return 0
|
|
343
|
+
else:
|
|
344
|
+
return billed_chars
|
|
345
|
+
|
|
346
|
+
def _translate_grading_policy(
|
|
347
|
+
self, course_dir: Path, source_language: str, translation_language: str
|
|
348
|
+
) -> int:
|
|
349
|
+
"""Translate grading_policy.json files."""
|
|
350
|
+
total_billed_chars = 0
|
|
351
|
+
policies_dir = course_dir / "course" / "policies"
|
|
352
|
+
|
|
353
|
+
if not policies_dir.exists():
|
|
354
|
+
return 0
|
|
355
|
+
|
|
356
|
+
for child_dir in policies_dir.iterdir():
|
|
357
|
+
if not child_dir.is_dir():
|
|
358
|
+
continue
|
|
359
|
+
|
|
360
|
+
grading_policy_path = child_dir / "grading_policy.json"
|
|
361
|
+
if not grading_policy_path.exists():
|
|
362
|
+
continue
|
|
363
|
+
|
|
364
|
+
try:
|
|
365
|
+
grading_policy = json.loads(
|
|
366
|
+
grading_policy_path.read_text(encoding="utf-8")
|
|
367
|
+
)
|
|
368
|
+
updated = False
|
|
369
|
+
|
|
370
|
+
for item in grading_policy.get("GRADER", []):
|
|
371
|
+
if "short_label" in item:
|
|
372
|
+
translated_label, billed_chars = self._translate_text(
|
|
373
|
+
item["short_label"], source_language, translation_language
|
|
374
|
+
)
|
|
375
|
+
item["short_label"] = translated_label
|
|
376
|
+
total_billed_chars += billed_chars
|
|
377
|
+
updated = True
|
|
378
|
+
|
|
379
|
+
if updated:
|
|
380
|
+
grading_policy_path.write_text(
|
|
381
|
+
json.dumps(grading_policy, ensure_ascii=False, indent=4),
|
|
382
|
+
encoding="utf-8",
|
|
383
|
+
)
|
|
384
|
+
except (OSError, json.JSONDecodeError) as e:
|
|
385
|
+
logger.warning(
|
|
386
|
+
"Failed to translate grading policy in %s: %s", child_dir, e
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
return total_billed_chars
|
|
390
|
+
|
|
391
|
+
def _translate_policy_json(
|
|
392
|
+
self, course_dir: Path, source_language: str, translation_language: str
|
|
393
|
+
) -> int:
|
|
394
|
+
"""Translate policy.json files."""
|
|
395
|
+
total_billed_chars = 0
|
|
396
|
+
policies_dir = course_dir / "course" / "policies"
|
|
397
|
+
|
|
398
|
+
if not policies_dir.exists():
|
|
399
|
+
return 0
|
|
400
|
+
|
|
401
|
+
for child_dir in policies_dir.iterdir():
|
|
402
|
+
if not child_dir.is_dir():
|
|
403
|
+
continue
|
|
404
|
+
|
|
405
|
+
policy_path = child_dir / "policy.json"
|
|
406
|
+
if not policy_path.exists():
|
|
407
|
+
continue
|
|
408
|
+
|
|
409
|
+
try:
|
|
410
|
+
policy_data = json.loads(policy_path.read_text(encoding="utf-8"))
|
|
411
|
+
updated = False
|
|
412
|
+
|
|
413
|
+
for course_obj in policy_data.values():
|
|
414
|
+
if not isinstance(course_obj, dict):
|
|
415
|
+
continue
|
|
416
|
+
|
|
417
|
+
# Translate various fields
|
|
418
|
+
billed_chars, field_updated = self._translate_policy_fields(
|
|
419
|
+
course_obj, source_language, translation_language
|
|
420
|
+
)
|
|
421
|
+
total_billed_chars += billed_chars
|
|
422
|
+
updated = updated or field_updated
|
|
423
|
+
|
|
424
|
+
if updated:
|
|
425
|
+
policy_path.write_text(
|
|
426
|
+
json.dumps(policy_data, ensure_ascii=False, indent=4),
|
|
427
|
+
encoding="utf-8",
|
|
428
|
+
)
|
|
429
|
+
except (OSError, json.JSONDecodeError) as e:
|
|
430
|
+
logger.warning("Failed to translate policy in %s: %s", child_dir, e)
|
|
431
|
+
|
|
432
|
+
return total_billed_chars
|
|
433
|
+
|
|
434
|
+
def _translate_policy_fields(
|
|
435
|
+
self,
|
|
436
|
+
course_obj: dict[str, Any],
|
|
437
|
+
source_language: str,
|
|
438
|
+
translation_language: str,
|
|
439
|
+
) -> tuple[int, bool]:
|
|
440
|
+
"""Translate specific fields in policy object."""
|
|
441
|
+
total_billed_chars = 0
|
|
442
|
+
updated = False
|
|
443
|
+
|
|
444
|
+
# Translate simple string fields
|
|
445
|
+
billed_chars, field_updated = self._translate_string_fields(
|
|
446
|
+
course_obj, source_language, translation_language
|
|
447
|
+
)
|
|
448
|
+
total_billed_chars += billed_chars
|
|
449
|
+
updated = updated or field_updated
|
|
450
|
+
|
|
451
|
+
# Translate discussion topics
|
|
452
|
+
billed_chars, field_updated = self._translate_discussion_topics(
|
|
453
|
+
course_obj, source_language, translation_language
|
|
454
|
+
)
|
|
455
|
+
total_billed_chars += billed_chars
|
|
456
|
+
updated = updated or field_updated
|
|
457
|
+
|
|
458
|
+
# Translate learning info and tabs
|
|
459
|
+
billed_chars, field_updated = self._translate_learning_info_and_tabs(
|
|
460
|
+
course_obj, source_language, translation_language
|
|
461
|
+
)
|
|
462
|
+
total_billed_chars += billed_chars
|
|
463
|
+
updated = updated or field_updated
|
|
464
|
+
|
|
465
|
+
# Translate XML attributes
|
|
466
|
+
billed_chars, field_updated = self._translate_xml_attributes(
|
|
467
|
+
course_obj, source_language, translation_language
|
|
468
|
+
)
|
|
469
|
+
total_billed_chars += billed_chars
|
|
470
|
+
updated = updated or field_updated
|
|
471
|
+
|
|
472
|
+
return total_billed_chars, updated
|
|
473
|
+
|
|
474
|
+
def _translate_string_fields(
|
|
475
|
+
self,
|
|
476
|
+
course_obj: dict[str, Any],
|
|
477
|
+
source_language: str,
|
|
478
|
+
translation_language: str,
|
|
479
|
+
) -> tuple[int, bool]:
|
|
480
|
+
"""Translate simple string fields."""
|
|
481
|
+
total_billed_chars = 0
|
|
482
|
+
updated = False
|
|
483
|
+
|
|
484
|
+
string_fields = ["advertised_start", "display_name", "display_organization"]
|
|
485
|
+
for field in string_fields:
|
|
486
|
+
if field in course_obj:
|
|
487
|
+
translated, billed_chars = self._translate_text(
|
|
488
|
+
course_obj[field], source_language, translation_language
|
|
489
|
+
)
|
|
490
|
+
course_obj[field] = translated
|
|
491
|
+
total_billed_chars += billed_chars
|
|
492
|
+
updated = True
|
|
493
|
+
|
|
494
|
+
return total_billed_chars, updated
|
|
495
|
+
|
|
496
|
+
def _translate_discussion_topics(
|
|
497
|
+
self,
|
|
498
|
+
course_obj: dict[str, Any],
|
|
499
|
+
source_language: str,
|
|
500
|
+
translation_language: str,
|
|
501
|
+
) -> tuple[int, bool]:
|
|
502
|
+
"""Translate discussion topics."""
|
|
503
|
+
total_billed_chars = 0
|
|
504
|
+
updated = False
|
|
505
|
+
|
|
506
|
+
if "discussion_topics" in course_obj:
|
|
507
|
+
topics = course_obj["discussion_topics"]
|
|
508
|
+
if isinstance(topics, dict):
|
|
509
|
+
new_topics = {}
|
|
510
|
+
for topic_key, value in topics.items():
|
|
511
|
+
translated_key, billed_chars = self._translate_text(
|
|
512
|
+
topic_key, source_language, translation_language
|
|
513
|
+
)
|
|
514
|
+
new_topics[translated_key] = value
|
|
515
|
+
total_billed_chars += billed_chars
|
|
516
|
+
course_obj["discussion_topics"] = new_topics
|
|
517
|
+
updated = True
|
|
518
|
+
|
|
519
|
+
return total_billed_chars, updated
|
|
520
|
+
|
|
521
|
+
def _translate_learning_info_and_tabs(
|
|
522
|
+
self,
|
|
523
|
+
course_obj: dict[str, Any],
|
|
524
|
+
source_language: str,
|
|
525
|
+
translation_language: str,
|
|
526
|
+
) -> tuple[int, bool]:
|
|
527
|
+
"""Translate learning info and tabs."""
|
|
528
|
+
total_billed_chars = 0
|
|
529
|
+
updated = False
|
|
530
|
+
|
|
531
|
+
# Learning info
|
|
532
|
+
if "learning_info" in course_obj and isinstance(
|
|
533
|
+
course_obj["learning_info"], list
|
|
534
|
+
):
|
|
535
|
+
translated_info = []
|
|
536
|
+
for item in course_obj["learning_info"]:
|
|
537
|
+
translated, billed_chars = self._translate_text(
|
|
538
|
+
item, source_language, translation_language
|
|
539
|
+
)
|
|
540
|
+
translated_info.append(translated)
|
|
541
|
+
total_billed_chars += billed_chars
|
|
542
|
+
course_obj["learning_info"] = translated_info
|
|
543
|
+
updated = True
|
|
544
|
+
|
|
545
|
+
# Tabs
|
|
546
|
+
if "tabs" in course_obj and isinstance(course_obj["tabs"], list):
|
|
547
|
+
for tab in course_obj["tabs"]:
|
|
548
|
+
if isinstance(tab, dict) and "name" in tab:
|
|
549
|
+
translated, billed_chars = self._translate_text(
|
|
550
|
+
tab["name"], source_language, translation_language
|
|
551
|
+
)
|
|
552
|
+
tab["name"] = translated
|
|
553
|
+
total_billed_chars += billed_chars
|
|
554
|
+
updated = True
|
|
555
|
+
|
|
556
|
+
return total_billed_chars, updated
|
|
557
|
+
|
|
558
|
+
def _translate_xml_attributes(
|
|
559
|
+
self,
|
|
560
|
+
course_obj: dict[str, Any],
|
|
561
|
+
source_language: str,
|
|
562
|
+
translation_language: str,
|
|
563
|
+
) -> tuple[int, bool]:
|
|
564
|
+
"""Translate XML attributes."""
|
|
565
|
+
total_billed_chars = 0
|
|
566
|
+
updated = False
|
|
567
|
+
|
|
568
|
+
if "xml_attributes" in course_obj and isinstance(
|
|
569
|
+
course_obj["xml_attributes"], dict
|
|
570
|
+
):
|
|
571
|
+
xml_attrs = course_obj["xml_attributes"]
|
|
572
|
+
xml_fields = [
|
|
573
|
+
"diplay_name",
|
|
574
|
+
"info_sidebar_name",
|
|
575
|
+
] # Note: keeping typo as in original
|
|
576
|
+
for field in xml_fields:
|
|
577
|
+
if field in xml_attrs:
|
|
578
|
+
translated, billed_chars = self._translate_text(
|
|
579
|
+
xml_attrs[field], source_language, translation_language
|
|
580
|
+
)
|
|
581
|
+
xml_attrs[field] = translated
|
|
582
|
+
total_billed_chars += billed_chars
|
|
583
|
+
updated = True
|
|
584
|
+
|
|
585
|
+
return total_billed_chars, updated
|
|
586
|
+
|
|
587
|
+
def _create_translated_archive(
|
|
588
|
+
self, translated_dir: Path, translation_language: str, original_name: str
|
|
589
|
+
) -> Path:
|
|
590
|
+
"""Create tar.gz archive of translated course."""
|
|
591
|
+
# Remove all archive extensions from the original name
|
|
592
|
+
ext = self.get_supported_archive_extension(original_name)
|
|
593
|
+
clean_name = original_name[: -len(ext)] if ext else original_name
|
|
594
|
+
|
|
595
|
+
tar_gz_name = f"{translation_language}_{clean_name}.tar.gz"
|
|
596
|
+
tar_gz_path = translated_dir.parent / tar_gz_name
|
|
597
|
+
|
|
598
|
+
# Remove existing archive
|
|
599
|
+
if tar_gz_path.exists():
|
|
600
|
+
tar_gz_path.unlink()
|
|
601
|
+
|
|
602
|
+
# Create tar.gz archive containing only the 'course' directory
|
|
603
|
+
course_dir_path = translated_dir / "course"
|
|
604
|
+
with tarfile.open(tar_gz_path, "w:gz") as tar:
|
|
605
|
+
tar.add(course_dir_path, arcname="course")
|
|
606
|
+
|
|
607
|
+
# Delete extracted directory after copying
|
|
608
|
+
if translated_dir.exists():
|
|
609
|
+
shutil.rmtree(translated_dir)
|
|
610
|
+
|
|
611
|
+
logger.info("Created tar.gz archive: %s", tar_gz_path)
|
|
612
|
+
return tar_gz_path
|
|
613
|
+
|
|
614
|
+
def translate_srt_file(
|
|
615
|
+
self, input_file_path: Path, source_language: str, target_language: str
|
|
616
|
+
) -> int:
|
|
617
|
+
"""
|
|
618
|
+
Translate an SRT file using DeepL document translation.
|
|
619
|
+
Creates a new output file with the target language prefix, then renames
|
|
620
|
+
it to the original file. Returns the number of billed characters.
|
|
621
|
+
"""
|
|
622
|
+
input_name = input_file_path.name
|
|
623
|
+
output_name = input_name
|
|
624
|
+
if "-" in input_name and input_name.endswith(".srt"):
|
|
625
|
+
parts = input_name.rsplit("-", 1)
|
|
626
|
+
output_name = f"{parts[0]}-{target_language.lower()}.srt"
|
|
627
|
+
output_file_path = input_file_path.parent / output_name
|
|
628
|
+
|
|
629
|
+
deepl_client = deepl.Translator(settings.DEEPL_API_KEY)
|
|
630
|
+
result = deepl_client.translate_document_from_filepath(
|
|
631
|
+
input_file_path,
|
|
632
|
+
output_file_path,
|
|
633
|
+
source_lang=source_language,
|
|
634
|
+
target_lang=target_language,
|
|
635
|
+
)
|
|
636
|
+
return result.billed_characters
|
|
637
|
+
|
|
638
|
+
def _translate_text(
|
|
639
|
+
self,
|
|
640
|
+
text: str,
|
|
641
|
+
source_language: str,
|
|
642
|
+
target_language: str,
|
|
643
|
+
filename: str | None = None,
|
|
644
|
+
) -> tuple[str, int]:
|
|
645
|
+
"""Translate text using DeepL API."""
|
|
646
|
+
if not text or not text.strip():
|
|
647
|
+
return text, 0
|
|
648
|
+
|
|
649
|
+
try:
|
|
650
|
+
deepl_client = deepl.Translator(settings.DEEPL_API_KEY)
|
|
651
|
+
|
|
652
|
+
tag_handling = None
|
|
653
|
+
if filename:
|
|
654
|
+
extension = Path(filename).suffix.lstrip(".")
|
|
655
|
+
if extension in ["html", "xml"]:
|
|
656
|
+
tag_handling = extension
|
|
657
|
+
|
|
658
|
+
result = deepl_client.translate_text(
|
|
659
|
+
text,
|
|
660
|
+
source_lang=source_language,
|
|
661
|
+
target_lang=target_language,
|
|
662
|
+
tag_handling=tag_handling,
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
return result.text, result.billed_characters # noqa: TRY300
|
|
666
|
+
except (deepl.exceptions.DeepLException, OSError) as e:
|
|
667
|
+
logger.warning("Translation failed for text: %s... Error: %s", text[:50], e)
|
|
668
|
+
return text, 0
|
|
669
|
+
|
|
670
|
+
def _translate_display_name(
|
|
671
|
+
self, xml_content: str, source_language: str, target_language: str
|
|
672
|
+
) -> str:
|
|
673
|
+
"""Extract and translate the display_name attribute of the root element."""
|
|
674
|
+
try:
|
|
675
|
+
root = ElementTree.fromstring(xml_content)
|
|
676
|
+
display_name = root.attrib.get("display_name")
|
|
677
|
+
|
|
678
|
+
if display_name:
|
|
679
|
+
translated_name, _ = self._translate_text(
|
|
680
|
+
display_name, source_language, target_language
|
|
681
|
+
)
|
|
682
|
+
root.set("display_name", translated_name)
|
|
683
|
+
return ElementTree.tostring(root, encoding="unicode")
|
|
684
|
+
except ElementTree.ParseError as e:
|
|
685
|
+
logger.warning("Could not translate display_name: %s", e)
|
|
686
|
+
|
|
687
|
+
return xml_content
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Middleware to set/reset language preference cookie and
|
|
3
|
+
user preference based on course language.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
from django.conf import settings
|
|
9
|
+
from django.http import HttpResponseRedirect
|
|
10
|
+
from django.utils.deprecation import MiddlewareMixin
|
|
11
|
+
from opaque_keys.edx.keys import CourseKey
|
|
12
|
+
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
|
|
13
|
+
from openedx.core.djangoapps.lang_pref import LANGUAGE_KEY
|
|
14
|
+
from openedx.core.djangoapps.lang_pref import helpers as lang_pref_helpers
|
|
15
|
+
from openedx.core.djangoapps.user_api.preferences.api import set_user_preference
|
|
16
|
+
|
|
17
|
+
ENGLISH_LANGUAGE_CODE = "en"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def should_process_request(request):
|
|
21
|
+
"""
|
|
22
|
+
Return True if language auto-selection should run for this request.
|
|
23
|
+
"""
|
|
24
|
+
return (
|
|
25
|
+
settings.ENABLE_AUTO_LANGUAGE_SELECTION
|
|
26
|
+
and hasattr(request, "user")
|
|
27
|
+
and request.user.is_authenticated
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def set_language(request, response, language):
|
|
32
|
+
"""
|
|
33
|
+
Set both cookie and user preference for language.
|
|
34
|
+
"""
|
|
35
|
+
lang_pref_helpers.set_language_cookie(request, response, language)
|
|
36
|
+
set_user_preference(request.user, LANGUAGE_KEY, language)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def redirect_current_path(request):
|
|
40
|
+
"""
|
|
41
|
+
Redirect to the same URL to ensure language change takes effect.
|
|
42
|
+
"""
|
|
43
|
+
return HttpResponseRedirect(request.get_full_path())
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class CourseLanguageCookieMiddleware(MiddlewareMixin):
|
|
47
|
+
"""
|
|
48
|
+
LMS middleware that:
|
|
49
|
+
- Sets language based on course language
|
|
50
|
+
- Forces English for exempt paths and authoring MFEs
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
COURSE_URL_REGEX = re.compile(
|
|
54
|
+
rf"^/courses/(?P<course_key>{settings.COURSE_KEY_REGEX})(?:/|$)",
|
|
55
|
+
re.IGNORECASE,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def process_response(self, request, response):
|
|
59
|
+
"""
|
|
60
|
+
Process the response to set/reset language cookie based on course language.
|
|
61
|
+
"""
|
|
62
|
+
if not should_process_request(request):
|
|
63
|
+
return response
|
|
64
|
+
|
|
65
|
+
path = getattr(request, "path_info", request.path)
|
|
66
|
+
|
|
67
|
+
if self._should_force_english(request, path):
|
|
68
|
+
return self._force_english_if_needed(request, response)
|
|
69
|
+
|
|
70
|
+
course_language = self._get_course_language(path)
|
|
71
|
+
if not course_language:
|
|
72
|
+
return response
|
|
73
|
+
|
|
74
|
+
return self._apply_course_language(request, response, course_language)
|
|
75
|
+
|
|
76
|
+
def _should_force_english(self, request, path):
|
|
77
|
+
"""
|
|
78
|
+
Determine if English should be forced based on request origin or exempt paths.
|
|
79
|
+
"""
|
|
80
|
+
return request.META.get(
|
|
81
|
+
"HTTP_ORIGIN"
|
|
82
|
+
) == settings.COURSE_AUTHORING_MICROFRONTEND_URL or any(
|
|
83
|
+
exempt_path in path
|
|
84
|
+
for exempt_path in settings.AUTO_LANGUAGE_SELECTION_EXEMPT_PATHS
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def _force_english_if_needed(self, request, response):
|
|
88
|
+
"""
|
|
89
|
+
Force language to English if not already set.
|
|
90
|
+
"""
|
|
91
|
+
cookie_val = lang_pref_helpers.get_language_cookie(request)
|
|
92
|
+
|
|
93
|
+
if cookie_val != ENGLISH_LANGUAGE_CODE:
|
|
94
|
+
set_language(request, response, ENGLISH_LANGUAGE_CODE)
|
|
95
|
+
return redirect_current_path(request)
|
|
96
|
+
|
|
97
|
+
return response
|
|
98
|
+
|
|
99
|
+
def _get_course_language(self, path):
|
|
100
|
+
"""
|
|
101
|
+
Extract course language from the course URL path.
|
|
102
|
+
"""
|
|
103
|
+
match = self.COURSE_URL_REGEX.match(path)
|
|
104
|
+
if not match:
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
course_key = CourseKey.from_string(match.group("course_key"))
|
|
109
|
+
overview = CourseOverview.get_from_id(course_key)
|
|
110
|
+
except Exception: # noqa: BLE001
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
return getattr(overview, "language", None)
|
|
114
|
+
|
|
115
|
+
def _apply_course_language(self, request, response, language):
|
|
116
|
+
"""
|
|
117
|
+
Apply the course language if it differs from the current cookie value.
|
|
118
|
+
"""
|
|
119
|
+
cookie_val = lang_pref_helpers.get_language_cookie(request)
|
|
120
|
+
if cookie_val != language:
|
|
121
|
+
set_language(request, response, language)
|
|
122
|
+
return redirect_current_path(request)
|
|
123
|
+
|
|
124
|
+
return response
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class CourseLanguageCookieResetMiddleware(MiddlewareMixin):
|
|
128
|
+
"""
|
|
129
|
+
CMS middleware that always resets language to English.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
def process_response(self, request, response):
|
|
133
|
+
"""
|
|
134
|
+
Process the response to reset language cookie to English.
|
|
135
|
+
"""
|
|
136
|
+
if not should_process_request(request):
|
|
137
|
+
return response
|
|
138
|
+
|
|
139
|
+
cookie_val = lang_pref_helpers.get_language_cookie(request)
|
|
140
|
+
if cookie_val and cookie_val != ENGLISH_LANGUAGE_CODE:
|
|
141
|
+
set_language(request, response, ENGLISH_LANGUAGE_CODE)
|
|
142
|
+
|
|
143
|
+
return response
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# noqa: INP001
|
|
2
|
+
|
|
3
|
+
"""Settings to provide to edX"""
|
|
4
|
+
|
|
5
|
+
from ol_openedx_course_translations.settings.common import apply_common_settings
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def plugin_settings(settings):
|
|
9
|
+
"""
|
|
10
|
+
Populate cms settings
|
|
11
|
+
"""
|
|
12
|
+
apply_common_settings(settings)
|
|
13
|
+
settings.MIDDLEWARE.extend(
|
|
14
|
+
[
|
|
15
|
+
"ol_openedx_course_translations.middleware.CourseLanguageCookieResetMiddleware",
|
|
16
|
+
]
|
|
17
|
+
)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# noqa: INP001
|
|
2
|
+
|
|
3
|
+
"""Common settings for LMS and CMS to provide to edX"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def apply_common_settings(settings):
|
|
7
|
+
"""
|
|
8
|
+
Apply custom settings function for LMS and CMS settings.
|
|
9
|
+
"""
|
|
10
|
+
settings.ENABLE_AUTO_LANGUAGE_SELECTION = False
|
|
11
|
+
settings.AUTO_LANGUAGE_SELECTION_EXEMPT_PATHS = ["admin", "sysadmin", "instructor"]
|
|
12
|
+
settings.DEEPL_API_KEY = ""
|
|
13
|
+
settings.COURSE_TRANSLATIONS_TARGET_DIRECTORIES = [
|
|
14
|
+
"about",
|
|
15
|
+
"course",
|
|
16
|
+
"chapter",
|
|
17
|
+
"html",
|
|
18
|
+
"info",
|
|
19
|
+
"problem",
|
|
20
|
+
"sequential",
|
|
21
|
+
"vertical",
|
|
22
|
+
"video",
|
|
23
|
+
"static",
|
|
24
|
+
"tabs",
|
|
25
|
+
]
|
|
26
|
+
settings.COURSE_TRANSLATIONS_SUPPORTED_ARCHIVE_EXTENSIONS = [
|
|
27
|
+
".tar.gz",
|
|
28
|
+
".tgz",
|
|
29
|
+
".tar",
|
|
30
|
+
]
|
|
31
|
+
settings.COURSE_TRANSLATIONS_TRANSLATABLE_EXTENSIONS = [
|
|
32
|
+
".html",
|
|
33
|
+
".xml",
|
|
34
|
+
".srt",
|
|
35
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# noqa: INP001
|
|
2
|
+
|
|
3
|
+
"""Settings to provide to edX"""
|
|
4
|
+
|
|
5
|
+
from ol_openedx_course_translations.settings.common import apply_common_settings
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def plugin_settings(settings):
|
|
9
|
+
"""
|
|
10
|
+
Populate lms settings
|
|
11
|
+
"""
|
|
12
|
+
apply_common_settings(settings)
|
|
13
|
+
settings.MIDDLEWARE.extend(
|
|
14
|
+
[
|
|
15
|
+
"ol_openedx_course_translations.middleware.CourseLanguageCookieMiddleware",
|
|
16
|
+
]
|
|
17
|
+
)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
URL configuration for ol_openedx_course_translations app.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from django.conf import settings
|
|
6
|
+
from django.urls import re_path
|
|
7
|
+
|
|
8
|
+
from ol_openedx_course_translations.views import CourseLanguageView
|
|
9
|
+
|
|
10
|
+
urlpatterns = [
|
|
11
|
+
re_path(
|
|
12
|
+
rf"api/course-language/{settings.COURSE_KEY_PATTERN}$",
|
|
13
|
+
CourseLanguageView.as_view(),
|
|
14
|
+
name="ol_openedx_course_language",
|
|
15
|
+
),
|
|
16
|
+
]
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""
|
|
2
|
+
API Views for ol_openedx_course_translations App
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
from opaque_keys import InvalidKeyError
|
|
8
|
+
from opaque_keys.edx.keys import CourseKey
|
|
9
|
+
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
|
|
10
|
+
from rest_framework import status
|
|
11
|
+
from rest_framework.permissions import IsAuthenticated
|
|
12
|
+
from rest_framework.response import Response
|
|
13
|
+
from rest_framework.views import APIView
|
|
14
|
+
|
|
15
|
+
log = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CourseLanguageView(APIView):
|
|
19
|
+
"""
|
|
20
|
+
API View to retrieve the language of a specified course.
|
|
21
|
+
|
|
22
|
+
Sample Request:
|
|
23
|
+
GET /course-translations/api/course_language/{course_key}/
|
|
24
|
+
|
|
25
|
+
Sample Response:
|
|
26
|
+
200 OK
|
|
27
|
+
{
|
|
28
|
+
"language": "en"
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
Error Responses:
|
|
32
|
+
400 Bad Request
|
|
33
|
+
{
|
|
34
|
+
"error": "Invalid course_key."
|
|
35
|
+
}
|
|
36
|
+
404 Not Found
|
|
37
|
+
{
|
|
38
|
+
"error": "Course not found."
|
|
39
|
+
}
|
|
40
|
+
400 Bad Request
|
|
41
|
+
{
|
|
42
|
+
"error": "An unexpected error occurred."
|
|
43
|
+
}
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
permission_classes = [IsAuthenticated]
|
|
47
|
+
|
|
48
|
+
def get(self, request, course_key_string): # noqa: ARG002
|
|
49
|
+
"""
|
|
50
|
+
Retrieve the language of the specified course.
|
|
51
|
+
"""
|
|
52
|
+
try:
|
|
53
|
+
course_key = CourseKey.from_string(course_key_string)
|
|
54
|
+
course = CourseOverview.get_from_id(course_key)
|
|
55
|
+
except InvalidKeyError:
|
|
56
|
+
log.info("Invalid course key %s", course_key_string)
|
|
57
|
+
return Response(
|
|
58
|
+
{"error": "Invalid course_key."},
|
|
59
|
+
status=status.HTTP_400_BAD_REQUEST,
|
|
60
|
+
)
|
|
61
|
+
except CourseOverview.DoesNotExist:
|
|
62
|
+
log.info("Course not found for key %s", course_key_string)
|
|
63
|
+
return Response(
|
|
64
|
+
{"error": "Course not found."},
|
|
65
|
+
status=status.HTTP_404_NOT_FOUND,
|
|
66
|
+
)
|
|
67
|
+
except Exception:
|
|
68
|
+
log.exception("Unexpected error retrieving course %s", course_key_string)
|
|
69
|
+
return Response(
|
|
70
|
+
{"error": "An unexpected error occurred."},
|
|
71
|
+
status=status.HTTP_400_BAD_REQUEST,
|
|
72
|
+
)
|
|
73
|
+
return Response({"language": course.language})
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "ol-openedx-course-translations"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "An Open edX plugin to translate courses"
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "MIT Office of Digital Learning"}
|
|
7
|
+
]
|
|
8
|
+
license = "BSD-3-Clause"
|
|
9
|
+
readme = "README.rst"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
keywords = ["Python", "edx"]
|
|
12
|
+
dependencies = [
|
|
13
|
+
"Django>=4.0",
|
|
14
|
+
"djangorestframework>=3.14.0",
|
|
15
|
+
"deepl>=1.25.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.entry-points."cms.djangoapp"]
|
|
19
|
+
ol_openedx_course_translations = "ol_openedx_course_translations.apps:OLOpenedXCourseTranslationsConfig"
|
|
20
|
+
|
|
21
|
+
[project.entry-points."lms.djangoapp"]
|
|
22
|
+
ol_openedx_course_translations = "ol_openedx_course_translations.apps:OLOpenedXCourseTranslationsConfig"
|
|
23
|
+
|
|
24
|
+
[build-system]
|
|
25
|
+
requires = ["hatchling"]
|
|
26
|
+
build-backend = "hatchling.build"
|
|
27
|
+
|
|
28
|
+
[tool.hatch.build.targets.wheel]
|
|
29
|
+
packages = ["ol_openedx_course_translations"]
|
|
30
|
+
include = [
|
|
31
|
+
"ol_openedx_course_translations/**/*.py",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[tool.hatch.build.targets.sdist]
|
|
35
|
+
include = [
|
|
36
|
+
"ol_openedx_course_translations/**/*",
|
|
37
|
+
"README.rst",
|
|
38
|
+
"pyproject.toml",
|
|
39
|
+
]
|