ol-openedx-course-translations 0.1.0__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ol-openedx-course-translations might be problematic. Click here for more details.
- ol_openedx_course_translations/admin.py +29 -0
- ol_openedx_course_translations/apps.py +13 -2
- ol_openedx_course_translations/filters.py +39 -0
- ol_openedx_course_translations/glossaries/machine_learning/ar.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/de.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/el.txt +988 -0
- ol_openedx_course_translations/glossaries/machine_learning/es.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/fr.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/ja.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/pt-br.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/ru.txt +213 -0
- ol_openedx_course_translations/management/commands/sync_and_translate_language.py +1866 -0
- ol_openedx_course_translations/management/commands/translate_course.py +472 -475
- ol_openedx_course_translations/middleware.py +143 -0
- ol_openedx_course_translations/migrations/0001_add_translation_logs.py +84 -0
- ol_openedx_course_translations/migrations/__init__.py +0 -0
- ol_openedx_course_translations/models.py +57 -0
- ol_openedx_course_translations/providers/__init__.py +1 -0
- ol_openedx_course_translations/providers/base.py +278 -0
- ol_openedx_course_translations/providers/deepl_provider.py +292 -0
- ol_openedx_course_translations/providers/llm_providers.py +581 -0
- ol_openedx_course_translations/settings/cms.py +17 -0
- ol_openedx_course_translations/settings/common.py +58 -30
- ol_openedx_course_translations/settings/lms.py +38 -0
- ol_openedx_course_translations/tasks.py +222 -0
- ol_openedx_course_translations/urls.py +16 -0
- ol_openedx_course_translations/utils/__init__.py +0 -0
- ol_openedx_course_translations/utils/command_utils.py +197 -0
- ol_openedx_course_translations/utils/constants.py +218 -0
- ol_openedx_course_translations/utils/course_translations.py +608 -0
- ol_openedx_course_translations/utils/translation_sync.py +808 -0
- ol_openedx_course_translations/views.py +73 -0
- ol_openedx_course_translations-0.3.5.dist-info/METADATA +409 -0
- ol_openedx_course_translations-0.3.5.dist-info/RECORD +40 -0
- ol_openedx_course_translations-0.3.5.dist-info/entry_points.txt +5 -0
- ol_openedx_course_translations-0.1.0.dist-info/METADATA +0 -63
- ol_openedx_course_translations-0.1.0.dist-info/RECORD +0 -11
- ol_openedx_course_translations-0.1.0.dist-info/entry_points.txt +0 -2
- {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.5.dist-info}/WHEEL +0 -0
- {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.5.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Middleware to set/reset language preference cookie and
|
|
3
|
+
user preference based on course language.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
from django.conf import settings
|
|
9
|
+
from django.http import HttpResponseRedirect
|
|
10
|
+
from django.utils.deprecation import MiddlewareMixin
|
|
11
|
+
from opaque_keys.edx.keys import CourseKey
|
|
12
|
+
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
|
|
13
|
+
from openedx.core.djangoapps.lang_pref import LANGUAGE_KEY
|
|
14
|
+
from openedx.core.djangoapps.lang_pref import helpers as lang_pref_helpers
|
|
15
|
+
from openedx.core.djangoapps.user_api.preferences.api import set_user_preference
|
|
16
|
+
|
|
17
|
+
from ol_openedx_course_translations.utils.constants import ENGLISH_LANGUAGE_CODE
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def should_process_request(request):
|
|
21
|
+
"""
|
|
22
|
+
Return True if language auto-selection should run for this request.
|
|
23
|
+
"""
|
|
24
|
+
return (
|
|
25
|
+
settings.ENABLE_AUTO_LANGUAGE_SELECTION
|
|
26
|
+
and hasattr(request, "user")
|
|
27
|
+
and request.user.is_authenticated
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def set_language(request, response, language):
|
|
32
|
+
"""
|
|
33
|
+
Set both cookie and user preference for language.
|
|
34
|
+
"""
|
|
35
|
+
lang_pref_helpers.set_language_cookie(request, response, language)
|
|
36
|
+
set_user_preference(request.user, LANGUAGE_KEY, language)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def redirect_current_path(request):
|
|
40
|
+
"""
|
|
41
|
+
Redirect to the same URL to ensure language change takes effect.
|
|
42
|
+
"""
|
|
43
|
+
return HttpResponseRedirect(request.get_full_path())
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class CourseLanguageCookieMiddleware(MiddlewareMixin):
|
|
47
|
+
"""
|
|
48
|
+
LMS middleware that:
|
|
49
|
+
- Sets language based on course language
|
|
50
|
+
- Forces English for exempt paths and authoring MFEs
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
COURSE_URL_REGEX = re.compile(
|
|
54
|
+
rf"^/courses/(?P<course_key>{settings.COURSE_KEY_REGEX})(?:/|$)",
|
|
55
|
+
re.IGNORECASE,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def process_response(self, request, response):
|
|
59
|
+
"""
|
|
60
|
+
Process the response to set/reset language cookie based on course language.
|
|
61
|
+
"""
|
|
62
|
+
if not should_process_request(request):
|
|
63
|
+
return response
|
|
64
|
+
|
|
65
|
+
path = getattr(request, "path_info", request.path)
|
|
66
|
+
|
|
67
|
+
if self._should_force_english(request, path):
|
|
68
|
+
return self._force_english_if_needed(request, response)
|
|
69
|
+
|
|
70
|
+
course_language = self._get_course_language(path)
|
|
71
|
+
if not course_language:
|
|
72
|
+
return response
|
|
73
|
+
|
|
74
|
+
return self._apply_course_language(request, response, course_language)
|
|
75
|
+
|
|
76
|
+
def _should_force_english(self, request, path):
|
|
77
|
+
"""
|
|
78
|
+
Determine if English should be forced based on request origin or exempt paths.
|
|
79
|
+
"""
|
|
80
|
+
return request.META.get(
|
|
81
|
+
"HTTP_ORIGIN"
|
|
82
|
+
) == settings.COURSE_AUTHORING_MICROFRONTEND_URL or any(
|
|
83
|
+
exempt_path in path
|
|
84
|
+
for exempt_path in settings.AUTO_LANGUAGE_SELECTION_EXEMPT_PATHS
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def _force_english_if_needed(self, request, response):
|
|
88
|
+
"""
|
|
89
|
+
Force language to English if not already set.
|
|
90
|
+
"""
|
|
91
|
+
cookie_val = lang_pref_helpers.get_language_cookie(request)
|
|
92
|
+
|
|
93
|
+
if cookie_val != ENGLISH_LANGUAGE_CODE:
|
|
94
|
+
set_language(request, response, ENGLISH_LANGUAGE_CODE)
|
|
95
|
+
return redirect_current_path(request)
|
|
96
|
+
|
|
97
|
+
return response
|
|
98
|
+
|
|
99
|
+
def _get_course_language(self, path):
|
|
100
|
+
"""
|
|
101
|
+
Extract course language from the course URL path.
|
|
102
|
+
"""
|
|
103
|
+
match = self.COURSE_URL_REGEX.match(path)
|
|
104
|
+
if not match:
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
course_key = CourseKey.from_string(match.group("course_key"))
|
|
109
|
+
overview = CourseOverview.get_from_id(course_key)
|
|
110
|
+
except Exception: # noqa: BLE001
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
return getattr(overview, "language", None)
|
|
114
|
+
|
|
115
|
+
def _apply_course_language(self, request, response, language):
|
|
116
|
+
"""
|
|
117
|
+
Apply the course language if it differs from the current cookie value.
|
|
118
|
+
"""
|
|
119
|
+
cookie_val = lang_pref_helpers.get_language_cookie(request)
|
|
120
|
+
if cookie_val != language:
|
|
121
|
+
set_language(request, response, language)
|
|
122
|
+
return redirect_current_path(request)
|
|
123
|
+
|
|
124
|
+
return response
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class CourseLanguageCookieResetMiddleware(MiddlewareMixin):
|
|
128
|
+
"""
|
|
129
|
+
CMS middleware that always resets language to English.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
def process_response(self, request, response):
|
|
133
|
+
"""
|
|
134
|
+
Process the response to reset language cookie to English.
|
|
135
|
+
"""
|
|
136
|
+
if not should_process_request(request):
|
|
137
|
+
return response
|
|
138
|
+
|
|
139
|
+
cookie_val = lang_pref_helpers.get_language_cookie(request)
|
|
140
|
+
if cookie_val and cookie_val != ENGLISH_LANGUAGE_CODE:
|
|
141
|
+
set_language(request, response, ENGLISH_LANGUAGE_CODE)
|
|
142
|
+
|
|
143
|
+
return response
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Generated by Django 5.2.9 on 2026-01-15 11:04
|
|
2
|
+
|
|
3
|
+
import opaque_keys.edx.django.models
|
|
4
|
+
from django.db import migrations, models
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Migration(migrations.Migration):
|
|
8
|
+
initial = True
|
|
9
|
+
|
|
10
|
+
dependencies = [] # type: ignore[var-annotated]
|
|
11
|
+
|
|
12
|
+
operations = [
|
|
13
|
+
migrations.CreateModel(
|
|
14
|
+
name="CourseTranslationLog",
|
|
15
|
+
fields=[
|
|
16
|
+
(
|
|
17
|
+
"id",
|
|
18
|
+
models.AutoField(
|
|
19
|
+
auto_created=True,
|
|
20
|
+
primary_key=True,
|
|
21
|
+
serialize=False,
|
|
22
|
+
verbose_name="ID",
|
|
23
|
+
),
|
|
24
|
+
),
|
|
25
|
+
(
|
|
26
|
+
"source_course_id",
|
|
27
|
+
opaque_keys.edx.django.models.CourseKeyField(
|
|
28
|
+
db_index=True, max_length=255
|
|
29
|
+
),
|
|
30
|
+
),
|
|
31
|
+
(
|
|
32
|
+
"source_course_language",
|
|
33
|
+
models.CharField(
|
|
34
|
+
help_text="Source language code (e.g., 'EN')", max_length=10
|
|
35
|
+
),
|
|
36
|
+
),
|
|
37
|
+
(
|
|
38
|
+
"target_course_language",
|
|
39
|
+
models.CharField(
|
|
40
|
+
help_text="Target language code for translation (e.g., 'FR')",
|
|
41
|
+
max_length=10,
|
|
42
|
+
),
|
|
43
|
+
),
|
|
44
|
+
(
|
|
45
|
+
"srt_provider_name",
|
|
46
|
+
models.CharField(
|
|
47
|
+
help_text="LLM Provider used for SRT translation",
|
|
48
|
+
max_length=100,
|
|
49
|
+
),
|
|
50
|
+
),
|
|
51
|
+
(
|
|
52
|
+
"srt_provider_model",
|
|
53
|
+
models.CharField(
|
|
54
|
+
blank=True,
|
|
55
|
+
help_text="LLM provider model used for SRT translation",
|
|
56
|
+
max_length=100,
|
|
57
|
+
),
|
|
58
|
+
),
|
|
59
|
+
(
|
|
60
|
+
"content_provider_name",
|
|
61
|
+
models.CharField(
|
|
62
|
+
help_text="LLM Provider used for content translation",
|
|
63
|
+
max_length=100,
|
|
64
|
+
),
|
|
65
|
+
),
|
|
66
|
+
(
|
|
67
|
+
"content_provider_model",
|
|
68
|
+
models.CharField(
|
|
69
|
+
blank=True,
|
|
70
|
+
help_text="LLM provider model used for content translation",
|
|
71
|
+
max_length=100,
|
|
72
|
+
),
|
|
73
|
+
),
|
|
74
|
+
(
|
|
75
|
+
"command_stats",
|
|
76
|
+
models.TextField(
|
|
77
|
+
blank=True, help_text="Logs from the translation command"
|
|
78
|
+
),
|
|
79
|
+
),
|
|
80
|
+
("created_at", models.DateTimeField(auto_now_add=True)),
|
|
81
|
+
("updated_at", models.DateTimeField(auto_now=True)),
|
|
82
|
+
],
|
|
83
|
+
),
|
|
84
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Models for course translations plugin"""
|
|
2
|
+
|
|
3
|
+
from django.db import models
|
|
4
|
+
from opaque_keys.edx.django.models import CourseKeyField
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CourseTranslationLog(models.Model):
|
|
8
|
+
"""Log entry for course translation operations."""
|
|
9
|
+
|
|
10
|
+
source_course_id = CourseKeyField(max_length=255, db_index=True)
|
|
11
|
+
source_course_language = models.CharField(
|
|
12
|
+
max_length=10,
|
|
13
|
+
help_text="Source language code (e.g., 'EN')",
|
|
14
|
+
)
|
|
15
|
+
target_course_language = models.CharField(
|
|
16
|
+
max_length=10,
|
|
17
|
+
help_text="Target language code for translation (e.g., 'FR')",
|
|
18
|
+
)
|
|
19
|
+
srt_provider_name = models.CharField(
|
|
20
|
+
max_length=100,
|
|
21
|
+
help_text="LLM Provider used for SRT translation",
|
|
22
|
+
)
|
|
23
|
+
srt_provider_model = models.CharField(
|
|
24
|
+
max_length=100,
|
|
25
|
+
blank=True,
|
|
26
|
+
help_text="LLM provider model used for SRT translation",
|
|
27
|
+
)
|
|
28
|
+
content_provider_name = models.CharField(
|
|
29
|
+
max_length=100,
|
|
30
|
+
help_text="LLM Provider used for content translation",
|
|
31
|
+
)
|
|
32
|
+
content_provider_model = models.CharField(
|
|
33
|
+
max_length=100,
|
|
34
|
+
blank=True,
|
|
35
|
+
help_text="LLM provider model used for content translation",
|
|
36
|
+
)
|
|
37
|
+
command_stats = models.TextField(
|
|
38
|
+
blank=True, help_text="Logs from the translation command"
|
|
39
|
+
)
|
|
40
|
+
created_at = models.DateTimeField(
|
|
41
|
+
auto_now_add=True,
|
|
42
|
+
)
|
|
43
|
+
updated_at = models.DateTimeField(
|
|
44
|
+
auto_now=True,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
class Meta:
|
|
48
|
+
"""Meta options for CourseTranslationLog."""
|
|
49
|
+
|
|
50
|
+
app_label = "ol_openedx_course_translations"
|
|
51
|
+
|
|
52
|
+
def __str__(self):
|
|
53
|
+
"""Return a string representation of the translation log."""
|
|
54
|
+
return (
|
|
55
|
+
f"{self.source_course_id} "
|
|
56
|
+
f"({self.source_course_language} → {self.target_course_language})"
|
|
57
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Translation providers for course content."""
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""Base classes for translation providers."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import srt
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
MAX_SUBTITLE_TRANSLATION_RETRIES = 1
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def load_glossary(target_language: str, glossary_directory: str | None = None) -> str:
|
|
15
|
+
"""
|
|
16
|
+
Load a glossary for the given language from the glossary directory.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
target_language: Target language code
|
|
20
|
+
glossary_directory: Path to glossary directory
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Glossary content as string, empty if not found or directory not provided
|
|
24
|
+
"""
|
|
25
|
+
if not glossary_directory:
|
|
26
|
+
return ""
|
|
27
|
+
|
|
28
|
+
glossary_dir_path = Path(glossary_directory)
|
|
29
|
+
if not glossary_dir_path.exists() or not glossary_dir_path.is_dir():
|
|
30
|
+
logger.warning("Glossary directory not found: %s", glossary_dir_path)
|
|
31
|
+
return ""
|
|
32
|
+
|
|
33
|
+
glossary_file_path = glossary_dir_path / f"{target_language.lower()}.txt"
|
|
34
|
+
if not glossary_file_path.exists():
|
|
35
|
+
logger.warning(
|
|
36
|
+
"Glossary file not found for language %s: %s",
|
|
37
|
+
target_language,
|
|
38
|
+
glossary_file_path,
|
|
39
|
+
)
|
|
40
|
+
return ""
|
|
41
|
+
|
|
42
|
+
return glossary_file_path.read_text(encoding="utf-8-sig").strip()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class TranslationProvider(ABC):
|
|
46
|
+
"""Abstract base class for translation providers."""
|
|
47
|
+
|
|
48
|
+
def __init__(self, primary_api_key: str, repair_api_key: str | None = None):
|
|
49
|
+
"""
|
|
50
|
+
Initialize translation provider with API keys.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
primary_api_key: API key for primary translation service
|
|
54
|
+
repair_api_key: API key for repair service (DeepL API key)
|
|
55
|
+
"""
|
|
56
|
+
self.primary_api_key = primary_api_key
|
|
57
|
+
self.repair_api_key = repair_api_key
|
|
58
|
+
|
|
59
|
+
def translate_srt_with_validation(
|
|
60
|
+
self,
|
|
61
|
+
subtitle_list: list[srt.Subtitle],
|
|
62
|
+
target_language: str,
|
|
63
|
+
glossary_file: str | None = None,
|
|
64
|
+
) -> list[srt.Subtitle]:
|
|
65
|
+
"""
|
|
66
|
+
Translate SRT subtitles with timestamp validation and repair.
|
|
67
|
+
|
|
68
|
+
Performs translation, validates timestamps, and attempts repair
|
|
69
|
+
if validation fails using DeepL.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
subtitle_list: List of subtitle objects to translate
|
|
73
|
+
target_language: Target language code
|
|
74
|
+
glossary_file: Path to glossary directory (optional)
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
List of translated subtitle objects with validated timestamps
|
|
78
|
+
|
|
79
|
+
Raises:
|
|
80
|
+
ValueError: If timestamp repair fails after validation
|
|
81
|
+
"""
|
|
82
|
+
log = logger.getChild("TranslationProvider")
|
|
83
|
+
log.info(" 🌐 Translating subtitles to %s...", target_language)
|
|
84
|
+
|
|
85
|
+
# Try translation with retries
|
|
86
|
+
translated_subtitles = []
|
|
87
|
+
for attempt in range(MAX_SUBTITLE_TRANSLATION_RETRIES + 1):
|
|
88
|
+
if attempt > 0:
|
|
89
|
+
log.info(" 🔧 Retrying subtitle translations...")
|
|
90
|
+
|
|
91
|
+
translated_subtitles = self.translate_subtitles(
|
|
92
|
+
subtitle_list, target_language, glossary_file
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
log.info(
|
|
96
|
+
" 🔍 %sValidating translated subtitles...",
|
|
97
|
+
"Re-" if attempt > 0 else "",
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
if self._validate_timestamps(subtitle_list, translated_subtitles):
|
|
101
|
+
log.info(
|
|
102
|
+
" ✅ Timestamps validated successfully%s.",
|
|
103
|
+
" on retry" if attempt > 0 else "",
|
|
104
|
+
)
|
|
105
|
+
return translated_subtitles
|
|
106
|
+
|
|
107
|
+
log.warning(
|
|
108
|
+
" ❌ Timestamp %svalidation failed.", "re-" if attempt > 0 else ""
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
repaired_subtitles = self._repair_timestamps_with_deepl(
|
|
112
|
+
subtitle_list, target_language
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
log.info(" 🔍 Re-validating repaired subtitles...")
|
|
116
|
+
if self._validate_timestamps(subtitle_list, repaired_subtitles):
|
|
117
|
+
log.info(" ✅ Timestamps repaired and validated successfully.")
|
|
118
|
+
return repaired_subtitles
|
|
119
|
+
|
|
120
|
+
log.error(" ❌ Timestamp repair failed. Translation cannot proceed.")
|
|
121
|
+
raise ValueError( # noqa: TRY003
|
|
122
|
+
"Subtitle timestamp repair failed - timestamps could not be validated" # noqa: EM101
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
def _validate_timestamps(
|
|
126
|
+
self, original: list[srt.Subtitle], translated: list[srt.Subtitle]
|
|
127
|
+
) -> bool:
|
|
128
|
+
"""
|
|
129
|
+
Validate that timestamps and cue numbers are preserved.
|
|
130
|
+
|
|
131
|
+
Checks for cue count mismatches, index mismatches, timestamp mismatches,
|
|
132
|
+
and blank translations.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
original: Original subtitle list
|
|
136
|
+
translated: Translated subtitle list
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
True if validation passes, False otherwise
|
|
140
|
+
"""
|
|
141
|
+
issues = []
|
|
142
|
+
if len(original) != len(translated):
|
|
143
|
+
issues.append(
|
|
144
|
+
f"Cue count mismatch: original {len(original)}, "
|
|
145
|
+
f"translated {len(translated)}"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
for i, (orig, trans) in enumerate(zip(original, translated)):
|
|
149
|
+
if orig.index != trans.index:
|
|
150
|
+
issues.append(
|
|
151
|
+
f"Cue {i + 1}: index mismatch ({orig.index} vs {trans.index})"
|
|
152
|
+
)
|
|
153
|
+
if orig.start != trans.start or orig.end != trans.end:
|
|
154
|
+
issues.append(f"Cue {i + 1}: timestamp mismatch")
|
|
155
|
+
if orig.content.strip() and not trans.content.strip():
|
|
156
|
+
issues.append(f"Cue {i + 1}: translation is BLANK")
|
|
157
|
+
|
|
158
|
+
if issues:
|
|
159
|
+
logger.warning("Translation validation found issues:")
|
|
160
|
+
for issue in issues[:10]:
|
|
161
|
+
logger.warning(" - %s", issue)
|
|
162
|
+
if len(issues) > 10: # noqa: PLR2004
|
|
163
|
+
logger.warning(" ... and %s more issues", len(issues) - 10)
|
|
164
|
+
return False
|
|
165
|
+
return True
|
|
166
|
+
|
|
167
|
+
def _repair_timestamps_with_deepl(
|
|
168
|
+
self,
|
|
169
|
+
original: list[srt.Subtitle],
|
|
170
|
+
target_lang: str,
|
|
171
|
+
) -> list[srt.Subtitle]:
|
|
172
|
+
"""
|
|
173
|
+
Repair misaligned timestamps using DeepL translation.
|
|
174
|
+
|
|
175
|
+
Uses DeepL to retranslate subtitles with proper timestamp preservation.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
original: Original subtitle list with correct timestamps
|
|
179
|
+
target_lang: Target language code
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
List of repaired subtitles with corrected timestamps
|
|
183
|
+
"""
|
|
184
|
+
if not self.repair_api_key:
|
|
185
|
+
logger.warning(" No repair API key available, skipping repair.")
|
|
186
|
+
return original
|
|
187
|
+
|
|
188
|
+
logger.info(" 🔧 Repairing timestamps using DeepL...")
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
# Import DeepL provider for repair
|
|
192
|
+
from ol_openedx_course_translations.providers.deepl_provider import ( # noqa: PLC0415
|
|
193
|
+
DeepLProvider,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Create DeepL provider instance for repair
|
|
197
|
+
deepl_provider = DeepLProvider(self.repair_api_key, None)
|
|
198
|
+
|
|
199
|
+
# Use DeepL to translate with proper timestamp preservation
|
|
200
|
+
repaired_subtitles = deepl_provider.translate_subtitles(
|
|
201
|
+
original, target_lang, None
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
logger.info(" ✅ DeepL repair completed.")
|
|
205
|
+
return repaired_subtitles # noqa: TRY300
|
|
206
|
+
|
|
207
|
+
except Exception as e: # noqa: BLE001
|
|
208
|
+
logger.error(" ❌ DeepL repair failed: %s", e) # noqa: TRY400
|
|
209
|
+
# Fallback: return original with empty content to preserve structure
|
|
210
|
+
return [
|
|
211
|
+
srt.Subtitle(
|
|
212
|
+
index=sub.index,
|
|
213
|
+
start=sub.start,
|
|
214
|
+
end=sub.end,
|
|
215
|
+
content="",
|
|
216
|
+
)
|
|
217
|
+
for sub in original
|
|
218
|
+
]
|
|
219
|
+
|
|
220
|
+
@abstractmethod
|
|
221
|
+
def translate_subtitles(
|
|
222
|
+
self,
|
|
223
|
+
subtitle_list: list[srt.Subtitle],
|
|
224
|
+
target_language: str,
|
|
225
|
+
glossary_file: str | None = None,
|
|
226
|
+
) -> list[srt.Subtitle]:
|
|
227
|
+
"""
|
|
228
|
+
Translate SRT subtitles.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
subtitle_list: List of subtitle objects to translate
|
|
232
|
+
target_language: Target language code
|
|
233
|
+
glossary_file: Path to glossary directory (optional)
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
List of translated subtitle objects
|
|
237
|
+
"""
|
|
238
|
+
|
|
239
|
+
@abstractmethod
|
|
240
|
+
def translate_text(
|
|
241
|
+
self,
|
|
242
|
+
source_text: str,
|
|
243
|
+
target_language: str,
|
|
244
|
+
tag_handling: str | None = None,
|
|
245
|
+
glossary_file: str | None = None,
|
|
246
|
+
) -> str:
|
|
247
|
+
"""
|
|
248
|
+
Translate plain text or HTML/XML.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
source_text: Text to translate
|
|
252
|
+
target_language: Target language code
|
|
253
|
+
tag_handling: How to handle XML/HTML tags (optional)
|
|
254
|
+
glossary_file: Path to glossary directory (optional)
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
Translated text
|
|
258
|
+
"""
|
|
259
|
+
|
|
260
|
+
@abstractmethod
|
|
261
|
+
def translate_document(
|
|
262
|
+
self,
|
|
263
|
+
input_file_path: Path,
|
|
264
|
+
output_file_path: Path,
|
|
265
|
+
source_language: str,
|
|
266
|
+
target_language: str,
|
|
267
|
+
glossary_file: str | None = None,
|
|
268
|
+
) -> None:
|
|
269
|
+
"""
|
|
270
|
+
Translate document file.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
input_file_path: Path to input file
|
|
274
|
+
output_file_path: Path to output file
|
|
275
|
+
source_language: Source language code
|
|
276
|
+
target_language: Target language code
|
|
277
|
+
glossary_file: Path to glossary directory (optional)
|
|
278
|
+
"""
|