ol-openedx-course-translations 0.1.0__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ol-openedx-course-translations might be problematic. Click here for more details.
- ol_openedx_course_translations/admin.py +29 -0
- ol_openedx_course_translations/apps.py +13 -2
- ol_openedx_course_translations/filters.py +39 -0
- ol_openedx_course_translations/glossaries/machine_learning/ar.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/de.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/el.txt +988 -0
- ol_openedx_course_translations/glossaries/machine_learning/es.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/fr.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/ja.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/pt-br.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/ru.txt +213 -0
- ol_openedx_course_translations/management/commands/sync_and_translate_language.py +1866 -0
- ol_openedx_course_translations/management/commands/translate_course.py +472 -475
- ol_openedx_course_translations/middleware.py +143 -0
- ol_openedx_course_translations/migrations/0001_add_translation_logs.py +84 -0
- ol_openedx_course_translations/migrations/__init__.py +0 -0
- ol_openedx_course_translations/models.py +57 -0
- ol_openedx_course_translations/providers/__init__.py +1 -0
- ol_openedx_course_translations/providers/base.py +278 -0
- ol_openedx_course_translations/providers/deepl_provider.py +292 -0
- ol_openedx_course_translations/providers/llm_providers.py +581 -0
- ol_openedx_course_translations/settings/cms.py +17 -0
- ol_openedx_course_translations/settings/common.py +58 -30
- ol_openedx_course_translations/settings/lms.py +38 -0
- ol_openedx_course_translations/tasks.py +222 -0
- ol_openedx_course_translations/urls.py +16 -0
- ol_openedx_course_translations/utils/__init__.py +0 -0
- ol_openedx_course_translations/utils/command_utils.py +197 -0
- ol_openedx_course_translations/utils/constants.py +218 -0
- ol_openedx_course_translations/utils/course_translations.py +608 -0
- ol_openedx_course_translations/utils/translation_sync.py +808 -0
- ol_openedx_course_translations/views.py +73 -0
- ol_openedx_course_translations-0.3.5.dist-info/METADATA +409 -0
- ol_openedx_course_translations-0.3.5.dist-info/RECORD +40 -0
- ol_openedx_course_translations-0.3.5.dist-info/entry_points.txt +5 -0
- ol_openedx_course_translations-0.1.0.dist-info/METADATA +0 -63
- ol_openedx_course_translations-0.1.0.dist-info/RECORD +0 -11
- ol_openedx_course_translations-0.1.0.dist-info/entry_points.txt +0 -2
- {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.5.dist-info}/WHEEL +0 -0
- {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.5.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# noqa: INP001
|
|
2
|
+
|
|
3
|
+
"""Settings to provide to edX"""
|
|
4
|
+
|
|
5
|
+
from ol_openedx_course_translations.settings.common import apply_common_settings
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def plugin_settings(settings):
|
|
9
|
+
"""
|
|
10
|
+
Populate lms settings
|
|
11
|
+
"""
|
|
12
|
+
apply_common_settings(settings)
|
|
13
|
+
settings.MIDDLEWARE.extend(
|
|
14
|
+
["ol_openedx_course_translations.middleware.CourseLanguageCookieMiddleware"]
|
|
15
|
+
)
|
|
16
|
+
VIDEO_TRANSCRIPT_LANGUAGE_FILTERS = {
|
|
17
|
+
"org.openedx.learning.xblock.render.started.v1": {
|
|
18
|
+
"pipeline": [
|
|
19
|
+
"ol_openedx_course_translations.filters.AddDestLangForVideoBlock"
|
|
20
|
+
],
|
|
21
|
+
"fail_silently": False,
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
existing_filters = getattr(settings, "OPEN_EDX_FILTERS_CONFIG", {})
|
|
25
|
+
|
|
26
|
+
# Merge pipeline lists instead of overwriting
|
|
27
|
+
for filter_name, config in VIDEO_TRANSCRIPT_LANGUAGE_FILTERS.items():
|
|
28
|
+
if filter_name not in existing_filters:
|
|
29
|
+
existing_filters[filter_name] = config
|
|
30
|
+
else:
|
|
31
|
+
existing_filters[filter_name]["pipeline"].extend(config.get("pipeline", []))
|
|
32
|
+
# do not override fail_silently
|
|
33
|
+
if "fail_silently" in config:
|
|
34
|
+
existing_filters[filter_name].setdefault(
|
|
35
|
+
"fail_silently", config["fail_silently"]
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
settings.OPEN_EDX_FILTERS_CONFIG = existing_filters
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""Celery tasks for course content translation."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from celery import shared_task
|
|
8
|
+
|
|
9
|
+
from ol_openedx_course_translations.providers.deepl_provider import DeepLProvider
|
|
10
|
+
from ol_openedx_course_translations.utils.course_translations import (
|
|
11
|
+
get_srt_output_filename,
|
|
12
|
+
get_translation_provider,
|
|
13
|
+
translate_policy_fields,
|
|
14
|
+
translate_xml_display_name,
|
|
15
|
+
update_video_xml_complete,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@shared_task(bind=True, name="translate_file_task")
|
|
22
|
+
def translate_file_task( # noqa: PLR0913
|
|
23
|
+
_self,
|
|
24
|
+
file_path_str: str,
|
|
25
|
+
source_language: str,
|
|
26
|
+
target_language: str,
|
|
27
|
+
content_provider_name: str,
|
|
28
|
+
content_model: str | None,
|
|
29
|
+
srt_provider_name: str,
|
|
30
|
+
srt_model: str | None,
|
|
31
|
+
glossary_directory: str | None = None,
|
|
32
|
+
):
|
|
33
|
+
"""
|
|
34
|
+
Translate a single file asynchronously.
|
|
35
|
+
|
|
36
|
+
Handles translation of various file types including SRT subtitles,
|
|
37
|
+
XML, and HTML files. Uses appropriate translation provider based on file type.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
_self: Celery task instance (bound)
|
|
41
|
+
file_path_str: Path to the file to translate
|
|
42
|
+
source_language: Source language code
|
|
43
|
+
target_language: Target language code
|
|
44
|
+
content_provider_name: Provider name for content translation
|
|
45
|
+
content_model: Model name for content provider (optional)
|
|
46
|
+
srt_provider_name: Provider name for SRT translation
|
|
47
|
+
srt_model: Model name for SRT provider (optional)
|
|
48
|
+
glossary_directory: Path to glossary directory (optional)
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Dict with status, file path, and optional error or output information
|
|
52
|
+
"""
|
|
53
|
+
try:
|
|
54
|
+
file_path = Path(file_path_str)
|
|
55
|
+
|
|
56
|
+
# Handle SRT files
|
|
57
|
+
if file_path.suffix == ".srt":
|
|
58
|
+
provider = get_translation_provider(srt_provider_name, srt_model)
|
|
59
|
+
|
|
60
|
+
source_lang_pattern = f"-{source_language.lower()}.srt"
|
|
61
|
+
if not file_path.name.lower().endswith(source_lang_pattern):
|
|
62
|
+
return {
|
|
63
|
+
"status": "skipped",
|
|
64
|
+
"file": file_path_str,
|
|
65
|
+
"reason": "Not source language SRT",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
output_filename = get_srt_output_filename(file_path.name, target_language)
|
|
69
|
+
output_file_path = file_path.parent / output_filename
|
|
70
|
+
|
|
71
|
+
provider.translate_document(
|
|
72
|
+
file_path,
|
|
73
|
+
output_file_path,
|
|
74
|
+
source_language,
|
|
75
|
+
target_language,
|
|
76
|
+
glossary_directory,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
return {
|
|
80
|
+
"status": "success",
|
|
81
|
+
"file": file_path_str,
|
|
82
|
+
"output": str(output_file_path),
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
# Handle other files
|
|
86
|
+
file_content = file_path.read_text(encoding="utf-8")
|
|
87
|
+
|
|
88
|
+
tag_handling_mode = None
|
|
89
|
+
if file_path.suffix in [".xml", ".html"]:
|
|
90
|
+
tag_handling_mode = file_path.suffix.lstrip(".")
|
|
91
|
+
|
|
92
|
+
provider = get_translation_provider(content_provider_name, content_model)
|
|
93
|
+
translated_content = provider.translate_text(
|
|
94
|
+
file_content,
|
|
95
|
+
target_language.lower(),
|
|
96
|
+
tag_handling=tag_handling_mode,
|
|
97
|
+
glossary_directory=glossary_directory,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Handle XML display_name translation only for DeepL provider
|
|
101
|
+
# LLM providers translate display_name as part of the XML translation
|
|
102
|
+
if file_path.suffix == ".xml" and isinstance(provider, DeepLProvider):
|
|
103
|
+
translated_content = translate_xml_display_name(
|
|
104
|
+
translated_content, target_language, provider, glossary_directory
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Update video XML if needed (use complete version)
|
|
108
|
+
if file_path.suffix == ".xml" and file_path.parent.name == "video":
|
|
109
|
+
translated_content = update_video_xml_complete(
|
|
110
|
+
translated_content, target_language
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
file_path.write_text(translated_content, encoding="utf-8")
|
|
114
|
+
except Exception as e:
|
|
115
|
+
logger.exception("Failed to translate file %s", file_path_str)
|
|
116
|
+
return {"status": "error", "file": file_path_str, "error": str(e)}
|
|
117
|
+
else:
|
|
118
|
+
return {"status": "success", "file": file_path_str}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@shared_task(bind=True, name="translate_grading_policy_task")
|
|
122
|
+
def translate_grading_policy_task(
|
|
123
|
+
_self,
|
|
124
|
+
policy_file_path_str: str,
|
|
125
|
+
target_language: str,
|
|
126
|
+
content_provider_name: str,
|
|
127
|
+
content_model: str | None,
|
|
128
|
+
glossary_directory: str | None = None,
|
|
129
|
+
):
|
|
130
|
+
"""
|
|
131
|
+
Translate grading_policy.json file.
|
|
132
|
+
|
|
133
|
+
Translates the short_label fields within the GRADER section of grading policy files.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
_self: Celery task instance (bound)
|
|
137
|
+
policy_file_path_str: Path to the grading_policy.json file
|
|
138
|
+
target_language: Target language code
|
|
139
|
+
content_provider_name: Provider name for content translation
|
|
140
|
+
content_model: Model name for content provider (optional)
|
|
141
|
+
glossary_directory: Path to glossary directory (optional)
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Dict with status, file path, and optional error information
|
|
145
|
+
"""
|
|
146
|
+
try:
|
|
147
|
+
policy_file_path = Path(policy_file_path_str)
|
|
148
|
+
provider = get_translation_provider(content_provider_name, content_model)
|
|
149
|
+
|
|
150
|
+
grading_policy_data = json.loads(policy_file_path.read_text(encoding="utf-8"))
|
|
151
|
+
policy_updated = False
|
|
152
|
+
|
|
153
|
+
for grader_item in grading_policy_data.get("GRADER", []):
|
|
154
|
+
if "short_label" in grader_item:
|
|
155
|
+
translated_label = provider.translate_text(
|
|
156
|
+
grader_item["short_label"],
|
|
157
|
+
target_language.lower(),
|
|
158
|
+
glossary_directory=glossary_directory,
|
|
159
|
+
)
|
|
160
|
+
grader_item["short_label"] = translated_label
|
|
161
|
+
policy_updated = True
|
|
162
|
+
|
|
163
|
+
if policy_updated:
|
|
164
|
+
policy_file_path.write_text(
|
|
165
|
+
json.dumps(grading_policy_data, ensure_ascii=False, indent=4),
|
|
166
|
+
encoding="utf-8",
|
|
167
|
+
)
|
|
168
|
+
except Exception as e:
|
|
169
|
+
logger.exception("Failed to translate grading policy %s", policy_file_path_str)
|
|
170
|
+
return {"status": "error", "file": policy_file_path_str, "error": str(e)}
|
|
171
|
+
else:
|
|
172
|
+
return {"status": "success", "file": policy_file_path_str}
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@shared_task(bind=True, name="translate_policy_json_task")
|
|
176
|
+
def translate_policy_json_task(
|
|
177
|
+
_self,
|
|
178
|
+
policy_file_path_str: str,
|
|
179
|
+
target_language: str,
|
|
180
|
+
content_provider_name: str,
|
|
181
|
+
content_model: str | None,
|
|
182
|
+
glossary_directory: str | None = None,
|
|
183
|
+
):
|
|
184
|
+
"""
|
|
185
|
+
Translate policy.json file.
|
|
186
|
+
|
|
187
|
+
Translates various policy fields including display names, discussion topics,
|
|
188
|
+
learning info, tabs, and XML attributes.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
_self: Celery task instance (bound)
|
|
192
|
+
policy_file_path_str: Path to the policy.json file
|
|
193
|
+
target_language: Target language code
|
|
194
|
+
content_provider_name: Provider name for content translation
|
|
195
|
+
content_model: Model name for content provider (optional)
|
|
196
|
+
glossary_directory: Path to glossary directory (optional)
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Dict with status, file path, and optional error information
|
|
200
|
+
"""
|
|
201
|
+
try:
|
|
202
|
+
policy_file_path = Path(policy_file_path_str)
|
|
203
|
+
provider = get_translation_provider(content_provider_name, content_model)
|
|
204
|
+
|
|
205
|
+
policy_json_data = json.loads(policy_file_path.read_text(encoding="utf-8"))
|
|
206
|
+
for course_policy_obj in policy_json_data.values():
|
|
207
|
+
if not isinstance(course_policy_obj, dict):
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
translate_policy_fields(
|
|
211
|
+
course_policy_obj, target_language, provider, glossary_directory
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
policy_file_path.write_text(
|
|
215
|
+
json.dumps(policy_json_data, ensure_ascii=False, indent=4),
|
|
216
|
+
encoding="utf-8",
|
|
217
|
+
)
|
|
218
|
+
except Exception as e:
|
|
219
|
+
logger.exception("Failed to translate policy.json %s", policy_file_path_str)
|
|
220
|
+
return {"status": "error", "file": policy_file_path_str, "error": str(e)}
|
|
221
|
+
else:
|
|
222
|
+
return {"status": "success", "file": policy_file_path_str}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
URL configuration for ol_openedx_course_translations app.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from django.conf import settings
|
|
6
|
+
from django.urls import re_path
|
|
7
|
+
|
|
8
|
+
from ol_openedx_course_translations.views import CourseLanguageView
|
|
9
|
+
|
|
10
|
+
urlpatterns = [
|
|
11
|
+
re_path(
|
|
12
|
+
rf"api/course-language/{settings.COURSE_KEY_PATTERN}$",
|
|
13
|
+
CourseLanguageView.as_view(),
|
|
14
|
+
name="ol_openedx_course_language",
|
|
15
|
+
),
|
|
16
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for management commands.
|
|
3
|
+
|
|
4
|
+
This module provides reusable utilities for Django management commands,
|
|
5
|
+
including validation, error handling, git operations, and configuration helpers.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
from datetime import UTC, datetime
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from django.conf import settings
|
|
14
|
+
from django.core.management.base import CommandError
|
|
15
|
+
|
|
16
|
+
from ol_openedx_course_translations.utils.constants import (
|
|
17
|
+
PROVIDER_GEMINI,
|
|
18
|
+
PROVIDER_MISTRAL,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# ============================================================================
|
|
22
|
+
# Validation Utilities
|
|
23
|
+
# ============================================================================
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def validate_language_code(code: str, field_name: str = "language code") -> None:
|
|
27
|
+
"""Validate language code format (xx or xx_XX)."""
|
|
28
|
+
if not re.match(r"^[a-z]{2}(_[A-Z]{2})?$", code):
|
|
29
|
+
msg = (
|
|
30
|
+
f"Invalid {field_name} format: {code}. "
|
|
31
|
+
f"Expected format: 'xx' or 'xx_XX' (e.g., 'el', 'es_ES')"
|
|
32
|
+
)
|
|
33
|
+
raise CommandError(msg)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def validate_branch_name(branch_name: str) -> None:
|
|
37
|
+
"""Validate branch name format to prevent injection."""
|
|
38
|
+
if not re.match(r"^[a-z0-9/_-]+$", branch_name):
|
|
39
|
+
msg = f"Invalid branch name format: {branch_name}"
|
|
40
|
+
raise CommandError(msg)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# ============================================================================
|
|
44
|
+
# Git Utilities
|
|
45
|
+
# ============================================================================
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def sanitize_for_git(text: str) -> str:
|
|
49
|
+
"""Sanitize text for use in git operations."""
|
|
50
|
+
return re.sub(r"[^\w\s-]", "", text)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def create_branch_name(lang_code: str) -> str:
|
|
54
|
+
"""Create a safe branch name from language code."""
|
|
55
|
+
safe_lang = re.sub(r"[^a-z0-9_-]", "", lang_code.lower())
|
|
56
|
+
timestamp = datetime.now(tz=UTC).strftime("%Y%m%d-%H%M%S")
|
|
57
|
+
return f"feature/add-{safe_lang}-translations-{timestamp}"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# ============================================================================
|
|
61
|
+
# Configuration Helpers
|
|
62
|
+
# ============================================================================
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_config_value(key: str, options: dict, default: Any = None) -> Any:
|
|
66
|
+
"""Get configuration value from options, settings, or environment."""
|
|
67
|
+
# Check command-line options first (Django converts --repo-path to repo_path)
|
|
68
|
+
option_value = options.get(key) or options.get(key.replace("_", "-"))
|
|
69
|
+
if option_value:
|
|
70
|
+
return option_value
|
|
71
|
+
|
|
72
|
+
# Check settings with TRANSLATIONS_ prefix
|
|
73
|
+
setting_key = f"TRANSLATIONS_{key.upper().replace('-', '_')}"
|
|
74
|
+
if hasattr(settings, setting_key):
|
|
75
|
+
setting_value = getattr(settings, setting_key)
|
|
76
|
+
# Only use setting if it's not empty
|
|
77
|
+
if setting_value:
|
|
78
|
+
return setting_value
|
|
79
|
+
|
|
80
|
+
# Check environment variable with TRANSLATIONS_ prefix
|
|
81
|
+
env_key = setting_key
|
|
82
|
+
env_value = os.environ.get(env_key)
|
|
83
|
+
if env_value:
|
|
84
|
+
return env_value
|
|
85
|
+
|
|
86
|
+
# Return default if nothing found
|
|
87
|
+
return default
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def get_default_provider() -> str | None:
|
|
91
|
+
"""Get default provider from TRANSLATIONS_PROVIDERS."""
|
|
92
|
+
providers = getattr(settings, "TRANSLATIONS_PROVIDERS", {})
|
|
93
|
+
if not isinstance(providers, dict):
|
|
94
|
+
return None
|
|
95
|
+
return providers.get("default_provider")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def get_default_model_for_provider(provider: str) -> str | None:
|
|
99
|
+
"""Get default model for a provider from TRANSLATIONS_PROVIDERS."""
|
|
100
|
+
providers = getattr(settings, "TRANSLATIONS_PROVIDERS", {})
|
|
101
|
+
if not isinstance(providers, dict):
|
|
102
|
+
return None
|
|
103
|
+
provider_config = providers.get(provider, {})
|
|
104
|
+
if not isinstance(provider_config, dict):
|
|
105
|
+
return None
|
|
106
|
+
return provider_config.get("default_model")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def configure_litellm_for_provider(
|
|
110
|
+
provider: str, model: str, api_key: str | None, **base_kwargs
|
|
111
|
+
) -> dict[str, Any]:
|
|
112
|
+
"""Configure LiteLLM completion kwargs for a specific provider."""
|
|
113
|
+
completion_kwargs = dict(base_kwargs)
|
|
114
|
+
completion_kwargs["model"] = model
|
|
115
|
+
|
|
116
|
+
if api_key:
|
|
117
|
+
completion_kwargs["api_key"] = api_key
|
|
118
|
+
if provider == PROVIDER_GEMINI:
|
|
119
|
+
# If no prefix, add gemini/ to force Gemini API usage (not Vertex AI)
|
|
120
|
+
# If vertex_ai/ or gemini/ prefix already exists, respect it
|
|
121
|
+
if not model.startswith(("gemini/", "vertex_ai/")):
|
|
122
|
+
completion_kwargs["model"] = f"gemini/{model}"
|
|
123
|
+
# Gemini 3 models require temperature = 1.0 to avoid issues:
|
|
124
|
+
# - Infinite loops in response generation
|
|
125
|
+
# - Degraded reasoning performance
|
|
126
|
+
# - Failure on complex tasks
|
|
127
|
+
# See: https://docs.litellm.ai/docs/providers/gemini
|
|
128
|
+
if "gemini-3" in model.lower():
|
|
129
|
+
completion_kwargs["temperature"] = 1.0
|
|
130
|
+
elif provider == PROVIDER_MISTRAL and not model.startswith("mistral/"):
|
|
131
|
+
completion_kwargs["model"] = f"mistral/{model}"
|
|
132
|
+
|
|
133
|
+
return completion_kwargs
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# ============================================================================
|
|
137
|
+
# Error Handling Utilities
|
|
138
|
+
# ============================================================================
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def is_retryable_error(error: Exception) -> bool:
|
|
142
|
+
"""
|
|
143
|
+
Check if an error is retryable (network issues, rate limits, timeouts).
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
error: The exception to check
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
True if the error is retryable, False otherwise
|
|
150
|
+
|
|
151
|
+
Examples:
|
|
152
|
+
>>> is_retryable_error(ConnectionError("Connection timeout"))
|
|
153
|
+
True
|
|
154
|
+
>>> is_retryable_error(ValueError("Invalid API key"))
|
|
155
|
+
False
|
|
156
|
+
"""
|
|
157
|
+
error_str = str(error).lower()
|
|
158
|
+
|
|
159
|
+
# Retryable errors
|
|
160
|
+
retryable_patterns = [
|
|
161
|
+
"timeout",
|
|
162
|
+
"connection",
|
|
163
|
+
"rate limit",
|
|
164
|
+
"429",
|
|
165
|
+
"503",
|
|
166
|
+
"502",
|
|
167
|
+
"500",
|
|
168
|
+
"temporarily unavailable",
|
|
169
|
+
"service unavailable",
|
|
170
|
+
"too many requests",
|
|
171
|
+
]
|
|
172
|
+
|
|
173
|
+
# Non-retryable errors (don't retry these)
|
|
174
|
+
non_retryable_patterns = [
|
|
175
|
+
"invalid api key",
|
|
176
|
+
"authentication",
|
|
177
|
+
"401",
|
|
178
|
+
"403",
|
|
179
|
+
"not found",
|
|
180
|
+
"404",
|
|
181
|
+
"bad request",
|
|
182
|
+
"400",
|
|
183
|
+
"commanderror", # Our custom errors that are usually non-retryable
|
|
184
|
+
]
|
|
185
|
+
|
|
186
|
+
# Check for non-retryable first
|
|
187
|
+
for pattern in non_retryable_patterns:
|
|
188
|
+
if pattern in error_str:
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
# Check for retryable patterns
|
|
192
|
+
for pattern in retryable_patterns:
|
|
193
|
+
if pattern in error_str:
|
|
194
|
+
return True
|
|
195
|
+
|
|
196
|
+
# Default: retry unknown errors (could be transient)
|
|
197
|
+
return True
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""Constants for translation synchronization."""
|
|
2
|
+
|
|
3
|
+
# LLM Provider names
|
|
4
|
+
PROVIDER_DEEPL = "deepl"
|
|
5
|
+
PROVIDER_GEMINI = "gemini"
|
|
6
|
+
PROVIDER_MISTRAL = "mistral"
|
|
7
|
+
PROVIDER_OPENAI = "openai"
|
|
8
|
+
|
|
9
|
+
# Learner-facing frontend applications that require translation
|
|
10
|
+
LEARNER_FACING_APPS = [
|
|
11
|
+
"frontend-app-learning",
|
|
12
|
+
"frontend-app-learner-dashboard",
|
|
13
|
+
"frontend-app-learner-record",
|
|
14
|
+
"frontend-app-account",
|
|
15
|
+
"frontend-app-profile",
|
|
16
|
+
"frontend-app-authn",
|
|
17
|
+
"frontend-app-catalog",
|
|
18
|
+
"frontend-app-discussions",
|
|
19
|
+
"frontend-component-header",
|
|
20
|
+
"frontend-component-footer",
|
|
21
|
+
"frontend-app-ora",
|
|
22
|
+
"frontend-platform",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
# Plural forms configuration for different languages
|
|
26
|
+
# Based on GNU gettext plural forms specification
|
|
27
|
+
# See: https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
|
|
28
|
+
PLURAL_FORMS = {
|
|
29
|
+
# Languages with no plural forms (nplurals=1)
|
|
30
|
+
"ja": "nplurals=1; plural=0;", # Japanese
|
|
31
|
+
"ko": "nplurals=1; plural=0;", # Korean
|
|
32
|
+
"zh": "nplurals=1; plural=0;", # Chinese (all variants)
|
|
33
|
+
"th": "nplurals=1; plural=0;", # Thai
|
|
34
|
+
"vi": "nplurals=1; plural=0;", # Vietnamese
|
|
35
|
+
"id": "nplurals=1; plural=0;", # Indonesian
|
|
36
|
+
"ms": "nplurals=1; plural=0;", # Malay
|
|
37
|
+
"km": "nplurals=1; plural=0;", # Khmer
|
|
38
|
+
"bo": "nplurals=1; plural=0;", # Tibetan
|
|
39
|
+
# Languages with 2 plural forms: plural=(n != 1)
|
|
40
|
+
"en": "nplurals=2; plural=(n != 1);", # English
|
|
41
|
+
"es": "nplurals=2; plural=(n != 1);", # Spanish (all variants)
|
|
42
|
+
"de": "nplurals=2; plural=(n != 1);", # German
|
|
43
|
+
"el": "nplurals=2; plural=(n != 1);", # Greek
|
|
44
|
+
"it": "nplurals=2; plural=(n != 1);", # Italian
|
|
45
|
+
"pt": "nplurals=2; plural=(n != 1);", # Portuguese (all variants)
|
|
46
|
+
"nl": "nplurals=2; plural=(n != 1);", # Dutch
|
|
47
|
+
"sv": "nplurals=2; plural=(n != 1);", # Swedish
|
|
48
|
+
"da": "nplurals=2; plural=(n != 1);", # Danish
|
|
49
|
+
"no": "nplurals=2; plural=(n != 1);", # Norwegian
|
|
50
|
+
"nb": "nplurals=2; plural=(n != 1);", # Norwegian Bokmål
|
|
51
|
+
"nn": "nplurals=2; plural=(n != 1);", # Norwegian Nynorsk
|
|
52
|
+
"fi": "nplurals=2; plural=(n != 1);", # Finnish
|
|
53
|
+
"is": "nplurals=2; plural=(n != 1);", # Icelandic
|
|
54
|
+
"et": "nplurals=2; plural=(n != 1);", # Estonian
|
|
55
|
+
"lv": "nplurals=2; plural=(n != 1);", # Latvian
|
|
56
|
+
"he": "nplurals=2; plural=(n != 1);", # Hebrew
|
|
57
|
+
"hi": "nplurals=2; plural=(n != 1);", # Hindi
|
|
58
|
+
"bn": "nplurals=2; plural=(n != 1);", # Bengali
|
|
59
|
+
"gu": "nplurals=2; plural=(n != 1);", # Gujarati
|
|
60
|
+
"kn": "nplurals=2; plural=(n != 1);", # Kannada
|
|
61
|
+
"ml": "nplurals=2; plural=(n != 1);", # Malayalam
|
|
62
|
+
"ta": "nplurals=2; plural=(n != 1);", # Tamil
|
|
63
|
+
"te": "nplurals=2; plural=(n != 1);", # Telugu
|
|
64
|
+
"or": "nplurals=2; plural=(n != 1);", # Oriya
|
|
65
|
+
"si": "nplurals=2; plural=(n != 1);", # Sinhala
|
|
66
|
+
"ne": "nplurals=2; plural=(n != 1);", # Nepali
|
|
67
|
+
"mr": "nplurals=2; plural=(n != 1);", # Marathi
|
|
68
|
+
"ur": "nplurals=2; plural=(n != 1);", # Urdu
|
|
69
|
+
"az": "nplurals=2; plural=(n != 1);", # Azerbaijani
|
|
70
|
+
"uz": "nplurals=2; plural=(n != 1);", # Uzbek
|
|
71
|
+
"kk": "nplurals=2; plural=(n != 1);", # Kazakh
|
|
72
|
+
"mn": "nplurals=2; plural=(n != 1);", # Mongolian
|
|
73
|
+
"sq": "nplurals=2; plural=(n != 1);", # Albanian
|
|
74
|
+
"eu": "nplurals=2; plural=(n != 1);", # Basque
|
|
75
|
+
"ca": "nplurals=2; plural=(n != 1);", # Catalan
|
|
76
|
+
"gl": "nplurals=2; plural=(n != 1);", # Galician
|
|
77
|
+
"tr": "nplurals=2; plural=(n != 1);", # Turkish
|
|
78
|
+
"af": "nplurals=2; plural=(n != 1);", # Afrikaans
|
|
79
|
+
"fil": "nplurals=2; plural=(n != 1);", # Filipino
|
|
80
|
+
# Languages with 2 plural forms: plural=(n > 1)
|
|
81
|
+
"fr": "nplurals=2; plural=(n > 1);", # French
|
|
82
|
+
"br": "nplurals=2; plural=(n > 1);", # Breton
|
|
83
|
+
# Languages with 3 plural forms
|
|
84
|
+
"pl": (
|
|
85
|
+
"nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && "
|
|
86
|
+
"(n%100<10 || n%100>=20) ? 1 : 2);"
|
|
87
|
+
), # Polish
|
|
88
|
+
"ru": (
|
|
89
|
+
"nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && "
|
|
90
|
+
"(n%100<10 || n%100>=20) ? 1 : 2);"
|
|
91
|
+
), # Russian
|
|
92
|
+
"uk": (
|
|
93
|
+
"nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && "
|
|
94
|
+
"(n%100<10 || n%100>=20) ? 1 : 2);"
|
|
95
|
+
), # Ukrainian
|
|
96
|
+
"be": (
|
|
97
|
+
"nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && "
|
|
98
|
+
"(n%100<10 || n%100>=20) ? 1 : 2);"
|
|
99
|
+
), # Belarusian
|
|
100
|
+
"sr": (
|
|
101
|
+
"nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && "
|
|
102
|
+
"(n%100<10 || n%100>=20) ? 1 : 2);"
|
|
103
|
+
), # Serbian
|
|
104
|
+
"hr": (
|
|
105
|
+
"nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && "
|
|
106
|
+
"(n%100<10 || n%100>=20) ? 1 : 2);"
|
|
107
|
+
), # Croatian
|
|
108
|
+
"bs": (
|
|
109
|
+
"nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && "
|
|
110
|
+
"(n%100<10 || n%100>=20) ? 1 : 2);"
|
|
111
|
+
), # Bosnian
|
|
112
|
+
"cs": "nplurals=3; plural=(n==1 ? 0 : (n>=2 && n<=4) ? 1 : 2);", # Czech
|
|
113
|
+
"sk": "nplurals=3; plural=(n==1 ? 0 : (n>=2 && n<=4) ? 1 : 2);", # Slovak
|
|
114
|
+
"lt": (
|
|
115
|
+
"nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && "
|
|
116
|
+
"(n%100<10 || n%100>=20) ? 1 : 2);"
|
|
117
|
+
), # Lithuanian
|
|
118
|
+
"hy": "nplurals=3; plural=(n==1 ? 0 : n>=2 && n<=4 ? 1 : 2);", # Armenian
|
|
119
|
+
"ro": (
|
|
120
|
+
"nplurals=3; plural=(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);"
|
|
121
|
+
), # Romanian
|
|
122
|
+
# Languages with 4 plural forms
|
|
123
|
+
"cy": (
|
|
124
|
+
"nplurals=4; plural=(n==1 ? 0 : n==2 ? 1 : (n==8 || n==11) ? 2 : 3);"
|
|
125
|
+
), # Welsh
|
|
126
|
+
"ga": "nplurals=4; plural=(n==1 ? 0 : n==2 ? 1 : (n>2 && n<7) ? 2 : 3);", # Irish
|
|
127
|
+
"gd": (
|
|
128
|
+
"nplurals=4; plural=(n==1 || n==11) ? 0 : (n==2 || n==12) ? 1 : "
|
|
129
|
+
"(n>2 && n<20) ? 2 : 3);"
|
|
130
|
+
), # Scottish Gaelic
|
|
131
|
+
"mt": (
|
|
132
|
+
"nplurals=4; plural=(n==1 ? 0 : n==0 || (n%100>=2 && n%100<=10) ? 1 : "
|
|
133
|
+
"(n%100>=11 && n%100<=19) ? 2 : 3);"
|
|
134
|
+
), # Maltese
|
|
135
|
+
# Languages with 6 plural forms
|
|
136
|
+
"ar": (
|
|
137
|
+
"nplurals=6; plural=(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && "
|
|
138
|
+
"n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5);"
|
|
139
|
+
), # Arabic
|
|
140
|
+
# Other languages
|
|
141
|
+
"fa": "nplurals=2; plural=(n==0 || n==1 ? 0 : 1);", # Persian/Farsi
|
|
142
|
+
"hu": "nplurals=2; plural=(n != 1);", # Hungarian
|
|
143
|
+
"bg": "nplurals=2; plural=(n != 1);", # Bulgarian
|
|
144
|
+
"am": "nplurals=2; plural=(n > 1);", # Amharic
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
# Default plural form fallback (English-style)
|
|
148
|
+
# Used when a language code is not found in PLURAL_FORMS
|
|
149
|
+
DEFAULT_PLURAL_FORM = "nplurals=2; plural=(n != 1);"
|
|
150
|
+
|
|
151
|
+
# Typo patterns to fix in translation files
|
|
152
|
+
TYPO_PATTERNS = [
|
|
153
|
+
("Serch", "Search"),
|
|
154
|
+
]
|
|
155
|
+
|
|
156
|
+
# Backend PO file names
|
|
157
|
+
BACKEND_PO_FILES = ["django.po", "djangojs.po"]
|
|
158
|
+
|
|
159
|
+
# PO file header metadata
|
|
160
|
+
PO_HEADER_PROJECT_VERSION = "0.1a"
|
|
161
|
+
PO_HEADER_BUGS_EMAIL = "openedx-translation@googlegroups.com"
|
|
162
|
+
PO_HEADER_POT_CREATION_DATE = "2023-06-13 08:00+0000"
|
|
163
|
+
PO_HEADER_MIME_VERSION = "1.0"
|
|
164
|
+
PO_HEADER_CONTENT_TYPE = "text/plain; charset=UTF-8"
|
|
165
|
+
PO_HEADER_CONTENT_TRANSFER_ENCODING = "8bit"
|
|
166
|
+
PO_HEADER_TRANSIFEX_TEAM_BASE_URL = "https://app.transifex.com/open-edx/teams/6205"
|
|
167
|
+
|
|
168
|
+
# File and directory names
|
|
169
|
+
TRANSLATION_FILE_NAMES = {
|
|
170
|
+
"transifex_input": "transifex_input.json",
|
|
171
|
+
"english": "en.json",
|
|
172
|
+
"messages_dir": "messages",
|
|
173
|
+
"i18n_dir": "i18n",
|
|
174
|
+
"locale_dir": "locale",
|
|
175
|
+
"lc_messages": "LC_MESSAGES",
|
|
176
|
+
"conf_dir": "conf",
|
|
177
|
+
"edx_platform": "edx-platform",
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
# JSON file formatting
|
|
181
|
+
DEFAULT_JSON_INDENT = 2
|
|
182
|
+
|
|
183
|
+
# Language code to human-readable name mapping
|
|
184
|
+
# Used in PO file headers for Language-Team field
|
|
185
|
+
LANGUAGE_MAPPING = {
|
|
186
|
+
"ar": "Arabic",
|
|
187
|
+
"de": "German",
|
|
188
|
+
"el": "Greek",
|
|
189
|
+
"es": "Spanish",
|
|
190
|
+
"fr": "French",
|
|
191
|
+
"hi": "Hindi",
|
|
192
|
+
"id": "Indonesian",
|
|
193
|
+
"ja": "Japanese",
|
|
194
|
+
"kr": "Korean",
|
|
195
|
+
"pt": "Portuguese",
|
|
196
|
+
"ru": "Russian",
|
|
197
|
+
"sq": "Albanian",
|
|
198
|
+
"tr": "Turkish",
|
|
199
|
+
"zh": "Chinese",
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
# Maximum number of retries for failed translation batches
|
|
203
|
+
MAX_RETRIES = 3
|
|
204
|
+
|
|
205
|
+
# Glossary parsing constants
|
|
206
|
+
EXPECTED_GLOSSARY_PARTS = 2 # English term and translation separated by "->"
|
|
207
|
+
|
|
208
|
+
# HTTP Status Codes
|
|
209
|
+
HTTP_OK = 200
|
|
210
|
+
HTTP_CREATED = 201
|
|
211
|
+
HTTP_NOT_FOUND = 404
|
|
212
|
+
HTTP_TOO_MANY_REQUESTS = 429
|
|
213
|
+
HTTP_UNPROCESSABLE_ENTITY = 422
|
|
214
|
+
|
|
215
|
+
# Error message length limit
|
|
216
|
+
MAX_ERROR_MESSAGE_LENGTH = 200
|
|
217
|
+
|
|
218
|
+
ENGLISH_LANGUAGE_CODE = "en"
|