ol-openedx-course-translations 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ol-openedx-course-translations might be problematic. Click here for more details.
- ol_openedx_course_translations/apps.py +12 -2
- ol_openedx_course_translations/glossaries/machine_learning/ar.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/de.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/el.txt +988 -0
- ol_openedx_course_translations/glossaries/machine_learning/es.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/fr.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/ja.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/pt-br.txt +175 -0
- ol_openedx_course_translations/glossaries/machine_learning/ru.txt +213 -0
- ol_openedx_course_translations/management/commands/sync_and_translate_language.py +1866 -0
- ol_openedx_course_translations/management/commands/translate_course.py +419 -470
- ol_openedx_course_translations/middleware.py +143 -0
- ol_openedx_course_translations/providers/__init__.py +1 -0
- ol_openedx_course_translations/providers/base.py +278 -0
- ol_openedx_course_translations/providers/deepl_provider.py +292 -0
- ol_openedx_course_translations/providers/llm_providers.py +565 -0
- ol_openedx_course_translations/settings/cms.py +17 -0
- ol_openedx_course_translations/settings/common.py +57 -30
- ol_openedx_course_translations/settings/lms.py +15 -0
- ol_openedx_course_translations/tasks.py +222 -0
- ol_openedx_course_translations/urls.py +16 -0
- ol_openedx_course_translations/utils/__init__.py +0 -0
- ol_openedx_course_translations/utils/command_utils.py +197 -0
- ol_openedx_course_translations/utils/constants.py +216 -0
- ol_openedx_course_translations/utils/course_translations.py +581 -0
- ol_openedx_course_translations/utils/translation_sync.py +808 -0
- ol_openedx_course_translations/views.py +73 -0
- ol_openedx_course_translations-0.3.0.dist-info/METADATA +407 -0
- ol_openedx_course_translations-0.3.0.dist-info/RECORD +35 -0
- ol_openedx_course_translations-0.3.0.dist-info/entry_points.txt +5 -0
- ol_openedx_course_translations-0.1.0.dist-info/METADATA +0 -63
- ol_openedx_course_translations-0.1.0.dist-info/RECORD +0 -11
- ol_openedx_course_translations-0.1.0.dist-info/entry_points.txt +0 -2
- {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.0.dist-info}/WHEEL +0 -0
- {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -0,0 +1,1866 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Django management command to sync translation keys, translate using LLM, and create PRs.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
./manage.py cms sync_and_translate_language el
|
|
6
|
+
./manage.py cms sync_and_translate_language el \\
|
|
7
|
+
--provider openai --model gpt-4-turbo --glossary
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
import shutil
|
|
15
|
+
import subprocess
|
|
16
|
+
import textwrap
|
|
17
|
+
import time
|
|
18
|
+
import urllib.parse
|
|
19
|
+
from configparser import NoSectionError
|
|
20
|
+
from contextlib import contextmanager, suppress
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any, TypedDict, cast
|
|
23
|
+
|
|
24
|
+
import git
|
|
25
|
+
import requests
|
|
26
|
+
from django.conf import settings
|
|
27
|
+
from django.core.management.base import BaseCommand, CommandError
|
|
28
|
+
from litellm import completion
|
|
29
|
+
|
|
30
|
+
import ol_openedx_course_translations.utils.translation_sync as utils_module
|
|
31
|
+
from ol_openedx_course_translations.utils.command_utils import (
|
|
32
|
+
configure_litellm_for_provider,
|
|
33
|
+
create_branch_name,
|
|
34
|
+
get_config_value,
|
|
35
|
+
get_default_model_for_provider,
|
|
36
|
+
get_default_provider,
|
|
37
|
+
is_retryable_error,
|
|
38
|
+
sanitize_for_git,
|
|
39
|
+
validate_branch_name,
|
|
40
|
+
validate_language_code,
|
|
41
|
+
)
|
|
42
|
+
from ol_openedx_course_translations.utils.constants import (
|
|
43
|
+
HTTP_CREATED,
|
|
44
|
+
HTTP_NOT_FOUND,
|
|
45
|
+
HTTP_OK,
|
|
46
|
+
HTTP_TOO_MANY_REQUESTS,
|
|
47
|
+
HTTP_UNPROCESSABLE_ENTITY,
|
|
48
|
+
LANGUAGE_MAPPING,
|
|
49
|
+
MAX_ERROR_MESSAGE_LENGTH,
|
|
50
|
+
MAX_RETRIES,
|
|
51
|
+
PROVIDER_GEMINI,
|
|
52
|
+
PROVIDER_MISTRAL,
|
|
53
|
+
)
|
|
54
|
+
from ol_openedx_course_translations.utils.translation_sync import (
|
|
55
|
+
apply_json_translations,
|
|
56
|
+
apply_po_translations,
|
|
57
|
+
extract_empty_keys,
|
|
58
|
+
load_glossary,
|
|
59
|
+
match_glossary_term,
|
|
60
|
+
sync_all_translations,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
logger = logging.getLogger(__name__)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class GitRepository:
|
|
67
|
+
"""Helper class for git operations with consistent error handling."""
|
|
68
|
+
|
|
69
|
+
def __init__(self, repo_path: str):
|
|
70
|
+
self.repo_path = Path(repo_path)
|
|
71
|
+
try:
|
|
72
|
+
self.repo = git.Repo(repo_path)
|
|
73
|
+
except git.exc.InvalidGitRepositoryError as e:
|
|
74
|
+
msg = (
|
|
75
|
+
f"Invalid git repository at {repo_path}. "
|
|
76
|
+
f"Please remove it or specify a different path."
|
|
77
|
+
)
|
|
78
|
+
raise CommandError(msg) from e
|
|
79
|
+
except git.exc.GitCommandError as e:
|
|
80
|
+
msg = f"Git error accessing repository: {e!s}"
|
|
81
|
+
raise CommandError(msg) from e
|
|
82
|
+
|
|
83
|
+
def _handle_git_error(self, operation: str, error: Exception) -> None:
|
|
84
|
+
"""Convert git errors to CommandError with context."""
|
|
85
|
+
msg = f"Git error {operation}: {error!s}"
|
|
86
|
+
raise CommandError(msg) from error
|
|
87
|
+
|
|
88
|
+
def _get_main_branch_name(self) -> str:
|
|
89
|
+
"""
|
|
90
|
+
Determine the main branch name.
|
|
91
|
+
Checks local branches first, then remote branches.
|
|
92
|
+
Fetches from remote if needed to check remote branches.
|
|
93
|
+
"""
|
|
94
|
+
# Check if 'main' exists locally
|
|
95
|
+
if "main" in [ref.name for ref in self.repo.heads]:
|
|
96
|
+
return "main"
|
|
97
|
+
|
|
98
|
+
# If not found locally, fetch from remote and check remote branches
|
|
99
|
+
with suppress(git.exc.GitCommandError):
|
|
100
|
+
# If fetch fails, we'll try to check existing remote refs anyway
|
|
101
|
+
self.repo.remotes.origin.fetch()
|
|
102
|
+
|
|
103
|
+
# Check remote branches
|
|
104
|
+
if "origin/main" in [ref.name for ref in self.repo.remotes.origin.refs]:
|
|
105
|
+
return "main"
|
|
106
|
+
|
|
107
|
+
msg = "Main branch not found locally or on remote"
|
|
108
|
+
raise CommandError(msg)
|
|
109
|
+
|
|
110
|
+
def ensure_clean(self) -> bool:
|
|
111
|
+
"""
|
|
112
|
+
Clean uncommitted changes in tracked files.
|
|
113
|
+
Returns True if cleaned, False if already clean.
|
|
114
|
+
|
|
115
|
+
This ensures any leftover staged/uncommitted changes from a previous
|
|
116
|
+
interrupted run are removed before starting a new translation sync.
|
|
117
|
+
"""
|
|
118
|
+
try:
|
|
119
|
+
if self.repo.is_dirty(untracked_files=False):
|
|
120
|
+
self.repo.head.reset(index=True, working_tree=True)
|
|
121
|
+
return True
|
|
122
|
+
else:
|
|
123
|
+
return False
|
|
124
|
+
except git.exc.GitCommandError as e:
|
|
125
|
+
self._handle_git_error("cleaning repository", e)
|
|
126
|
+
return False # Never reached, but satisfies type checker
|
|
127
|
+
|
|
128
|
+
def switch_to_main(self) -> None:
|
|
129
|
+
"""Switch to main branch, deleting current branch if it's not main."""
|
|
130
|
+
try:
|
|
131
|
+
# Get current branch name (might be in detached HEAD state)
|
|
132
|
+
try:
|
|
133
|
+
current_branch = self.repo.active_branch.name
|
|
134
|
+
except TypeError:
|
|
135
|
+
# Detached HEAD state - we'll checkout main anyway
|
|
136
|
+
current_branch = None
|
|
137
|
+
|
|
138
|
+
# Get the main branch name
|
|
139
|
+
main_branch = self._get_main_branch_name()
|
|
140
|
+
|
|
141
|
+
# Only switch if we're not already on the main branch
|
|
142
|
+
if current_branch != main_branch:
|
|
143
|
+
# Try to checkout the branch (will work if it exists locally)
|
|
144
|
+
try:
|
|
145
|
+
self.repo.git.checkout(main_branch)
|
|
146
|
+
except git.exc.GitCommandError:
|
|
147
|
+
# Branch doesn't exist locally, checkout from remote
|
|
148
|
+
self.repo.git.checkout("-b", main_branch, f"origin/{main_branch}")
|
|
149
|
+
|
|
150
|
+
# Delete the previous branch if it exists and is not the main branch
|
|
151
|
+
if current_branch and current_branch != main_branch:
|
|
152
|
+
with suppress(git.exc.GitCommandError):
|
|
153
|
+
self.repo.git.branch("-D", current_branch)
|
|
154
|
+
except (git.exc.GitCommandError, TypeError) as e:
|
|
155
|
+
self._handle_git_error("switching branches", e)
|
|
156
|
+
|
|
157
|
+
def update_from_remote(self) -> None:
|
|
158
|
+
"""Fetch and pull latest changes from origin/main."""
|
|
159
|
+
try:
|
|
160
|
+
self.repo.remotes.origin.fetch()
|
|
161
|
+
main_branch = self._get_main_branch_name()
|
|
162
|
+
self.repo.git.pull("origin", main_branch)
|
|
163
|
+
except git.exc.GitCommandError as e:
|
|
164
|
+
self._handle_git_error("updating repository", e)
|
|
165
|
+
|
|
166
|
+
def get_remote_url(self) -> str | None:
|
|
167
|
+
"""Get the current remote URL."""
|
|
168
|
+
try:
|
|
169
|
+
return self.repo.remotes.origin.url
|
|
170
|
+
except (git.exc.GitCommandError, AttributeError):
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
def configure_user(
|
|
174
|
+
self,
|
|
175
|
+
email: str = "translations@mitodl.org",
|
|
176
|
+
name: str = "MIT Open Learning Translations Bot",
|
|
177
|
+
) -> None:
|
|
178
|
+
"""Configure git user for this repository."""
|
|
179
|
+
try:
|
|
180
|
+
with self.repo.config_writer() as config:
|
|
181
|
+
# Check if user section exists and get existing values
|
|
182
|
+
try:
|
|
183
|
+
existing_email = config.get_value("user", "email", default=None)
|
|
184
|
+
existing_name = config.get_value("user", "name", default=None)
|
|
185
|
+
except NoSectionError:
|
|
186
|
+
# Section doesn't exist, set both values
|
|
187
|
+
existing_email = None
|
|
188
|
+
existing_name = None
|
|
189
|
+
# Set values only if they don't exist
|
|
190
|
+
if not existing_email:
|
|
191
|
+
config.set_value("user", "email", email)
|
|
192
|
+
if not existing_name:
|
|
193
|
+
config.set_value("user", "name", name)
|
|
194
|
+
except git.exc.GitCommandError as e:
|
|
195
|
+
self._handle_git_error("configuring user", e)
|
|
196
|
+
|
|
197
|
+
def branch_exists(self, branch_name: str) -> bool:
|
|
198
|
+
"""Check if branch exists locally or remotely."""
|
|
199
|
+
validate_branch_name(branch_name)
|
|
200
|
+
try:
|
|
201
|
+
# Check local branches
|
|
202
|
+
if branch_name in [ref.name for ref in self.repo.heads]:
|
|
203
|
+
return True
|
|
204
|
+
# Check remote branches
|
|
205
|
+
remote_branch = f"origin/{branch_name}"
|
|
206
|
+
try:
|
|
207
|
+
self.repo.remotes.origin.fetch()
|
|
208
|
+
except git.exc.GitCommandError:
|
|
209
|
+
# If fetch fails, try to check existing remote refs anyway
|
|
210
|
+
# Check remote refs with existing data
|
|
211
|
+
return remote_branch in [
|
|
212
|
+
ref.name for ref in self.repo.remotes.origin.refs
|
|
213
|
+
]
|
|
214
|
+
else:
|
|
215
|
+
# Fetch succeeded, check remote refs
|
|
216
|
+
return remote_branch in [
|
|
217
|
+
ref.name for ref in self.repo.remotes.origin.refs
|
|
218
|
+
]
|
|
219
|
+
except git.exc.GitCommandError as e:
|
|
220
|
+
self._handle_git_error("checking branch existence", e)
|
|
221
|
+
return False # Never reached, but satisfies type checker
|
|
222
|
+
|
|
223
|
+
def create_branch(self, branch_name: str) -> None:
|
|
224
|
+
"""Create and checkout a new branch."""
|
|
225
|
+
validate_branch_name(branch_name)
|
|
226
|
+
try:
|
|
227
|
+
self.repo.git.checkout("-b", branch_name)
|
|
228
|
+
except git.exc.GitCommandError as e:
|
|
229
|
+
self._handle_git_error("creating branch", e)
|
|
230
|
+
|
|
231
|
+
def stage_all(self) -> None:
|
|
232
|
+
"""Stage all changes."""
|
|
233
|
+
try:
|
|
234
|
+
self.repo.git.add(".")
|
|
235
|
+
except git.exc.GitCommandError as e:
|
|
236
|
+
self._handle_git_error("staging changes", e)
|
|
237
|
+
|
|
238
|
+
def has_changes(self) -> bool:
|
|
239
|
+
"""Check if there are uncommitted changes."""
|
|
240
|
+
try:
|
|
241
|
+
return self.repo.is_dirty(untracked_files=True)
|
|
242
|
+
except git.exc.GitCommandError as e:
|
|
243
|
+
self._handle_git_error("checking changes", e)
|
|
244
|
+
return False # Never reached, but satisfies type checker
|
|
245
|
+
|
|
246
|
+
def commit(self, message: str) -> None:
|
|
247
|
+
"""Commit staged changes."""
|
|
248
|
+
try:
|
|
249
|
+
self.repo.index.commit(message)
|
|
250
|
+
except git.exc.GitCommandError as e:
|
|
251
|
+
self._handle_git_error("committing changes", e)
|
|
252
|
+
|
|
253
|
+
@contextmanager
|
|
254
|
+
def authenticated_push_url(self, github_token: str):
|
|
255
|
+
"""Context manager for authenticated push with automatic cleanup."""
|
|
256
|
+
origin = self.repo.remotes.origin
|
|
257
|
+
original_url = origin.url
|
|
258
|
+
|
|
259
|
+
# Build authenticated URL
|
|
260
|
+
match = re.search(r"github\.com[/:]([^/]+)/([^/]+?)(?:\.git)?$", original_url)
|
|
261
|
+
if match:
|
|
262
|
+
owner, repo_name = match.groups()
|
|
263
|
+
encoded_token = urllib.parse.quote(github_token, safe="")
|
|
264
|
+
push_url = f"https://{encoded_token}@github.com/{owner}/{repo_name}.git"
|
|
265
|
+
else:
|
|
266
|
+
encoded_token = urllib.parse.quote(github_token, safe="")
|
|
267
|
+
push_url = original_url.replace("https://", f"https://{encoded_token}@")
|
|
268
|
+
|
|
269
|
+
try:
|
|
270
|
+
origin.set_url(push_url)
|
|
271
|
+
yield
|
|
272
|
+
finally:
|
|
273
|
+
# Always restore original URL
|
|
274
|
+
try:
|
|
275
|
+
origin.set_url(original_url)
|
|
276
|
+
except (git.exc.GitCommandError, ValueError) as e:
|
|
277
|
+
# Best effort cleanup - log but don't fail
|
|
278
|
+
logger.warning("Failed to restore original git remote URL: %s", e)
|
|
279
|
+
|
|
280
|
+
def push_branch(self, branch_name: str, github_token: str | None = None) -> None:
|
|
281
|
+
"""Push branch to remote with optional authentication."""
|
|
282
|
+
validate_branch_name(branch_name)
|
|
283
|
+
try:
|
|
284
|
+
if github_token:
|
|
285
|
+
with self.authenticated_push_url(github_token):
|
|
286
|
+
self.repo.git.push("-u", "origin", branch_name)
|
|
287
|
+
else:
|
|
288
|
+
self.repo.git.push("-u", "origin", branch_name)
|
|
289
|
+
except git.exc.GitCommandError as e:
|
|
290
|
+
self._handle_git_error("pushing branch", e)
|
|
291
|
+
|
|
292
|
+
@staticmethod
|
|
293
|
+
def clone(repo_url: str, repo_path: str) -> "GitRepository":
|
|
294
|
+
"""Clone a repository and return GitRepository instance."""
|
|
295
|
+
repo_path_obj = Path(repo_path)
|
|
296
|
+
try:
|
|
297
|
+
repo_path_obj.parent.mkdir(parents=True, exist_ok=True)
|
|
298
|
+
git.Repo.clone_from(repo_url, str(repo_path))
|
|
299
|
+
return GitRepository(repo_path)
|
|
300
|
+
except git.exc.GitCommandError as e:
|
|
301
|
+
msg = f"Git error cloning repository: {e!s}"
|
|
302
|
+
raise CommandError(msg) from e
|
|
303
|
+
except OSError as e:
|
|
304
|
+
msg = f"Error creating directory: {e!s}"
|
|
305
|
+
raise CommandError(msg) from e
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
class GitHubAPIClient:
|
|
309
|
+
"""Helper class for GitHub API operations."""
|
|
310
|
+
|
|
311
|
+
def __init__(self, token: str | None = None):
|
|
312
|
+
"""Initialize with optional token."""
|
|
313
|
+
self.token = (
|
|
314
|
+
token
|
|
315
|
+
or getattr(settings, "TRANSLATIONS_GITHUB_TOKEN", None)
|
|
316
|
+
or os.environ.get("TRANSLATIONS_GITHUB_TOKEN")
|
|
317
|
+
)
|
|
318
|
+
if not self.token:
|
|
319
|
+
msg = "TRANSLATIONS_GITHUB_TOKEN not set in settings or environment"
|
|
320
|
+
raise CommandError(msg)
|
|
321
|
+
|
|
322
|
+
def _get_headers(self) -> dict:
|
|
323
|
+
"""Get API request headers."""
|
|
324
|
+
return {
|
|
325
|
+
"Authorization": f"Bearer {self.token}",
|
|
326
|
+
"Accept": "application/vnd.github.v3+json",
|
|
327
|
+
"Content-Type": "application/json",
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
@staticmethod
|
|
331
|
+
def parse_repo_url(repo_url: str) -> tuple[str, str]:
|
|
332
|
+
"""Extract owner and repo from GitHub URL."""
|
|
333
|
+
match = re.search(r"github\.com[/:]([^/]+)/([^/]+?)(?:\.git)?$", repo_url)
|
|
334
|
+
if not match:
|
|
335
|
+
msg = f"Could not parse owner/repo from repo URL: {repo_url}"
|
|
336
|
+
raise CommandError(msg)
|
|
337
|
+
owner, repo = match.groups()
|
|
338
|
+
return (owner, repo)
|
|
339
|
+
|
|
340
|
+
def _handle_rate_limit(
|
|
341
|
+
self, response: requests.Response, attempt: int, max_retries: int, stdout
|
|
342
|
+
) -> bool:
|
|
343
|
+
"""Handle rate limit response. Returns True if should retry."""
|
|
344
|
+
if response.status_code == HTTP_TOO_MANY_REQUESTS:
|
|
345
|
+
retry_after = int(response.headers.get("Retry-After", 2 * (2**attempt)))
|
|
346
|
+
if attempt < max_retries - 1:
|
|
347
|
+
stdout.write(
|
|
348
|
+
f" Rate limit exceeded (attempt {attempt + 1}/{max_retries}). "
|
|
349
|
+
f"Retrying in {retry_after} seconds..."
|
|
350
|
+
)
|
|
351
|
+
time.sleep(retry_after)
|
|
352
|
+
return True
|
|
353
|
+
else:
|
|
354
|
+
msg = "GitHub API rate limit exceeded. Please try again later."
|
|
355
|
+
raise CommandError(msg)
|
|
356
|
+
return False
|
|
357
|
+
|
|
358
|
+
def _extract_error_message(self, response: requests.Response) -> str:
|
|
359
|
+
"""Extract safe error message from response, including validation errors."""
|
|
360
|
+
try:
|
|
361
|
+
error_data = response.json()
|
|
362
|
+
message = error_data.get("message", f"HTTP {response.status_code}")
|
|
363
|
+
|
|
364
|
+
# GitHub API validation errors include detailed error info in 'errors' array
|
|
365
|
+
if error_data.get("errors"):
|
|
366
|
+
error_details = []
|
|
367
|
+
for err in error_data["errors"]:
|
|
368
|
+
if isinstance(err, dict):
|
|
369
|
+
field = err.get("field", "unknown")
|
|
370
|
+
code = err.get("code", "unknown")
|
|
371
|
+
resource = err.get("resource", "unknown")
|
|
372
|
+
error_details.append(f"{resource}.{field}: {code}")
|
|
373
|
+
else:
|
|
374
|
+
error_details.append(str(err))
|
|
375
|
+
|
|
376
|
+
if error_details:
|
|
377
|
+
message = f"{message} ({', '.join(error_details)})"
|
|
378
|
+
return message
|
|
379
|
+
else:
|
|
380
|
+
return message
|
|
381
|
+
except (ValueError, requests.exceptions.JSONDecodeError):
|
|
382
|
+
return f"HTTP {response.status_code}"
|
|
383
|
+
|
|
384
|
+
def verify_branch(
|
|
385
|
+
self,
|
|
386
|
+
owner: str,
|
|
387
|
+
repo: str,
|
|
388
|
+
branch_name: str,
|
|
389
|
+
stdout, # noqa: ARG002
|
|
390
|
+
) -> None:
|
|
391
|
+
"""Verify branch exists on remote."""
|
|
392
|
+
url = f"https://api.github.com/repos/{owner}/{repo}/branches/{branch_name}"
|
|
393
|
+
response = requests.get(url, headers=self._get_headers(), timeout=10)
|
|
394
|
+
|
|
395
|
+
if response.status_code == HTTP_NOT_FOUND:
|
|
396
|
+
msg = (
|
|
397
|
+
f"Branch '{branch_name}' not found on remote. "
|
|
398
|
+
f"Ensure the branch was pushed successfully."
|
|
399
|
+
)
|
|
400
|
+
raise CommandError(msg)
|
|
401
|
+
elif response.status_code != HTTP_OK:
|
|
402
|
+
error_msg = self._extract_error_message(response)
|
|
403
|
+
msg = f"Failed to verify branch: {error_msg}"
|
|
404
|
+
raise CommandError(msg)
|
|
405
|
+
# If status_code is HTTP_OK, function returns None implicitly
|
|
406
|
+
|
|
407
|
+
def create_pull_request( # noqa: PLR0913
|
|
408
|
+
self,
|
|
409
|
+
owner: str,
|
|
410
|
+
repo: str,
|
|
411
|
+
branch_name: str,
|
|
412
|
+
title: str,
|
|
413
|
+
body: str,
|
|
414
|
+
base: str = "main",
|
|
415
|
+
stdout=None,
|
|
416
|
+
) -> str:
|
|
417
|
+
"""Create a pull request with retry logic."""
|
|
418
|
+
url = f"https://api.github.com/repos/{owner}/{repo}/pulls"
|
|
419
|
+
payload = {"title": title, "body": body, "head": branch_name, "base": base}
|
|
420
|
+
headers = self._get_headers()
|
|
421
|
+
|
|
422
|
+
max_retries = 3
|
|
423
|
+
base_retry_delay = 2
|
|
424
|
+
|
|
425
|
+
for attempt in range(max_retries):
|
|
426
|
+
retry_delay = base_retry_delay * (2**attempt)
|
|
427
|
+
|
|
428
|
+
try:
|
|
429
|
+
response = requests.post(url, json=payload, headers=headers, timeout=30)
|
|
430
|
+
|
|
431
|
+
if response.status_code == HTTP_CREATED:
|
|
432
|
+
return response.json()["html_url"]
|
|
433
|
+
|
|
434
|
+
if self._handle_rate_limit(
|
|
435
|
+
response, attempt, max_retries, stdout or self
|
|
436
|
+
):
|
|
437
|
+
continue
|
|
438
|
+
|
|
439
|
+
if response.status_code == HTTP_UNPROCESSABLE_ENTITY:
|
|
440
|
+
error_msg = self._extract_error_message(response)
|
|
441
|
+
safe_error = (
|
|
442
|
+
error_msg[:MAX_ERROR_MESSAGE_LENGTH]
|
|
443
|
+
if len(error_msg) > MAX_ERROR_MESSAGE_LENGTH
|
|
444
|
+
else error_msg
|
|
445
|
+
)
|
|
446
|
+
msg = (
|
|
447
|
+
f"GitHub API validation error: {safe_error}\n"
|
|
448
|
+
f"This usually means the branch doesn't exist on remote "
|
|
449
|
+
f"or there's already a PR for this branch."
|
|
450
|
+
)
|
|
451
|
+
raise CommandError(msg)
|
|
452
|
+
|
|
453
|
+
error_msg = self._extract_error_message(response)
|
|
454
|
+
safe_error = (
|
|
455
|
+
error_msg[:MAX_ERROR_MESSAGE_LENGTH]
|
|
456
|
+
if len(error_msg) > MAX_ERROR_MESSAGE_LENGTH
|
|
457
|
+
else error_msg
|
|
458
|
+
)
|
|
459
|
+
msg = f"GitHub API error: {safe_error}"
|
|
460
|
+
raise CommandError(msg)
|
|
461
|
+
|
|
462
|
+
except requests.exceptions.RequestException as e:
|
|
463
|
+
is_connection_error = isinstance(
|
|
464
|
+
e,
|
|
465
|
+
(requests.exceptions.ConnectionError, requests.exceptions.Timeout),
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
if is_connection_error and attempt < max_retries - 1:
|
|
469
|
+
if stdout:
|
|
470
|
+
error_msg = (
|
|
471
|
+
f" Connection error "
|
|
472
|
+
f"(attempt {attempt + 1}/{max_retries}): {e!s}"
|
|
473
|
+
)
|
|
474
|
+
stdout.write(error_msg)
|
|
475
|
+
stdout.write(f" Retrying in {retry_delay} seconds...")
|
|
476
|
+
time.sleep(retry_delay)
|
|
477
|
+
continue
|
|
478
|
+
else:
|
|
479
|
+
if is_connection_error:
|
|
480
|
+
msg = (
|
|
481
|
+
f"Failed to connect to GitHub API after "
|
|
482
|
+
f"{max_retries} attempts: {e!s}\n"
|
|
483
|
+
f"Please check your network connection and try again later."
|
|
484
|
+
)
|
|
485
|
+
raise CommandError(msg) from e
|
|
486
|
+
msg = f"GitHub API error: {e!s}"
|
|
487
|
+
raise CommandError(msg) from e
|
|
488
|
+
|
|
489
|
+
msg = "Failed to create pull request after all retries"
|
|
490
|
+
raise CommandError(msg)
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
class PullRequestData(TypedDict):
|
|
494
|
+
"""Data structure for pull request creation."""
|
|
495
|
+
|
|
496
|
+
lang_code: str
|
|
497
|
+
iso_code: str
|
|
498
|
+
sync_stats: dict
|
|
499
|
+
applied_count: int
|
|
500
|
+
translation_stats: dict[str, Any]
|
|
501
|
+
applied_by_app: dict[str, Any]
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
class TranslationParams(TypedDict):
|
|
505
|
+
"""Parameters for translation operations."""
|
|
506
|
+
|
|
507
|
+
lang_code: str
|
|
508
|
+
provider: str
|
|
509
|
+
model: str
|
|
510
|
+
glossary: dict[str, Any] | None
|
|
511
|
+
batch_size: int
|
|
512
|
+
max_retries: int
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
class Command(BaseCommand):
|
|
516
|
+
help = (
|
|
517
|
+
"Sync translation keys, translate using LLM, "
|
|
518
|
+
"and create PR in mitxonline-translations"
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
def add_arguments(self, parser):
|
|
522
|
+
parser.add_argument(
|
|
523
|
+
"lang", type=str, help="Language code (e.g., el, fr, es_ES)"
|
|
524
|
+
)
|
|
525
|
+
parser.add_argument(
|
|
526
|
+
"--iso-code",
|
|
527
|
+
type=str,
|
|
528
|
+
help="ISO code for JSON files (default: same as lang)",
|
|
529
|
+
)
|
|
530
|
+
parser.add_argument(
|
|
531
|
+
"--repo-path",
|
|
532
|
+
type=str,
|
|
533
|
+
help=(
|
|
534
|
+
"Path to mitxonline-translations repository. "
|
|
535
|
+
"Can also be set via TRANSLATIONS_REPO_PATH setting "
|
|
536
|
+
"or environment variable."
|
|
537
|
+
),
|
|
538
|
+
)
|
|
539
|
+
default_provider = get_default_provider()
|
|
540
|
+
parser.add_argument(
|
|
541
|
+
"--provider",
|
|
542
|
+
type=str,
|
|
543
|
+
default=default_provider,
|
|
544
|
+
choices=["openai", "gemini", "mistral"],
|
|
545
|
+
help=(
|
|
546
|
+
"Translation provider (openai, gemini, mistral). "
|
|
547
|
+
"Default is taken from TRANSLATIONS_PROVIDERS['default_provider']"
|
|
548
|
+
+ (
|
|
549
|
+
f" (currently: {default_provider})"
|
|
550
|
+
if default_provider
|
|
551
|
+
else " (not configured)"
|
|
552
|
+
)
|
|
553
|
+
),
|
|
554
|
+
)
|
|
555
|
+
parser.add_argument(
|
|
556
|
+
"--model",
|
|
557
|
+
type=str,
|
|
558
|
+
default=None,
|
|
559
|
+
help=(
|
|
560
|
+
"Model name (e.g., gpt-4, gemini-pro, mistral-large-latest). "
|
|
561
|
+
"If not specified, uses the default_model for the selected provider "
|
|
562
|
+
"from TRANSLATIONS_PROVIDERS. "
|
|
563
|
+
"LiteLLM automatically detects provider from model name."
|
|
564
|
+
),
|
|
565
|
+
)
|
|
566
|
+
parser.add_argument(
|
|
567
|
+
"--dry-run",
|
|
568
|
+
action="store_true",
|
|
569
|
+
help="Run without committing or creating PR",
|
|
570
|
+
)
|
|
571
|
+
parser.add_argument(
|
|
572
|
+
"--glossary",
|
|
573
|
+
action="store_true",
|
|
574
|
+
default=False,
|
|
575
|
+
help="Use glossary from plugin glossaries folder. "
|
|
576
|
+
"Looks for {plugin_dir}/glossaries/machine_learning/{lang_code}.txt",
|
|
577
|
+
)
|
|
578
|
+
parser.add_argument(
|
|
579
|
+
"--batch-size",
|
|
580
|
+
type=int,
|
|
581
|
+
default=200,
|
|
582
|
+
help=(
|
|
583
|
+
"Number of keys to translate per API request (default: 200). "
|
|
584
|
+
"Larger batches are faster but may hit rate limits. "
|
|
585
|
+
"Recommended: 200-300 for most models, "
|
|
586
|
+
"up to 400-500 for large models like mistral-large."
|
|
587
|
+
),
|
|
588
|
+
)
|
|
589
|
+
parser.add_argument(
|
|
590
|
+
"--mfe",
|
|
591
|
+
type=str,
|
|
592
|
+
nargs="+",
|
|
593
|
+
help=(
|
|
594
|
+
"Filter by specific MFE(s). "
|
|
595
|
+
"Use 'edx-platform' for backend translations."
|
|
596
|
+
),
|
|
597
|
+
)
|
|
598
|
+
parser.add_argument(
|
|
599
|
+
"--repo-url",
|
|
600
|
+
type=str,
|
|
601
|
+
help=(
|
|
602
|
+
"GitHub repository URL. "
|
|
603
|
+
"Can also be set via TRANSLATIONS_REPO_URL setting "
|
|
604
|
+
"or environment variable."
|
|
605
|
+
),
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
def handle(self, *args, **options): # noqa: ARG002, PLR0915
|
|
609
|
+
"""Handle the command execution."""
|
|
610
|
+
lang_code = options["lang"]
|
|
611
|
+
iso_code = options.get("iso_code") or lang_code
|
|
612
|
+
|
|
613
|
+
validate_language_code(lang_code)
|
|
614
|
+
validate_language_code(iso_code, "ISO code")
|
|
615
|
+
|
|
616
|
+
repo_path = get_config_value(
|
|
617
|
+
"repo_path",
|
|
618
|
+
options,
|
|
619
|
+
str(Path.home() / ".mitxonline-translations"),
|
|
620
|
+
)
|
|
621
|
+
repo_url = get_config_value(
|
|
622
|
+
"repo_url",
|
|
623
|
+
options,
|
|
624
|
+
"https://github.com/mitodl/mitxonline-translations.git",
|
|
625
|
+
)
|
|
626
|
+
|
|
627
|
+
# Validate repository path is not empty
|
|
628
|
+
if not repo_path or not repo_path.strip():
|
|
629
|
+
msg = (
|
|
630
|
+
"Repository path is not set. Please specify --repo-path, "
|
|
631
|
+
"set TRANSLATIONS_REPO_PATH in Django settings, or set "
|
|
632
|
+
"TRANSLATIONS_REPO_PATH environment variable."
|
|
633
|
+
)
|
|
634
|
+
raise CommandError(msg)
|
|
635
|
+
|
|
636
|
+
self.stdout.write(self.style.SUCCESS(f"Processing language: {lang_code}"))
|
|
637
|
+
self.stdout.write(f" ISO code: {iso_code}")
|
|
638
|
+
self.stdout.write(f" Repository: {repo_path}")
|
|
639
|
+
|
|
640
|
+
repo = self._ensure_repo(repo_path, repo_url)
|
|
641
|
+
|
|
642
|
+
self.stdout.write("\nSyncing translation keys...")
|
|
643
|
+
base_dir = Path(repo_path) / "translations"
|
|
644
|
+
sync_stats = sync_all_translations(
|
|
645
|
+
base_dir, lang_code, iso_code, skip_backend=False
|
|
646
|
+
)
|
|
647
|
+
self._log_sync_stats(sync_stats)
|
|
648
|
+
|
|
649
|
+
# Extract and filter empty keys
|
|
650
|
+
self.stdout.write("\nExtracting empty keys for translation...")
|
|
651
|
+
empty_keys = extract_empty_keys(
|
|
652
|
+
base_dir, lang_code, iso_code, skip_backend=False
|
|
653
|
+
)
|
|
654
|
+
empty_keys = self._filter_by_mfe(empty_keys, options.get("mfe"))
|
|
655
|
+
|
|
656
|
+
if not empty_keys:
|
|
657
|
+
self.stdout.write(self.style.SUCCESS("\nNo empty keys to translate!"))
|
|
658
|
+
return
|
|
659
|
+
|
|
660
|
+
glossary = self._load_glossary(options, lang_code)
|
|
661
|
+
|
|
662
|
+
provider = options.get("provider") or get_default_provider()
|
|
663
|
+
if not provider:
|
|
664
|
+
msg = (
|
|
665
|
+
"Provider not specified and "
|
|
666
|
+
"TRANSLATIONS_PROVIDERS['default_provider'] is not set"
|
|
667
|
+
)
|
|
668
|
+
raise CommandError(msg)
|
|
669
|
+
|
|
670
|
+
model = options.get("model") or get_default_model_for_provider(provider)
|
|
671
|
+
if not model:
|
|
672
|
+
msg = (
|
|
673
|
+
f"Model not specified and provider '{provider}' "
|
|
674
|
+
"does not have default_model in TRANSLATIONS_PROVIDERS"
|
|
675
|
+
)
|
|
676
|
+
raise CommandError(msg)
|
|
677
|
+
|
|
678
|
+
self.stdout.write(f"\nTranslating using {provider}/{model}...")
|
|
679
|
+
params = TranslationParams(
|
|
680
|
+
lang_code=lang_code,
|
|
681
|
+
provider=provider,
|
|
682
|
+
model=model,
|
|
683
|
+
glossary=glossary,
|
|
684
|
+
batch_size=options.get("batch_size", 200),
|
|
685
|
+
max_retries=MAX_RETRIES,
|
|
686
|
+
)
|
|
687
|
+
translations, translation_stats = self._translate_keys(empty_keys, params)
|
|
688
|
+
self.stdout.write(f" Translated {len(translations)} keys")
|
|
689
|
+
|
|
690
|
+
self.stdout.write("\nApplying translations...")
|
|
691
|
+
applied_count, applied_by_app = self._apply_translations(
|
|
692
|
+
translations, empty_keys, self.stdout
|
|
693
|
+
)
|
|
694
|
+
self.stdout.write(f" Applied {applied_count} translations")
|
|
695
|
+
|
|
696
|
+
if options.get("dry_run"):
|
|
697
|
+
self.stdout.write(self.style.WARNING("\nDry run - no changes committed"))
|
|
698
|
+
return
|
|
699
|
+
|
|
700
|
+
branch_name = create_branch_name(lang_code)
|
|
701
|
+
self.stdout.write(f"\nCommitting changes to branch: {branch_name}")
|
|
702
|
+
|
|
703
|
+
if not self._commit_changes(repo, branch_name, lang_code):
|
|
704
|
+
return
|
|
705
|
+
|
|
706
|
+
self.stdout.write("\nCreating pull request...")
|
|
707
|
+
try:
|
|
708
|
+
pr_data = PullRequestData(
|
|
709
|
+
lang_code=lang_code,
|
|
710
|
+
iso_code=iso_code,
|
|
711
|
+
sync_stats=sync_stats,
|
|
712
|
+
applied_count=applied_count,
|
|
713
|
+
translation_stats=translation_stats,
|
|
714
|
+
applied_by_app=applied_by_app,
|
|
715
|
+
)
|
|
716
|
+
pr_url = self._create_pull_request(
|
|
717
|
+
repo_path,
|
|
718
|
+
branch_name,
|
|
719
|
+
pr_data,
|
|
720
|
+
repo_url,
|
|
721
|
+
)
|
|
722
|
+
self.stdout.write(self.style.SUCCESS(f"\nPull request created: {pr_url}"))
|
|
723
|
+
except CommandError as e:
|
|
724
|
+
# Clean up branch if PR creation fails
|
|
725
|
+
self.stdout.write(
|
|
726
|
+
self.style.ERROR(f"\nFailed to create pull request: {e!s}")
|
|
727
|
+
)
|
|
728
|
+
self._cleanup_failed_branch(repo, branch_name)
|
|
729
|
+
raise
|
|
730
|
+
|
|
731
|
+
def _ensure_repo(self, repo_path: str, repo_url: str) -> GitRepository:
|
|
732
|
+
"""Ensure repository exists and is ready. Returns GitRepository instance."""
|
|
733
|
+
repo_path_obj = Path(repo_path)
|
|
734
|
+
is_git_repo = repo_path_obj.exists() and (repo_path_obj / ".git").exists()
|
|
735
|
+
|
|
736
|
+
if is_git_repo:
|
|
737
|
+
repo = GitRepository(repo_path)
|
|
738
|
+
current_url = repo.get_remote_url()
|
|
739
|
+
|
|
740
|
+
# Normalize URLs for comparison (remove .git suffix, trailing slashes)
|
|
741
|
+
normalized_current = (current_url or "").rstrip(".git").rstrip("/")
|
|
742
|
+
normalized_new = repo_url.rstrip(".git").rstrip("/")
|
|
743
|
+
|
|
744
|
+
# If URL changed, delete and re-clone
|
|
745
|
+
if normalized_current != normalized_new:
|
|
746
|
+
self.stdout.write(
|
|
747
|
+
self.style.WARNING(
|
|
748
|
+
f" Repository URL changed from {current_url} to {repo_url}"
|
|
749
|
+
)
|
|
750
|
+
)
|
|
751
|
+
self.stdout.write(" Removing old repository and cloning new one...")
|
|
752
|
+
shutil.rmtree(repo_path)
|
|
753
|
+
self.stdout.write(f" Cloning repository to {repo_path}...")
|
|
754
|
+
repo = GitRepository.clone(repo_url, repo_path)
|
|
755
|
+
self.stdout.write(
|
|
756
|
+
self.style.SUCCESS(" Repository cloned successfully")
|
|
757
|
+
)
|
|
758
|
+
return repo
|
|
759
|
+
|
|
760
|
+
# URL matches, use existing repo
|
|
761
|
+
self.stdout.write(f" Repository found at {repo_path}")
|
|
762
|
+
if repo.ensure_clean():
|
|
763
|
+
self.stdout.write(
|
|
764
|
+
self.style.WARNING(
|
|
765
|
+
" WARNING: Found uncommitted changes (cleaned up)"
|
|
766
|
+
)
|
|
767
|
+
)
|
|
768
|
+
self.stdout.write(
|
|
769
|
+
self.style.SUCCESS(" Cleaned up uncommitted changes")
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
repo.switch_to_main()
|
|
773
|
+
self.stdout.write(" Updating repository...")
|
|
774
|
+
repo.update_from_remote()
|
|
775
|
+
self.stdout.write(self.style.SUCCESS(" Repository up to date"))
|
|
776
|
+
return repo
|
|
777
|
+
|
|
778
|
+
elif repo_path_obj.exists():
|
|
779
|
+
msg = (
|
|
780
|
+
f"Path {repo_path} exists but is not a git repository. "
|
|
781
|
+
f"Please remove it or specify a different path."
|
|
782
|
+
)
|
|
783
|
+
raise CommandError(msg)
|
|
784
|
+
else:
|
|
785
|
+
self.stdout.write(f" Cloning repository to {repo_path}...")
|
|
786
|
+
repo = GitRepository.clone(repo_url, repo_path)
|
|
787
|
+
self.stdout.write(self.style.SUCCESS(" Repository cloned successfully"))
|
|
788
|
+
return repo
|
|
789
|
+
|
|
790
|
+
def _log_sync_stats(self, sync_stats: dict) -> None:
|
|
791
|
+
"""Log synchronization statistics."""
|
|
792
|
+
self.stdout.write(
|
|
793
|
+
f" Frontend: {sync_stats['frontend']['added']} keys added, "
|
|
794
|
+
f"{sync_stats['frontend']['fixed']} typos fixed"
|
|
795
|
+
)
|
|
796
|
+
self.stdout.write(f" Backend: {sync_stats['backend']['added']} entries added")
|
|
797
|
+
|
|
798
|
+
def _filter_by_mfe(
|
|
799
|
+
self, empty_keys: list[dict], mfe_filter: list[str] | None
|
|
800
|
+
) -> list[dict]:
|
|
801
|
+
"""Filter empty keys by MFE if specified."""
|
|
802
|
+
if not mfe_filter:
|
|
803
|
+
self.stdout.write(f" Found {len(empty_keys)} empty keys")
|
|
804
|
+
return empty_keys
|
|
805
|
+
|
|
806
|
+
mfe_set = set(mfe_filter)
|
|
807
|
+
original_count = len(empty_keys)
|
|
808
|
+
available_apps = {key.get("app", "unknown") for key in empty_keys}
|
|
809
|
+
filtered = [key for key in empty_keys if key.get("app") in mfe_set]
|
|
810
|
+
|
|
811
|
+
if not filtered:
|
|
812
|
+
mfe_list = ", ".join(mfe_filter)
|
|
813
|
+
apps_list = ", ".join(sorted(available_apps))
|
|
814
|
+
self.stdout.write(
|
|
815
|
+
self.style.WARNING(
|
|
816
|
+
f"\nWARNING: No empty keys found for specified MFE(s): "
|
|
817
|
+
f"{mfe_list}\n"
|
|
818
|
+
f" Available apps: {apps_list}"
|
|
819
|
+
)
|
|
820
|
+
)
|
|
821
|
+
return []
|
|
822
|
+
|
|
823
|
+
mfe_list = ", ".join(mfe_filter)
|
|
824
|
+
self.stdout.write(
|
|
825
|
+
f" Filtered to {len(filtered)} keys from {len(mfe_set)} MFE(s): "
|
|
826
|
+
f"{mfe_list} (was {original_count} total)"
|
|
827
|
+
)
|
|
828
|
+
return filtered
|
|
829
|
+
|
|
830
|
+
def _load_glossary(self, options: dict, lang_code: str) -> dict[str, Any]:
|
|
831
|
+
"""Load glossary if enabled."""
|
|
832
|
+
if not options.get("glossary", False):
|
|
833
|
+
return {}
|
|
834
|
+
|
|
835
|
+
utils_file = Path(utils_module.__file__)
|
|
836
|
+
glossary_path = (
|
|
837
|
+
utils_file.parent.parent
|
|
838
|
+
/ "glossaries"
|
|
839
|
+
/ "machine_learning"
|
|
840
|
+
/ f"{lang_code}.txt"
|
|
841
|
+
)
|
|
842
|
+
|
|
843
|
+
if glossary_path.exists():
|
|
844
|
+
self.stdout.write(f"\nLoading glossary from {glossary_path}...")
|
|
845
|
+
glossary = load_glossary(glossary_path, lang_code)
|
|
846
|
+
self.stdout.write(f" Loaded {len(glossary)} glossary terms")
|
|
847
|
+
return glossary
|
|
848
|
+
|
|
849
|
+
self.stdout.write(
|
|
850
|
+
self.style.WARNING(
|
|
851
|
+
f"\nWARNING: Glossary file not found: {glossary_path}\n"
|
|
852
|
+
f" Continuing without glossary."
|
|
853
|
+
)
|
|
854
|
+
)
|
|
855
|
+
return {}
|
|
856
|
+
|
|
857
|
+
def _check_glossary_for_keys(
|
|
858
|
+
self,
|
|
859
|
+
empty_keys: list[dict],
|
|
860
|
+
glossary: dict[str, Any] | None,
|
|
861
|
+
) -> tuple[dict[str, Any], int, list[dict]]:
|
|
862
|
+
"""Check glossary matches for keys.
|
|
863
|
+
|
|
864
|
+
Returns (translations, matches_count, remaining_keys).
|
|
865
|
+
"""
|
|
866
|
+
translations = {}
|
|
867
|
+
glossary_matches = 0
|
|
868
|
+
keys_needing_llm = []
|
|
869
|
+
|
|
870
|
+
for key_info in empty_keys:
|
|
871
|
+
# Normalize file path for consistent comparison
|
|
872
|
+
file_path_str = str(Path(key_info["file_path"]).resolve())
|
|
873
|
+
translation_key = f"{file_path_str}:{key_info['key']}"
|
|
874
|
+
|
|
875
|
+
if glossary:
|
|
876
|
+
match_result = self._check_glossary_match(key_info, glossary)
|
|
877
|
+
if match_result:
|
|
878
|
+
translations[translation_key] = match_result
|
|
879
|
+
glossary_matches += 1
|
|
880
|
+
continue
|
|
881
|
+
|
|
882
|
+
keys_needing_llm.append(key_info)
|
|
883
|
+
|
|
884
|
+
return translations, glossary_matches, keys_needing_llm
|
|
885
|
+
|
|
886
|
+
def _process_batch_results(
|
|
887
|
+
self,
|
|
888
|
+
batch: list[dict],
|
|
889
|
+
batch_translations: list[Any],
|
|
890
|
+
translations: dict[str, Any],
|
|
891
|
+
) -> tuple[int, int, dict[str, int]]:
|
|
892
|
+
"""Process batch translation results.
|
|
893
|
+
|
|
894
|
+
Returns (successes, errors, errors_by_app).
|
|
895
|
+
"""
|
|
896
|
+
batch_successes = 0
|
|
897
|
+
batch_errors = 0
|
|
898
|
+
batch_errors_by_app: dict[str, int] = {}
|
|
899
|
+
|
|
900
|
+
for i, key_info in enumerate(batch):
|
|
901
|
+
# Normalize file path for consistent comparison
|
|
902
|
+
file_path_str = str(Path(key_info["file_path"]).resolve())
|
|
903
|
+
translation_key = f"{file_path_str}:{key_info['key']}"
|
|
904
|
+
app = key_info.get("app", "unknown")
|
|
905
|
+
if i < len(batch_translations) and batch_translations[i]:
|
|
906
|
+
translations[translation_key] = batch_translations[i]
|
|
907
|
+
batch_successes += 1
|
|
908
|
+
else:
|
|
909
|
+
batch_errors += 1
|
|
910
|
+
batch_errors_by_app[app] = batch_errors_by_app.get(app, 0) + 1
|
|
911
|
+
|
|
912
|
+
return batch_successes, batch_errors, batch_errors_by_app
|
|
913
|
+
|
|
914
|
+
def _translate_with_llm( # noqa: PLR0913
|
|
915
|
+
self,
|
|
916
|
+
keys_needing_llm: list[dict],
|
|
917
|
+
translations: dict[str, Any],
|
|
918
|
+
lang_code: str,
|
|
919
|
+
provider: str,
|
|
920
|
+
model: str,
|
|
921
|
+
glossary: dict[str, Any] | None,
|
|
922
|
+
batch_size: int,
|
|
923
|
+
max_retries: int,
|
|
924
|
+
) -> tuple[int, int, dict[str, int]]:
|
|
925
|
+
"""Translate keys using LLM with batch processing.
|
|
926
|
+
|
|
927
|
+
Returns (llm_translations, llm_errors, errors_by_app).
|
|
928
|
+
"""
|
|
929
|
+
llm_translations = 0
|
|
930
|
+
llm_errors = 0
|
|
931
|
+
errors_by_app: dict[str, int] = {}
|
|
932
|
+
|
|
933
|
+
total_keys = len(keys_needing_llm)
|
|
934
|
+
num_batches = (total_keys + batch_size - 1) // batch_size
|
|
935
|
+
self.stdout.write(
|
|
936
|
+
f" Translating {total_keys} keys using LLM "
|
|
937
|
+
f"({num_batches} batches of up to {batch_size} keys each)..."
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
for batch_idx, batch in enumerate(
|
|
941
|
+
[
|
|
942
|
+
keys_needing_llm[i : i + batch_size]
|
|
943
|
+
for i in range(0, total_keys, batch_size)
|
|
944
|
+
],
|
|
945
|
+
1,
|
|
946
|
+
):
|
|
947
|
+
batch_succeeded = False
|
|
948
|
+
batch_apps = {key_info.get("app", "unknown") for key_info in batch}
|
|
949
|
+
|
|
950
|
+
# Retry loop for this batch
|
|
951
|
+
for attempt in range(max_retries + 1): # +1 for initial attempt
|
|
952
|
+
try:
|
|
953
|
+
batch_translations = self._call_llm_batch(
|
|
954
|
+
batch, lang_code, provider, model, glossary
|
|
955
|
+
)
|
|
956
|
+
batch_successes, batch_errors, batch_errors_by_app = (
|
|
957
|
+
self._process_batch_results(
|
|
958
|
+
batch,
|
|
959
|
+
batch_translations,
|
|
960
|
+
translations,
|
|
961
|
+
)
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
llm_translations += batch_successes
|
|
965
|
+
llm_errors += batch_errors
|
|
966
|
+
for app, count in batch_errors_by_app.items():
|
|
967
|
+
errors_by_app[app] = errors_by_app.get(app, 0) + count
|
|
968
|
+
|
|
969
|
+
completed = min(batch_idx * batch_size, total_keys)
|
|
970
|
+
progress_pct = min((completed / total_keys) * 100, 100)
|
|
971
|
+
remaining_keys = total_keys - llm_translations
|
|
972
|
+
|
|
973
|
+
self._log_batch_progress(
|
|
974
|
+
batch_idx,
|
|
975
|
+
num_batches,
|
|
976
|
+
batch_successes,
|
|
977
|
+
batch_errors,
|
|
978
|
+
completed,
|
|
979
|
+
total_keys,
|
|
980
|
+
progress_pct,
|
|
981
|
+
remaining_keys,
|
|
982
|
+
batch_apps,
|
|
983
|
+
batch_errors_by_app,
|
|
984
|
+
attempt,
|
|
985
|
+
)
|
|
986
|
+
|
|
987
|
+
batch_succeeded = True
|
|
988
|
+
break # Success - exit retry loop
|
|
989
|
+
|
|
990
|
+
except (
|
|
991
|
+
requests.RequestException,
|
|
992
|
+
ValueError,
|
|
993
|
+
KeyError,
|
|
994
|
+
AttributeError,
|
|
995
|
+
) as e:
|
|
996
|
+
if not self._handle_batch_error(
|
|
997
|
+
e, batch_idx, num_batches, batch_apps, attempt, max_retries
|
|
998
|
+
):
|
|
999
|
+
break # Non-retryable error
|
|
1000
|
+
|
|
1001
|
+
# If batch failed after all retries, mark all keys as errors
|
|
1002
|
+
if not batch_succeeded:
|
|
1003
|
+
batch_errors = len(batch)
|
|
1004
|
+
llm_errors += batch_errors
|
|
1005
|
+
for key_info in batch:
|
|
1006
|
+
app = key_info.get("app", "unknown")
|
|
1007
|
+
errors_by_app[app] = errors_by_app.get(app, 0) + 1
|
|
1008
|
+
apps_str = ", ".join(sorted(batch_apps))
|
|
1009
|
+
self.stdout.write(
|
|
1010
|
+
self.style.ERROR(
|
|
1011
|
+
f" Marked {batch_errors} keys as errors, "
|
|
1012
|
+
f"continuing with next batch...\n"
|
|
1013
|
+
f" Affected apps: {apps_str}"
|
|
1014
|
+
)
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
return llm_translations, llm_errors, errors_by_app
|
|
1018
|
+
|
|
1019
|
+
def _log_batch_progress( # noqa: PLR0913
|
|
1020
|
+
self,
|
|
1021
|
+
batch_idx: int,
|
|
1022
|
+
num_batches: int,
|
|
1023
|
+
batch_successes: int,
|
|
1024
|
+
batch_errors: int,
|
|
1025
|
+
completed: int,
|
|
1026
|
+
total_keys: int,
|
|
1027
|
+
progress_pct: float,
|
|
1028
|
+
remaining_keys: int,
|
|
1029
|
+
batch_apps: set[str],
|
|
1030
|
+
batch_errors_by_app: dict[str, int],
|
|
1031
|
+
attempt: int,
|
|
1032
|
+
) -> None:
|
|
1033
|
+
"""Log batch processing progress."""
|
|
1034
|
+
retry_msg = f" (after {attempt + 1} attempt(s))" if attempt > 0 else ""
|
|
1035
|
+
if batch_errors > 0:
|
|
1036
|
+
apps_str = ", ".join(sorted(batch_apps))
|
|
1037
|
+
errors_by_app_str = ", ".join(
|
|
1038
|
+
f"{app}: {count}" for app, count in sorted(batch_errors_by_app.items())
|
|
1039
|
+
)
|
|
1040
|
+
self.stdout.write(
|
|
1041
|
+
f" Batch {batch_idx}/{num_batches} completed "
|
|
1042
|
+
f"with partial success "
|
|
1043
|
+
f"({batch_successes} succeeded, "
|
|
1044
|
+
f"{batch_errors} failed){retry_msg} "
|
|
1045
|
+
f"({completed}/{total_keys} keys, "
|
|
1046
|
+
f"{progress_pct:.1f}% complete, "
|
|
1047
|
+
f"{remaining_keys} remaining)\n"
|
|
1048
|
+
f" Affected apps: {apps_str}\n"
|
|
1049
|
+
f" Errors by app: {errors_by_app_str}"
|
|
1050
|
+
)
|
|
1051
|
+
else:
|
|
1052
|
+
self.stdout.write(
|
|
1053
|
+
f" Batch {batch_idx}/{num_batches} completed"
|
|
1054
|
+
f"{retry_msg} "
|
|
1055
|
+
f"({completed}/{total_keys} keys, "
|
|
1056
|
+
f"{progress_pct:.1f}% complete, "
|
|
1057
|
+
f"{remaining_keys} remaining)"
|
|
1058
|
+
)
|
|
1059
|
+
|
|
1060
|
+
def _handle_batch_error( # noqa: PLR0913
|
|
1061
|
+
self,
|
|
1062
|
+
error: Exception,
|
|
1063
|
+
batch_idx: int,
|
|
1064
|
+
num_batches: int,
|
|
1065
|
+
batch_apps: set[str],
|
|
1066
|
+
attempt: int,
|
|
1067
|
+
max_retries: int,
|
|
1068
|
+
) -> bool:
|
|
1069
|
+
"""Handle batch error. Returns True if should retry, False otherwise."""
|
|
1070
|
+
apps_str = ", ".join(sorted(batch_apps))
|
|
1071
|
+
if not is_retryable_error(error):
|
|
1072
|
+
# Non-retryable error - fail immediately
|
|
1073
|
+
self.stdout.write(
|
|
1074
|
+
self.style.ERROR(
|
|
1075
|
+
f" ERROR: Batch {batch_idx}/{num_batches} "
|
|
1076
|
+
f"failed with non-retryable error: {error!s}\n"
|
|
1077
|
+
f" Affected apps: {apps_str}"
|
|
1078
|
+
)
|
|
1079
|
+
)
|
|
1080
|
+
return False
|
|
1081
|
+
|
|
1082
|
+
# Retryable error - check if we have retries left
|
|
1083
|
+
if attempt < max_retries:
|
|
1084
|
+
# Exponential backoff: 2^attempt seconds (1s, 2s, 4s, 8s...)
|
|
1085
|
+
wait_time = 2**attempt
|
|
1086
|
+
self.stdout.write(
|
|
1087
|
+
self.style.WARNING(
|
|
1088
|
+
f" WARNING: Batch {batch_idx}/{num_batches} "
|
|
1089
|
+
f"failed (attempt {attempt + 1}/"
|
|
1090
|
+
f"{max_retries + 1}): {error!s}\n"
|
|
1091
|
+
f" Affected apps: {apps_str}\n"
|
|
1092
|
+
f" Retrying in {wait_time} second(s)..."
|
|
1093
|
+
)
|
|
1094
|
+
)
|
|
1095
|
+
time.sleep(wait_time)
|
|
1096
|
+
return True
|
|
1097
|
+
else:
|
|
1098
|
+
# Out of retries
|
|
1099
|
+
self.stdout.write(
|
|
1100
|
+
self.style.ERROR(
|
|
1101
|
+
f" ERROR: Batch {batch_idx}/{num_batches} "
|
|
1102
|
+
f"failed after {max_retries + 1} attempts: "
|
|
1103
|
+
f"{error!s}\n"
|
|
1104
|
+
f" Affected apps: {apps_str}"
|
|
1105
|
+
)
|
|
1106
|
+
)
|
|
1107
|
+
return False
|
|
1108
|
+
|
|
1109
|
+
def _translate_keys(
|
|
1110
|
+
self,
|
|
1111
|
+
empty_keys: list[dict],
|
|
1112
|
+
params: TranslationParams,
|
|
1113
|
+
) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
1114
|
+
"""Translate empty keys using LLM with batch processing."""
|
|
1115
|
+
lang_code = params["lang_code"]
|
|
1116
|
+
provider = params["provider"]
|
|
1117
|
+
model = params["model"]
|
|
1118
|
+
glossary = params["glossary"]
|
|
1119
|
+
batch_size = params["batch_size"]
|
|
1120
|
+
max_retries = params["max_retries"]
|
|
1121
|
+
|
|
1122
|
+
# First pass: check glossary matches
|
|
1123
|
+
translations, glossary_matches, keys_needing_llm = (
|
|
1124
|
+
self._check_glossary_for_keys(empty_keys, glossary)
|
|
1125
|
+
)
|
|
1126
|
+
|
|
1127
|
+
if not keys_needing_llm:
|
|
1128
|
+
return translations, {
|
|
1129
|
+
"glossary_matches": glossary_matches,
|
|
1130
|
+
"llm_translations": 0,
|
|
1131
|
+
"errors": 0,
|
|
1132
|
+
"errors_by_app": cast("dict[str, int]", {}),
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
# Translate remaining keys with LLM
|
|
1136
|
+
llm_translations, llm_errors, errors_by_app = self._translate_with_llm(
|
|
1137
|
+
keys_needing_llm,
|
|
1138
|
+
translations,
|
|
1139
|
+
lang_code,
|
|
1140
|
+
provider,
|
|
1141
|
+
model,
|
|
1142
|
+
glossary,
|
|
1143
|
+
batch_size,
|
|
1144
|
+
max_retries,
|
|
1145
|
+
)
|
|
1146
|
+
|
|
1147
|
+
summary = (
|
|
1148
|
+
f" Summary - LLM translations: {llm_translations}, Errors: {llm_errors}"
|
|
1149
|
+
)
|
|
1150
|
+
if glossary:
|
|
1151
|
+
summary = (
|
|
1152
|
+
f" Summary - Glossary matches: {glossary_matches}, {summary[12:]}"
|
|
1153
|
+
)
|
|
1154
|
+
self.stdout.write(summary)
|
|
1155
|
+
|
|
1156
|
+
return translations, {
|
|
1157
|
+
"glossary_matches": glossary_matches,
|
|
1158
|
+
"llm_translations": llm_translations,
|
|
1159
|
+
"errors": llm_errors,
|
|
1160
|
+
"errors_by_app": errors_by_app,
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
def _check_glossary_match(
|
|
1164
|
+
self, key_info: dict, glossary: dict[str, Any] | None
|
|
1165
|
+
) -> Any | None:
|
|
1166
|
+
"""
|
|
1167
|
+
Check if key matches glossary. Returns translation or None.
|
|
1168
|
+
|
|
1169
|
+
Args:
|
|
1170
|
+
key_info: Dictionary containing key information with 'english',
|
|
1171
|
+
'is_plural', etc.
|
|
1172
|
+
glossary: Dictionary mapping English terms to translations, or None.
|
|
1173
|
+
|
|
1174
|
+
Returns:
|
|
1175
|
+
Translation string/dict if match found, None otherwise.
|
|
1176
|
+
"""
|
|
1177
|
+
if not glossary:
|
|
1178
|
+
return None
|
|
1179
|
+
|
|
1180
|
+
is_plural = key_info.get("is_plural", False)
|
|
1181
|
+
msgid_plural = key_info.get("msgid_plural")
|
|
1182
|
+
|
|
1183
|
+
if is_plural and msgid_plural:
|
|
1184
|
+
return self._check_plural_glossary_match(key_info, glossary, msgid_plural)
|
|
1185
|
+
|
|
1186
|
+
# Singular match
|
|
1187
|
+
match = match_glossary_term(key_info["english"], glossary, exact_match=True)
|
|
1188
|
+
if not match:
|
|
1189
|
+
return None
|
|
1190
|
+
|
|
1191
|
+
if isinstance(match, dict):
|
|
1192
|
+
return match.get("translation", match.get("singular", ""))
|
|
1193
|
+
return match
|
|
1194
|
+
|
|
1195
|
+
def _check_plural_glossary_match(
|
|
1196
|
+
self, key_info: dict, glossary: dict[str, Any], msgid_plural: str
|
|
1197
|
+
) -> Any | None:
|
|
1198
|
+
"""Check glossary match for plural keys. Returns translation or None."""
|
|
1199
|
+
singular_match = match_glossary_term(
|
|
1200
|
+
key_info["english"], glossary, exact_match=True
|
|
1201
|
+
)
|
|
1202
|
+
plural_match = match_glossary_term(msgid_plural, glossary, exact_match=True)
|
|
1203
|
+
|
|
1204
|
+
if singular_match and plural_match:
|
|
1205
|
+
if isinstance(singular_match, dict) and "singular" in singular_match:
|
|
1206
|
+
return singular_match
|
|
1207
|
+
if isinstance(plural_match, dict) and "singular" in plural_match:
|
|
1208
|
+
return plural_match
|
|
1209
|
+
return {
|
|
1210
|
+
"singular": str(singular_match),
|
|
1211
|
+
"plural": str(plural_match),
|
|
1212
|
+
}
|
|
1213
|
+
|
|
1214
|
+
if singular_match:
|
|
1215
|
+
key_info["_glossary_singular"] = (
|
|
1216
|
+
str(singular_match)
|
|
1217
|
+
if isinstance(singular_match, str)
|
|
1218
|
+
else singular_match.get("singular", "")
|
|
1219
|
+
)
|
|
1220
|
+
|
|
1221
|
+
return None # Need LLM for plural or no match
|
|
1222
|
+
|
|
1223
|
+
def _format_glossary_for_prompt(self, glossary: dict[str, Any] | None) -> str:
|
|
1224
|
+
"""Format glossary as a prompt section for LLM translation requests.
|
|
1225
|
+
|
|
1226
|
+
Args:
|
|
1227
|
+
glossary: Dictionary mapping English terms to translations, or
|
|
1228
|
+
None/empty dict.
|
|
1229
|
+
|
|
1230
|
+
Returns:
|
|
1231
|
+
Empty string if glossary is None or empty, otherwise returns a
|
|
1232
|
+
formatted string with glossary terms and instructions for consistent
|
|
1233
|
+
translation.
|
|
1234
|
+
"""
|
|
1235
|
+
if not glossary:
|
|
1236
|
+
return ""
|
|
1237
|
+
|
|
1238
|
+
# Format glossary as JSON for the prompt
|
|
1239
|
+
# Handle potential serialization errors gracefully
|
|
1240
|
+
try:
|
|
1241
|
+
glossary_json = json.dumps(glossary, indent=2, ensure_ascii=False)
|
|
1242
|
+
except (TypeError, ValueError) as e:
|
|
1243
|
+
# If glossary contains non-serializable values, log warning and skip
|
|
1244
|
+
self.stdout.write(
|
|
1245
|
+
self.style.WARNING(
|
|
1246
|
+
f" WARNING: Could not serialize glossary for prompt: {e!s}. "
|
|
1247
|
+
f"Continuing without glossary in LLM prompt."
|
|
1248
|
+
)
|
|
1249
|
+
)
|
|
1250
|
+
return ""
|
|
1251
|
+
glossary_template = f"""
|
|
1252
|
+
IMPORTANT - Use these glossary terms when translating. If any English terms
|
|
1253
|
+
from the glossary appear in the texts to translate, use the corresponding
|
|
1254
|
+
translation from the glossary:
|
|
1255
|
+
|
|
1256
|
+
{glossary_json}
|
|
1257
|
+
|
|
1258
|
+
When translating sentences, ensure that glossary terms are translated
|
|
1259
|
+
consistently according to the glossary above, even if they appear
|
|
1260
|
+
within longer sentences. For example, if the glossary specifies
|
|
1261
|
+
"certificate" -> "Πιστοποιητικό", then translate "certificate" as
|
|
1262
|
+
"Πιστοποιητικό" even when it appears in longer sentences like
|
|
1263
|
+
"The course completion certificate is available".
|
|
1264
|
+
"""
|
|
1265
|
+
return textwrap.dedent(glossary_template)
|
|
1266
|
+
|
|
1267
|
+
def _call_llm_batch( # noqa: PLR0913
|
|
1268
|
+
self,
|
|
1269
|
+
key_batch: list[dict],
|
|
1270
|
+
lang_code: str,
|
|
1271
|
+
provider: str,
|
|
1272
|
+
model: str,
|
|
1273
|
+
glossary: dict[str, Any] | None = None,
|
|
1274
|
+
timeout: int = 120,
|
|
1275
|
+
) -> list[str | dict]:
|
|
1276
|
+
"""Call LLM API to translate multiple texts in a single request.
|
|
1277
|
+
|
|
1278
|
+
Args:
|
|
1279
|
+
key_batch: List of key information dictionaries to translate
|
|
1280
|
+
lang_code: Target language code
|
|
1281
|
+
provider: Translation provider name (openai, gemini, mistral)
|
|
1282
|
+
model: LLM model name
|
|
1283
|
+
glossary: Optional glossary dictionary
|
|
1284
|
+
timeout: Request timeout in seconds (default: 120)
|
|
1285
|
+
"""
|
|
1286
|
+
api_key = self._get_llm_api_key(provider)
|
|
1287
|
+
|
|
1288
|
+
texts_dict = {}
|
|
1289
|
+
plural_entries = {}
|
|
1290
|
+
for i, key_info in enumerate(key_batch, 1):
|
|
1291
|
+
key_str = str(i)
|
|
1292
|
+
if key_info.get("is_plural") and key_info.get("msgid_plural"):
|
|
1293
|
+
texts_dict[key_str] = {
|
|
1294
|
+
"singular": key_info["english"],
|
|
1295
|
+
"plural": key_info.get("msgid_plural", ""),
|
|
1296
|
+
}
|
|
1297
|
+
plural_entries[key_str] = True
|
|
1298
|
+
else:
|
|
1299
|
+
texts_dict[key_str] = key_info["english"]
|
|
1300
|
+
|
|
1301
|
+
texts_block = json.dumps(texts_dict, indent=2, ensure_ascii=False)
|
|
1302
|
+
plural_count = len(plural_entries)
|
|
1303
|
+
|
|
1304
|
+
lang_name = LANGUAGE_MAPPING.get(lang_code, lang_code)
|
|
1305
|
+
|
|
1306
|
+
# Build glossary section if glossary is provided
|
|
1307
|
+
glossary_section = self._format_glossary_for_prompt(glossary)
|
|
1308
|
+
|
|
1309
|
+
prompt_template = (
|
|
1310
|
+
f"""Translate the following {len(key_batch)} text(s) to {lang_name} """
|
|
1311
|
+
f"""(language code: {lang_code}).
|
|
1312
|
+
Context: These are from an educational platform.
|
|
1313
|
+
Preserve any placeholders like {{variable}}, {{0}}, %s, etc.
|
|
1314
|
+
Preserve HTML tags and formatting.
|
|
1315
|
+
{glossary_section}
|
|
1316
|
+
{
|
|
1317
|
+
(
|
|
1318
|
+
"IMPORTANT: "
|
|
1319
|
+
+ str(plural_count)
|
|
1320
|
+
+ " entry/entries have plural forms. "
|
|
1321
|
+
+ "For these, return BOTH singular and "
|
|
1322
|
+
+ 'plural translations as an object with "singular" '
|
|
1323
|
+
+ 'and "plural" keys.'
|
|
1324
|
+
)
|
|
1325
|
+
if plural_count > 0
|
|
1326
|
+
else ""
|
|
1327
|
+
}
|
|
1328
|
+
|
|
1329
|
+
Return a JSON object where each key is the number (1, 2, 3, etc.).
|
|
1330
|
+
- For singular entries: value is the translation string.
|
|
1331
|
+
- For plural entries: value is an object with "singular" and "plural" keys,
|
|
1332
|
+
each containing the translation.
|
|
1333
|
+
|
|
1334
|
+
Input texts (numbered):
|
|
1335
|
+
{texts_block}
|
|
1336
|
+
|
|
1337
|
+
Return ONLY valid JSON in this format:
|
|
1338
|
+
{{
|
|
1339
|
+
"1": "translation of first text",
|
|
1340
|
+
"2": {{"singular": "singular translation",
|
|
1341
|
+
"plural": "plural translation"}},
|
|
1342
|
+
"3": "translation of third text",
|
|
1343
|
+
...
|
|
1344
|
+
}}"""
|
|
1345
|
+
)
|
|
1346
|
+
prompt = textwrap.dedent(prompt_template)
|
|
1347
|
+
|
|
1348
|
+
try:
|
|
1349
|
+
completion_kwargs = configure_litellm_for_provider(
|
|
1350
|
+
provider=provider,
|
|
1351
|
+
model=model,
|
|
1352
|
+
api_key=api_key,
|
|
1353
|
+
messages=[{"role": "user", "content": prompt}],
|
|
1354
|
+
temperature=0.3,
|
|
1355
|
+
timeout=timeout,
|
|
1356
|
+
)
|
|
1357
|
+
|
|
1358
|
+
response = completion(**completion_kwargs)
|
|
1359
|
+
response_text = response.choices[0].message.content.strip()
|
|
1360
|
+
|
|
1361
|
+
translations = self._parse_json_response(response_text, key_batch)
|
|
1362
|
+
if translations:
|
|
1363
|
+
return translations
|
|
1364
|
+
|
|
1365
|
+
return self._parse_order_based_response(response_text, key_batch)
|
|
1366
|
+
|
|
1367
|
+
except TimeoutError:
|
|
1368
|
+
msg = (
|
|
1369
|
+
f"LLM batch API call timed out after {timeout} seconds.\n"
|
|
1370
|
+
f"Model: {model}\n"
|
|
1371
|
+
f"Batch size: {len(key_batch)}\n"
|
|
1372
|
+
f"Try reducing --batch-size or check your network connection."
|
|
1373
|
+
)
|
|
1374
|
+
raise CommandError(msg) from None
|
|
1375
|
+
except (requests.RequestException, ValueError, KeyError, AttributeError) as e:
|
|
1376
|
+
msg = (
|
|
1377
|
+
f"LLM batch API call failed: {e!s}\n"
|
|
1378
|
+
f"Model: {model}\n"
|
|
1379
|
+
f"Batch size: {len(key_batch)}\n"
|
|
1380
|
+
f"Make sure TRANSLATIONS_PROVIDERS is configured in settings "
|
|
1381
|
+
f"with the appropriate api_key, or set the environment variable "
|
|
1382
|
+
f"(OPENAI_API_KEY, GEMINI_API_KEY, or MISTRAL_API_KEY)"
|
|
1383
|
+
)
|
|
1384
|
+
raise CommandError(msg) from e
|
|
1385
|
+
|
|
1386
|
+
def _parse_json_response(
|
|
1387
|
+
self, response_text: str, key_batch: list[dict]
|
|
1388
|
+
) -> list[str | dict] | None:
|
|
1389
|
+
"""Parse JSON response from LLM."""
|
|
1390
|
+
json_text = response_text
|
|
1391
|
+
if "```json" in response_text:
|
|
1392
|
+
start = response_text.find("```json") + 7
|
|
1393
|
+
end = response_text.find("```", start)
|
|
1394
|
+
if end > start:
|
|
1395
|
+
json_text = response_text[start:end].strip()
|
|
1396
|
+
elif "```" in response_text:
|
|
1397
|
+
start = response_text.find("```") + 3
|
|
1398
|
+
end = response_text.find("```", start)
|
|
1399
|
+
if end > start:
|
|
1400
|
+
json_text = response_text[start:end].strip()
|
|
1401
|
+
|
|
1402
|
+
try:
|
|
1403
|
+
data = json.loads(json_text)
|
|
1404
|
+
translations: list[str | dict[str, str]] = []
|
|
1405
|
+
for i in range(len(key_batch)):
|
|
1406
|
+
key = str(i + 1)
|
|
1407
|
+
if key in data:
|
|
1408
|
+
value = data[key]
|
|
1409
|
+
if (
|
|
1410
|
+
isinstance(value, dict)
|
|
1411
|
+
and "singular" in value
|
|
1412
|
+
and "plural" in value
|
|
1413
|
+
):
|
|
1414
|
+
translations.append(
|
|
1415
|
+
{
|
|
1416
|
+
"singular": str(value["singular"]).strip(),
|
|
1417
|
+
"plural": str(value["plural"]).strip(),
|
|
1418
|
+
}
|
|
1419
|
+
)
|
|
1420
|
+
else:
|
|
1421
|
+
translations.append(str(value).strip())
|
|
1422
|
+
else:
|
|
1423
|
+
translations.append("")
|
|
1424
|
+
except (json.JSONDecodeError, KeyError, ValueError):
|
|
1425
|
+
return None
|
|
1426
|
+
else:
|
|
1427
|
+
return translations
|
|
1428
|
+
|
|
1429
|
+
def _parse_order_based_response(
|
|
1430
|
+
self, response_text: str, key_batch: list[dict]
|
|
1431
|
+
) -> list[str | dict[str, str]]:
|
|
1432
|
+
"""Fallback: Parse response assuming translations are in order."""
|
|
1433
|
+
lines = [line.strip() for line in response_text.split("\n") if line.strip()]
|
|
1434
|
+
cleaned_lines = [
|
|
1435
|
+
line.lstrip("0123456789.-) ").strip()
|
|
1436
|
+
for line in lines
|
|
1437
|
+
if line.lstrip("0123456789.-) ").strip()
|
|
1438
|
+
]
|
|
1439
|
+
if len(cleaned_lines) < len(key_batch):
|
|
1440
|
+
cleaned_lines.extend([""] * (len(key_batch) - len(cleaned_lines)))
|
|
1441
|
+
# Return as list[str | dict[str, str]] - all strings in this fallback
|
|
1442
|
+
return cast("list[str | dict[str, str]]", cleaned_lines[: len(key_batch)])
|
|
1443
|
+
|
|
1444
|
+
def _get_llm_api_key(self, provider: str) -> str | None:
|
|
1445
|
+
"""Get API key from TRANSLATIONS_PROVIDERS or environment variables.
|
|
1446
|
+
|
|
1447
|
+
Args:
|
|
1448
|
+
provider: Translation provider name (openai, gemini, mistral)
|
|
1449
|
+
"""
|
|
1450
|
+
try:
|
|
1451
|
+
if hasattr(settings, "TRANSLATIONS_PROVIDERS"):
|
|
1452
|
+
providers = getattr(settings, "TRANSLATIONS_PROVIDERS", {})
|
|
1453
|
+
if isinstance(providers, dict) and provider in providers:
|
|
1454
|
+
provider_config = providers[provider]
|
|
1455
|
+
if isinstance(provider_config, dict):
|
|
1456
|
+
api_key = provider_config.get("api_key")
|
|
1457
|
+
if api_key:
|
|
1458
|
+
return api_key
|
|
1459
|
+
except (AttributeError, TypeError) as e:
|
|
1460
|
+
logger.debug("Error accessing TRANSLATIONS_PROVIDERS: %s", e)
|
|
1461
|
+
|
|
1462
|
+
env_key_name = (
|
|
1463
|
+
"GEMINI_API_KEY"
|
|
1464
|
+
if provider == PROVIDER_GEMINI
|
|
1465
|
+
else "MISTRAL_API_KEY"
|
|
1466
|
+
if provider == PROVIDER_MISTRAL
|
|
1467
|
+
else "OPENAI_API_KEY"
|
|
1468
|
+
)
|
|
1469
|
+
return os.environ.get(env_key_name)
|
|
1470
|
+
|
|
1471
|
+
def _group_translations_by_file(
|
|
1472
|
+
self, translations: dict[str, Any], empty_keys: list[dict]
|
|
1473
|
+
) -> dict[str, dict[str, Any]]:
|
|
1474
|
+
"""Group translations by file path."""
|
|
1475
|
+
translations_by_file: dict[str, dict[str, Any]] = {}
|
|
1476
|
+
|
|
1477
|
+
for key_info in empty_keys:
|
|
1478
|
+
# Normalize file path for consistent comparison
|
|
1479
|
+
file_path_str = str(Path(key_info["file_path"]).resolve())
|
|
1480
|
+
translation_key = f"{file_path_str}:{key_info['key']}"
|
|
1481
|
+
if translation_key in translations:
|
|
1482
|
+
trans_value = translations[translation_key]
|
|
1483
|
+
|
|
1484
|
+
if key_info["file_type"] == "json" and isinstance(trans_value, dict):
|
|
1485
|
+
trans_value = trans_value.get("singular", str(trans_value))
|
|
1486
|
+
|
|
1487
|
+
translations_by_file.setdefault(file_path_str, {})[key_info["key"]] = (
|
|
1488
|
+
trans_value
|
|
1489
|
+
)
|
|
1490
|
+
|
|
1491
|
+
return translations_by_file
|
|
1492
|
+
|
|
1493
|
+
def _apply_file_translations(
|
|
1494
|
+
self,
|
|
1495
|
+
file_path: Path,
|
|
1496
|
+
file_translations: dict[str, Any],
|
|
1497
|
+
empty_keys: list[dict],
|
|
1498
|
+
stdout,
|
|
1499
|
+
) -> tuple[int, str]:
|
|
1500
|
+
"""Apply translations to a single file. Returns (count, app)."""
|
|
1501
|
+
if not file_path.exists():
|
|
1502
|
+
stdout.write(self.style.WARNING(f" WARNING: File not found: {file_path}"))
|
|
1503
|
+
return 0, "unknown"
|
|
1504
|
+
|
|
1505
|
+
# Normalize paths for comparison
|
|
1506
|
+
normalized_file_path = str(file_path.resolve())
|
|
1507
|
+
key_info = next(
|
|
1508
|
+
k
|
|
1509
|
+
for k in empty_keys
|
|
1510
|
+
if str(Path(k["file_path"]).resolve()) == normalized_file_path
|
|
1511
|
+
)
|
|
1512
|
+
app = key_info.get("app", "unknown")
|
|
1513
|
+
|
|
1514
|
+
if key_info["file_type"] == "json":
|
|
1515
|
+
count = apply_json_translations(file_path, file_translations)
|
|
1516
|
+
elif key_info["file_type"] == "po":
|
|
1517
|
+
count = apply_po_translations(file_path, file_translations)
|
|
1518
|
+
else:
|
|
1519
|
+
return 0, app
|
|
1520
|
+
|
|
1521
|
+
return count, app
|
|
1522
|
+
|
|
1523
|
+
def _apply_translations(
|
|
1524
|
+
self,
|
|
1525
|
+
translations: dict[str, Any],
|
|
1526
|
+
empty_keys: list[dict],
|
|
1527
|
+
stdout,
|
|
1528
|
+
) -> tuple[int, dict[str, Any]]:
|
|
1529
|
+
"""Apply translations to files."""
|
|
1530
|
+
translations_by_file = self._group_translations_by_file(
|
|
1531
|
+
translations, empty_keys
|
|
1532
|
+
)
|
|
1533
|
+
|
|
1534
|
+
if not translations_by_file:
|
|
1535
|
+
stdout.write(self.style.WARNING(" WARNING: No translations to apply"))
|
|
1536
|
+
return 0, {"by_app": {}, "details": []}
|
|
1537
|
+
|
|
1538
|
+
applied = 0
|
|
1539
|
+
applied_by_app: dict[str, int] = {}
|
|
1540
|
+
applied_details: list[dict[str, Any]] = []
|
|
1541
|
+
|
|
1542
|
+
for file_path_str, file_translations in translations_by_file.items():
|
|
1543
|
+
full_path = Path(file_path_str)
|
|
1544
|
+
count, app = self._apply_file_translations(
|
|
1545
|
+
full_path, file_translations, empty_keys, stdout
|
|
1546
|
+
)
|
|
1547
|
+
|
|
1548
|
+
applied += count
|
|
1549
|
+
if count > 0:
|
|
1550
|
+
applied_by_app[app] = applied_by_app.get(app, 0) + count
|
|
1551
|
+
applied_details.append(
|
|
1552
|
+
{"app": app, "file": full_path.name, "count": count}
|
|
1553
|
+
)
|
|
1554
|
+
stdout.write(
|
|
1555
|
+
f" Applied {count} translations to {app} ({full_path.name})"
|
|
1556
|
+
)
|
|
1557
|
+
|
|
1558
|
+
if applied_by_app:
|
|
1559
|
+
app_summary = ", ".join(
|
|
1560
|
+
f"{app}: {count}" for app, count in applied_by_app.items()
|
|
1561
|
+
)
|
|
1562
|
+
stdout.write(f" Summary by app: {app_summary}")
|
|
1563
|
+
|
|
1564
|
+
return applied, {"by_app": applied_by_app, "details": applied_details}
|
|
1565
|
+
|
|
1566
|
+
def _cleanup_failed_branch(self, repo: GitRepository, branch_name: str) -> None:
|
|
1567
|
+
"""Clean up branch if PR creation fails."""
|
|
1568
|
+
try:
|
|
1569
|
+
repo.switch_to_main()
|
|
1570
|
+
# Only try to delete if branch exists locally
|
|
1571
|
+
if branch_name in [ref.name for ref in repo.repo.heads]:
|
|
1572
|
+
with suppress(git.exc.GitCommandError):
|
|
1573
|
+
repo.repo.git.branch("-D", branch_name)
|
|
1574
|
+
self.stdout.write(
|
|
1575
|
+
self.style.WARNING(
|
|
1576
|
+
f" Cleaned up failed branch: {branch_name}"
|
|
1577
|
+
)
|
|
1578
|
+
)
|
|
1579
|
+
except (git.exc.GitCommandError, AttributeError) as e:
|
|
1580
|
+
self.stdout.write(
|
|
1581
|
+
self.style.WARNING(f" Could not clean up branch {branch_name}: {e!s}")
|
|
1582
|
+
)
|
|
1583
|
+
|
|
1584
|
+
def _commit_changes(
|
|
1585
|
+
self, repo: GitRepository, branch_name: str, lang_code: str
|
|
1586
|
+
) -> bool:
|
|
1587
|
+
"""Commit changes to git repository. Returns True if committed."""
|
|
1588
|
+
# Check if branch already exists
|
|
1589
|
+
if repo.branch_exists(branch_name):
|
|
1590
|
+
self.stdout.write(
|
|
1591
|
+
self.style.WARNING(
|
|
1592
|
+
f" Branch '{branch_name}' already exists. "
|
|
1593
|
+
f"Switching to it and continuing..."
|
|
1594
|
+
)
|
|
1595
|
+
)
|
|
1596
|
+
try:
|
|
1597
|
+
repo.repo.git.checkout(branch_name)
|
|
1598
|
+
except git.exc.GitCommandError:
|
|
1599
|
+
# If local branch doesn't exist but remote does, create tracking branch
|
|
1600
|
+
repo.repo.git.checkout("-b", branch_name, f"origin/{branch_name}")
|
|
1601
|
+
else:
|
|
1602
|
+
repo.configure_user()
|
|
1603
|
+
repo.create_branch(branch_name)
|
|
1604
|
+
repo.stage_all()
|
|
1605
|
+
|
|
1606
|
+
if not repo.has_changes():
|
|
1607
|
+
self.stdout.write(
|
|
1608
|
+
self.style.WARNING(
|
|
1609
|
+
" No changes to commit. Skipping commit and PR creation."
|
|
1610
|
+
)
|
|
1611
|
+
)
|
|
1612
|
+
repo.switch_to_main()
|
|
1613
|
+
with suppress(git.exc.GitCommandError):
|
|
1614
|
+
repo.repo.git.branch("-D", branch_name)
|
|
1615
|
+
return False
|
|
1616
|
+
|
|
1617
|
+
safe_lang_code = sanitize_for_git(lang_code)
|
|
1618
|
+
commit_message = (
|
|
1619
|
+
f"feat: Add {safe_lang_code} translations via LLM\n\n"
|
|
1620
|
+
f"Automated translation of empty keys for {safe_lang_code} language."
|
|
1621
|
+
)
|
|
1622
|
+
|
|
1623
|
+
repo.commit(commit_message)
|
|
1624
|
+
|
|
1625
|
+
github_token = getattr(
|
|
1626
|
+
settings, "TRANSLATIONS_GITHUB_TOKEN", None
|
|
1627
|
+
) or os.environ.get("TRANSLATIONS_GITHUB_TOKEN")
|
|
1628
|
+
repo.push_branch(branch_name, github_token)
|
|
1629
|
+
self.stdout.write(" Pushed branch to remote")
|
|
1630
|
+
|
|
1631
|
+
return True
|
|
1632
|
+
|
|
1633
|
+
def _create_pull_request(
|
|
1634
|
+
self,
|
|
1635
|
+
repo_path: str,
|
|
1636
|
+
branch_name: str,
|
|
1637
|
+
pr_data: PullRequestData,
|
|
1638
|
+
repo_url: str,
|
|
1639
|
+
) -> str:
|
|
1640
|
+
"""Create pull request using GitHub CLI or API."""
|
|
1641
|
+
lang_code = pr_data["lang_code"]
|
|
1642
|
+
try:
|
|
1643
|
+
# Using GitHub CLI (gh) - trusted system command
|
|
1644
|
+
gh_path = shutil.which("gh")
|
|
1645
|
+
if gh_path:
|
|
1646
|
+
result = subprocess.run( # noqa: S603
|
|
1647
|
+
[
|
|
1648
|
+
gh_path,
|
|
1649
|
+
"pr",
|
|
1650
|
+
"create",
|
|
1651
|
+
"--title",
|
|
1652
|
+
f"feat: Add {lang_code} translations via LLM",
|
|
1653
|
+
"--body",
|
|
1654
|
+
self._generate_pr_body(pr_data),
|
|
1655
|
+
],
|
|
1656
|
+
cwd=repo_path,
|
|
1657
|
+
capture_output=True,
|
|
1658
|
+
text=True,
|
|
1659
|
+
check=True,
|
|
1660
|
+
)
|
|
1661
|
+
return result.stdout.strip()
|
|
1662
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
1663
|
+
pass
|
|
1664
|
+
# Fall back to API if gh CLI is not available or fails
|
|
1665
|
+
return self._create_pr_via_api(
|
|
1666
|
+
repo_path,
|
|
1667
|
+
branch_name,
|
|
1668
|
+
pr_data,
|
|
1669
|
+
repo_url,
|
|
1670
|
+
)
|
|
1671
|
+
|
|
1672
|
+
def _generate_error_section(
|
|
1673
|
+
self, errors: int, errors_by_app: dict[str, int] | None = None
|
|
1674
|
+
) -> str:
|
|
1675
|
+
"""Generate error warning section for PR body if there are errors.
|
|
1676
|
+
|
|
1677
|
+
Args:
|
|
1678
|
+
errors: Number of translation errors.
|
|
1679
|
+
errors_by_app: Dictionary mapping app/MFE names to error counts.
|
|
1680
|
+
|
|
1681
|
+
Returns:
|
|
1682
|
+
Error section markdown string, or empty string if no errors.
|
|
1683
|
+
"""
|
|
1684
|
+
if errors == 0:
|
|
1685
|
+
return ""
|
|
1686
|
+
|
|
1687
|
+
error_details = ""
|
|
1688
|
+
if errors_by_app:
|
|
1689
|
+
error_lines = [
|
|
1690
|
+
f"- **{app}**: {count} key(s) failed"
|
|
1691
|
+
for app, count in sorted(
|
|
1692
|
+
errors_by_app.items(), key=lambda x: x[1], reverse=True
|
|
1693
|
+
)
|
|
1694
|
+
]
|
|
1695
|
+
error_details = (
|
|
1696
|
+
"\n**Errors by app/MFE:**\n\n" + "\n".join(error_lines) + "\n"
|
|
1697
|
+
)
|
|
1698
|
+
|
|
1699
|
+
error_template = f"""
|
|
1700
|
+
### Translation Errors
|
|
1701
|
+
|
|
1702
|
+
**{errors} translation key(s) failed to translate** due to API errors, rate
|
|
1703
|
+
limits, or parsing issues.
|
|
1704
|
+
{error_details}
|
|
1705
|
+
**Impact:**
|
|
1706
|
+
- These keys remain untranslated in the target language files
|
|
1707
|
+
- They will need to be translated manually or re-run the command
|
|
1708
|
+
- The translation process continued and completed successfully
|
|
1709
|
+
for the remaining keys
|
|
1710
|
+
|
|
1711
|
+
**Recommendation:**
|
|
1712
|
+
- Review the command output logs for specific error details
|
|
1713
|
+
- Consider re-running the command to retry failed batches
|
|
1714
|
+
- Check API key permissions and rate limits if errors persist
|
|
1715
|
+
|
|
1716
|
+
"""
|
|
1717
|
+
return textwrap.dedent(error_template)
|
|
1718
|
+
|
|
1719
|
+
def _generate_translation_summary(
|
|
1720
|
+
self, glossary_matches: int, llm_translations: int, errors: int
|
|
1721
|
+
) -> str:
|
|
1722
|
+
"""Generate translation statistics summary line.
|
|
1723
|
+
|
|
1724
|
+
Args:
|
|
1725
|
+
glossary_matches: Number of glossary matches.
|
|
1726
|
+
llm_translations: Number of LLM translations.
|
|
1727
|
+
errors: Number of translation errors.
|
|
1728
|
+
|
|
1729
|
+
Returns:
|
|
1730
|
+
Summary string.
|
|
1731
|
+
"""
|
|
1732
|
+
if glossary_matches > 0:
|
|
1733
|
+
return (
|
|
1734
|
+
f"Summary - Glossary matches: {glossary_matches}, "
|
|
1735
|
+
f"LLM translations: {llm_translations}, Errors: {errors}"
|
|
1736
|
+
)
|
|
1737
|
+
return f"Summary - LLM translations: {llm_translations}, Errors: {errors}"
|
|
1738
|
+
|
|
1739
|
+
def _generate_pr_body(self, pr_data: PullRequestData) -> str:
|
|
1740
|
+
"""Generate PR description."""
|
|
1741
|
+
lang_code = pr_data["lang_code"]
|
|
1742
|
+
iso_code = pr_data["iso_code"]
|
|
1743
|
+
sync_stats = pr_data["sync_stats"]
|
|
1744
|
+
applied_count = pr_data["applied_count"]
|
|
1745
|
+
translation_stats = pr_data["translation_stats"]
|
|
1746
|
+
applied_by_app = pr_data["applied_by_app"]
|
|
1747
|
+
|
|
1748
|
+
glossary_matches = translation_stats.get("glossary_matches", 0)
|
|
1749
|
+
llm_translations = translation_stats.get("llm_translations", 0)
|
|
1750
|
+
errors = translation_stats.get("errors", 0)
|
|
1751
|
+
errors_by_app: dict[str, int] = cast(
|
|
1752
|
+
"dict[str, int]", translation_stats.get("errors_by_app", {})
|
|
1753
|
+
)
|
|
1754
|
+
|
|
1755
|
+
translation_summary = self._generate_translation_summary(
|
|
1756
|
+
glossary_matches, llm_translations, errors
|
|
1757
|
+
)
|
|
1758
|
+
error_section = self._generate_error_section(errors, errors_by_app)
|
|
1759
|
+
|
|
1760
|
+
applied_details = applied_by_app.get("details", [])
|
|
1761
|
+
breakdown_lines = [
|
|
1762
|
+
f" Applied {detail['count']} translations to "
|
|
1763
|
+
f"{detail['app']} ({detail['file']})"
|
|
1764
|
+
for detail in applied_details
|
|
1765
|
+
]
|
|
1766
|
+
|
|
1767
|
+
# Build changes section with conditional error line
|
|
1768
|
+
changes_lines = [
|
|
1769
|
+
f"- **Language**: {lang_code} ({iso_code})",
|
|
1770
|
+
f"- **Keys synced**: {sync_stats['frontend']['added']} frontend keys, "
|
|
1771
|
+
f"{sync_stats['backend']['added']} backend entries",
|
|
1772
|
+
f"- **Translations applied**: {applied_count} keys translated",
|
|
1773
|
+
f"- **Typos fixed**: {sync_stats['frontend']['fixed']}",
|
|
1774
|
+
]
|
|
1775
|
+
if errors > 0:
|
|
1776
|
+
changes_lines.append(
|
|
1777
|
+
f"- **Translation errors**: {errors} keys failed to translate"
|
|
1778
|
+
)
|
|
1779
|
+
|
|
1780
|
+
# Build statistics section with conditional error line
|
|
1781
|
+
statistics_lines = [
|
|
1782
|
+
translation_summary,
|
|
1783
|
+
f" Translated {applied_count} keys",
|
|
1784
|
+
]
|
|
1785
|
+
if errors > 0:
|
|
1786
|
+
statistics_lines.append(f" Failed: {errors} keys")
|
|
1787
|
+
|
|
1788
|
+
# Build next steps section with conditional error line
|
|
1789
|
+
next_steps_lines = [
|
|
1790
|
+
"- Review translations for accuracy",
|
|
1791
|
+
]
|
|
1792
|
+
if errors > 0:
|
|
1793
|
+
next_steps_lines.append(
|
|
1794
|
+
"- Address failed translations (see error section above)"
|
|
1795
|
+
)
|
|
1796
|
+
next_steps_lines.extend(
|
|
1797
|
+
[
|
|
1798
|
+
"- Test in staging environment",
|
|
1799
|
+
"- Merge when ready",
|
|
1800
|
+
]
|
|
1801
|
+
)
|
|
1802
|
+
|
|
1803
|
+
pr_template = (
|
|
1804
|
+
f"""## Summary
|
|
1805
|
+
|
|
1806
|
+
This PR adds {lang_code} translations via LLM automation.
|
|
1807
|
+
{error_section}
|
|
1808
|
+
### Changes
|
|
1809
|
+
|
|
1810
|
+
{chr(10).join(changes_lines)}
|
|
1811
|
+
|
|
1812
|
+
### Translation Statistics
|
|
1813
|
+
|
|
1814
|
+
{chr(10).join(statistics_lines)}
|
|
1815
|
+
|
|
1816
|
+
### Applied Translations
|
|
1817
|
+
|
|
1818
|
+
{
|
|
1819
|
+
chr(10).join(breakdown_lines)
|
|
1820
|
+
if breakdown_lines
|
|
1821
|
+
else " No translations applied"
|
|
1822
|
+
}
|
|
1823
|
+
|
|
1824
|
+
Applied {applied_count} translations
|
|
1825
|
+
|
|
1826
|
+
### Files Modified
|
|
1827
|
+
|
|
1828
|
+
- Frontend apps: {sync_stats["frontend"]["created"]} created, """
|
|
1829
|
+
f"""{sync_stats["frontend"]["synced"]} synced
|
|
1830
|
+
- Backend: PO files updated
|
|
1831
|
+
|
|
1832
|
+
### Next Steps
|
|
1833
|
+
|
|
1834
|
+
{chr(10).join(next_steps_lines)}
|
|
1835
|
+
|
|
1836
|
+
---
|
|
1837
|
+
*This PR was automatically generated by the sync_and_translate_language """
|
|
1838
|
+
f"""management command.*
|
|
1839
|
+
"""
|
|
1840
|
+
)
|
|
1841
|
+
return textwrap.dedent(pr_template)
|
|
1842
|
+
|
|
1843
|
+
def _create_pr_via_api(
|
|
1844
|
+
self,
|
|
1845
|
+
repo_path: str,
|
|
1846
|
+
branch_name: str,
|
|
1847
|
+
pr_data: PullRequestData,
|
|
1848
|
+
repo_url: str,
|
|
1849
|
+
) -> str:
|
|
1850
|
+
"""Create PR using GitHub API."""
|
|
1851
|
+
client = GitHubAPIClient()
|
|
1852
|
+
owner, repo = GitHubAPIClient.parse_repo_url(repo_url)
|
|
1853
|
+
|
|
1854
|
+
git_repo = GitRepository(repo_path)
|
|
1855
|
+
main_branch = git_repo._get_main_branch_name() # noqa: SLF001
|
|
1856
|
+
|
|
1857
|
+
lang_code = pr_data["lang_code"]
|
|
1858
|
+
return client.create_pull_request(
|
|
1859
|
+
owner=owner,
|
|
1860
|
+
repo=repo,
|
|
1861
|
+
branch_name=branch_name,
|
|
1862
|
+
title=f"feat: Add {lang_code} translations via LLM",
|
|
1863
|
+
body=self._generate_pr_body(pr_data),
|
|
1864
|
+
base=main_branch,
|
|
1865
|
+
stdout=self.stdout,
|
|
1866
|
+
)
|