ol-openedx-course-translations 0.1.0__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ol-openedx-course-translations might be problematic. Click here for more details.

Files changed (40) hide show
  1. ol_openedx_course_translations/admin.py +29 -0
  2. ol_openedx_course_translations/apps.py +13 -2
  3. ol_openedx_course_translations/filters.py +39 -0
  4. ol_openedx_course_translations/glossaries/machine_learning/ar.txt +175 -0
  5. ol_openedx_course_translations/glossaries/machine_learning/de.txt +175 -0
  6. ol_openedx_course_translations/glossaries/machine_learning/el.txt +988 -0
  7. ol_openedx_course_translations/glossaries/machine_learning/es.txt +175 -0
  8. ol_openedx_course_translations/glossaries/machine_learning/fr.txt +175 -0
  9. ol_openedx_course_translations/glossaries/machine_learning/ja.txt +175 -0
  10. ol_openedx_course_translations/glossaries/machine_learning/pt-br.txt +175 -0
  11. ol_openedx_course_translations/glossaries/machine_learning/ru.txt +213 -0
  12. ol_openedx_course_translations/management/commands/sync_and_translate_language.py +1866 -0
  13. ol_openedx_course_translations/management/commands/translate_course.py +472 -475
  14. ol_openedx_course_translations/middleware.py +143 -0
  15. ol_openedx_course_translations/migrations/0001_add_translation_logs.py +84 -0
  16. ol_openedx_course_translations/migrations/__init__.py +0 -0
  17. ol_openedx_course_translations/models.py +57 -0
  18. ol_openedx_course_translations/providers/__init__.py +1 -0
  19. ol_openedx_course_translations/providers/base.py +278 -0
  20. ol_openedx_course_translations/providers/deepl_provider.py +292 -0
  21. ol_openedx_course_translations/providers/llm_providers.py +581 -0
  22. ol_openedx_course_translations/settings/cms.py +17 -0
  23. ol_openedx_course_translations/settings/common.py +58 -30
  24. ol_openedx_course_translations/settings/lms.py +38 -0
  25. ol_openedx_course_translations/tasks.py +222 -0
  26. ol_openedx_course_translations/urls.py +16 -0
  27. ol_openedx_course_translations/utils/__init__.py +0 -0
  28. ol_openedx_course_translations/utils/command_utils.py +197 -0
  29. ol_openedx_course_translations/utils/constants.py +218 -0
  30. ol_openedx_course_translations/utils/course_translations.py +608 -0
  31. ol_openedx_course_translations/utils/translation_sync.py +808 -0
  32. ol_openedx_course_translations/views.py +73 -0
  33. ol_openedx_course_translations-0.3.5.dist-info/METADATA +409 -0
  34. ol_openedx_course_translations-0.3.5.dist-info/RECORD +40 -0
  35. ol_openedx_course_translations-0.3.5.dist-info/entry_points.txt +5 -0
  36. ol_openedx_course_translations-0.1.0.dist-info/METADATA +0 -63
  37. ol_openedx_course_translations-0.1.0.dist-info/RECORD +0 -11
  38. ol_openedx_course_translations-0.1.0.dist-info/entry_points.txt +0 -2
  39. {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.5.dist-info}/WHEEL +0 -0
  40. {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.5.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,1866 @@
1
+ """
2
+ Django management command to sync translation keys, translate using LLM, and create PRs.
3
+
4
+ Usage:
5
+ ./manage.py cms sync_and_translate_language el
6
+ ./manage.py cms sync_and_translate_language el \\
7
+ --provider openai --model gpt-4-turbo --glossary
8
+ """
9
+
10
+ import json
11
+ import logging
12
+ import os
13
+ import re
14
+ import shutil
15
+ import subprocess
16
+ import textwrap
17
+ import time
18
+ import urllib.parse
19
+ from configparser import NoSectionError
20
+ from contextlib import contextmanager, suppress
21
+ from pathlib import Path
22
+ from typing import Any, TypedDict, cast
23
+
24
+ import git
25
+ import requests
26
+ from django.conf import settings
27
+ from django.core.management.base import BaseCommand, CommandError
28
+ from litellm import completion
29
+
30
+ import ol_openedx_course_translations.utils.translation_sync as utils_module
31
+ from ol_openedx_course_translations.utils.command_utils import (
32
+ configure_litellm_for_provider,
33
+ create_branch_name,
34
+ get_config_value,
35
+ get_default_model_for_provider,
36
+ get_default_provider,
37
+ is_retryable_error,
38
+ sanitize_for_git,
39
+ validate_branch_name,
40
+ validate_language_code,
41
+ )
42
+ from ol_openedx_course_translations.utils.constants import (
43
+ HTTP_CREATED,
44
+ HTTP_NOT_FOUND,
45
+ HTTP_OK,
46
+ HTTP_TOO_MANY_REQUESTS,
47
+ HTTP_UNPROCESSABLE_ENTITY,
48
+ LANGUAGE_MAPPING,
49
+ MAX_ERROR_MESSAGE_LENGTH,
50
+ MAX_RETRIES,
51
+ PROVIDER_GEMINI,
52
+ PROVIDER_MISTRAL,
53
+ )
54
+ from ol_openedx_course_translations.utils.translation_sync import (
55
+ apply_json_translations,
56
+ apply_po_translations,
57
+ extract_empty_keys,
58
+ load_glossary,
59
+ match_glossary_term,
60
+ sync_all_translations,
61
+ )
62
+
63
+ logger = logging.getLogger(__name__)
64
+
65
+
66
+ class GitRepository:
67
+ """Helper class for git operations with consistent error handling."""
68
+
69
+ def __init__(self, repo_path: str):
70
+ self.repo_path = Path(repo_path)
71
+ try:
72
+ self.repo = git.Repo(repo_path)
73
+ except git.exc.InvalidGitRepositoryError as e:
74
+ msg = (
75
+ f"Invalid git repository at {repo_path}. "
76
+ f"Please remove it or specify a different path."
77
+ )
78
+ raise CommandError(msg) from e
79
+ except git.exc.GitCommandError as e:
80
+ msg = f"Git error accessing repository: {e!s}"
81
+ raise CommandError(msg) from e
82
+
83
+ def _handle_git_error(self, operation: str, error: Exception) -> None:
84
+ """Convert git errors to CommandError with context."""
85
+ msg = f"Git error {operation}: {error!s}"
86
+ raise CommandError(msg) from error
87
+
88
+ def _get_main_branch_name(self) -> str:
89
+ """
90
+ Determine the main branch name.
91
+ Checks local branches first, then remote branches.
92
+ Fetches from remote if needed to check remote branches.
93
+ """
94
+ # Check if 'main' exists locally
95
+ if "main" in [ref.name for ref in self.repo.heads]:
96
+ return "main"
97
+
98
+ # If not found locally, fetch from remote and check remote branches
99
+ with suppress(git.exc.GitCommandError):
100
+ # If fetch fails, we'll try to check existing remote refs anyway
101
+ self.repo.remotes.origin.fetch()
102
+
103
+ # Check remote branches
104
+ if "origin/main" in [ref.name for ref in self.repo.remotes.origin.refs]:
105
+ return "main"
106
+
107
+ msg = "Main branch not found locally or on remote"
108
+ raise CommandError(msg)
109
+
110
+ def ensure_clean(self) -> bool:
111
+ """
112
+ Clean uncommitted changes in tracked files.
113
+ Returns True if cleaned, False if already clean.
114
+
115
+ This ensures any leftover staged/uncommitted changes from a previous
116
+ interrupted run are removed before starting a new translation sync.
117
+ """
118
+ try:
119
+ if self.repo.is_dirty(untracked_files=False):
120
+ self.repo.head.reset(index=True, working_tree=True)
121
+ return True
122
+ else:
123
+ return False
124
+ except git.exc.GitCommandError as e:
125
+ self._handle_git_error("cleaning repository", e)
126
+ return False # Never reached, but satisfies type checker
127
+
128
+ def switch_to_main(self) -> None:
129
+ """Switch to main branch, deleting current branch if it's not main."""
130
+ try:
131
+ # Get current branch name (might be in detached HEAD state)
132
+ try:
133
+ current_branch = self.repo.active_branch.name
134
+ except TypeError:
135
+ # Detached HEAD state - we'll checkout main anyway
136
+ current_branch = None
137
+
138
+ # Get the main branch name
139
+ main_branch = self._get_main_branch_name()
140
+
141
+ # Only switch if we're not already on the main branch
142
+ if current_branch != main_branch:
143
+ # Try to checkout the branch (will work if it exists locally)
144
+ try:
145
+ self.repo.git.checkout(main_branch)
146
+ except git.exc.GitCommandError:
147
+ # Branch doesn't exist locally, checkout from remote
148
+ self.repo.git.checkout("-b", main_branch, f"origin/{main_branch}")
149
+
150
+ # Delete the previous branch if it exists and is not the main branch
151
+ if current_branch and current_branch != main_branch:
152
+ with suppress(git.exc.GitCommandError):
153
+ self.repo.git.branch("-D", current_branch)
154
+ except (git.exc.GitCommandError, TypeError) as e:
155
+ self._handle_git_error("switching branches", e)
156
+
157
+ def update_from_remote(self) -> None:
158
+ """Fetch and pull latest changes from origin/main."""
159
+ try:
160
+ self.repo.remotes.origin.fetch()
161
+ main_branch = self._get_main_branch_name()
162
+ self.repo.git.pull("origin", main_branch)
163
+ except git.exc.GitCommandError as e:
164
+ self._handle_git_error("updating repository", e)
165
+
166
+ def get_remote_url(self) -> str | None:
167
+ """Get the current remote URL."""
168
+ try:
169
+ return self.repo.remotes.origin.url
170
+ except (git.exc.GitCommandError, AttributeError):
171
+ return None
172
+
173
+ def configure_user(
174
+ self,
175
+ email: str = "translations@mitodl.org",
176
+ name: str = "MIT Open Learning Translations Bot",
177
+ ) -> None:
178
+ """Configure git user for this repository."""
179
+ try:
180
+ with self.repo.config_writer() as config:
181
+ # Check if user section exists and get existing values
182
+ try:
183
+ existing_email = config.get_value("user", "email", default=None)
184
+ existing_name = config.get_value("user", "name", default=None)
185
+ except NoSectionError:
186
+ # Section doesn't exist, set both values
187
+ existing_email = None
188
+ existing_name = None
189
+ # Set values only if they don't exist
190
+ if not existing_email:
191
+ config.set_value("user", "email", email)
192
+ if not existing_name:
193
+ config.set_value("user", "name", name)
194
+ except git.exc.GitCommandError as e:
195
+ self._handle_git_error("configuring user", e)
196
+
197
+ def branch_exists(self, branch_name: str) -> bool:
198
+ """Check if branch exists locally or remotely."""
199
+ validate_branch_name(branch_name)
200
+ try:
201
+ # Check local branches
202
+ if branch_name in [ref.name for ref in self.repo.heads]:
203
+ return True
204
+ # Check remote branches
205
+ remote_branch = f"origin/{branch_name}"
206
+ try:
207
+ self.repo.remotes.origin.fetch()
208
+ except git.exc.GitCommandError:
209
+ # If fetch fails, try to check existing remote refs anyway
210
+ # Check remote refs with existing data
211
+ return remote_branch in [
212
+ ref.name for ref in self.repo.remotes.origin.refs
213
+ ]
214
+ else:
215
+ # Fetch succeeded, check remote refs
216
+ return remote_branch in [
217
+ ref.name for ref in self.repo.remotes.origin.refs
218
+ ]
219
+ except git.exc.GitCommandError as e:
220
+ self._handle_git_error("checking branch existence", e)
221
+ return False # Never reached, but satisfies type checker
222
+
223
+ def create_branch(self, branch_name: str) -> None:
224
+ """Create and checkout a new branch."""
225
+ validate_branch_name(branch_name)
226
+ try:
227
+ self.repo.git.checkout("-b", branch_name)
228
+ except git.exc.GitCommandError as e:
229
+ self._handle_git_error("creating branch", e)
230
+
231
+ def stage_all(self) -> None:
232
+ """Stage all changes."""
233
+ try:
234
+ self.repo.git.add(".")
235
+ except git.exc.GitCommandError as e:
236
+ self._handle_git_error("staging changes", e)
237
+
238
+ def has_changes(self) -> bool:
239
+ """Check if there are uncommitted changes."""
240
+ try:
241
+ return self.repo.is_dirty(untracked_files=True)
242
+ except git.exc.GitCommandError as e:
243
+ self._handle_git_error("checking changes", e)
244
+ return False # Never reached, but satisfies type checker
245
+
246
+ def commit(self, message: str) -> None:
247
+ """Commit staged changes."""
248
+ try:
249
+ self.repo.index.commit(message)
250
+ except git.exc.GitCommandError as e:
251
+ self._handle_git_error("committing changes", e)
252
+
253
+ @contextmanager
254
+ def authenticated_push_url(self, github_token: str):
255
+ """Context manager for authenticated push with automatic cleanup."""
256
+ origin = self.repo.remotes.origin
257
+ original_url = origin.url
258
+
259
+ # Build authenticated URL
260
+ match = re.search(r"github\.com[/:]([^/]+)/([^/]+?)(?:\.git)?$", original_url)
261
+ if match:
262
+ owner, repo_name = match.groups()
263
+ encoded_token = urllib.parse.quote(github_token, safe="")
264
+ push_url = f"https://{encoded_token}@github.com/{owner}/{repo_name}.git"
265
+ else:
266
+ encoded_token = urllib.parse.quote(github_token, safe="")
267
+ push_url = original_url.replace("https://", f"https://{encoded_token}@")
268
+
269
+ try:
270
+ origin.set_url(push_url)
271
+ yield
272
+ finally:
273
+ # Always restore original URL
274
+ try:
275
+ origin.set_url(original_url)
276
+ except (git.exc.GitCommandError, ValueError) as e:
277
+ # Best effort cleanup - log but don't fail
278
+ logger.warning("Failed to restore original git remote URL: %s", e)
279
+
280
+ def push_branch(self, branch_name: str, github_token: str | None = None) -> None:
281
+ """Push branch to remote with optional authentication."""
282
+ validate_branch_name(branch_name)
283
+ try:
284
+ if github_token:
285
+ with self.authenticated_push_url(github_token):
286
+ self.repo.git.push("-u", "origin", branch_name)
287
+ else:
288
+ self.repo.git.push("-u", "origin", branch_name)
289
+ except git.exc.GitCommandError as e:
290
+ self._handle_git_error("pushing branch", e)
291
+
292
+ @staticmethod
293
+ def clone(repo_url: str, repo_path: str) -> "GitRepository":
294
+ """Clone a repository and return GitRepository instance."""
295
+ repo_path_obj = Path(repo_path)
296
+ try:
297
+ repo_path_obj.parent.mkdir(parents=True, exist_ok=True)
298
+ git.Repo.clone_from(repo_url, str(repo_path))
299
+ return GitRepository(repo_path)
300
+ except git.exc.GitCommandError as e:
301
+ msg = f"Git error cloning repository: {e!s}"
302
+ raise CommandError(msg) from e
303
+ except OSError as e:
304
+ msg = f"Error creating directory: {e!s}"
305
+ raise CommandError(msg) from e
306
+
307
+
308
+ class GitHubAPIClient:
309
+ """Helper class for GitHub API operations."""
310
+
311
+ def __init__(self, token: str | None = None):
312
+ """Initialize with optional token."""
313
+ self.token = (
314
+ token
315
+ or getattr(settings, "TRANSLATIONS_GITHUB_TOKEN", None)
316
+ or os.environ.get("TRANSLATIONS_GITHUB_TOKEN")
317
+ )
318
+ if not self.token:
319
+ msg = "TRANSLATIONS_GITHUB_TOKEN not set in settings or environment"
320
+ raise CommandError(msg)
321
+
322
+ def _get_headers(self) -> dict:
323
+ """Get API request headers."""
324
+ return {
325
+ "Authorization": f"Bearer {self.token}",
326
+ "Accept": "application/vnd.github.v3+json",
327
+ "Content-Type": "application/json",
328
+ }
329
+
330
+ @staticmethod
331
+ def parse_repo_url(repo_url: str) -> tuple[str, str]:
332
+ """Extract owner and repo from GitHub URL."""
333
+ match = re.search(r"github\.com[/:]([^/]+)/([^/]+?)(?:\.git)?$", repo_url)
334
+ if not match:
335
+ msg = f"Could not parse owner/repo from repo URL: {repo_url}"
336
+ raise CommandError(msg)
337
+ owner, repo = match.groups()
338
+ return (owner, repo)
339
+
340
+ def _handle_rate_limit(
341
+ self, response: requests.Response, attempt: int, max_retries: int, stdout
342
+ ) -> bool:
343
+ """Handle rate limit response. Returns True if should retry."""
344
+ if response.status_code == HTTP_TOO_MANY_REQUESTS:
345
+ retry_after = int(response.headers.get("Retry-After", 2 * (2**attempt)))
346
+ if attempt < max_retries - 1:
347
+ stdout.write(
348
+ f" Rate limit exceeded (attempt {attempt + 1}/{max_retries}). "
349
+ f"Retrying in {retry_after} seconds..."
350
+ )
351
+ time.sleep(retry_after)
352
+ return True
353
+ else:
354
+ msg = "GitHub API rate limit exceeded. Please try again later."
355
+ raise CommandError(msg)
356
+ return False
357
+
358
+ def _extract_error_message(self, response: requests.Response) -> str:
359
+ """Extract safe error message from response, including validation errors."""
360
+ try:
361
+ error_data = response.json()
362
+ message = error_data.get("message", f"HTTP {response.status_code}")
363
+
364
+ # GitHub API validation errors include detailed error info in 'errors' array
365
+ if error_data.get("errors"):
366
+ error_details = []
367
+ for err in error_data["errors"]:
368
+ if isinstance(err, dict):
369
+ field = err.get("field", "unknown")
370
+ code = err.get("code", "unknown")
371
+ resource = err.get("resource", "unknown")
372
+ error_details.append(f"{resource}.{field}: {code}")
373
+ else:
374
+ error_details.append(str(err))
375
+
376
+ if error_details:
377
+ message = f"{message} ({', '.join(error_details)})"
378
+ return message
379
+ else:
380
+ return message
381
+ except (ValueError, requests.exceptions.JSONDecodeError):
382
+ return f"HTTP {response.status_code}"
383
+
384
+ def verify_branch(
385
+ self,
386
+ owner: str,
387
+ repo: str,
388
+ branch_name: str,
389
+ stdout, # noqa: ARG002
390
+ ) -> None:
391
+ """Verify branch exists on remote."""
392
+ url = f"https://api.github.com/repos/{owner}/{repo}/branches/{branch_name}"
393
+ response = requests.get(url, headers=self._get_headers(), timeout=10)
394
+
395
+ if response.status_code == HTTP_NOT_FOUND:
396
+ msg = (
397
+ f"Branch '{branch_name}' not found on remote. "
398
+ f"Ensure the branch was pushed successfully."
399
+ )
400
+ raise CommandError(msg)
401
+ elif response.status_code != HTTP_OK:
402
+ error_msg = self._extract_error_message(response)
403
+ msg = f"Failed to verify branch: {error_msg}"
404
+ raise CommandError(msg)
405
+ # If status_code is HTTP_OK, function returns None implicitly
406
+
407
+ def create_pull_request( # noqa: PLR0913
408
+ self,
409
+ owner: str,
410
+ repo: str,
411
+ branch_name: str,
412
+ title: str,
413
+ body: str,
414
+ base: str = "main",
415
+ stdout=None,
416
+ ) -> str:
417
+ """Create a pull request with retry logic."""
418
+ url = f"https://api.github.com/repos/{owner}/{repo}/pulls"
419
+ payload = {"title": title, "body": body, "head": branch_name, "base": base}
420
+ headers = self._get_headers()
421
+
422
+ max_retries = 3
423
+ base_retry_delay = 2
424
+
425
+ for attempt in range(max_retries):
426
+ retry_delay = base_retry_delay * (2**attempt)
427
+
428
+ try:
429
+ response = requests.post(url, json=payload, headers=headers, timeout=30)
430
+
431
+ if response.status_code == HTTP_CREATED:
432
+ return response.json()["html_url"]
433
+
434
+ if self._handle_rate_limit(
435
+ response, attempt, max_retries, stdout or self
436
+ ):
437
+ continue
438
+
439
+ if response.status_code == HTTP_UNPROCESSABLE_ENTITY:
440
+ error_msg = self._extract_error_message(response)
441
+ safe_error = (
442
+ error_msg[:MAX_ERROR_MESSAGE_LENGTH]
443
+ if len(error_msg) > MAX_ERROR_MESSAGE_LENGTH
444
+ else error_msg
445
+ )
446
+ msg = (
447
+ f"GitHub API validation error: {safe_error}\n"
448
+ f"This usually means the branch doesn't exist on remote "
449
+ f"or there's already a PR for this branch."
450
+ )
451
+ raise CommandError(msg)
452
+
453
+ error_msg = self._extract_error_message(response)
454
+ safe_error = (
455
+ error_msg[:MAX_ERROR_MESSAGE_LENGTH]
456
+ if len(error_msg) > MAX_ERROR_MESSAGE_LENGTH
457
+ else error_msg
458
+ )
459
+ msg = f"GitHub API error: {safe_error}"
460
+ raise CommandError(msg)
461
+
462
+ except requests.exceptions.RequestException as e:
463
+ is_connection_error = isinstance(
464
+ e,
465
+ (requests.exceptions.ConnectionError, requests.exceptions.Timeout),
466
+ )
467
+
468
+ if is_connection_error and attempt < max_retries - 1:
469
+ if stdout:
470
+ error_msg = (
471
+ f" Connection error "
472
+ f"(attempt {attempt + 1}/{max_retries}): {e!s}"
473
+ )
474
+ stdout.write(error_msg)
475
+ stdout.write(f" Retrying in {retry_delay} seconds...")
476
+ time.sleep(retry_delay)
477
+ continue
478
+ else:
479
+ if is_connection_error:
480
+ msg = (
481
+ f"Failed to connect to GitHub API after "
482
+ f"{max_retries} attempts: {e!s}\n"
483
+ f"Please check your network connection and try again later."
484
+ )
485
+ raise CommandError(msg) from e
486
+ msg = f"GitHub API error: {e!s}"
487
+ raise CommandError(msg) from e
488
+
489
+ msg = "Failed to create pull request after all retries"
490
+ raise CommandError(msg)
491
+
492
+
493
+ class PullRequestData(TypedDict):
494
+ """Data structure for pull request creation."""
495
+
496
+ lang_code: str
497
+ iso_code: str
498
+ sync_stats: dict
499
+ applied_count: int
500
+ translation_stats: dict[str, Any]
501
+ applied_by_app: dict[str, Any]
502
+
503
+
504
+ class TranslationParams(TypedDict):
505
+ """Parameters for translation operations."""
506
+
507
+ lang_code: str
508
+ provider: str
509
+ model: str
510
+ glossary: dict[str, Any] | None
511
+ batch_size: int
512
+ max_retries: int
513
+
514
+
515
+ class Command(BaseCommand):
516
+ help = (
517
+ "Sync translation keys, translate using LLM, "
518
+ "and create PR in mitxonline-translations"
519
+ )
520
+
521
+ def add_arguments(self, parser):
522
+ parser.add_argument(
523
+ "lang", type=str, help="Language code (e.g., el, fr, es_ES)"
524
+ )
525
+ parser.add_argument(
526
+ "--iso-code",
527
+ type=str,
528
+ help="ISO code for JSON files (default: same as lang)",
529
+ )
530
+ parser.add_argument(
531
+ "--repo-path",
532
+ type=str,
533
+ help=(
534
+ "Path to mitxonline-translations repository. "
535
+ "Can also be set via TRANSLATIONS_REPO_PATH setting "
536
+ "or environment variable."
537
+ ),
538
+ )
539
+ default_provider = get_default_provider()
540
+ parser.add_argument(
541
+ "--provider",
542
+ type=str,
543
+ default=default_provider,
544
+ choices=["openai", "gemini", "mistral"],
545
+ help=(
546
+ "Translation provider (openai, gemini, mistral). "
547
+ "Default is taken from TRANSLATIONS_PROVIDERS['default_provider']"
548
+ + (
549
+ f" (currently: {default_provider})"
550
+ if default_provider
551
+ else " (not configured)"
552
+ )
553
+ ),
554
+ )
555
+ parser.add_argument(
556
+ "--model",
557
+ type=str,
558
+ default=None,
559
+ help=(
560
+ "Model name (e.g., gpt-4, gemini-pro, mistral-large-latest). "
561
+ "If not specified, uses the default_model for the selected provider "
562
+ "from TRANSLATIONS_PROVIDERS. "
563
+ "LiteLLM automatically detects provider from model name."
564
+ ),
565
+ )
566
+ parser.add_argument(
567
+ "--dry-run",
568
+ action="store_true",
569
+ help="Run without committing or creating PR",
570
+ )
571
+ parser.add_argument(
572
+ "--glossary",
573
+ action="store_true",
574
+ default=False,
575
+ help="Use glossary from plugin glossaries folder. "
576
+ "Looks for {plugin_dir}/glossaries/machine_learning/{lang_code}.txt",
577
+ )
578
+ parser.add_argument(
579
+ "--batch-size",
580
+ type=int,
581
+ default=200,
582
+ help=(
583
+ "Number of keys to translate per API request (default: 200). "
584
+ "Larger batches are faster but may hit rate limits. "
585
+ "Recommended: 200-300 for most models, "
586
+ "up to 400-500 for large models like mistral-large."
587
+ ),
588
+ )
589
+ parser.add_argument(
590
+ "--mfe",
591
+ type=str,
592
+ nargs="+",
593
+ help=(
594
+ "Filter by specific MFE(s). "
595
+ "Use 'edx-platform' for backend translations."
596
+ ),
597
+ )
598
+ parser.add_argument(
599
+ "--repo-url",
600
+ type=str,
601
+ help=(
602
+ "GitHub repository URL. "
603
+ "Can also be set via TRANSLATIONS_REPO_URL setting "
604
+ "or environment variable."
605
+ ),
606
+ )
607
+
608
+ def handle(self, *args, **options): # noqa: ARG002, PLR0915
609
+ """Handle the command execution."""
610
+ lang_code = options["lang"]
611
+ iso_code = options.get("iso_code") or lang_code
612
+
613
+ validate_language_code(lang_code)
614
+ validate_language_code(iso_code, "ISO code")
615
+
616
+ repo_path = get_config_value(
617
+ "repo_path",
618
+ options,
619
+ str(Path.home() / ".mitxonline-translations"),
620
+ )
621
+ repo_url = get_config_value(
622
+ "repo_url",
623
+ options,
624
+ "https://github.com/mitodl/mitxonline-translations.git",
625
+ )
626
+
627
+ # Validate repository path is not empty
628
+ if not repo_path or not repo_path.strip():
629
+ msg = (
630
+ "Repository path is not set. Please specify --repo-path, "
631
+ "set TRANSLATIONS_REPO_PATH in Django settings, or set "
632
+ "TRANSLATIONS_REPO_PATH environment variable."
633
+ )
634
+ raise CommandError(msg)
635
+
636
+ self.stdout.write(self.style.SUCCESS(f"Processing language: {lang_code}"))
637
+ self.stdout.write(f" ISO code: {iso_code}")
638
+ self.stdout.write(f" Repository: {repo_path}")
639
+
640
+ repo = self._ensure_repo(repo_path, repo_url)
641
+
642
+ self.stdout.write("\nSyncing translation keys...")
643
+ base_dir = Path(repo_path) / "translations"
644
+ sync_stats = sync_all_translations(
645
+ base_dir, lang_code, iso_code, skip_backend=False
646
+ )
647
+ self._log_sync_stats(sync_stats)
648
+
649
+ # Extract and filter empty keys
650
+ self.stdout.write("\nExtracting empty keys for translation...")
651
+ empty_keys = extract_empty_keys(
652
+ base_dir, lang_code, iso_code, skip_backend=False
653
+ )
654
+ empty_keys = self._filter_by_mfe(empty_keys, options.get("mfe"))
655
+
656
+ if not empty_keys:
657
+ self.stdout.write(self.style.SUCCESS("\nNo empty keys to translate!"))
658
+ return
659
+
660
+ glossary = self._load_glossary(options, lang_code)
661
+
662
+ provider = options.get("provider") or get_default_provider()
663
+ if not provider:
664
+ msg = (
665
+ "Provider not specified and "
666
+ "TRANSLATIONS_PROVIDERS['default_provider'] is not set"
667
+ )
668
+ raise CommandError(msg)
669
+
670
+ model = options.get("model") or get_default_model_for_provider(provider)
671
+ if not model:
672
+ msg = (
673
+ f"Model not specified and provider '{provider}' "
674
+ "does not have default_model in TRANSLATIONS_PROVIDERS"
675
+ )
676
+ raise CommandError(msg)
677
+
678
+ self.stdout.write(f"\nTranslating using {provider}/{model}...")
679
+ params = TranslationParams(
680
+ lang_code=lang_code,
681
+ provider=provider,
682
+ model=model,
683
+ glossary=glossary,
684
+ batch_size=options.get("batch_size", 200),
685
+ max_retries=MAX_RETRIES,
686
+ )
687
+ translations, translation_stats = self._translate_keys(empty_keys, params)
688
+ self.stdout.write(f" Translated {len(translations)} keys")
689
+
690
+ self.stdout.write("\nApplying translations...")
691
+ applied_count, applied_by_app = self._apply_translations(
692
+ translations, empty_keys, self.stdout
693
+ )
694
+ self.stdout.write(f" Applied {applied_count} translations")
695
+
696
+ if options.get("dry_run"):
697
+ self.stdout.write(self.style.WARNING("\nDry run - no changes committed"))
698
+ return
699
+
700
+ branch_name = create_branch_name(lang_code)
701
+ self.stdout.write(f"\nCommitting changes to branch: {branch_name}")
702
+
703
+ if not self._commit_changes(repo, branch_name, lang_code):
704
+ return
705
+
706
+ self.stdout.write("\nCreating pull request...")
707
+ try:
708
+ pr_data = PullRequestData(
709
+ lang_code=lang_code,
710
+ iso_code=iso_code,
711
+ sync_stats=sync_stats,
712
+ applied_count=applied_count,
713
+ translation_stats=translation_stats,
714
+ applied_by_app=applied_by_app,
715
+ )
716
+ pr_url = self._create_pull_request(
717
+ repo_path,
718
+ branch_name,
719
+ pr_data,
720
+ repo_url,
721
+ )
722
+ self.stdout.write(self.style.SUCCESS(f"\nPull request created: {pr_url}"))
723
+ except CommandError as e:
724
+ # Clean up branch if PR creation fails
725
+ self.stdout.write(
726
+ self.style.ERROR(f"\nFailed to create pull request: {e!s}")
727
+ )
728
+ self._cleanup_failed_branch(repo, branch_name)
729
+ raise
730
+
731
+ def _ensure_repo(self, repo_path: str, repo_url: str) -> GitRepository:
732
+ """Ensure repository exists and is ready. Returns GitRepository instance."""
733
+ repo_path_obj = Path(repo_path)
734
+ is_git_repo = repo_path_obj.exists() and (repo_path_obj / ".git").exists()
735
+
736
+ if is_git_repo:
737
+ repo = GitRepository(repo_path)
738
+ current_url = repo.get_remote_url()
739
+
740
+ # Normalize URLs for comparison (remove .git suffix, trailing slashes)
741
+ normalized_current = (current_url or "").rstrip(".git").rstrip("/")
742
+ normalized_new = repo_url.rstrip(".git").rstrip("/")
743
+
744
+ # If URL changed, delete and re-clone
745
+ if normalized_current != normalized_new:
746
+ self.stdout.write(
747
+ self.style.WARNING(
748
+ f" Repository URL changed from {current_url} to {repo_url}"
749
+ )
750
+ )
751
+ self.stdout.write(" Removing old repository and cloning new one...")
752
+ shutil.rmtree(repo_path)
753
+ self.stdout.write(f" Cloning repository to {repo_path}...")
754
+ repo = GitRepository.clone(repo_url, repo_path)
755
+ self.stdout.write(
756
+ self.style.SUCCESS(" Repository cloned successfully")
757
+ )
758
+ return repo
759
+
760
+ # URL matches, use existing repo
761
+ self.stdout.write(f" Repository found at {repo_path}")
762
+ if repo.ensure_clean():
763
+ self.stdout.write(
764
+ self.style.WARNING(
765
+ " WARNING: Found uncommitted changes (cleaned up)"
766
+ )
767
+ )
768
+ self.stdout.write(
769
+ self.style.SUCCESS(" Cleaned up uncommitted changes")
770
+ )
771
+
772
+ repo.switch_to_main()
773
+ self.stdout.write(" Updating repository...")
774
+ repo.update_from_remote()
775
+ self.stdout.write(self.style.SUCCESS(" Repository up to date"))
776
+ return repo
777
+
778
+ elif repo_path_obj.exists():
779
+ msg = (
780
+ f"Path {repo_path} exists but is not a git repository. "
781
+ f"Please remove it or specify a different path."
782
+ )
783
+ raise CommandError(msg)
784
+ else:
785
+ self.stdout.write(f" Cloning repository to {repo_path}...")
786
+ repo = GitRepository.clone(repo_url, repo_path)
787
+ self.stdout.write(self.style.SUCCESS(" Repository cloned successfully"))
788
+ return repo
789
+
790
+ def _log_sync_stats(self, sync_stats: dict) -> None:
791
+ """Log synchronization statistics."""
792
+ self.stdout.write(
793
+ f" Frontend: {sync_stats['frontend']['added']} keys added, "
794
+ f"{sync_stats['frontend']['fixed']} typos fixed"
795
+ )
796
+ self.stdout.write(f" Backend: {sync_stats['backend']['added']} entries added")
797
+
798
+ def _filter_by_mfe(
799
+ self, empty_keys: list[dict], mfe_filter: list[str] | None
800
+ ) -> list[dict]:
801
+ """Filter empty keys by MFE if specified."""
802
+ if not mfe_filter:
803
+ self.stdout.write(f" Found {len(empty_keys)} empty keys")
804
+ return empty_keys
805
+
806
+ mfe_set = set(mfe_filter)
807
+ original_count = len(empty_keys)
808
+ available_apps = {key.get("app", "unknown") for key in empty_keys}
809
+ filtered = [key for key in empty_keys if key.get("app") in mfe_set]
810
+
811
+ if not filtered:
812
+ mfe_list = ", ".join(mfe_filter)
813
+ apps_list = ", ".join(sorted(available_apps))
814
+ self.stdout.write(
815
+ self.style.WARNING(
816
+ f"\nWARNING: No empty keys found for specified MFE(s): "
817
+ f"{mfe_list}\n"
818
+ f" Available apps: {apps_list}"
819
+ )
820
+ )
821
+ return []
822
+
823
+ mfe_list = ", ".join(mfe_filter)
824
+ self.stdout.write(
825
+ f" Filtered to {len(filtered)} keys from {len(mfe_set)} MFE(s): "
826
+ f"{mfe_list} (was {original_count} total)"
827
+ )
828
+ return filtered
829
+
830
+ def _load_glossary(self, options: dict, lang_code: str) -> dict[str, Any]:
831
+ """Load glossary if enabled."""
832
+ if not options.get("glossary", False):
833
+ return {}
834
+
835
+ utils_file = Path(utils_module.__file__)
836
+ glossary_path = (
837
+ utils_file.parent.parent
838
+ / "glossaries"
839
+ / "machine_learning"
840
+ / f"{lang_code}.txt"
841
+ )
842
+
843
+ if glossary_path.exists():
844
+ self.stdout.write(f"\nLoading glossary from {glossary_path}...")
845
+ glossary = load_glossary(glossary_path, lang_code)
846
+ self.stdout.write(f" Loaded {len(glossary)} glossary terms")
847
+ return glossary
848
+
849
+ self.stdout.write(
850
+ self.style.WARNING(
851
+ f"\nWARNING: Glossary file not found: {glossary_path}\n"
852
+ f" Continuing without glossary."
853
+ )
854
+ )
855
+ return {}
856
+
857
+ def _check_glossary_for_keys(
858
+ self,
859
+ empty_keys: list[dict],
860
+ glossary: dict[str, Any] | None,
861
+ ) -> tuple[dict[str, Any], int, list[dict]]:
862
+ """Check glossary matches for keys.
863
+
864
+ Returns (translations, matches_count, remaining_keys).
865
+ """
866
+ translations = {}
867
+ glossary_matches = 0
868
+ keys_needing_llm = []
869
+
870
+ for key_info in empty_keys:
871
+ # Normalize file path for consistent comparison
872
+ file_path_str = str(Path(key_info["file_path"]).resolve())
873
+ translation_key = f"{file_path_str}:{key_info['key']}"
874
+
875
+ if glossary:
876
+ match_result = self._check_glossary_match(key_info, glossary)
877
+ if match_result:
878
+ translations[translation_key] = match_result
879
+ glossary_matches += 1
880
+ continue
881
+
882
+ keys_needing_llm.append(key_info)
883
+
884
+ return translations, glossary_matches, keys_needing_llm
885
+
886
+ def _process_batch_results(
887
+ self,
888
+ batch: list[dict],
889
+ batch_translations: list[Any],
890
+ translations: dict[str, Any],
891
+ ) -> tuple[int, int, dict[str, int]]:
892
+ """Process batch translation results.
893
+
894
+ Returns (successes, errors, errors_by_app).
895
+ """
896
+ batch_successes = 0
897
+ batch_errors = 0
898
+ batch_errors_by_app: dict[str, int] = {}
899
+
900
+ for i, key_info in enumerate(batch):
901
+ # Normalize file path for consistent comparison
902
+ file_path_str = str(Path(key_info["file_path"]).resolve())
903
+ translation_key = f"{file_path_str}:{key_info['key']}"
904
+ app = key_info.get("app", "unknown")
905
+ if i < len(batch_translations) and batch_translations[i]:
906
+ translations[translation_key] = batch_translations[i]
907
+ batch_successes += 1
908
+ else:
909
+ batch_errors += 1
910
+ batch_errors_by_app[app] = batch_errors_by_app.get(app, 0) + 1
911
+
912
+ return batch_successes, batch_errors, batch_errors_by_app
913
+
914
+ def _translate_with_llm( # noqa: PLR0913
915
+ self,
916
+ keys_needing_llm: list[dict],
917
+ translations: dict[str, Any],
918
+ lang_code: str,
919
+ provider: str,
920
+ model: str,
921
+ glossary: dict[str, Any] | None,
922
+ batch_size: int,
923
+ max_retries: int,
924
+ ) -> tuple[int, int, dict[str, int]]:
925
+ """Translate keys using LLM with batch processing.
926
+
927
+ Returns (llm_translations, llm_errors, errors_by_app).
928
+ """
929
+ llm_translations = 0
930
+ llm_errors = 0
931
+ errors_by_app: dict[str, int] = {}
932
+
933
+ total_keys = len(keys_needing_llm)
934
+ num_batches = (total_keys + batch_size - 1) // batch_size
935
+ self.stdout.write(
936
+ f" Translating {total_keys} keys using LLM "
937
+ f"({num_batches} batches of up to {batch_size} keys each)..."
938
+ )
939
+
940
+ for batch_idx, batch in enumerate(
941
+ [
942
+ keys_needing_llm[i : i + batch_size]
943
+ for i in range(0, total_keys, batch_size)
944
+ ],
945
+ 1,
946
+ ):
947
+ batch_succeeded = False
948
+ batch_apps = {key_info.get("app", "unknown") for key_info in batch}
949
+
950
+ # Retry loop for this batch
951
+ for attempt in range(max_retries + 1): # +1 for initial attempt
952
+ try:
953
+ batch_translations = self._call_llm_batch(
954
+ batch, lang_code, provider, model, glossary
955
+ )
956
+ batch_successes, batch_errors, batch_errors_by_app = (
957
+ self._process_batch_results(
958
+ batch,
959
+ batch_translations,
960
+ translations,
961
+ )
962
+ )
963
+
964
+ llm_translations += batch_successes
965
+ llm_errors += batch_errors
966
+ for app, count in batch_errors_by_app.items():
967
+ errors_by_app[app] = errors_by_app.get(app, 0) + count
968
+
969
+ completed = min(batch_idx * batch_size, total_keys)
970
+ progress_pct = min((completed / total_keys) * 100, 100)
971
+ remaining_keys = total_keys - llm_translations
972
+
973
+ self._log_batch_progress(
974
+ batch_idx,
975
+ num_batches,
976
+ batch_successes,
977
+ batch_errors,
978
+ completed,
979
+ total_keys,
980
+ progress_pct,
981
+ remaining_keys,
982
+ batch_apps,
983
+ batch_errors_by_app,
984
+ attempt,
985
+ )
986
+
987
+ batch_succeeded = True
988
+ break # Success - exit retry loop
989
+
990
+ except (
991
+ requests.RequestException,
992
+ ValueError,
993
+ KeyError,
994
+ AttributeError,
995
+ ) as e:
996
+ if not self._handle_batch_error(
997
+ e, batch_idx, num_batches, batch_apps, attempt, max_retries
998
+ ):
999
+ break # Non-retryable error
1000
+
1001
+ # If batch failed after all retries, mark all keys as errors
1002
+ if not batch_succeeded:
1003
+ batch_errors = len(batch)
1004
+ llm_errors += batch_errors
1005
+ for key_info in batch:
1006
+ app = key_info.get("app", "unknown")
1007
+ errors_by_app[app] = errors_by_app.get(app, 0) + 1
1008
+ apps_str = ", ".join(sorted(batch_apps))
1009
+ self.stdout.write(
1010
+ self.style.ERROR(
1011
+ f" Marked {batch_errors} keys as errors, "
1012
+ f"continuing with next batch...\n"
1013
+ f" Affected apps: {apps_str}"
1014
+ )
1015
+ )
1016
+
1017
+ return llm_translations, llm_errors, errors_by_app
1018
+
1019
+ def _log_batch_progress( # noqa: PLR0913
1020
+ self,
1021
+ batch_idx: int,
1022
+ num_batches: int,
1023
+ batch_successes: int,
1024
+ batch_errors: int,
1025
+ completed: int,
1026
+ total_keys: int,
1027
+ progress_pct: float,
1028
+ remaining_keys: int,
1029
+ batch_apps: set[str],
1030
+ batch_errors_by_app: dict[str, int],
1031
+ attempt: int,
1032
+ ) -> None:
1033
+ """Log batch processing progress."""
1034
+ retry_msg = f" (after {attempt + 1} attempt(s))" if attempt > 0 else ""
1035
+ if batch_errors > 0:
1036
+ apps_str = ", ".join(sorted(batch_apps))
1037
+ errors_by_app_str = ", ".join(
1038
+ f"{app}: {count}" for app, count in sorted(batch_errors_by_app.items())
1039
+ )
1040
+ self.stdout.write(
1041
+ f" Batch {batch_idx}/{num_batches} completed "
1042
+ f"with partial success "
1043
+ f"({batch_successes} succeeded, "
1044
+ f"{batch_errors} failed){retry_msg} "
1045
+ f"({completed}/{total_keys} keys, "
1046
+ f"{progress_pct:.1f}% complete, "
1047
+ f"{remaining_keys} remaining)\n"
1048
+ f" Affected apps: {apps_str}\n"
1049
+ f" Errors by app: {errors_by_app_str}"
1050
+ )
1051
+ else:
1052
+ self.stdout.write(
1053
+ f" Batch {batch_idx}/{num_batches} completed"
1054
+ f"{retry_msg} "
1055
+ f"({completed}/{total_keys} keys, "
1056
+ f"{progress_pct:.1f}% complete, "
1057
+ f"{remaining_keys} remaining)"
1058
+ )
1059
+
1060
+ def _handle_batch_error( # noqa: PLR0913
1061
+ self,
1062
+ error: Exception,
1063
+ batch_idx: int,
1064
+ num_batches: int,
1065
+ batch_apps: set[str],
1066
+ attempt: int,
1067
+ max_retries: int,
1068
+ ) -> bool:
1069
+ """Handle batch error. Returns True if should retry, False otherwise."""
1070
+ apps_str = ", ".join(sorted(batch_apps))
1071
+ if not is_retryable_error(error):
1072
+ # Non-retryable error - fail immediately
1073
+ self.stdout.write(
1074
+ self.style.ERROR(
1075
+ f" ERROR: Batch {batch_idx}/{num_batches} "
1076
+ f"failed with non-retryable error: {error!s}\n"
1077
+ f" Affected apps: {apps_str}"
1078
+ )
1079
+ )
1080
+ return False
1081
+
1082
+ # Retryable error - check if we have retries left
1083
+ if attempt < max_retries:
1084
+ # Exponential backoff: 2^attempt seconds (1s, 2s, 4s, 8s...)
1085
+ wait_time = 2**attempt
1086
+ self.stdout.write(
1087
+ self.style.WARNING(
1088
+ f" WARNING: Batch {batch_idx}/{num_batches} "
1089
+ f"failed (attempt {attempt + 1}/"
1090
+ f"{max_retries + 1}): {error!s}\n"
1091
+ f" Affected apps: {apps_str}\n"
1092
+ f" Retrying in {wait_time} second(s)..."
1093
+ )
1094
+ )
1095
+ time.sleep(wait_time)
1096
+ return True
1097
+ else:
1098
+ # Out of retries
1099
+ self.stdout.write(
1100
+ self.style.ERROR(
1101
+ f" ERROR: Batch {batch_idx}/{num_batches} "
1102
+ f"failed after {max_retries + 1} attempts: "
1103
+ f"{error!s}\n"
1104
+ f" Affected apps: {apps_str}"
1105
+ )
1106
+ )
1107
+ return False
1108
+
1109
+ def _translate_keys(
1110
+ self,
1111
+ empty_keys: list[dict],
1112
+ params: TranslationParams,
1113
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
1114
+ """Translate empty keys using LLM with batch processing."""
1115
+ lang_code = params["lang_code"]
1116
+ provider = params["provider"]
1117
+ model = params["model"]
1118
+ glossary = params["glossary"]
1119
+ batch_size = params["batch_size"]
1120
+ max_retries = params["max_retries"]
1121
+
1122
+ # First pass: check glossary matches
1123
+ translations, glossary_matches, keys_needing_llm = (
1124
+ self._check_glossary_for_keys(empty_keys, glossary)
1125
+ )
1126
+
1127
+ if not keys_needing_llm:
1128
+ return translations, {
1129
+ "glossary_matches": glossary_matches,
1130
+ "llm_translations": 0,
1131
+ "errors": 0,
1132
+ "errors_by_app": cast("dict[str, int]", {}),
1133
+ }
1134
+
1135
+ # Translate remaining keys with LLM
1136
+ llm_translations, llm_errors, errors_by_app = self._translate_with_llm(
1137
+ keys_needing_llm,
1138
+ translations,
1139
+ lang_code,
1140
+ provider,
1141
+ model,
1142
+ glossary,
1143
+ batch_size,
1144
+ max_retries,
1145
+ )
1146
+
1147
+ summary = (
1148
+ f" Summary - LLM translations: {llm_translations}, Errors: {llm_errors}"
1149
+ )
1150
+ if glossary:
1151
+ summary = (
1152
+ f" Summary - Glossary matches: {glossary_matches}, {summary[12:]}"
1153
+ )
1154
+ self.stdout.write(summary)
1155
+
1156
+ return translations, {
1157
+ "glossary_matches": glossary_matches,
1158
+ "llm_translations": llm_translations,
1159
+ "errors": llm_errors,
1160
+ "errors_by_app": errors_by_app,
1161
+ }
1162
+
1163
+ def _check_glossary_match(
1164
+ self, key_info: dict, glossary: dict[str, Any] | None
1165
+ ) -> Any | None:
1166
+ """
1167
+ Check if key matches glossary. Returns translation or None.
1168
+
1169
+ Args:
1170
+ key_info: Dictionary containing key information with 'english',
1171
+ 'is_plural', etc.
1172
+ glossary: Dictionary mapping English terms to translations, or None.
1173
+
1174
+ Returns:
1175
+ Translation string/dict if match found, None otherwise.
1176
+ """
1177
+ if not glossary:
1178
+ return None
1179
+
1180
+ is_plural = key_info.get("is_plural", False)
1181
+ msgid_plural = key_info.get("msgid_plural")
1182
+
1183
+ if is_plural and msgid_plural:
1184
+ return self._check_plural_glossary_match(key_info, glossary, msgid_plural)
1185
+
1186
+ # Singular match
1187
+ match = match_glossary_term(key_info["english"], glossary, exact_match=True)
1188
+ if not match:
1189
+ return None
1190
+
1191
+ if isinstance(match, dict):
1192
+ return match.get("translation", match.get("singular", ""))
1193
+ return match
1194
+
1195
+ def _check_plural_glossary_match(
1196
+ self, key_info: dict, glossary: dict[str, Any], msgid_plural: str
1197
+ ) -> Any | None:
1198
+ """Check glossary match for plural keys. Returns translation or None."""
1199
+ singular_match = match_glossary_term(
1200
+ key_info["english"], glossary, exact_match=True
1201
+ )
1202
+ plural_match = match_glossary_term(msgid_plural, glossary, exact_match=True)
1203
+
1204
+ if singular_match and plural_match:
1205
+ if isinstance(singular_match, dict) and "singular" in singular_match:
1206
+ return singular_match
1207
+ if isinstance(plural_match, dict) and "singular" in plural_match:
1208
+ return plural_match
1209
+ return {
1210
+ "singular": str(singular_match),
1211
+ "plural": str(plural_match),
1212
+ }
1213
+
1214
+ if singular_match:
1215
+ key_info["_glossary_singular"] = (
1216
+ str(singular_match)
1217
+ if isinstance(singular_match, str)
1218
+ else singular_match.get("singular", "")
1219
+ )
1220
+
1221
+ return None # Need LLM for plural or no match
1222
+
1223
+ def _format_glossary_for_prompt(self, glossary: dict[str, Any] | None) -> str:
1224
+ """Format glossary as a prompt section for LLM translation requests.
1225
+
1226
+ Args:
1227
+ glossary: Dictionary mapping English terms to translations, or
1228
+ None/empty dict.
1229
+
1230
+ Returns:
1231
+ Empty string if glossary is None or empty, otherwise returns a
1232
+ formatted string with glossary terms and instructions for consistent
1233
+ translation.
1234
+ """
1235
+ if not glossary:
1236
+ return ""
1237
+
1238
+ # Format glossary as JSON for the prompt
1239
+ # Handle potential serialization errors gracefully
1240
+ try:
1241
+ glossary_json = json.dumps(glossary, indent=2, ensure_ascii=False)
1242
+ except (TypeError, ValueError) as e:
1243
+ # If glossary contains non-serializable values, log warning and skip
1244
+ self.stdout.write(
1245
+ self.style.WARNING(
1246
+ f" WARNING: Could not serialize glossary for prompt: {e!s}. "
1247
+ f"Continuing without glossary in LLM prompt."
1248
+ )
1249
+ )
1250
+ return ""
1251
+ glossary_template = f"""
1252
+ IMPORTANT - Use these glossary terms when translating. If any English terms
1253
+ from the glossary appear in the texts to translate, use the corresponding
1254
+ translation from the glossary:
1255
+
1256
+ {glossary_json}
1257
+
1258
+ When translating sentences, ensure that glossary terms are translated
1259
+ consistently according to the glossary above, even if they appear
1260
+ within longer sentences. For example, if the glossary specifies
1261
+ "certificate" -> "Πιστοποιητικό", then translate "certificate" as
1262
+ "Πιστοποιητικό" even when it appears in longer sentences like
1263
+ "The course completion certificate is available".
1264
+ """
1265
+ return textwrap.dedent(glossary_template)
1266
+
1267
+ def _call_llm_batch( # noqa: PLR0913
1268
+ self,
1269
+ key_batch: list[dict],
1270
+ lang_code: str,
1271
+ provider: str,
1272
+ model: str,
1273
+ glossary: dict[str, Any] | None = None,
1274
+ timeout: int = 120,
1275
+ ) -> list[str | dict]:
1276
+ """Call LLM API to translate multiple texts in a single request.
1277
+
1278
+ Args:
1279
+ key_batch: List of key information dictionaries to translate
1280
+ lang_code: Target language code
1281
+ provider: Translation provider name (openai, gemini, mistral)
1282
+ model: LLM model name
1283
+ glossary: Optional glossary dictionary
1284
+ timeout: Request timeout in seconds (default: 120)
1285
+ """
1286
+ api_key = self._get_llm_api_key(provider)
1287
+
1288
+ texts_dict = {}
1289
+ plural_entries = {}
1290
+ for i, key_info in enumerate(key_batch, 1):
1291
+ key_str = str(i)
1292
+ if key_info.get("is_plural") and key_info.get("msgid_plural"):
1293
+ texts_dict[key_str] = {
1294
+ "singular": key_info["english"],
1295
+ "plural": key_info.get("msgid_plural", ""),
1296
+ }
1297
+ plural_entries[key_str] = True
1298
+ else:
1299
+ texts_dict[key_str] = key_info["english"]
1300
+
1301
+ texts_block = json.dumps(texts_dict, indent=2, ensure_ascii=False)
1302
+ plural_count = len(plural_entries)
1303
+
1304
+ lang_name = LANGUAGE_MAPPING.get(lang_code, lang_code)
1305
+
1306
+ # Build glossary section if glossary is provided
1307
+ glossary_section = self._format_glossary_for_prompt(glossary)
1308
+
1309
+ prompt_template = (
1310
+ f"""Translate the following {len(key_batch)} text(s) to {lang_name} """
1311
+ f"""(language code: {lang_code}).
1312
+ Context: These are from an educational platform.
1313
+ Preserve any placeholders like {{variable}}, {{0}}, %s, etc.
1314
+ Preserve HTML tags and formatting.
1315
+ {glossary_section}
1316
+ {
1317
+ (
1318
+ "IMPORTANT: "
1319
+ + str(plural_count)
1320
+ + " entry/entries have plural forms. "
1321
+ + "For these, return BOTH singular and "
1322
+ + 'plural translations as an object with "singular" '
1323
+ + 'and "plural" keys.'
1324
+ )
1325
+ if plural_count > 0
1326
+ else ""
1327
+ }
1328
+
1329
+ Return a JSON object where each key is the number (1, 2, 3, etc.).
1330
+ - For singular entries: value is the translation string.
1331
+ - For plural entries: value is an object with "singular" and "plural" keys,
1332
+ each containing the translation.
1333
+
1334
+ Input texts (numbered):
1335
+ {texts_block}
1336
+
1337
+ Return ONLY valid JSON in this format:
1338
+ {{
1339
+ "1": "translation of first text",
1340
+ "2": {{"singular": "singular translation",
1341
+ "plural": "plural translation"}},
1342
+ "3": "translation of third text",
1343
+ ...
1344
+ }}"""
1345
+ )
1346
+ prompt = textwrap.dedent(prompt_template)
1347
+
1348
+ try:
1349
+ completion_kwargs = configure_litellm_for_provider(
1350
+ provider=provider,
1351
+ model=model,
1352
+ api_key=api_key,
1353
+ messages=[{"role": "user", "content": prompt}],
1354
+ temperature=0.3,
1355
+ timeout=timeout,
1356
+ )
1357
+
1358
+ response = completion(**completion_kwargs)
1359
+ response_text = response.choices[0].message.content.strip()
1360
+
1361
+ translations = self._parse_json_response(response_text, key_batch)
1362
+ if translations:
1363
+ return translations
1364
+
1365
+ return self._parse_order_based_response(response_text, key_batch)
1366
+
1367
+ except TimeoutError:
1368
+ msg = (
1369
+ f"LLM batch API call timed out after {timeout} seconds.\n"
1370
+ f"Model: {model}\n"
1371
+ f"Batch size: {len(key_batch)}\n"
1372
+ f"Try reducing --batch-size or check your network connection."
1373
+ )
1374
+ raise CommandError(msg) from None
1375
+ except (requests.RequestException, ValueError, KeyError, AttributeError) as e:
1376
+ msg = (
1377
+ f"LLM batch API call failed: {e!s}\n"
1378
+ f"Model: {model}\n"
1379
+ f"Batch size: {len(key_batch)}\n"
1380
+ f"Make sure TRANSLATIONS_PROVIDERS is configured in settings "
1381
+ f"with the appropriate api_key, or set the environment variable "
1382
+ f"(OPENAI_API_KEY, GEMINI_API_KEY, or MISTRAL_API_KEY)"
1383
+ )
1384
+ raise CommandError(msg) from e
1385
+
1386
+ def _parse_json_response(
1387
+ self, response_text: str, key_batch: list[dict]
1388
+ ) -> list[str | dict] | None:
1389
+ """Parse JSON response from LLM."""
1390
+ json_text = response_text
1391
+ if "```json" in response_text:
1392
+ start = response_text.find("```json") + 7
1393
+ end = response_text.find("```", start)
1394
+ if end > start:
1395
+ json_text = response_text[start:end].strip()
1396
+ elif "```" in response_text:
1397
+ start = response_text.find("```") + 3
1398
+ end = response_text.find("```", start)
1399
+ if end > start:
1400
+ json_text = response_text[start:end].strip()
1401
+
1402
+ try:
1403
+ data = json.loads(json_text)
1404
+ translations: list[str | dict[str, str]] = []
1405
+ for i in range(len(key_batch)):
1406
+ key = str(i + 1)
1407
+ if key in data:
1408
+ value = data[key]
1409
+ if (
1410
+ isinstance(value, dict)
1411
+ and "singular" in value
1412
+ and "plural" in value
1413
+ ):
1414
+ translations.append(
1415
+ {
1416
+ "singular": str(value["singular"]).strip(),
1417
+ "plural": str(value["plural"]).strip(),
1418
+ }
1419
+ )
1420
+ else:
1421
+ translations.append(str(value).strip())
1422
+ else:
1423
+ translations.append("")
1424
+ except (json.JSONDecodeError, KeyError, ValueError):
1425
+ return None
1426
+ else:
1427
+ return translations
1428
+
1429
+ def _parse_order_based_response(
1430
+ self, response_text: str, key_batch: list[dict]
1431
+ ) -> list[str | dict[str, str]]:
1432
+ """Fallback: Parse response assuming translations are in order."""
1433
+ lines = [line.strip() for line in response_text.split("\n") if line.strip()]
1434
+ cleaned_lines = [
1435
+ line.lstrip("0123456789.-) ").strip()
1436
+ for line in lines
1437
+ if line.lstrip("0123456789.-) ").strip()
1438
+ ]
1439
+ if len(cleaned_lines) < len(key_batch):
1440
+ cleaned_lines.extend([""] * (len(key_batch) - len(cleaned_lines)))
1441
+ # Return as list[str | dict[str, str]] - all strings in this fallback
1442
+ return cast("list[str | dict[str, str]]", cleaned_lines[: len(key_batch)])
1443
+
1444
+ def _get_llm_api_key(self, provider: str) -> str | None:
1445
+ """Get API key from TRANSLATIONS_PROVIDERS or environment variables.
1446
+
1447
+ Args:
1448
+ provider: Translation provider name (openai, gemini, mistral)
1449
+ """
1450
+ try:
1451
+ if hasattr(settings, "TRANSLATIONS_PROVIDERS"):
1452
+ providers = getattr(settings, "TRANSLATIONS_PROVIDERS", {})
1453
+ if isinstance(providers, dict) and provider in providers:
1454
+ provider_config = providers[provider]
1455
+ if isinstance(provider_config, dict):
1456
+ api_key = provider_config.get("api_key")
1457
+ if api_key:
1458
+ return api_key
1459
+ except (AttributeError, TypeError) as e:
1460
+ logger.debug("Error accessing TRANSLATIONS_PROVIDERS: %s", e)
1461
+
1462
+ env_key_name = (
1463
+ "GEMINI_API_KEY"
1464
+ if provider == PROVIDER_GEMINI
1465
+ else "MISTRAL_API_KEY"
1466
+ if provider == PROVIDER_MISTRAL
1467
+ else "OPENAI_API_KEY"
1468
+ )
1469
+ return os.environ.get(env_key_name)
1470
+
1471
+ def _group_translations_by_file(
1472
+ self, translations: dict[str, Any], empty_keys: list[dict]
1473
+ ) -> dict[str, dict[str, Any]]:
1474
+ """Group translations by file path."""
1475
+ translations_by_file: dict[str, dict[str, Any]] = {}
1476
+
1477
+ for key_info in empty_keys:
1478
+ # Normalize file path for consistent comparison
1479
+ file_path_str = str(Path(key_info["file_path"]).resolve())
1480
+ translation_key = f"{file_path_str}:{key_info['key']}"
1481
+ if translation_key in translations:
1482
+ trans_value = translations[translation_key]
1483
+
1484
+ if key_info["file_type"] == "json" and isinstance(trans_value, dict):
1485
+ trans_value = trans_value.get("singular", str(trans_value))
1486
+
1487
+ translations_by_file.setdefault(file_path_str, {})[key_info["key"]] = (
1488
+ trans_value
1489
+ )
1490
+
1491
+ return translations_by_file
1492
+
1493
+ def _apply_file_translations(
1494
+ self,
1495
+ file_path: Path,
1496
+ file_translations: dict[str, Any],
1497
+ empty_keys: list[dict],
1498
+ stdout,
1499
+ ) -> tuple[int, str]:
1500
+ """Apply translations to a single file. Returns (count, app)."""
1501
+ if not file_path.exists():
1502
+ stdout.write(self.style.WARNING(f" WARNING: File not found: {file_path}"))
1503
+ return 0, "unknown"
1504
+
1505
+ # Normalize paths for comparison
1506
+ normalized_file_path = str(file_path.resolve())
1507
+ key_info = next(
1508
+ k
1509
+ for k in empty_keys
1510
+ if str(Path(k["file_path"]).resolve()) == normalized_file_path
1511
+ )
1512
+ app = key_info.get("app", "unknown")
1513
+
1514
+ if key_info["file_type"] == "json":
1515
+ count = apply_json_translations(file_path, file_translations)
1516
+ elif key_info["file_type"] == "po":
1517
+ count = apply_po_translations(file_path, file_translations)
1518
+ else:
1519
+ return 0, app
1520
+
1521
+ return count, app
1522
+
1523
+ def _apply_translations(
1524
+ self,
1525
+ translations: dict[str, Any],
1526
+ empty_keys: list[dict],
1527
+ stdout,
1528
+ ) -> tuple[int, dict[str, Any]]:
1529
+ """Apply translations to files."""
1530
+ translations_by_file = self._group_translations_by_file(
1531
+ translations, empty_keys
1532
+ )
1533
+
1534
+ if not translations_by_file:
1535
+ stdout.write(self.style.WARNING(" WARNING: No translations to apply"))
1536
+ return 0, {"by_app": {}, "details": []}
1537
+
1538
+ applied = 0
1539
+ applied_by_app: dict[str, int] = {}
1540
+ applied_details: list[dict[str, Any]] = []
1541
+
1542
+ for file_path_str, file_translations in translations_by_file.items():
1543
+ full_path = Path(file_path_str)
1544
+ count, app = self._apply_file_translations(
1545
+ full_path, file_translations, empty_keys, stdout
1546
+ )
1547
+
1548
+ applied += count
1549
+ if count > 0:
1550
+ applied_by_app[app] = applied_by_app.get(app, 0) + count
1551
+ applied_details.append(
1552
+ {"app": app, "file": full_path.name, "count": count}
1553
+ )
1554
+ stdout.write(
1555
+ f" Applied {count} translations to {app} ({full_path.name})"
1556
+ )
1557
+
1558
+ if applied_by_app:
1559
+ app_summary = ", ".join(
1560
+ f"{app}: {count}" for app, count in applied_by_app.items()
1561
+ )
1562
+ stdout.write(f" Summary by app: {app_summary}")
1563
+
1564
+ return applied, {"by_app": applied_by_app, "details": applied_details}
1565
+
1566
+ def _cleanup_failed_branch(self, repo: GitRepository, branch_name: str) -> None:
1567
+ """Clean up branch if PR creation fails."""
1568
+ try:
1569
+ repo.switch_to_main()
1570
+ # Only try to delete if branch exists locally
1571
+ if branch_name in [ref.name for ref in repo.repo.heads]:
1572
+ with suppress(git.exc.GitCommandError):
1573
+ repo.repo.git.branch("-D", branch_name)
1574
+ self.stdout.write(
1575
+ self.style.WARNING(
1576
+ f" Cleaned up failed branch: {branch_name}"
1577
+ )
1578
+ )
1579
+ except (git.exc.GitCommandError, AttributeError) as e:
1580
+ self.stdout.write(
1581
+ self.style.WARNING(f" Could not clean up branch {branch_name}: {e!s}")
1582
+ )
1583
+
1584
+ def _commit_changes(
1585
+ self, repo: GitRepository, branch_name: str, lang_code: str
1586
+ ) -> bool:
1587
+ """Commit changes to git repository. Returns True if committed."""
1588
+ # Check if branch already exists
1589
+ if repo.branch_exists(branch_name):
1590
+ self.stdout.write(
1591
+ self.style.WARNING(
1592
+ f" Branch '{branch_name}' already exists. "
1593
+ f"Switching to it and continuing..."
1594
+ )
1595
+ )
1596
+ try:
1597
+ repo.repo.git.checkout(branch_name)
1598
+ except git.exc.GitCommandError:
1599
+ # If local branch doesn't exist but remote does, create tracking branch
1600
+ repo.repo.git.checkout("-b", branch_name, f"origin/{branch_name}")
1601
+ else:
1602
+ repo.configure_user()
1603
+ repo.create_branch(branch_name)
1604
+ repo.stage_all()
1605
+
1606
+ if not repo.has_changes():
1607
+ self.stdout.write(
1608
+ self.style.WARNING(
1609
+ " No changes to commit. Skipping commit and PR creation."
1610
+ )
1611
+ )
1612
+ repo.switch_to_main()
1613
+ with suppress(git.exc.GitCommandError):
1614
+ repo.repo.git.branch("-D", branch_name)
1615
+ return False
1616
+
1617
+ safe_lang_code = sanitize_for_git(lang_code)
1618
+ commit_message = (
1619
+ f"feat: Add {safe_lang_code} translations via LLM\n\n"
1620
+ f"Automated translation of empty keys for {safe_lang_code} language."
1621
+ )
1622
+
1623
+ repo.commit(commit_message)
1624
+
1625
+ github_token = getattr(
1626
+ settings, "TRANSLATIONS_GITHUB_TOKEN", None
1627
+ ) or os.environ.get("TRANSLATIONS_GITHUB_TOKEN")
1628
+ repo.push_branch(branch_name, github_token)
1629
+ self.stdout.write(" Pushed branch to remote")
1630
+
1631
+ return True
1632
+
1633
+ def _create_pull_request(
1634
+ self,
1635
+ repo_path: str,
1636
+ branch_name: str,
1637
+ pr_data: PullRequestData,
1638
+ repo_url: str,
1639
+ ) -> str:
1640
+ """Create pull request using GitHub CLI or API."""
1641
+ lang_code = pr_data["lang_code"]
1642
+ try:
1643
+ # Using GitHub CLI (gh) - trusted system command
1644
+ gh_path = shutil.which("gh")
1645
+ if gh_path:
1646
+ result = subprocess.run( # noqa: S603
1647
+ [
1648
+ gh_path,
1649
+ "pr",
1650
+ "create",
1651
+ "--title",
1652
+ f"feat: Add {lang_code} translations via LLM",
1653
+ "--body",
1654
+ self._generate_pr_body(pr_data),
1655
+ ],
1656
+ cwd=repo_path,
1657
+ capture_output=True,
1658
+ text=True,
1659
+ check=True,
1660
+ )
1661
+ return result.stdout.strip()
1662
+ except (subprocess.CalledProcessError, FileNotFoundError):
1663
+ pass
1664
+ # Fall back to API if gh CLI is not available or fails
1665
+ return self._create_pr_via_api(
1666
+ repo_path,
1667
+ branch_name,
1668
+ pr_data,
1669
+ repo_url,
1670
+ )
1671
+
1672
+ def _generate_error_section(
1673
+ self, errors: int, errors_by_app: dict[str, int] | None = None
1674
+ ) -> str:
1675
+ """Generate error warning section for PR body if there are errors.
1676
+
1677
+ Args:
1678
+ errors: Number of translation errors.
1679
+ errors_by_app: Dictionary mapping app/MFE names to error counts.
1680
+
1681
+ Returns:
1682
+ Error section markdown string, or empty string if no errors.
1683
+ """
1684
+ if errors == 0:
1685
+ return ""
1686
+
1687
+ error_details = ""
1688
+ if errors_by_app:
1689
+ error_lines = [
1690
+ f"- **{app}**: {count} key(s) failed"
1691
+ for app, count in sorted(
1692
+ errors_by_app.items(), key=lambda x: x[1], reverse=True
1693
+ )
1694
+ ]
1695
+ error_details = (
1696
+ "\n**Errors by app/MFE:**\n\n" + "\n".join(error_lines) + "\n"
1697
+ )
1698
+
1699
+ error_template = f"""
1700
+ ### Translation Errors
1701
+
1702
+ **{errors} translation key(s) failed to translate** due to API errors, rate
1703
+ limits, or parsing issues.
1704
+ {error_details}
1705
+ **Impact:**
1706
+ - These keys remain untranslated in the target language files
1707
+ - They will need to be translated manually or re-run the command
1708
+ - The translation process continued and completed successfully
1709
+ for the remaining keys
1710
+
1711
+ **Recommendation:**
1712
+ - Review the command output logs for specific error details
1713
+ - Consider re-running the command to retry failed batches
1714
+ - Check API key permissions and rate limits if errors persist
1715
+
1716
+ """
1717
+ return textwrap.dedent(error_template)
1718
+
1719
+ def _generate_translation_summary(
1720
+ self, glossary_matches: int, llm_translations: int, errors: int
1721
+ ) -> str:
1722
+ """Generate translation statistics summary line.
1723
+
1724
+ Args:
1725
+ glossary_matches: Number of glossary matches.
1726
+ llm_translations: Number of LLM translations.
1727
+ errors: Number of translation errors.
1728
+
1729
+ Returns:
1730
+ Summary string.
1731
+ """
1732
+ if glossary_matches > 0:
1733
+ return (
1734
+ f"Summary - Glossary matches: {glossary_matches}, "
1735
+ f"LLM translations: {llm_translations}, Errors: {errors}"
1736
+ )
1737
+ return f"Summary - LLM translations: {llm_translations}, Errors: {errors}"
1738
+
1739
+ def _generate_pr_body(self, pr_data: PullRequestData) -> str:
1740
+ """Generate PR description."""
1741
+ lang_code = pr_data["lang_code"]
1742
+ iso_code = pr_data["iso_code"]
1743
+ sync_stats = pr_data["sync_stats"]
1744
+ applied_count = pr_data["applied_count"]
1745
+ translation_stats = pr_data["translation_stats"]
1746
+ applied_by_app = pr_data["applied_by_app"]
1747
+
1748
+ glossary_matches = translation_stats.get("glossary_matches", 0)
1749
+ llm_translations = translation_stats.get("llm_translations", 0)
1750
+ errors = translation_stats.get("errors", 0)
1751
+ errors_by_app: dict[str, int] = cast(
1752
+ "dict[str, int]", translation_stats.get("errors_by_app", {})
1753
+ )
1754
+
1755
+ translation_summary = self._generate_translation_summary(
1756
+ glossary_matches, llm_translations, errors
1757
+ )
1758
+ error_section = self._generate_error_section(errors, errors_by_app)
1759
+
1760
+ applied_details = applied_by_app.get("details", [])
1761
+ breakdown_lines = [
1762
+ f" Applied {detail['count']} translations to "
1763
+ f"{detail['app']} ({detail['file']})"
1764
+ for detail in applied_details
1765
+ ]
1766
+
1767
+ # Build changes section with conditional error line
1768
+ changes_lines = [
1769
+ f"- **Language**: {lang_code} ({iso_code})",
1770
+ f"- **Keys synced**: {sync_stats['frontend']['added']} frontend keys, "
1771
+ f"{sync_stats['backend']['added']} backend entries",
1772
+ f"- **Translations applied**: {applied_count} keys translated",
1773
+ f"- **Typos fixed**: {sync_stats['frontend']['fixed']}",
1774
+ ]
1775
+ if errors > 0:
1776
+ changes_lines.append(
1777
+ f"- **Translation errors**: {errors} keys failed to translate"
1778
+ )
1779
+
1780
+ # Build statistics section with conditional error line
1781
+ statistics_lines = [
1782
+ translation_summary,
1783
+ f" Translated {applied_count} keys",
1784
+ ]
1785
+ if errors > 0:
1786
+ statistics_lines.append(f" Failed: {errors} keys")
1787
+
1788
+ # Build next steps section with conditional error line
1789
+ next_steps_lines = [
1790
+ "- Review translations for accuracy",
1791
+ ]
1792
+ if errors > 0:
1793
+ next_steps_lines.append(
1794
+ "- Address failed translations (see error section above)"
1795
+ )
1796
+ next_steps_lines.extend(
1797
+ [
1798
+ "- Test in staging environment",
1799
+ "- Merge when ready",
1800
+ ]
1801
+ )
1802
+
1803
+ pr_template = (
1804
+ f"""## Summary
1805
+
1806
+ This PR adds {lang_code} translations via LLM automation.
1807
+ {error_section}
1808
+ ### Changes
1809
+
1810
+ {chr(10).join(changes_lines)}
1811
+
1812
+ ### Translation Statistics
1813
+
1814
+ {chr(10).join(statistics_lines)}
1815
+
1816
+ ### Applied Translations
1817
+
1818
+ {
1819
+ chr(10).join(breakdown_lines)
1820
+ if breakdown_lines
1821
+ else " No translations applied"
1822
+ }
1823
+
1824
+ Applied {applied_count} translations
1825
+
1826
+ ### Files Modified
1827
+
1828
+ - Frontend apps: {sync_stats["frontend"]["created"]} created, """
1829
+ f"""{sync_stats["frontend"]["synced"]} synced
1830
+ - Backend: PO files updated
1831
+
1832
+ ### Next Steps
1833
+
1834
+ {chr(10).join(next_steps_lines)}
1835
+
1836
+ ---
1837
+ *This PR was automatically generated by the sync_and_translate_language """
1838
+ f"""management command.*
1839
+ """
1840
+ )
1841
+ return textwrap.dedent(pr_template)
1842
+
1843
+ def _create_pr_via_api(
1844
+ self,
1845
+ repo_path: str,
1846
+ branch_name: str,
1847
+ pr_data: PullRequestData,
1848
+ repo_url: str,
1849
+ ) -> str:
1850
+ """Create PR using GitHub API."""
1851
+ client = GitHubAPIClient()
1852
+ owner, repo = GitHubAPIClient.parse_repo_url(repo_url)
1853
+
1854
+ git_repo = GitRepository(repo_path)
1855
+ main_branch = git_repo._get_main_branch_name() # noqa: SLF001
1856
+
1857
+ lang_code = pr_data["lang_code"]
1858
+ return client.create_pull_request(
1859
+ owner=owner,
1860
+ repo=repo,
1861
+ branch_name=branch_name,
1862
+ title=f"feat: Add {lang_code} translations via LLM",
1863
+ body=self._generate_pr_body(pr_data),
1864
+ base=main_branch,
1865
+ stdout=self.stdout,
1866
+ )