ol-openedx-course-translations 0.1.0__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ol-openedx-course-translations might be problematic. Click here for more details.

Files changed (40) hide show
  1. ol_openedx_course_translations/admin.py +29 -0
  2. ol_openedx_course_translations/apps.py +13 -2
  3. ol_openedx_course_translations/filters.py +39 -0
  4. ol_openedx_course_translations/glossaries/machine_learning/ar.txt +175 -0
  5. ol_openedx_course_translations/glossaries/machine_learning/de.txt +175 -0
  6. ol_openedx_course_translations/glossaries/machine_learning/el.txt +988 -0
  7. ol_openedx_course_translations/glossaries/machine_learning/es.txt +175 -0
  8. ol_openedx_course_translations/glossaries/machine_learning/fr.txt +175 -0
  9. ol_openedx_course_translations/glossaries/machine_learning/ja.txt +175 -0
  10. ol_openedx_course_translations/glossaries/machine_learning/pt-br.txt +175 -0
  11. ol_openedx_course_translations/glossaries/machine_learning/ru.txt +213 -0
  12. ol_openedx_course_translations/management/commands/sync_and_translate_language.py +1866 -0
  13. ol_openedx_course_translations/management/commands/translate_course.py +472 -475
  14. ol_openedx_course_translations/middleware.py +143 -0
  15. ol_openedx_course_translations/migrations/0001_add_translation_logs.py +84 -0
  16. ol_openedx_course_translations/migrations/__init__.py +0 -0
  17. ol_openedx_course_translations/models.py +57 -0
  18. ol_openedx_course_translations/providers/__init__.py +1 -0
  19. ol_openedx_course_translations/providers/base.py +278 -0
  20. ol_openedx_course_translations/providers/deepl_provider.py +292 -0
  21. ol_openedx_course_translations/providers/llm_providers.py +581 -0
  22. ol_openedx_course_translations/settings/cms.py +17 -0
  23. ol_openedx_course_translations/settings/common.py +58 -30
  24. ol_openedx_course_translations/settings/lms.py +38 -0
  25. ol_openedx_course_translations/tasks.py +222 -0
  26. ol_openedx_course_translations/urls.py +16 -0
  27. ol_openedx_course_translations/utils/__init__.py +0 -0
  28. ol_openedx_course_translations/utils/command_utils.py +197 -0
  29. ol_openedx_course_translations/utils/constants.py +218 -0
  30. ol_openedx_course_translations/utils/course_translations.py +608 -0
  31. ol_openedx_course_translations/utils/translation_sync.py +808 -0
  32. ol_openedx_course_translations/views.py +73 -0
  33. ol_openedx_course_translations-0.3.5.dist-info/METADATA +409 -0
  34. ol_openedx_course_translations-0.3.5.dist-info/RECORD +40 -0
  35. ol_openedx_course_translations-0.3.5.dist-info/entry_points.txt +5 -0
  36. ol_openedx_course_translations-0.1.0.dist-info/METADATA +0 -63
  37. ol_openedx_course_translations-0.1.0.dist-info/RECORD +0 -11
  38. ol_openedx_course_translations-0.1.0.dist-info/entry_points.txt +0 -2
  39. {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.5.dist-info}/WHEEL +0 -0
  40. {ol_openedx_course_translations-0.1.0.dist-info → ol_openedx_course_translations-0.3.5.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,608 @@
1
+ """Utility functions for course translations."""
2
+
3
+ import json
4
+ import logging
5
+ import re
6
+ import shutil
7
+ import tarfile
8
+ from pathlib import Path
9
+ from xml.etree.ElementTree import Element
10
+
11
+ from defusedxml import ElementTree
12
+ from django.conf import settings
13
+ from django.core.management.base import CommandError
14
+ from opaque_keys.edx.locator import CourseLocator
15
+
16
+ from ol_openedx_course_translations.providers.deepl_provider import DeepLProvider
17
+ from ol_openedx_course_translations.providers.llm_providers import (
18
+ GeminiProvider,
19
+ MistralProvider,
20
+ OpenAIProvider,
21
+ )
22
+ from ol_openedx_course_translations.utils.constants import (
23
+ PROVIDER_DEEPL,
24
+ PROVIDER_GEMINI,
25
+ PROVIDER_MISTRAL,
26
+ PROVIDER_OPENAI,
27
+ )
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # Archive and file size limits
32
+ TAR_FILE_SIZE_LIMIT = 512 * 1024 * 1024 # 512MB
33
+
34
+
35
+ def _get_deepl_api_key() -> str:
36
+ """
37
+ Get DeepL API key from settings.
38
+
39
+ Returns:
40
+ DeepL API key
41
+
42
+ Raises:
43
+ ValueError: If DeepL API key is not configured
44
+ """
45
+ providers_config = getattr(settings, "TRANSLATIONS_PROVIDERS", {})
46
+
47
+ if PROVIDER_DEEPL in providers_config:
48
+ deepl_config = providers_config[PROVIDER_DEEPL]
49
+ if isinstance(deepl_config, dict):
50
+ api_key = deepl_config.get("api_key", "")
51
+ if api_key:
52
+ return api_key
53
+
54
+ msg = (
55
+ "DeepL API key is required. Configure it in "
56
+ "TRANSLATIONS_PROVIDERS['deepl']['api_key']"
57
+ )
58
+ raise ValueError(msg)
59
+
60
+
61
+ def get_translation_provider(
62
+ provider_name: str,
63
+ model_name: str | None = None,
64
+ ):
65
+ """
66
+ Get translation provider instance based on provider name.
67
+
68
+ Note: This function assumes validation has already been done via
69
+ _parse_and_validate_provider_spec() in the management command.
70
+
71
+ Args:
72
+ provider_name: Name of the provider (deepl, openai, gemini, mistral)
73
+ model_name: Model name to use
74
+
75
+ Returns:
76
+ Translation provider instance
77
+
78
+ Raises:
79
+ ValueError: If provider configuration is invalid
80
+ """
81
+ # Handle DeepL
82
+ deepl_api_key = _get_deepl_api_key()
83
+ if provider_name == PROVIDER_DEEPL:
84
+ return DeepLProvider(deepl_api_key, None)
85
+
86
+ # Handle LLM providers
87
+ providers_config = getattr(settings, "TRANSLATIONS_PROVIDERS", {})
88
+ provider_config = providers_config[provider_name]
89
+ api_key = provider_config["api_key"]
90
+
91
+ if provider_name == PROVIDER_OPENAI:
92
+ return OpenAIProvider(api_key, deepl_api_key, model_name)
93
+ elif provider_name == PROVIDER_GEMINI:
94
+ return GeminiProvider(api_key, deepl_api_key, model_name)
95
+ elif provider_name == PROVIDER_MISTRAL:
96
+ return MistralProvider(api_key, deepl_api_key, model_name)
97
+
98
+ msg = f"Unknown provider: {provider_name}"
99
+ raise ValueError(msg)
100
+
101
+
102
+ def translate_xml_display_name(
103
+ xml_content: str,
104
+ target_language: str,
105
+ provider,
106
+ glossary_directory: str | None = None,
107
+ ) -> str:
108
+ """
109
+ Translate display_name attribute in XML content.
110
+
111
+ This function is used primarily with DeepL for separate display_name translation.
112
+ LLM providers handle display_name translation as part of the full XML translation.
113
+
114
+ Args:
115
+ xml_content: XML content as string
116
+ target_language: Target language code
117
+ provider: Translation provider instance
118
+ glossary_directory: Optional glossary directory path
119
+
120
+ Returns:
121
+ Updated XML content with translated display_name
122
+ """
123
+ try:
124
+ xml_root = ElementTree.fromstring(xml_content)
125
+ display_name = xml_root.attrib.get("display_name")
126
+
127
+ if display_name:
128
+ translated_name = provider.translate_text(
129
+ display_name,
130
+ target_language,
131
+ glossary_directory=glossary_directory,
132
+ )
133
+ xml_root.set("display_name", translated_name)
134
+ return ElementTree.tostring(xml_root, encoding="unicode")
135
+ except ElementTree.ParseError as e:
136
+ logger.warning("Failed to parse XML for display_name translation: %s", e)
137
+
138
+ return xml_content
139
+
140
+
141
+ def update_video_xml_transcripts(xml_content: str, target_language: str) -> str:
142
+ """
143
+ Update video XML transcripts for target language.
144
+
145
+ Args:
146
+ xml_content: XML content as string
147
+ target_language: Target language code
148
+
149
+ Returns:
150
+ Updated XML content with target language transcripts
151
+ """
152
+ try:
153
+ xml_root = ElementTree.fromstring(xml_content)
154
+ target_lang_code = target_language
155
+
156
+ # Update transcripts attribute in <video> tag
157
+ if xml_root.tag == "video" and "transcripts" in xml_root.attrib:
158
+ transcripts_json_str = xml_root.attrib["transcripts"].replace("&quot;", '"')
159
+ transcripts_dict = json.loads(transcripts_json_str)
160
+
161
+ for key in list(transcripts_dict.keys()):
162
+ value = transcripts_dict[key]
163
+ new_value = re.sub(
164
+ r"-[a-zA-Z]{2}\.srt$",
165
+ f"-{target_lang_code}.srt",
166
+ value,
167
+ )
168
+ transcripts_dict[target_lang_code] = new_value
169
+
170
+ xml_root.set(
171
+ "transcripts", json.dumps(transcripts_dict, ensure_ascii=False)
172
+ )
173
+
174
+ return ElementTree.tostring(xml_root, encoding="unicode")
175
+ except (ElementTree.ParseError, json.JSONDecodeError) as e:
176
+ logger.warning("Failed to update video XML transcripts: %s", e)
177
+ return xml_content
178
+
179
+
180
+ def update_course_language_attribute(course_dir: Path, target_language: str) -> None:
181
+ """
182
+ Update language attribute in course XML files.
183
+
184
+ Args:
185
+ course_dir: Parent course directory path
186
+ target_language: Target language code
187
+ """
188
+ for xml_file in (course_dir / "course").glob("*.xml"):
189
+ try:
190
+ xml_content = xml_file.read_text(encoding="utf-8")
191
+ xml_root = ElementTree.fromstring(xml_content)
192
+
193
+ # Check if root tag is 'course' and has language attribute
194
+ if xml_root.tag == "course" and "language" in xml_root.attrib:
195
+ current_language = xml_root.attrib["language"]
196
+ xml_root.set("language", target_language.lower())
197
+ updated_xml_content = ElementTree.tostring(xml_root, encoding="unicode")
198
+ xml_file.write_text(updated_xml_content, encoding="utf-8")
199
+ logger.debug(
200
+ "Updated language attribute in %s from %s to %s",
201
+ xml_file,
202
+ current_language,
203
+ target_language.lower(),
204
+ )
205
+ except (OSError, ElementTree.ParseError) as e:
206
+ logger.warning("Failed to update language attribute in %s: %s", xml_file, e)
207
+
208
+
209
+ def translate_policy_fields( # noqa: C901
210
+ course_policy_obj: dict,
211
+ target_language: str,
212
+ provider,
213
+ glossary_directory: str | None = None,
214
+ ) -> None:
215
+ """
216
+ Translate fields in policy object.
217
+
218
+ Args:
219
+ course_policy_obj: Policy object dictionary
220
+ target_language: Target language code
221
+ provider: Translation provider instance
222
+ glossary_directory: Optional glossary directory path
223
+ """
224
+ # Translate string fields
225
+ string_fields = ["advertised_start", "display_name", "display_organization"]
226
+ for field in string_fields:
227
+ if field in course_policy_obj:
228
+ translated = provider.translate_text(
229
+ course_policy_obj[field],
230
+ target_language.lower(),
231
+ glossary_directory=glossary_directory,
232
+ )
233
+ course_policy_obj[field] = translated
234
+
235
+ # Update language attribute
236
+ course_policy_obj["language"] = target_language.lower()
237
+
238
+ # Translate discussion topics
239
+ if "discussion_topics" in course_policy_obj:
240
+ topics = course_policy_obj["discussion_topics"]
241
+ if isinstance(topics, dict):
242
+ translated_topics = {}
243
+ for key, value in topics.items():
244
+ translated_key = provider.translate_text(
245
+ key, target_language.lower(), glossary_directory=glossary_directory
246
+ )
247
+ translated_topics[translated_key] = value
248
+ course_policy_obj["discussion_topics"] = translated_topics
249
+
250
+ # Translate learning info
251
+ if "learning_info" in course_policy_obj and isinstance(
252
+ course_policy_obj["learning_info"], list
253
+ ):
254
+ translated_info = [
255
+ provider.translate_text(
256
+ item, target_language.lower(), glossary_directory=glossary_directory
257
+ )
258
+ for item in course_policy_obj["learning_info"]
259
+ ]
260
+ course_policy_obj["learning_info"] = translated_info
261
+
262
+ # Translate tabs
263
+ if "tabs" in course_policy_obj and isinstance(course_policy_obj["tabs"], list):
264
+ for tab in course_policy_obj["tabs"]:
265
+ if isinstance(tab, dict) and "name" in tab:
266
+ tab["name"] = provider.translate_text(
267
+ tab["name"],
268
+ target_language.lower(),
269
+ glossary_directory=glossary_directory,
270
+ )
271
+
272
+ # Translate XML attributes
273
+ if "xml_attributes" in course_policy_obj and isinstance(
274
+ course_policy_obj["xml_attributes"], dict
275
+ ):
276
+ xml_attributes_dict = course_policy_obj["xml_attributes"]
277
+ translatable_xml_fields = ["diplay_name", "info_sidebar_name"]
278
+ for xml_field_name in translatable_xml_fields:
279
+ if xml_field_name in xml_attributes_dict:
280
+ translated_value = provider.translate_text(
281
+ xml_attributes_dict[xml_field_name],
282
+ target_language.lower(),
283
+ glossary_directory=glossary_directory,
284
+ )
285
+ xml_attributes_dict[xml_field_name] = translated_value
286
+
287
+
288
+ def get_srt_output_filename(input_filename: str, target_language: str) -> str:
289
+ """
290
+ Generate output filename for translated SRT file.
291
+
292
+ Args:
293
+ input_filename: Original SRT filename
294
+ target_language: Target language code
295
+
296
+ Returns:
297
+ Output filename with target language code
298
+ """
299
+ if "-" in input_filename and input_filename.endswith(".srt"):
300
+ filename_parts = input_filename.rsplit("-", 1)
301
+ return f"{filename_parts[0]}-{target_language.lower()}.srt"
302
+ return input_filename
303
+
304
+
305
+ def get_supported_archive_extension(filename: str) -> str | None:
306
+ """
307
+ Return the supported archive extension if filename ends with one, else None.
308
+
309
+ Args:
310
+ filename: Name of the archive file
311
+
312
+ Returns:
313
+ Archive extension if supported, None otherwise
314
+ """
315
+ for ext in settings.COURSE_TRANSLATIONS_SUPPORTED_ARCHIVE_EXTENSIONS:
316
+ if filename.endswith(ext):
317
+ return ext
318
+ return None
319
+
320
+
321
+ def validate_tar_file(tar_file: tarfile.TarFile) -> None:
322
+ """
323
+ Validate tar file contents for security.
324
+
325
+ Args:
326
+ tar_file: Open tarfile object
327
+
328
+ Raises:
329
+ CommandError: If tar file contains unsafe members or excessively large files
330
+ """
331
+ for tar_member in tar_file.getmembers():
332
+ # Check for directory traversal attacks
333
+ if tar_member.name.startswith("/") or ".." in tar_member.name:
334
+ error_msg = f"Unsafe tar member: {tar_member.name}"
335
+ raise CommandError(error_msg)
336
+ # Check for excessively large files (512MB limit)
337
+ if tar_member.size > TAR_FILE_SIZE_LIMIT:
338
+ error_msg = f"File too large: {tar_member.name}"
339
+ raise CommandError(error_msg)
340
+
341
+
342
+ def extract_course_archive(course_archive_path: Path) -> Path:
343
+ """
344
+ Extract course archive to working directory.
345
+
346
+ Args:
347
+ course_archive_path: Path to the course archive file
348
+
349
+ Returns:
350
+ Path to extracted course directory
351
+
352
+ Raises:
353
+ CommandError: If extraction fails
354
+ """
355
+ # Use the parent directory of the source file as the base extraction directory
356
+ extraction_base_dir = course_archive_path.parent
357
+
358
+ # Get base name without extension
359
+ archive_extension = get_supported_archive_extension(course_archive_path.name)
360
+ archive_base_name = (
361
+ course_archive_path.name[: -len(archive_extension)]
362
+ if archive_extension
363
+ else course_archive_path.name
364
+ )
365
+
366
+ extracted_course_dir = extraction_base_dir / archive_base_name
367
+
368
+ if not extracted_course_dir.exists():
369
+ try:
370
+ with tarfile.open(course_archive_path, "r:*") as tar_file:
371
+ # Validate tar file before extraction
372
+ validate_tar_file(tar_file)
373
+ tar_file.extractall(path=extracted_course_dir, filter="data")
374
+ except (tarfile.TarError, OSError) as e:
375
+ error_msg = f"Failed to extract archive: {e}"
376
+ raise CommandError(error_msg) from e
377
+
378
+ logger.info("Extracted course to: %s", extracted_course_dir)
379
+ return extracted_course_dir
380
+
381
+
382
+ def create_translated_copy(source_course_dir: Path, target_language: str) -> Path:
383
+ """
384
+ Create a copy of the course for translation.
385
+
386
+ Args:
387
+ source_course_dir: Path to source course directory
388
+ target_language: Target language code
389
+
390
+ Returns:
391
+ Path to translated course directory
392
+
393
+ Raises:
394
+ CommandError: If translation directory already exists
395
+ """
396
+ source_base_name = source_course_dir.name
397
+ translated_dir_name = f"{target_language}_{source_base_name}"
398
+ translated_course_dir = source_course_dir.parent / translated_dir_name
399
+
400
+ if translated_course_dir.exists():
401
+ error_msg = f"Translation directory already exists: {translated_course_dir}"
402
+ raise CommandError(error_msg)
403
+
404
+ shutil.copytree(source_course_dir, translated_course_dir)
405
+ logger.info("Created translation copy: %s", translated_course_dir)
406
+ return translated_course_dir
407
+
408
+
409
+ def create_translated_archive(
410
+ translated_course_dir: Path,
411
+ target_language: str,
412
+ original_archive_name: str,
413
+ ) -> Path:
414
+ """
415
+ Create tar.gz archive of translated course.
416
+
417
+ Args:
418
+ translated_course_dir: Path to translated course directory
419
+ target_language: Target language code
420
+ original_archive_name: Original archive filename
421
+
422
+ Returns:
423
+ Path to created archive
424
+ """
425
+ # Remove all archive extensions from the original name
426
+ archive_extension = get_supported_archive_extension(original_archive_name)
427
+ clean_archive_name = (
428
+ original_archive_name[: -len(archive_extension)]
429
+ if archive_extension
430
+ else original_archive_name
431
+ )
432
+
433
+ translated_archive_name = f"{target_language}_{clean_archive_name}.tar.gz"
434
+ translated_archive_path = translated_course_dir.parent / translated_archive_name
435
+
436
+ # Remove existing archive
437
+ if translated_archive_path.exists():
438
+ translated_archive_path.unlink()
439
+
440
+ # Create tar.gz archive containing only the 'course' directory
441
+ course_directory_path = translated_course_dir / "course"
442
+ with tarfile.open(translated_archive_path, "w:gz") as tar_archive:
443
+ tar_archive.add(course_directory_path, arcname="course")
444
+
445
+ # Delete extracted directory after archiving
446
+ if translated_course_dir.exists():
447
+ shutil.rmtree(translated_course_dir)
448
+
449
+ logger.info("Created tar.gz archive: %s", translated_archive_path)
450
+ return translated_archive_path
451
+
452
+
453
+ def update_video_xml_complete(xml_content: str, target_language: str) -> str: # noqa: C901
454
+ """
455
+ Update video XML transcripts and transcript tags for the target language.
456
+ This is a more complete version that handles nested transcript elements.
457
+
458
+ Args:
459
+ xml_content: XML content as string
460
+ target_language: Target language code
461
+
462
+ Returns:
463
+ Updated XML content with all video transcript references
464
+ """
465
+ try:
466
+ xml_root = ElementTree.fromstring(xml_content)
467
+ target_lang_code = target_language.lower()
468
+
469
+ # Update transcripts attribute in <video>
470
+ if xml_root.tag == "video" and "transcripts" in xml_root.attrib:
471
+ transcripts_json_str = xml_root.attrib["transcripts"].replace("&quot;", '"')
472
+ transcripts_dict = json.loads(transcripts_json_str)
473
+ for transcript_key in list(transcripts_dict.keys()):
474
+ transcript_value = transcripts_dict[transcript_key]
475
+ new_transcript_key = target_lang_code
476
+ new_transcript_value = re.sub(
477
+ r"-[a-zA-Z]{2}\.srt$",
478
+ f"-{new_transcript_key}.srt",
479
+ transcript_value,
480
+ )
481
+ transcripts_dict[new_transcript_key] = new_transcript_value
482
+ updated_transcripts_json = json.dumps(transcripts_dict, ensure_ascii=False)
483
+ xml_root.set("transcripts", updated_transcripts_json)
484
+
485
+ # Add a new <transcript> tag inside <transcripts> for the target language
486
+ for video_asset_element in xml_root.findall("video_asset"):
487
+ for transcripts_element in video_asset_element.findall("transcripts"):
488
+ existing_transcript_element = transcripts_element.find("transcript")
489
+ new_transcript_element = Element("transcript")
490
+ if existing_transcript_element is not None:
491
+ new_transcript_element.attrib = (
492
+ existing_transcript_element.attrib.copy()
493
+ )
494
+ new_transcript_element.set("language_code", target_lang_code)
495
+ # Avoid duplicates
496
+ if not any(
497
+ transcript_elem.attrib == new_transcript_element.attrib
498
+ for transcript_elem in transcripts_element.findall("transcript")
499
+ ):
500
+ transcripts_element.append(new_transcript_element)
501
+
502
+ # Add a new <transcript> tag for the target language
503
+ for transcript_element in xml_root.findall("transcript"):
504
+ transcript_src = transcript_element.get("src")
505
+ if transcript_src:
506
+ new_transcript_src = re.sub(
507
+ r"-[a-zA-Z]{2}\.srt$",
508
+ f"-{target_lang_code}.srt",
509
+ transcript_src,
510
+ )
511
+ new_transcript_element = Element("transcript")
512
+ new_transcript_element.set("language", target_lang_code)
513
+ new_transcript_element.set("src", new_transcript_src)
514
+ # Avoid duplicates
515
+ if not any(
516
+ existing_transcript.get("language") == target_lang_code
517
+ and existing_transcript.get("src") == new_transcript_src
518
+ for existing_transcript in xml_root.findall("transcript")
519
+ ):
520
+ xml_root.append(new_transcript_element)
521
+
522
+ return ElementTree.tostring(xml_root, encoding="unicode")
523
+ except (ElementTree.ParseError, json.JSONDecodeError, KeyError) as e:
524
+ logger.warning("Failed to update video XML completely: %s", e)
525
+ return xml_content
526
+
527
+
528
+ def validate_course_inputs(
529
+ course_archive_path: Path,
530
+ ) -> None:
531
+ """
532
+ Validate command inputs for course translation.
533
+
534
+ Args:
535
+ course_archive_path: Path to course archive file
536
+
537
+ Raises:
538
+ CommandError: If validation fails
539
+ """
540
+ if not course_archive_path.exists():
541
+ error_msg = f"Course archive not found: {course_archive_path}"
542
+ raise CommandError(error_msg)
543
+
544
+ if get_supported_archive_extension(course_archive_path.name) is None:
545
+ supported_extensions = ", ".join(
546
+ settings.COURSE_TRANSLATIONS_SUPPORTED_ARCHIVE_EXTENSIONS
547
+ )
548
+ error_msg = f"Course archive must be a tar file: {supported_extensions}"
549
+ raise CommandError(error_msg)
550
+
551
+
552
+ def get_translatable_file_paths(
553
+ directory_path: Path,
554
+ *,
555
+ recursive: bool = False,
556
+ ) -> list[Path]:
557
+ """
558
+ Get list of translatable file paths from a directory.
559
+
560
+ Args:
561
+ directory_path: Path to directory to scan
562
+ recursive: Whether to search recursively
563
+
564
+ Returns:
565
+ List of translatable file paths
566
+ """
567
+ if recursive:
568
+ translatable_file_paths: list[Path] = []
569
+ for file_extension in settings.COURSE_TRANSLATIONS_TRANSLATABLE_EXTENSIONS:
570
+ translatable_file_paths.extend(directory_path.rglob(f"*{file_extension}"))
571
+ else:
572
+ translatable_file_paths = [
573
+ file_path
574
+ for file_path in directory_path.iterdir()
575
+ if file_path.is_file()
576
+ and any(
577
+ file_path.name.endswith(extension)
578
+ for extension in settings.COURSE_TRANSLATIONS_TRANSLATABLE_EXTENSIONS
579
+ )
580
+ ]
581
+
582
+ return translatable_file_paths
583
+
584
+
585
+ def generate_course_key_from_xml(course_dir_path: Path) -> str:
586
+ """
587
+ Generate the course id of the source course
588
+ """
589
+ try:
590
+ about_file_path = course_dir_path / "course" / "course.xml"
591
+ xml_content = about_file_path.read_text(encoding="utf-8")
592
+ xml_root = ElementTree.fromstring(xml_content)
593
+
594
+ org = xml_root.get("org", "")
595
+ course = xml_root.get("course", "")
596
+ url_name = xml_root.get("url_name", "")
597
+
598
+ if not all([org, course, url_name]):
599
+ error_msg = (
600
+ "Missing required attributes in course.xml: org, course, url_name"
601
+ )
602
+ raise CommandError(error_msg)
603
+ except (OSError, ElementTree.ParseError) as e:
604
+ error_msg = f"Failed to read course id from about.xml: {e}"
605
+ raise CommandError(error_msg) from e
606
+ else:
607
+ # URL name is the run ID of the course
608
+ return CourseLocator(org=org, course=course, run=url_name)