openedx-learning 0.27.1__py2.py3-none-any.whl → 0.29.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. openedx_learning/__init__.py +1 -1
  2. openedx_learning/apps/authoring/backup_restore/api.py +19 -4
  3. openedx_learning/apps/authoring/backup_restore/management/commands/lp_dump.py +22 -4
  4. openedx_learning/apps/authoring/backup_restore/management/commands/lp_load.py +57 -0
  5. openedx_learning/apps/authoring/backup_restore/serializers.py +168 -0
  6. openedx_learning/apps/authoring/backup_restore/toml.py +203 -24
  7. openedx_learning/apps/authoring/backup_restore/zipper.py +1007 -16
  8. openedx_learning/apps/authoring/components/api.py +55 -0
  9. openedx_learning/apps/authoring/components/migrations/0004_remove_componentversioncontent_uuid.py +17 -0
  10. openedx_learning/apps/authoring/components/models.py +1 -3
  11. openedx_learning/apps/authoring/publishing/api.py +65 -12
  12. openedx_learning/apps/authoring/sections/api.py +17 -0
  13. openedx_learning/apps/authoring/subsections/api.py +17 -0
  14. openedx_learning/apps/authoring/units/api.py +17 -0
  15. {openedx_learning-0.27.1.dist-info → openedx_learning-0.29.0.dist-info}/METADATA +14 -5
  16. {openedx_learning-0.27.1.dist-info → openedx_learning-0.29.0.dist-info}/RECORD +21 -18
  17. openedx_tagging/core/tagging/models/base.py +7 -5
  18. openedx_tagging/core/tagging/models/utils.py +37 -9
  19. {openedx_learning-0.27.1.dist-info → openedx_learning-0.29.0.dist-info}/WHEEL +0 -0
  20. {openedx_learning-0.27.1.dist-info → openedx_learning-0.29.0.dist-info}/licenses/LICENSE.txt +0 -0
  21. {openedx_learning-0.27.1.dist-info → openedx_learning-0.29.0.dist-info}/top_level.txt +0 -0
@@ -2,14 +2,84 @@
2
2
  This module provides functionality to create a zip file containing the learning package data,
3
3
  including a TOML representation of the learning package and its entities.
4
4
  """
5
+ import hashlib
6
+ import time
5
7
  import zipfile
8
+ from collections import defaultdict
9
+ from dataclasses import asdict, dataclass
10
+ from datetime import datetime, timezone
11
+ from io import StringIO
6
12
  from pathlib import Path
13
+ from typing import Any, List, Literal, Optional, Tuple
7
14
 
8
- from openedx_learning.apps.authoring.backup_restore.toml import toml_learning_package, toml_publishable_entity
15
+ from django.contrib.auth.models import User as UserType # pylint: disable=imported-auth-user
16
+ from django.db import transaction
17
+ from django.db.models import Prefetch, QuerySet
18
+ from django.utils.text import slugify
19
+ from rest_framework import serializers
20
+
21
+ from openedx_learning.api.authoring_models import (
22
+ Collection,
23
+ ComponentType,
24
+ ComponentVersion,
25
+ ComponentVersionContent,
26
+ Content,
27
+ LearningPackage,
28
+ PublishableEntity,
29
+ PublishableEntityVersion,
30
+ )
31
+ from openedx_learning.apps.authoring.backup_restore.serializers import (
32
+ CollectionSerializer,
33
+ ComponentSerializer,
34
+ ComponentVersionSerializer,
35
+ ContainerSerializer,
36
+ ContainerVersionSerializer,
37
+ LearningPackageMetadataSerializer,
38
+ LearningPackageSerializer,
39
+ )
40
+ from openedx_learning.apps.authoring.backup_restore.toml import (
41
+ parse_collection_toml,
42
+ parse_learning_package_toml,
43
+ parse_publishable_entity_toml,
44
+ toml_collection,
45
+ toml_learning_package,
46
+ toml_publishable_entity,
47
+ )
48
+ from openedx_learning.apps.authoring.collections import api as collections_api
49
+ from openedx_learning.apps.authoring.components import api as components_api
9
50
  from openedx_learning.apps.authoring.publishing import api as publishing_api
10
- from openedx_learning.apps.authoring.publishing.models.learning_package import LearningPackage
51
+ from openedx_learning.apps.authoring.sections import api as sections_api
52
+ from openedx_learning.apps.authoring.subsections import api as subsections_api
53
+ from openedx_learning.apps.authoring.units import api as units_api
11
54
 
12
55
  TOML_PACKAGE_NAME = "package.toml"
56
+ DEFAULT_USERNAME = "command"
57
+
58
+
59
+ def slugify_hashed_filename(identifier: str) -> str:
60
+ """
61
+ Generate a filesystem-safe filename from an identifier.
62
+
63
+ Why:
64
+ Identifiers may contain characters that are invalid or ambiguous
65
+ in filesystems (e.g., slashes, colons, case differences).
66
+ Additionally, two different identifiers might normalize to the same
67
+ slug after cleaning. To avoid collisions and ensure uniqueness,
68
+ we append a short blake2b hash.
69
+
70
+ What:
71
+ - Slugify the identifier (preserves most characters, only strips
72
+ filesystem-invalid ones).
73
+ - Append a short hash for uniqueness.
74
+ - Result: human-readable but still unique and filesystem-safe filename.
75
+ """
76
+ slug = slugify(identifier, allow_unicode=True)
77
+ # Short digest ensures uniqueness without overly long filenames
78
+ short_hash = hashlib.blake2b(
79
+ identifier.encode("utf-8"),
80
+ digest_size=3,
81
+ ).hexdigest()
82
+ return f"{slug}_{short_hash}"
13
83
 
14
84
 
15
85
  class LearningPackageZipper:
@@ -17,8 +87,175 @@ class LearningPackageZipper:
17
87
  A class to handle the zipping of learning content for backup and restore.
18
88
  """
19
89
 
20
- def __init__(self, learning_package: LearningPackage):
90
+ def __init__(self, learning_package: LearningPackage, user: UserType | None = None):
21
91
  self.learning_package = learning_package
92
+ self.user = user
93
+ self.folders_already_created: set[Path] = set()
94
+ self.entities_filenames_already_created: set[str] = set()
95
+ self.utc_now = datetime.now(tz=timezone.utc)
96
+
97
+ def _ensure_parent_folders(
98
+ self,
99
+ zip_file: zipfile.ZipFile,
100
+ path: Path,
101
+ timestamp: datetime,
102
+ ) -> None:
103
+ """
104
+ Ensure all parent folders for the given path exist in the zip.
105
+ """
106
+ for parent in path.parents[::-1]:
107
+ if parent != Path(".") and parent not in self.folders_already_created:
108
+ folder_info = zipfile.ZipInfo(str(parent) + "/")
109
+ folder_info.date_time = timestamp.timetuple()[:6]
110
+ zip_file.writestr(folder_info, "")
111
+ self.folders_already_created.add(parent)
112
+
113
+ def add_folder_to_zip(
114
+ self,
115
+ zip_file: zipfile.ZipFile,
116
+ folder: Path,
117
+ timestamp: datetime | None = None,
118
+ ) -> None:
119
+ """
120
+ Explicitly add an empty folder into the zip structure.
121
+ """
122
+ if folder in self.folders_already_created:
123
+ return
124
+
125
+ if timestamp is None:
126
+ timestamp = self.utc_now
127
+
128
+ self._ensure_parent_folders(zip_file, folder, timestamp)
129
+
130
+ folder_info = zipfile.ZipInfo(str(folder) + "/")
131
+ folder_info.date_time = timestamp.timetuple()[:6]
132
+ zip_file.writestr(folder_info, "")
133
+ self.folders_already_created.add(folder)
134
+
135
+ def add_file_to_zip(
136
+ self,
137
+ zip_file: zipfile.ZipFile,
138
+ file_path: Path,
139
+ content: bytes | str | None = None,
140
+ timestamp: datetime | None = None,
141
+ ) -> None:
142
+ """
143
+ Add a file into the zip structure.
144
+ """
145
+ if timestamp is None:
146
+ timestamp = self.utc_now
147
+
148
+ self._ensure_parent_folders(zip_file, file_path, timestamp)
149
+
150
+ file_info = zipfile.ZipInfo(str(file_path))
151
+ file_info.date_time = timestamp.timetuple()[:6]
152
+
153
+ if isinstance(content, str):
154
+ content = content.encode("utf-8")
155
+
156
+ zip_file.writestr(file_info, content or b"")
157
+
158
+ def get_publishable_entities(self) -> QuerySet[PublishableEntity]:
159
+ """
160
+ Retrieve the publishable entities associated with the learning package.
161
+ Prefetches related data for efficiency.
162
+ """
163
+ lp_id = self.learning_package.pk
164
+ publishable_entities: QuerySet[PublishableEntity] = publishing_api.get_publishable_entities(lp_id)
165
+ return (
166
+ publishable_entities # type: ignore[no-redef]
167
+ .select_related(
168
+ "container",
169
+ "component__component_type",
170
+ "draft__version__componentversion",
171
+ "published__version__componentversion",
172
+ )
173
+ .prefetch_related(
174
+ # We should re-evaluate the prefetching strategy here,
175
+ # as the current approach may cause performance issues—
176
+ # especially with large libraries (up to 100K items),
177
+ # which is too large for this type of prefetch.
178
+ Prefetch(
179
+ "draft__version__componentversion__componentversioncontent_set",
180
+ queryset=ComponentVersionContent.objects.select_related("content"),
181
+ to_attr="prefetched_contents",
182
+ ),
183
+ Prefetch(
184
+ "published__version__componentversion__componentversioncontent_set",
185
+ queryset=ComponentVersionContent.objects.select_related("content"),
186
+ to_attr="prefetched_contents",
187
+ ),
188
+ )
189
+ .order_by("key")
190
+ )
191
+
192
+ def get_collections(self) -> QuerySet[Collection]:
193
+ """
194
+ Get the collections associated with the learning package.
195
+ """
196
+ return (
197
+ collections_api.get_collections(self.learning_package.pk)
198
+ .prefetch_related("entities")
199
+ )
200
+
201
+ def get_versions_to_write(
202
+ self, entity: PublishableEntity
203
+ ) -> Tuple[List[PublishableEntityVersion],
204
+ Optional[PublishableEntityVersion],
205
+ Optional[PublishableEntityVersion]]:
206
+ """
207
+ Get the versions of a publishable entity that should be written to the zip file.
208
+ It retrieves both draft and published versions.
209
+
210
+ Returns:
211
+ Tuple containing:
212
+ - versions_to_write: List of PublishableEntityVersion to write.
213
+ - draft_version: The current draft version, if any.
214
+ - published_version: The current published version, if any.
215
+ """
216
+ draft_version: Optional[PublishableEntityVersion] = publishing_api.get_draft_version(entity)
217
+ published_version: Optional[PublishableEntityVersion] = publishing_api.get_published_version(entity)
218
+
219
+ versions_to_write = [draft_version] if draft_version else []
220
+
221
+ if published_version and published_version != draft_version:
222
+ versions_to_write.append(published_version)
223
+ return versions_to_write, draft_version, published_version
224
+
225
+ def get_entity_toml_filename(self, entity_key: str) -> str:
226
+ """
227
+ Generate a unique TOML filename for a publishable entity.
228
+ Ensures that the filename is unique within the zip file.
229
+
230
+ Behavior:
231
+ - If the slugified key has not been used yet, use it as the filename.
232
+ - If it has been used, append a short hash to ensure uniqueness.
233
+
234
+ Args:
235
+ entity_key (str): The key of the publishable entity.
236
+
237
+ Returns:
238
+ str: A unique TOML filename for the entity.
239
+ """
240
+ slugify_name = slugify(entity_key, allow_unicode=True)
241
+
242
+ if slugify_name in self.entities_filenames_already_created:
243
+ filename = slugify_hashed_filename(entity_key)
244
+ else:
245
+ filename = slugify_name
246
+
247
+ self.entities_filenames_already_created.add(slugify_name)
248
+ return filename
249
+
250
+ def get_latest_modified(self, versions_to_check: List[PublishableEntityVersion]) -> datetime:
251
+ """
252
+ Get the latest modification timestamp among the learning package and its entities.
253
+ """
254
+ latest = self.learning_package.updated
255
+ for version in versions_to_check:
256
+ if version and version.created > latest:
257
+ latest = version.created
258
+ return latest
22
259
 
23
260
  def create_zip(self, path: str) -> None:
24
261
  """
@@ -28,26 +265,780 @@ class LearningPackageZipper:
28
265
  Raises:
29
266
  Exception: If the learning package cannot be found or if the zip creation fails.
30
267
  """
31
- package_toml_content: str = toml_learning_package(self.learning_package)
32
268
 
33
269
  with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zipf:
34
- # Add the package.toml string
35
- zipf.writestr(TOML_PACKAGE_NAME, package_toml_content)
270
+ # Add the package.toml file
271
+ package_toml_content: str = toml_learning_package(self.learning_package, self.utc_now, user=self.user)
272
+ self.add_file_to_zip(zipf, Path(TOML_PACKAGE_NAME), package_toml_content, self.learning_package.updated)
36
273
 
37
274
  # Add the entities directory
38
275
  entities_folder = Path("entities")
39
- zip_info = zipfile.ZipInfo(str(entities_folder) + "/") # Ensure trailing slash
40
- zipf.writestr(zip_info, "") # Add explicit empty directory entry
276
+ self.add_folder_to_zip(zipf, entities_folder, timestamp=self.learning_package.updated)
41
277
 
42
278
  # Add the collections directory
43
279
  collections_folder = Path("collections")
44
- collections_info = zipfile.ZipInfo(str(collections_folder) + "/") # Ensure trailing slash
45
- zipf.writestr(collections_info, "") # Add explicit empty directory
280
+ self.add_folder_to_zip(zipf, collections_folder, timestamp=self.learning_package.updated)
281
+
282
+ # ------ ENTITIES SERIALIZATION -------------
283
+
284
+ # get the publishable entities
285
+ publishable_entities: QuerySet[PublishableEntity] = self.get_publishable_entities()
286
+
287
+ for entity in publishable_entities:
288
+ # entity: PublishableEntity = entity # Type hint for clarity
289
+
290
+ # Get the versions to serialize for this entity
291
+ versions_to_write, draft_version, published_version = self.get_versions_to_write(entity)
292
+
293
+ latest_modified = self.get_latest_modified(versions_to_write)
46
294
 
47
- # Add each entity's TOML file
48
- for entity in publishing_api.get_entities(self.learning_package.pk):
49
295
  # Create a TOML representation of the entity
50
- entity_toml_content: str = toml_publishable_entity(entity)
51
- entity_toml_filename = f"{entity.key}.toml"
52
- entity_toml_path = entities_folder / entity_toml_filename
53
- zipf.writestr(str(entity_toml_path), entity_toml_content)
296
+ entity_toml_content: str = toml_publishable_entity(
297
+ entity, versions_to_write, draft_version, published_version
298
+ )
299
+
300
+ if hasattr(entity, 'container'):
301
+ entity_filename = self.get_entity_toml_filename(entity.key)
302
+ entity_toml_filename = f"{entity_filename}.toml"
303
+ entity_toml_path = entities_folder / entity_toml_filename
304
+ self.add_file_to_zip(zipf, entity_toml_path, entity_toml_content, timestamp=latest_modified)
305
+
306
+ if hasattr(entity, 'component'):
307
+ # Create the component folder structure for the entity. The structure is as follows:
308
+ # entities/
309
+ # xblock.v1/ (component namespace)
310
+ # html/ (component type)
311
+ # my_component.toml (entity TOML file)
312
+ # my_component/ (component id)
313
+ # component_versions/
314
+ # v1/
315
+ # static/
316
+
317
+ entity_filename = self.get_entity_toml_filename(entity.component.local_key)
318
+
319
+ component_root_folder = (
320
+ # Example: "entities/xblock.v1/html/"
321
+ entities_folder
322
+ / entity.component.component_type.namespace
323
+ / entity.component.component_type.name
324
+ )
325
+
326
+ component_folder = (
327
+ # Example: "entities/xblock.v1/html/my_component_123456/"
328
+ component_root_folder
329
+ / entity_filename
330
+ )
331
+
332
+ component_version_folder = (
333
+ # Example: "entities/xblock.v1/html/my_component_123456/component_versions/"
334
+ component_folder
335
+ / "component_versions"
336
+ )
337
+
338
+ # Add the entity TOML file inside the component type folder as well
339
+ # Example: "entities/xblock.v1/html/my_component_123456.toml"
340
+ component_entity_toml_path = component_root_folder / f"{entity_filename}.toml"
341
+ self.add_file_to_zip(zipf, component_entity_toml_path, entity_toml_content, latest_modified)
342
+
343
+ # ------ COMPONENT VERSIONING -------------
344
+ # Focusing on draft and published versions only
345
+ for version in versions_to_write:
346
+ # Create a folder for the version
347
+ version_number = f"v{version.version_num}"
348
+ version_folder = component_version_folder / version_number
349
+ self.add_folder_to_zip(zipf, version_folder, timestamp=version.created)
350
+
351
+ # Add static folder for the version
352
+ static_folder = version_folder / "static"
353
+ self.add_folder_to_zip(zipf, static_folder, timestamp=version.created)
354
+
355
+ # ------ COMPONENT STATIC CONTENT -------------
356
+ component_version: ComponentVersion = version.componentversion
357
+
358
+ # Get content data associated with this version
359
+ contents: QuerySet[
360
+ ComponentVersionContent
361
+ ] = component_version.prefetched_contents # type: ignore[attr-defined]
362
+
363
+ for component_version_content in contents:
364
+ content: Content = component_version_content.content
365
+
366
+ # Important: The component_version_content.key contains implicitly
367
+ # the file name and the file extension
368
+ file_path = version_folder / component_version_content.key
369
+
370
+ if content.has_file and content.path:
371
+ # If has_file, we pull it from the file system
372
+ with content.read_file() as f:
373
+ file_data = f.read()
374
+ elif not content.has_file and content.text:
375
+ # Otherwise, we use the text content as the file data
376
+ file_data = content.text
377
+ else:
378
+ # If no file and no text, we skip this content
379
+ continue
380
+ self.add_file_to_zip(zipf, file_path, file_data, timestamp=content.created)
381
+
382
+ # ------ COLLECTION SERIALIZATION -------------
383
+ collections = self.get_collections()
384
+
385
+ for collection in collections:
386
+ collection_hash_slug = self.get_entity_toml_filename(collection.key)
387
+ collection_toml_file_path = collections_folder / f"{collection_hash_slug}.toml"
388
+ entity_keys_related = collection.entities.order_by("key").values_list("key", flat=True)
389
+ self.add_file_to_zip(
390
+ zipf,
391
+ collection_toml_file_path,
392
+ toml_collection(collection, list(entity_keys_related)),
393
+ timestamp=collection.modified,
394
+ )
395
+
396
+
397
+ @dataclass
398
+ class RestoreLearningPackageData:
399
+ """
400
+ Data about the restored learning package.
401
+ """
402
+ id: int # The ID of the restored learning package
403
+ key: str # The key of the restored learning package (may be different if staged)
404
+ archive_lp_key: str # The original key from the archive
405
+ archive_org_key: str # The original organization key from the archive
406
+ archive_slug: str # The original slug from the archive
407
+ title: str
408
+ num_containers: int
409
+ num_sections: int
410
+ num_subsections: int
411
+ num_units: int
412
+ num_components: int
413
+ num_collections: int
414
+
415
+
416
+ @dataclass
417
+ class BackupMetadata:
418
+ """
419
+ Metadata about the backup operation.
420
+ """
421
+ format_version: int
422
+ created_at: str
423
+ created_by: str | None = None
424
+ created_by_email: str | None = None
425
+ original_server: str | None = None
426
+
427
+
428
+ @dataclass
429
+ class RestoreResult:
430
+ """
431
+ Result of the restore operation.
432
+ """
433
+ status: Literal["success", "error"]
434
+ log_file_error: StringIO | None = None
435
+ lp_restored_data: RestoreLearningPackageData | None = None
436
+ backup_metadata: BackupMetadata | None = None
437
+
438
+
439
+ def unpack_lp_key(lp_key: str) -> tuple[str, str]:
440
+ """
441
+ Unpack a learning package key into its components.
442
+ """
443
+ parts = lp_key.split(":")
444
+ if len(parts) < 3:
445
+ raise ValueError(f"Invalid learning package key: {lp_key}")
446
+ _, org_key, lp_slug = parts[:3]
447
+ return org_key, lp_slug
448
+
449
+
450
+ def generate_staged_lp_key(archive_lp_key: str, user: UserType) -> str:
451
+ """
452
+ Generate a staged learning package key based on the given base key.
453
+
454
+ Arguments:
455
+ archive_lp_key (str): The original learning package key from the archive.
456
+ user (UserType | None): The user performing the restore operation.
457
+
458
+ Example:
459
+ Input: "lib:WGU:LIB_C001"
460
+ Output: "lp-restore:dave:WGU:LIB_C001:1728575321"
461
+
462
+ The timestamp at the end ensures the key is unique.
463
+ """
464
+ username = user.username
465
+ org_key, lp_slug = unpack_lp_key(archive_lp_key)
466
+ timestamp = int(time.time() * 1000) # Current time in milliseconds
467
+ return f"lp-restore:{username}:{org_key}:{lp_slug}:{timestamp}"
468
+
469
+
470
+ class LearningPackageUnzipper:
471
+ """
472
+ Handles extraction and restoration of learning package data from a zip archive.
473
+
474
+ Args:
475
+ zipf (zipfile.ZipFile): The zip file containing the learning package data.
476
+ user (UserType | None): The user performing the restore operation. Not necessarily the creator.
477
+ generate_new_key (bool): Whether to generate a new key for the restored learning package.
478
+
479
+ Returns:
480
+ dict[str, Any]: The result of the restore operation, including any errors encountered.
481
+
482
+ Responsibilities:
483
+ - Parse and organize files from the zip structure.
484
+ - Restore learning package, containers, components, and collections to the database.
485
+ - Ensure atomicity of the restore process.
486
+
487
+ Usage:
488
+ unzipper = LearningPackageUnzipper(zip_file)
489
+ result = unzipper.load()
490
+ """
491
+
492
+ def __init__(self, zipf: zipfile.ZipFile, key: str | None = None, user: UserType | None = None):
493
+ self.zipf = zipf
494
+ self.user = user
495
+ self.lp_key = key # If provided, use this key for the restored learning package
496
+ self.utc_now: datetime = datetime.now(timezone.utc)
497
+ self.component_types_cache: dict[tuple[str, str], ComponentType] = {}
498
+ self.errors: list[dict[str, Any]] = []
499
+ # Maps for resolving relationships
500
+ self.components_map_by_key: dict[str, Any] = {}
501
+ self.units_map_by_key: dict[str, Any] = {}
502
+ self.subsections_map_by_key: dict[str, Any] = {}
503
+ self.sections_map_by_key: dict[str, Any] = {}
504
+ self.all_publishable_entities_keys: set[str] = set()
505
+ self.all_published_entities_versions: set[tuple[str, int]] = set() # To track published entity versions
506
+
507
+ # --------------------------
508
+ # Public API
509
+ # --------------------------
510
+
511
+ @transaction.atomic
512
+ def load(self) -> dict[str, Any]:
513
+ """Extracts and restores all objects from the ZIP archive in an atomic transaction."""
514
+
515
+ # Step 1: Validate presence of package.toml and basic structure
516
+ _, organized_files = self.check_mandatory_files()
517
+ if self.errors:
518
+ # Early return if preliminary checks fail since mandatory files are missing
519
+ result = RestoreResult(
520
+ status="error",
521
+ log_file_error=self._write_errors(), # return a StringIO with the errors
522
+ lp_restored_data=None,
523
+ backup_metadata=None,
524
+ )
525
+ return asdict(result)
526
+
527
+ # Step 2: Extract and validate learning package, entities and collections
528
+ # Errors are collected and reported at the end
529
+ # No saving to DB happens until all validations pass
530
+ learning_package_validated = self._extract_learning_package(organized_files["learning_package"])
531
+ lp_metadata = learning_package_validated.pop("metadata", {})
532
+
533
+ components_validated = self._extract_entities(
534
+ organized_files["components"], ComponentSerializer, ComponentVersionSerializer
535
+ )
536
+ containers_validated = self._extract_entities(
537
+ organized_files["containers"], ContainerSerializer, ContainerVersionSerializer
538
+ )
539
+
540
+ collections_validated = self._extract_collections(
541
+ organized_files["collections"]
542
+ )
543
+
544
+ # Step 3.1: If there are validation errors, return them without saving anything
545
+ if self.errors:
546
+ result = RestoreResult(
547
+ status="error",
548
+ log_file_error=self._write_errors(), # return a StringIO with the errors
549
+ lp_restored_data=None,
550
+ backup_metadata=None,
551
+ )
552
+ return asdict(result)
553
+
554
+ # Step 3.2: Save everything to the DB
555
+ # All validations passed, we can proceed to save everything
556
+ # Save the learning package first to get its ID
557
+ archive_lp_key = learning_package_validated["key"]
558
+ learning_package = self._save(
559
+ learning_package_validated,
560
+ components_validated,
561
+ containers_validated,
562
+ collections_validated,
563
+ component_static_files=organized_files["component_static_files"]
564
+ )
565
+
566
+ num_containers = sum(
567
+ len(containers_validated.get(container_type, []))
568
+ for container_type in ["section", "subsection", "unit"]
569
+ )
570
+
571
+ org_key, lp_slug = unpack_lp_key(archive_lp_key)
572
+ result = RestoreResult(
573
+ status="success",
574
+ log_file_error=None,
575
+ lp_restored_data=RestoreLearningPackageData(
576
+ id=learning_package.id,
577
+ key=learning_package.key,
578
+ archive_lp_key=archive_lp_key, # The original key from the backup archive
579
+ archive_org_key=org_key, # The original organization key from the backup archive
580
+ archive_slug=lp_slug, # The original slug from the backup archive
581
+ title=learning_package.title,
582
+ num_containers=num_containers,
583
+ num_sections=len(containers_validated.get("section", [])),
584
+ num_subsections=len(containers_validated.get("subsection", [])),
585
+ num_units=len(containers_validated.get("unit", [])),
586
+ num_components=len(components_validated["components"]),
587
+ num_collections=len(collections_validated["collections"]),
588
+ ),
589
+ backup_metadata=BackupMetadata(
590
+ format_version=lp_metadata.get("format_version", 1),
591
+ created_by=lp_metadata.get("created_by"),
592
+ created_by_email=lp_metadata.get("created_by_email"),
593
+ created_at=lp_metadata.get("created_at"),
594
+ original_server=lp_metadata.get("origin_server"),
595
+ ) if lp_metadata else None,
596
+ )
597
+ return asdict(result)
598
+
599
+ def check_mandatory_files(self) -> Tuple[list[dict[str, Any]], dict[str, Any]]:
600
+ """
601
+ Check for the presence of mandatory files in the zip archive.
602
+ So far, the only mandatory file is package.toml.
603
+ """
604
+ organized_files = self._get_organized_file_list(self.zipf.namelist())
605
+
606
+ if not organized_files["learning_package"]:
607
+ self.errors.append({"file": TOML_PACKAGE_NAME, "errors": "Missing learning package file."})
608
+
609
+ return self.errors, organized_files
610
+
611
+ # --------------------------
612
+ # Extract + Validate
613
+ # --------------------------
614
+
615
+ def _extract_learning_package(self, package_file: str) -> dict[str, Any]:
616
+ """Extract and validate the learning package TOML file."""
617
+ toml_content_text = self._read_file_from_zip(package_file)
618
+ toml_content_dict = parse_learning_package_toml(toml_content_text)
619
+ lp = toml_content_dict.get("learning_package")
620
+ lp_metadata = toml_content_dict.get("meta")
621
+
622
+ # Validate learning package data
623
+ lp_serializer = LearningPackageSerializer(data=lp)
624
+ if not lp_serializer.is_valid():
625
+ self.errors.append({"file": f"{package_file} learning package section", "errors": lp_serializer.errors})
626
+
627
+ # Validate metadata if present
628
+ lp_metadata_serializer = LearningPackageMetadataSerializer(data=lp_metadata)
629
+ if not lp_metadata_serializer.is_valid():
630
+ self.errors.append({"file": f"{package_file} meta section", "errors": lp_metadata_serializer.errors})
631
+
632
+ lp_validated = lp_serializer.validated_data if lp_serializer.is_valid() else {}
633
+ lp_metadata = lp_metadata_serializer.validated_data if lp_metadata_serializer.is_valid() else {}
634
+ lp_validated["metadata"] = lp_metadata
635
+ return lp_validated
636
+
637
+ def _extract_entities(
638
+ self,
639
+ entity_files: list[str],
640
+ entity_serializer: type[serializers.Serializer],
641
+ version_serializer: type[serializers.Serializer],
642
+ ) -> dict[str, Any]:
643
+ """Generic extraction + validation pipeline for containers or components."""
644
+ results: dict[str, list[Any]] = defaultdict(list)
645
+
646
+ for file in entity_files:
647
+ if not file.endswith(".toml"):
648
+ # Skip non-TOML files
649
+ continue
650
+
651
+ entity_data, draft_version, published_version = self._load_entity_data(file)
652
+ serializer = entity_serializer(
653
+ data={"created": self.utc_now, "created_by": None, **entity_data}
654
+ )
655
+
656
+ if not serializer.is_valid():
657
+ self.errors.append({"file": file, "errors": serializer.errors})
658
+ continue
659
+
660
+ entity_data = serializer.validated_data
661
+ self.all_publishable_entities_keys.add(entity_data["key"])
662
+ entity_type = entity_data.pop("container_type", "components")
663
+ results[entity_type].append(entity_data)
664
+
665
+ valid_versions = self._validate_versions(
666
+ entity_data,
667
+ draft_version,
668
+ published_version,
669
+ version_serializer,
670
+ file=file
671
+ )
672
+ if valid_versions["draft"]:
673
+ results[f"{entity_type}_drafts"].append(valid_versions["draft"])
674
+ if valid_versions["published"]:
675
+ results[f"{entity_type}_published"].append(valid_versions["published"])
676
+
677
+ return results
678
+
679
+ def _extract_collections(
680
+ self,
681
+ collection_files: list[str],
682
+ ) -> dict[str, Any]:
683
+ """Extraction + validation pipeline for collections."""
684
+ results: dict[str, list[Any]] = defaultdict(list)
685
+
686
+ for file in collection_files:
687
+ if not file.endswith(".toml"):
688
+ # Skip non-TOML files
689
+ continue
690
+ toml_content = self._read_file_from_zip(file)
691
+ collection_data = parse_collection_toml(toml_content)
692
+ collection_data = collection_data.get("collection", {})
693
+ serializer = CollectionSerializer(data={"created_by": None, **collection_data})
694
+ if not serializer.is_valid():
695
+ self.errors.append({"file": f"{file} collection section", "errors": serializer.errors})
696
+ continue
697
+ collection_validated = serializer.validated_data
698
+ entities_list = collection_validated["entities"]
699
+ for entity_key in entities_list:
700
+ if entity_key not in self.all_publishable_entities_keys:
701
+ self.errors.append({
702
+ "file": file,
703
+ "errors": f"Entity key {entity_key} not found for collection {collection_validated.get('key')}"
704
+ })
705
+ results["collections"].append(collection_validated)
706
+
707
+ return results
708
+
709
+ # --------------------------
710
+ # Save Logic
711
+ # --------------------------
712
+
713
+ def _save(
714
+ self,
715
+ learning_package: dict[str, Any],
716
+ components: dict[str, Any],
717
+ containers: dict[str, Any],
718
+ collections: dict[str, Any],
719
+ *,
720
+ component_static_files: dict[str, List[str]]
721
+ ) -> LearningPackage:
722
+ """Persist all validated entities in two phases: published then drafts."""
723
+
724
+ # Important: If not using a specific LP key, generate a temporary one
725
+ # We cannot use the original key because it may generate security issues
726
+ if not self.lp_key:
727
+ # Generate a tmp key for the staged learning package
728
+ if not self.user:
729
+ raise ValueError("User is required to create lp_key")
730
+ learning_package["key"] = generate_staged_lp_key(
731
+ archive_lp_key=learning_package["key"],
732
+ user=self.user
733
+ )
734
+ else:
735
+ learning_package["key"] = self.lp_key
736
+
737
+ learning_package_obj = publishing_api.create_learning_package(**learning_package)
738
+
739
+ with publishing_api.bulk_draft_changes_for(learning_package_obj.id):
740
+ self._save_components(learning_package_obj, components, component_static_files)
741
+ self._save_units(learning_package_obj, containers)
742
+ self._save_subsections(learning_package_obj, containers)
743
+ self._save_sections(learning_package_obj, containers)
744
+ self._save_collections(learning_package_obj, collections)
745
+ publishing_api.publish_all_drafts(learning_package_obj.id)
746
+
747
+ with publishing_api.bulk_draft_changes_for(learning_package_obj.id):
748
+ self._save_draft_versions(components, containers, component_static_files)
749
+
750
+ return learning_package_obj
751
+
752
+ def _save_collections(self, learning_package, collections):
753
+ """Save collections and their entities."""
754
+ for valid_collection in collections.get("collections", []):
755
+ entities = valid_collection.pop("entities", [])
756
+ collection = collections_api.create_collection(learning_package.id, **valid_collection)
757
+ collection = collections_api.add_to_collection(
758
+ learning_package_id=learning_package.id,
759
+ key=collection.key,
760
+ entities_qset=publishing_api.get_publishable_entities(learning_package.id).filter(key__in=entities)
761
+ )
762
+
763
+ def _save_components(self, learning_package, components, component_static_files):
764
+ """Save components and published component versions."""
765
+ for valid_component in components.get("components", []):
766
+ entity_key = valid_component.pop("key")
767
+ component = components_api.create_component(learning_package.id, **valid_component)
768
+ self.components_map_by_key[entity_key] = component
769
+
770
+ for valid_published in components.get("components_published", []):
771
+ entity_key = valid_published.pop("entity_key")
772
+ version_num = valid_published["version_num"] # Should exist, validated earlier
773
+ content_to_replace = self._resolve_static_files(version_num, entity_key, component_static_files)
774
+ self.all_published_entities_versions.add(
775
+ (entity_key, version_num)
776
+ ) # Track published version
777
+ components_api.create_next_component_version(
778
+ self.components_map_by_key[entity_key].publishable_entity.id,
779
+ content_to_replace=content_to_replace,
780
+ force_version_num=valid_published.pop("version_num", None),
781
+ **valid_published
782
+ )
783
+
784
+ def _save_units(self, learning_package, containers):
785
+ """Save units and published unit versions."""
786
+ for valid_unit in containers.get("unit", []):
787
+ entity_key = valid_unit.get("key")
788
+ unit = units_api.create_unit(learning_package.id, **valid_unit)
789
+ self.units_map_by_key[entity_key] = unit
790
+
791
+ for valid_published in containers.get("unit_published", []):
792
+ entity_key = valid_published.pop("entity_key")
793
+ children = self._resolve_children(valid_published, self.components_map_by_key)
794
+ self.all_published_entities_versions.add(
795
+ (entity_key, valid_published.get('version_num'))
796
+ ) # Track published version
797
+ units_api.create_next_unit_version(
798
+ self.units_map_by_key[entity_key],
799
+ force_version_num=valid_published.pop("version_num", None),
800
+ components=children,
801
+ **valid_published
802
+ )
803
+
804
+ def _save_subsections(self, learning_package, containers):
805
+ """Save subsections and published subsection versions."""
806
+ for valid_subsection in containers.get("subsection", []):
807
+ entity_key = valid_subsection.get("key")
808
+ subsection = subsections_api.create_subsection(learning_package.id, **valid_subsection)
809
+ self.subsections_map_by_key[entity_key] = subsection
810
+
811
+ for valid_published in containers.get("subsection_published", []):
812
+ entity_key = valid_published.pop("entity_key")
813
+ children = self._resolve_children(valid_published, self.units_map_by_key)
814
+ self.all_published_entities_versions.add(
815
+ (entity_key, valid_published.get('version_num'))
816
+ ) # Track published version
817
+ subsections_api.create_next_subsection_version(
818
+ self.subsections_map_by_key[entity_key],
819
+ units=children,
820
+ force_version_num=valid_published.pop("version_num", None),
821
+ **valid_published
822
+ )
823
+
824
+ def _save_sections(self, learning_package, containers):
825
+ """Save sections and published section versions."""
826
+ for valid_section in containers.get("section", []):
827
+ entity_key = valid_section.get("key")
828
+ section = sections_api.create_section(learning_package.id, **valid_section)
829
+ self.sections_map_by_key[entity_key] = section
830
+
831
+ for valid_published in containers.get("section_published", []):
832
+ entity_key = valid_published.pop("entity_key")
833
+ children = self._resolve_children(valid_published, self.subsections_map_by_key)
834
+ self.all_published_entities_versions.add(
835
+ (entity_key, valid_published.get('version_num'))
836
+ ) # Track published version
837
+ sections_api.create_next_section_version(
838
+ self.sections_map_by_key[entity_key],
839
+ subsections=children,
840
+ force_version_num=valid_published.pop("version_num", None),
841
+ **valid_published
842
+ )
843
+
844
+ def _save_draft_versions(self, components, containers, component_static_files):
845
+ """Save draft versions for all entity types."""
846
+ for valid_draft in components.get("components_drafts", []):
847
+ entity_key = valid_draft.pop("entity_key")
848
+ version_num = valid_draft["version_num"] # Should exist, validated earlier
849
+ if self._is_version_already_exists(entity_key, version_num):
850
+ continue
851
+ content_to_replace = self._resolve_static_files(version_num, entity_key, component_static_files)
852
+ components_api.create_next_component_version(
853
+ self.components_map_by_key[entity_key].publishable_entity.id,
854
+ content_to_replace=content_to_replace,
855
+ force_version_num=valid_draft.pop("version_num", None),
856
+ # Drafts can diverge from published, so we allow ignoring previous content
857
+ # Use case: published v1 had files A, B; draft v2 only has file A
858
+ ignore_previous_content=True,
859
+ **valid_draft
860
+ )
861
+
862
+ for valid_draft in containers.get("unit_drafts", []):
863
+ entity_key = valid_draft.pop("entity_key")
864
+ version_num = valid_draft["version_num"] # Should exist, validated earlier
865
+ if self._is_version_already_exists(entity_key, version_num):
866
+ continue
867
+ children = self._resolve_children(valid_draft, self.components_map_by_key)
868
+ units_api.create_next_unit_version(
869
+ self.units_map_by_key[entity_key],
870
+ components=children,
871
+ force_version_num=valid_draft.pop("version_num", None),
872
+ **valid_draft
873
+ )
874
+
875
+ for valid_draft in containers.get("subsection_drafts", []):
876
+ entity_key = valid_draft.pop("entity_key")
877
+ version_num = valid_draft["version_num"] # Should exist, validated earlier
878
+ if self._is_version_already_exists(entity_key, version_num):
879
+ continue
880
+ children = self._resolve_children(valid_draft, self.units_map_by_key)
881
+ subsections_api.create_next_subsection_version(
882
+ self.subsections_map_by_key[entity_key],
883
+ units=children,
884
+ force_version_num=valid_draft.pop("version_num", None),
885
+ **valid_draft
886
+ )
887
+
888
+ for valid_draft in containers.get("section_drafts", []):
889
+ entity_key = valid_draft.pop("entity_key")
890
+ version_num = valid_draft["version_num"] # Should exist, validated earlier
891
+ if self._is_version_already_exists(entity_key, version_num):
892
+ continue
893
+ children = self._resolve_children(valid_draft, self.subsections_map_by_key)
894
+ sections_api.create_next_section_version(
895
+ self.sections_map_by_key[entity_key],
896
+ subsections=children,
897
+ force_version_num=valid_draft.pop("version_num", None),
898
+ **valid_draft
899
+ )
900
+
901
+ # --------------------------
902
+ # Utilities
903
+ # --------------------------
904
+
905
+ def _format_errors(self) -> str:
906
+ """Return formatted error content as a string."""
907
+ if not self.errors:
908
+ return ""
909
+ lines = [f"{err['file']}: {err['errors']}" for err in self.errors]
910
+ return "Errors encountered during restore:\n" + "\n".join(lines) + "\n"
911
+
912
+ def _write_errors(self) -> StringIO | None:
913
+ """
914
+ Write errors to a StringIO buffer.
915
+ """
916
+ content = self._format_errors()
917
+ if not content:
918
+ return None
919
+ return StringIO(content)
920
+
921
+ def _is_version_already_exists(self, entity_key: str, version_num: int) -> bool:
922
+ """
923
+ Check if a version already exists for a given entity key and version number.
924
+
925
+ Note:
926
+ Skip creating draft if this version is already published
927
+ Why? Because the version itself is already created and
928
+ we don't want to create duplicate versions.
929
+ Otherwise, we will raise an IntegrityError on PublishableEntityVersion
930
+ due to unique constraints between publishable_entity and version_num.
931
+ """
932
+ identifier = (entity_key, version_num)
933
+ return identifier in self.all_published_entities_versions
934
+
935
+ def _resolve_static_files(
936
+ self,
937
+ num_version: int,
938
+ entity_key: str,
939
+ static_files_map: dict[str, List[str]]
940
+ ) -> dict[str, bytes]:
941
+ """Resolve static file paths into their binary content."""
942
+ resolved_files: dict[str, bytes] = {}
943
+
944
+ static_file_key = f"{entity_key}:v{num_version}" # e.g., "my_component:123:v1"
945
+ static_files = static_files_map.get(static_file_key, [])
946
+ for static_file in static_files:
947
+ local_key = static_file.split(f"v{num_version}/")[-1]
948
+ with self.zipf.open(static_file, "r") as f:
949
+ resolved_files[local_key] = f.read()
950
+ return resolved_files
951
+
952
+ def _resolve_children(self, entity_data: dict[str, Any], lookup_map: dict[str, Any]) -> list[Any]:
953
+ """Resolve child entity keys into model instances."""
954
+ children_keys = entity_data.pop("children", [])
955
+ return [lookup_map[key] for key in children_keys if key in lookup_map]
956
+
957
+ def _load_entity_data(
958
+ self, entity_file: str
959
+ ) -> tuple[dict[str, Any], dict[str, Any] | None, dict[str, Any] | None]:
960
+ """Load entity data and its versions from TOML."""
961
+ entity_toml_txt = self._read_file_from_zip(entity_file)
962
+ entity_toml_dict = parse_publishable_entity_toml(entity_toml_txt)
963
+ entity_data = entity_toml_dict.get("entity", {})
964
+ version_data = entity_toml_dict.get("version", [])
965
+ return entity_data, *self._get_versions_to_write(version_data, entity_data)
966
+
967
+ def _validate_versions(self, entity_data, draft, published, serializer_cls, *, file) -> dict[str, Any]:
968
+ """Validate draft/published versions with serializer."""
969
+ valid = {"draft": None, "published": None}
970
+ for label, version in [("draft", draft), ("published", published)]:
971
+ if not version:
972
+ continue
973
+ serializer = serializer_cls(
974
+ data={
975
+ "entity_key": entity_data["key"],
976
+ "created": self.utc_now,
977
+ "created_by": None,
978
+ **version
979
+ }
980
+ )
981
+ if serializer.is_valid():
982
+ valid[label] = serializer.validated_data
983
+ else:
984
+ self.errors.append({"file": file, "errors": serializer.errors})
985
+ return valid
986
+
987
+ def _read_file_from_zip(self, filename: str) -> str:
988
+ """Read and decode a UTF-8 file from the zip archive."""
989
+ with self.zipf.open(filename) as f:
990
+ return f.read().decode("utf-8")
991
+
992
+ def _get_organized_file_list(self, file_paths: list[str]) -> dict[str, Any]:
993
+ """Organize file paths into categories: learning_package, containers, components, collections."""
994
+ organized: dict[str, Any] = {
995
+ "learning_package": None,
996
+ "containers": [],
997
+ "components": [],
998
+ "component_static_files": defaultdict(list),
999
+ "collections": [],
1000
+ }
1001
+
1002
+ for path in file_paths:
1003
+ if path.endswith("/"):
1004
+ # Skip directories
1005
+ continue
1006
+ if path == TOML_PACKAGE_NAME:
1007
+ organized["learning_package"] = path
1008
+ elif path.startswith("entities/") and str(Path(path).parent) == "entities" and path.endswith(".toml"):
1009
+ # Top-level entity TOML files are considered containers
1010
+ organized["containers"].append(path)
1011
+ elif path.startswith("entities/"):
1012
+ if path.endswith(".toml"):
1013
+ # Component entity TOML files
1014
+ organized["components"].append(path)
1015
+ else:
1016
+ # Component static files
1017
+ # Path structure: entities/<namespace>/<type>/<component_id>/component_versions/<version>/static/...
1018
+ # Example: entities/xblock.v1/html/my_component_123456/component_versions/v1/static/...
1019
+ component_key = Path(path).parts[1:4] # e.g., ['xblock.v1', 'html', 'my_component_123456']
1020
+ num_version = Path(path).parts[5] if len(Path(path).parts) > 5 else "v1" # e.g., 'v1'
1021
+ if len(component_key) == 3:
1022
+ component_identifier = ":".join(component_key)
1023
+ component_identifier += f":{num_version}"
1024
+ organized["component_static_files"][component_identifier].append(path)
1025
+ else:
1026
+ self.errors.append({"file": path, "errors": "Invalid component static file path structure."})
1027
+ elif path.startswith("collections/") and path.endswith(".toml"):
1028
+ # Collection TOML files
1029
+ organized["collections"].append(path)
1030
+ return organized
1031
+
1032
+ def _get_versions_to_write(
1033
+ self,
1034
+ version_data: list[dict[str, Any]],
1035
+ entity_data: dict[str, Any]
1036
+ ) -> tuple[Optional[dict[str, Any]], Optional[dict[str, Any]]]:
1037
+ """Return the draft and published versions to write, based on entity data."""
1038
+ draft_num = entity_data.get("draft", {}).get("version_num")
1039
+ published_num = entity_data.get("published", {}).get("version_num")
1040
+ lookup = {v.get("version_num"): v for v in version_data}
1041
+ return (
1042
+ lookup.get(draft_num) if draft_num else None,
1043
+ lookup.get(published_num) if published_num else None,
1044
+ )