openedx-learning 0.28.0__py2.py3-none-any.whl → 0.29.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,14 +3,24 @@ This module provides functionality to create a zip file containing the learning
3
3
  including a TOML representation of the learning package and its entities.
4
4
  """
5
5
  import hashlib
6
+ import time
6
7
  import zipfile
8
+ from collections import defaultdict
9
+ from dataclasses import asdict, dataclass
10
+ from datetime import datetime, timezone
11
+ from io import StringIO
7
12
  from pathlib import Path
8
- from typing import List, Optional
13
+ from typing import Any, List, Literal, Optional, Tuple
9
14
 
15
+ from django.contrib.auth.models import User as UserType # pylint: disable=imported-auth-user
16
+ from django.db import transaction
10
17
  from django.db.models import Prefetch, QuerySet
11
18
  from django.utils.text import slugify
19
+ from rest_framework import serializers
12
20
 
13
21
  from openedx_learning.api.authoring_models import (
22
+ Collection,
23
+ ComponentType,
14
24
  ComponentVersion,
15
25
  ComponentVersionContent,
16
26
  Content,
@@ -18,10 +28,33 @@ from openedx_learning.api.authoring_models import (
18
28
  PublishableEntity,
19
29
  PublishableEntityVersion,
20
30
  )
21
- from openedx_learning.apps.authoring.backup_restore.toml import toml_learning_package, toml_publishable_entity
31
+ from openedx_learning.apps.authoring.backup_restore.serializers import (
32
+ CollectionSerializer,
33
+ ComponentSerializer,
34
+ ComponentVersionSerializer,
35
+ ContainerSerializer,
36
+ ContainerVersionSerializer,
37
+ LearningPackageMetadataSerializer,
38
+ LearningPackageSerializer,
39
+ )
40
+ from openedx_learning.apps.authoring.backup_restore.toml import (
41
+ parse_collection_toml,
42
+ parse_learning_package_toml,
43
+ parse_publishable_entity_toml,
44
+ toml_collection,
45
+ toml_learning_package,
46
+ toml_publishable_entity,
47
+ )
48
+ from openedx_learning.apps.authoring.collections import api as collections_api
49
+ from openedx_learning.apps.authoring.components import api as components_api
50
+ from openedx_learning.apps.authoring.contents import api as contents_api
22
51
  from openedx_learning.apps.authoring.publishing import api as publishing_api
52
+ from openedx_learning.apps.authoring.sections import api as sections_api
53
+ from openedx_learning.apps.authoring.subsections import api as subsections_api
54
+ from openedx_learning.apps.authoring.units import api as units_api
23
55
 
24
56
  TOML_PACKAGE_NAME = "package.toml"
57
+ DEFAULT_USERNAME = "command"
25
58
 
26
59
 
27
60
  def slugify_hashed_filename(identifier: str) -> str:
@@ -55,21 +88,73 @@ class LearningPackageZipper:
55
88
  A class to handle the zipping of learning content for backup and restore.
56
89
  """
57
90
 
58
- def __init__(self, learning_package: LearningPackage):
91
+ def __init__(self, learning_package: LearningPackage, user: UserType | None = None):
59
92
  self.learning_package = learning_package
93
+ self.user = user
60
94
  self.folders_already_created: set[Path] = set()
95
+ self.entities_filenames_already_created: set[str] = set()
96
+ self.utc_now = datetime.now(tz=timezone.utc)
61
97
 
62
- def create_folder(self, folder_path: Path, zip_file: zipfile.ZipFile) -> None:
98
+ def _ensure_parent_folders(
99
+ self,
100
+ zip_file: zipfile.ZipFile,
101
+ path: Path,
102
+ timestamp: datetime,
103
+ ) -> None:
63
104
  """
64
- Create a folder for the zip file structure.
65
- Skips creating the folder if it already exists based on the folder path.
66
- Args:
67
- folder_path (Path): The path of the folder to create.
105
+ Ensure all parent folders for the given path exist in the zip.
106
+ """
107
+ for parent in path.parents[::-1]:
108
+ if parent != Path(".") and parent not in self.folders_already_created:
109
+ folder_info = zipfile.ZipInfo(str(parent) + "/")
110
+ folder_info.date_time = timestamp.timetuple()[:6]
111
+ zip_file.writestr(folder_info, "")
112
+ self.folders_already_created.add(parent)
113
+
114
+ def add_folder_to_zip(
115
+ self,
116
+ zip_file: zipfile.ZipFile,
117
+ folder: Path,
118
+ timestamp: datetime | None = None,
119
+ ) -> None:
120
+ """
121
+ Explicitly add an empty folder into the zip structure.
68
122
  """
69
- if folder_path not in self.folders_already_created:
70
- zip_info = zipfile.ZipInfo(str(folder_path) + "/")
71
- zip_file.writestr(zip_info, "") # Add explicit empty directory entry
72
- self.folders_already_created.add(folder_path)
123
+ if folder in self.folders_already_created:
124
+ return
125
+
126
+ if timestamp is None:
127
+ timestamp = self.utc_now
128
+
129
+ self._ensure_parent_folders(zip_file, folder, timestamp)
130
+
131
+ folder_info = zipfile.ZipInfo(str(folder) + "/")
132
+ folder_info.date_time = timestamp.timetuple()[:6]
133
+ zip_file.writestr(folder_info, "")
134
+ self.folders_already_created.add(folder)
135
+
136
+ def add_file_to_zip(
137
+ self,
138
+ zip_file: zipfile.ZipFile,
139
+ file_path: Path,
140
+ content: bytes | str | None = None,
141
+ timestamp: datetime | None = None,
142
+ ) -> None:
143
+ """
144
+ Add a file into the zip structure.
145
+ """
146
+ if timestamp is None:
147
+ timestamp = self.utc_now
148
+
149
+ self._ensure_parent_folders(zip_file, file_path, timestamp)
150
+
151
+ file_info = zipfile.ZipInfo(str(file_path))
152
+ file_info.date_time = timestamp.timetuple()[:6]
153
+
154
+ if isinstance(content, str):
155
+ content = content.encode("utf-8")
156
+
157
+ zip_file.writestr(file_info, content or b"")
73
158
 
74
159
  def get_publishable_entities(self) -> QuerySet[PublishableEntity]:
75
160
  """
@@ -79,7 +164,7 @@ class LearningPackageZipper:
79
164
  lp_id = self.learning_package.pk
80
165
  publishable_entities: QuerySet[PublishableEntity] = publishing_api.get_publishable_entities(lp_id)
81
166
  return (
82
- publishable_entities
167
+ publishable_entities # type: ignore[no-redef]
83
168
  .select_related(
84
169
  "container",
85
170
  "component__component_type",
@@ -102,12 +187,32 @@ class LearningPackageZipper:
102
187
  to_attr="prefetched_contents",
103
188
  ),
104
189
  )
190
+ .order_by("key")
191
+ )
192
+
193
+ def get_collections(self) -> QuerySet[Collection]:
194
+ """
195
+ Get the collections associated with the learning package.
196
+ """
197
+ return (
198
+ collections_api.get_collections(self.learning_package.pk)
199
+ .prefetch_related("entities")
105
200
  )
106
201
 
107
- def get_versions_to_write(self, entity: PublishableEntity):
202
+ def get_versions_to_write(
203
+ self, entity: PublishableEntity
204
+ ) -> Tuple[List[PublishableEntityVersion],
205
+ Optional[PublishableEntityVersion],
206
+ Optional[PublishableEntityVersion]]:
108
207
  """
109
208
  Get the versions of a publishable entity that should be written to the zip file.
110
209
  It retrieves both draft and published versions.
210
+
211
+ Returns:
212
+ Tuple containing:
213
+ - versions_to_write: List of PublishableEntityVersion to write.
214
+ - draft_version: The current draft version, if any.
215
+ - published_version: The current published version, if any.
111
216
  """
112
217
  draft_version: Optional[PublishableEntityVersion] = publishing_api.get_draft_version(entity)
113
218
  published_version: Optional[PublishableEntityVersion] = publishing_api.get_published_version(entity)
@@ -116,7 +221,42 @@ class LearningPackageZipper:
116
221
 
117
222
  if published_version and published_version != draft_version:
118
223
  versions_to_write.append(published_version)
119
- return versions_to_write
224
+ return versions_to_write, draft_version, published_version
225
+
226
+ def get_entity_toml_filename(self, entity_key: str) -> str:
227
+ """
228
+ Generate a unique TOML filename for a publishable entity.
229
+ Ensures that the filename is unique within the zip file.
230
+
231
+ Behavior:
232
+ - If the slugified key has not been used yet, use it as the filename.
233
+ - If it has been used, append a short hash to ensure uniqueness.
234
+
235
+ Args:
236
+ entity_key (str): The key of the publishable entity.
237
+
238
+ Returns:
239
+ str: A unique TOML filename for the entity.
240
+ """
241
+ slugify_name = slugify(entity_key, allow_unicode=True)
242
+
243
+ if slugify_name in self.entities_filenames_already_created:
244
+ filename = slugify_hashed_filename(entity_key)
245
+ else:
246
+ filename = slugify_name
247
+
248
+ self.entities_filenames_already_created.add(slugify_name)
249
+ return filename
250
+
251
+ def get_latest_modified(self, versions_to_check: List[PublishableEntityVersion]) -> datetime:
252
+ """
253
+ Get the latest modification timestamp among the learning package and its entities.
254
+ """
255
+ latest = self.learning_package.updated
256
+ for version in versions_to_check:
257
+ if version and version.created > latest:
258
+ latest = version.created
259
+ return latest
120
260
 
121
261
  def create_zip(self, path: str) -> None:
122
262
  """
@@ -129,16 +269,16 @@ class LearningPackageZipper:
129
269
 
130
270
  with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zipf:
131
271
  # Add the package.toml file
132
- package_toml_content: str = toml_learning_package(self.learning_package)
133
- zipf.writestr(TOML_PACKAGE_NAME, package_toml_content)
272
+ package_toml_content: str = toml_learning_package(self.learning_package, self.utc_now, user=self.user)
273
+ self.add_file_to_zip(zipf, Path(TOML_PACKAGE_NAME), package_toml_content, self.learning_package.updated)
134
274
 
135
275
  # Add the entities directory
136
276
  entities_folder = Path("entities")
137
- self.create_folder(entities_folder, zipf)
277
+ self.add_folder_to_zip(zipf, entities_folder, timestamp=self.learning_package.updated)
138
278
 
139
279
  # Add the collections directory
140
280
  collections_folder = Path("collections")
141
- self.create_folder(collections_folder, zipf)
281
+ self.add_folder_to_zip(zipf, collections_folder, timestamp=self.learning_package.updated)
142
282
 
143
283
  # ------ ENTITIES SERIALIZATION -------------
144
284
 
@@ -148,14 +288,21 @@ class LearningPackageZipper:
148
288
  for entity in publishable_entities:
149
289
  # entity: PublishableEntity = entity # Type hint for clarity
150
290
 
291
+ # Get the versions to serialize for this entity
292
+ versions_to_write, draft_version, published_version = self.get_versions_to_write(entity)
293
+
294
+ latest_modified = self.get_latest_modified(versions_to_write)
295
+
151
296
  # Create a TOML representation of the entity
152
- entity_toml_content: str = toml_publishable_entity(entity)
297
+ entity_toml_content: str = toml_publishable_entity(
298
+ entity, versions_to_write, draft_version, published_version
299
+ )
153
300
 
154
301
  if hasattr(entity, 'container'):
155
- entity_slugify_hash = slugify_hashed_filename(entity.key)
156
- entity_toml_filename = f"{entity_slugify_hash}.toml"
302
+ entity_filename = self.get_entity_toml_filename(entity.key)
303
+ entity_toml_filename = f"{entity_filename}.toml"
157
304
  entity_toml_path = entities_folder / entity_toml_filename
158
- zipf.writestr(str(entity_toml_path), entity_toml_content)
305
+ self.add_file_to_zip(zipf, entity_toml_path, entity_toml_content, timestamp=latest_modified)
159
306
 
160
307
  if hasattr(entity, 'component'):
161
308
  # Create the component folder structure for the entity. The structure is as follows:
@@ -168,51 +315,43 @@ class LearningPackageZipper:
168
315
  # v1/
169
316
  # static/
170
317
 
171
- # Generate the slugified hash for the component local key
172
- # Example: if the local key is "my_component", the slugified hash might be "my_component_123456"
173
- # It's a combination of the local key and a hash and should be unique
174
- entity_slugify_hash = slugify_hashed_filename(entity.component.local_key)
318
+ entity_filename = self.get_entity_toml_filename(entity.component.local_key)
175
319
 
176
- # Create the component namespace folder
177
- # Example of component namespace is: "entities/xblock.v1/"
178
- component_namespace_folder = entities_folder / entity.component.component_type.namespace
179
- self.create_folder(component_namespace_folder, zipf)
320
+ component_root_folder = (
321
+ # Example: "entities/xblock.v1/html/"
322
+ entities_folder
323
+ / entity.component.component_type.namespace
324
+ / entity.component.component_type.name
325
+ )
180
326
 
181
- # Create the component type folder
182
- # Example of component type is: "entities/xblock.v1/html/"
183
- component_type_folder = component_namespace_folder / entity.component.component_type.name
184
- self.create_folder(component_type_folder, zipf)
327
+ component_folder = (
328
+ # Example: "entities/xblock.v1/html/my_component_123456/"
329
+ component_root_folder
330
+ / entity_filename
331
+ )
185
332
 
186
- # Create the component id folder
187
- # Example of component id is: "entities/xblock.v1/html/my_component_123456/"
188
- component_id_folder = component_type_folder / entity_slugify_hash
189
- self.create_folder(component_id_folder, zipf)
333
+ component_version_folder = (
334
+ # Example: "entities/xblock.v1/html/my_component_123456/component_versions/"
335
+ component_folder
336
+ / "component_versions"
337
+ )
190
338
 
191
339
  # Add the entity TOML file inside the component type folder as well
192
340
  # Example: "entities/xblock.v1/html/my_component_123456.toml"
193
- component_entity_toml_path = component_type_folder / f"{entity_slugify_hash}.toml"
194
- zipf.writestr(str(component_entity_toml_path), entity_toml_content)
195
-
196
- # Add component version folder into the component id folder
197
- # Example: "entities/xblock.v1/html/my_component_123456/component_versions/"
198
- component_version_folder = component_id_folder / "component_versions"
199
- self.create_folder(component_version_folder, zipf)
341
+ component_entity_toml_path = component_root_folder / f"{entity_filename}.toml"
342
+ self.add_file_to_zip(zipf, component_entity_toml_path, entity_toml_content, latest_modified)
200
343
 
201
344
  # ------ COMPONENT VERSIONING -------------
202
- # Focusing on draft and published versions
203
-
204
- # Get the draft and published versions
205
- versions_to_write: List[PublishableEntityVersion] = self.get_versions_to_write(entity)
206
-
345
+ # Focusing on draft and published versions only
207
346
  for version in versions_to_write:
208
347
  # Create a folder for the version
209
348
  version_number = f"v{version.version_num}"
210
349
  version_folder = component_version_folder / version_number
211
- self.create_folder(version_folder, zipf)
350
+ self.add_folder_to_zip(zipf, version_folder, timestamp=version.created)
212
351
 
213
352
  # Add static folder for the version
214
353
  static_folder = version_folder / "static"
215
- self.create_folder(static_folder, zipf)
354
+ self.add_folder_to_zip(zipf, static_folder, timestamp=version.created)
216
355
 
217
356
  # ------ COMPONENT STATIC CONTENT -------------
218
357
  component_version: ComponentVersion = version.componentversion
@@ -239,4 +378,685 @@ class LearningPackageZipper:
239
378
  else:
240
379
  # If no file and no text, we skip this content
241
380
  continue
242
- zipf.writestr(str(file_path), file_data)
381
+ self.add_file_to_zip(zipf, file_path, file_data, timestamp=content.created)
382
+
383
+ # ------ COLLECTION SERIALIZATION -------------
384
+ collections = self.get_collections()
385
+
386
+ for collection in collections:
387
+ collection_hash_slug = self.get_entity_toml_filename(collection.key)
388
+ collection_toml_file_path = collections_folder / f"{collection_hash_slug}.toml"
389
+ entity_keys_related = collection.entities.order_by("key").values_list("key", flat=True)
390
+ self.add_file_to_zip(
391
+ zipf,
392
+ collection_toml_file_path,
393
+ toml_collection(collection, list(entity_keys_related)),
394
+ timestamp=collection.modified,
395
+ )
396
+
397
+
398
+ @dataclass
399
+ class RestoreLearningPackageData:
400
+ """
401
+ Data about the restored learning package.
402
+ """
403
+ id: int # The ID of the restored learning package
404
+ key: str # The key of the restored learning package (may be different if staged)
405
+ archive_lp_key: str # The original key from the archive
406
+ archive_org_key: str # The original organization key from the archive
407
+ archive_slug: str # The original slug from the archive
408
+ title: str
409
+ num_containers: int
410
+ num_sections: int
411
+ num_subsections: int
412
+ num_units: int
413
+ num_components: int
414
+ num_collections: int
415
+
416
+
417
+ @dataclass
418
+ class BackupMetadata:
419
+ """
420
+ Metadata about the backup operation.
421
+ """
422
+ format_version: int
423
+ created_at: str
424
+ created_by: str | None = None
425
+ created_by_email: str | None = None
426
+ original_server: str | None = None
427
+
428
+
429
+ @dataclass
430
+ class RestoreResult:
431
+ """
432
+ Result of the restore operation.
433
+ """
434
+ status: Literal["success", "error"]
435
+ log_file_error: StringIO | None = None
436
+ lp_restored_data: RestoreLearningPackageData | None = None
437
+ backup_metadata: BackupMetadata | None = None
438
+
439
+
440
+ def unpack_lp_key(lp_key: str) -> tuple[str, str]:
441
+ """
442
+ Unpack a learning package key into its components.
443
+ """
444
+ parts = lp_key.split(":")
445
+ if len(parts) < 3:
446
+ raise ValueError(f"Invalid learning package key: {lp_key}")
447
+ _, org_key, lp_slug = parts[:3]
448
+ return org_key, lp_slug
449
+
450
+
451
+ def generate_staged_lp_key(archive_lp_key: str, user: UserType) -> str:
452
+ """
453
+ Generate a staged learning package key based on the given base key.
454
+
455
+ Arguments:
456
+ archive_lp_key (str): The original learning package key from the archive.
457
+ user (UserType | None): The user performing the restore operation.
458
+
459
+ Example:
460
+ Input: "lib:WGU:LIB_C001"
461
+ Output: "lp-restore:dave:WGU:LIB_C001:1728575321"
462
+
463
+ The timestamp at the end ensures the key is unique.
464
+ """
465
+ username = user.username
466
+ org_key, lp_slug = unpack_lp_key(archive_lp_key)
467
+ timestamp = int(time.time() * 1000) # Current time in milliseconds
468
+ return f"lp-restore:{username}:{org_key}:{lp_slug}:{timestamp}"
469
+
470
+
471
+ class LearningPackageUnzipper:
472
+ """
473
+ Handles extraction and restoration of learning package data from a zip archive.
474
+
475
+ Args:
476
+ zipf (zipfile.ZipFile): The zip file containing the learning package data.
477
+ user (UserType | None): The user performing the restore operation. Not necessarily the creator.
478
+ generate_new_key (bool): Whether to generate a new key for the restored learning package.
479
+
480
+ Returns:
481
+ dict[str, Any]: The result of the restore operation, including any errors encountered.
482
+
483
+ Responsibilities:
484
+ - Parse and organize files from the zip structure.
485
+ - Restore learning package, containers, components, and collections to the database.
486
+ - Ensure atomicity of the restore process.
487
+
488
+ Usage:
489
+ unzipper = LearningPackageUnzipper(zip_file)
490
+ result = unzipper.load()
491
+ """
492
+
493
+ def __init__(self, zipf: zipfile.ZipFile, key: str | None = None, user: UserType | None = None):
494
+ self.zipf = zipf
495
+ self.user = user
496
+ self.lp_key = key # If provided, use this key for the restored learning package
497
+ self.learning_package_id: int | None = None # Will be set upon restoration
498
+ self.utc_now: datetime = datetime.now(timezone.utc)
499
+ self.component_types_cache: dict[tuple[str, str], ComponentType] = {}
500
+ self.errors: list[dict[str, Any]] = []
501
+ # Maps for resolving relationships
502
+ self.components_map_by_key: dict[str, Any] = {}
503
+ self.units_map_by_key: dict[str, Any] = {}
504
+ self.subsections_map_by_key: dict[str, Any] = {}
505
+ self.sections_map_by_key: dict[str, Any] = {}
506
+ self.all_publishable_entities_keys: set[str] = set()
507
+ self.all_published_entities_versions: set[tuple[str, int]] = set() # To track published entity versions
508
+
509
+ # --------------------------
510
+ # Public API
511
+ # --------------------------
512
+
513
+ @transaction.atomic
514
+ def load(self) -> dict[str, Any]:
515
+ """Extracts and restores all objects from the ZIP archive in an atomic transaction."""
516
+
517
+ # Step 1: Validate presence of package.toml and basic structure
518
+ _, organized_files = self.check_mandatory_files()
519
+ if self.errors:
520
+ # Early return if preliminary checks fail since mandatory files are missing
521
+ result = RestoreResult(
522
+ status="error",
523
+ log_file_error=self._write_errors(), # return a StringIO with the errors
524
+ lp_restored_data=None,
525
+ backup_metadata=None,
526
+ )
527
+ return asdict(result)
528
+
529
+ # Step 2: Extract and validate learning package, entities and collections
530
+ # Errors are collected and reported at the end
531
+ # No saving to DB happens until all validations pass
532
+ learning_package_validated = self._extract_learning_package(organized_files["learning_package"])
533
+ lp_metadata = learning_package_validated.pop("metadata", {})
534
+
535
+ components_validated = self._extract_entities(
536
+ organized_files["components"], ComponentSerializer, ComponentVersionSerializer
537
+ )
538
+ containers_validated = self._extract_entities(
539
+ organized_files["containers"], ContainerSerializer, ContainerVersionSerializer
540
+ )
541
+
542
+ collections_validated = self._extract_collections(
543
+ organized_files["collections"]
544
+ )
545
+
546
+ # Step 3.1: If there are validation errors, return them without saving anything
547
+ if self.errors:
548
+ result = RestoreResult(
549
+ status="error",
550
+ log_file_error=self._write_errors(), # return a StringIO with the errors
551
+ lp_restored_data=None,
552
+ backup_metadata=None,
553
+ )
554
+ return asdict(result)
555
+
556
+ # Step 3.2: Save everything to the DB
557
+ # All validations passed, we can proceed to save everything
558
+ # Save the learning package first to get its ID
559
+ archive_lp_key = learning_package_validated["key"]
560
+ learning_package = self._save(
561
+ learning_package_validated,
562
+ components_validated,
563
+ containers_validated,
564
+ collections_validated,
565
+ component_static_files=organized_files["component_static_files"]
566
+ )
567
+
568
+ num_containers = sum(
569
+ len(containers_validated.get(container_type, []))
570
+ for container_type in ["section", "subsection", "unit"]
571
+ )
572
+
573
+ org_key, lp_slug = unpack_lp_key(archive_lp_key)
574
+ result = RestoreResult(
575
+ status="success",
576
+ log_file_error=None,
577
+ lp_restored_data=RestoreLearningPackageData(
578
+ id=learning_package.id,
579
+ key=learning_package.key,
580
+ archive_lp_key=archive_lp_key, # The original key from the backup archive
581
+ archive_org_key=org_key, # The original organization key from the backup archive
582
+ archive_slug=lp_slug, # The original slug from the backup archive
583
+ title=learning_package.title,
584
+ num_containers=num_containers,
585
+ num_sections=len(containers_validated.get("section", [])),
586
+ num_subsections=len(containers_validated.get("subsection", [])),
587
+ num_units=len(containers_validated.get("unit", [])),
588
+ num_components=len(components_validated["components"]),
589
+ num_collections=len(collections_validated["collections"]),
590
+ ),
591
+ backup_metadata=BackupMetadata(
592
+ format_version=lp_metadata.get("format_version", 1),
593
+ created_by=lp_metadata.get("created_by"),
594
+ created_by_email=lp_metadata.get("created_by_email"),
595
+ created_at=lp_metadata.get("created_at"),
596
+ original_server=lp_metadata.get("origin_server"),
597
+ ) if lp_metadata else None,
598
+ )
599
+ return asdict(result)
600
+
601
+ def check_mandatory_files(self) -> Tuple[list[dict[str, Any]], dict[str, Any]]:
602
+ """
603
+ Check for the presence of mandatory files in the zip archive.
604
+ So far, the only mandatory file is package.toml.
605
+ """
606
+ organized_files = self._get_organized_file_list(self.zipf.namelist())
607
+
608
+ if not organized_files["learning_package"]:
609
+ self.errors.append({"file": TOML_PACKAGE_NAME, "errors": "Missing learning package file."})
610
+
611
+ return self.errors, organized_files
612
+
613
+ # --------------------------
614
+ # Extract + Validate
615
+ # --------------------------
616
+
617
+ def _extract_learning_package(self, package_file: str) -> dict[str, Any]:
618
+ """Extract and validate the learning package TOML file."""
619
+ toml_content_text = self._read_file_from_zip(package_file)
620
+ toml_content_dict = parse_learning_package_toml(toml_content_text)
621
+ lp = toml_content_dict.get("learning_package")
622
+ lp_metadata = toml_content_dict.get("meta")
623
+
624
+ # Validate learning package data
625
+ lp_serializer = LearningPackageSerializer(data=lp)
626
+ if not lp_serializer.is_valid():
627
+ self.errors.append({"file": f"{package_file} learning package section", "errors": lp_serializer.errors})
628
+
629
+ # Validate metadata if present
630
+ lp_metadata_serializer = LearningPackageMetadataSerializer(data=lp_metadata)
631
+ if not lp_metadata_serializer.is_valid():
632
+ self.errors.append({"file": f"{package_file} meta section", "errors": lp_metadata_serializer.errors})
633
+
634
+ lp_validated = lp_serializer.validated_data if lp_serializer.is_valid() else {}
635
+ lp_metadata = lp_metadata_serializer.validated_data if lp_metadata_serializer.is_valid() else {}
636
+ lp_validated["metadata"] = lp_metadata
637
+ return lp_validated
638
+
639
+ def _extract_entities(
640
+ self,
641
+ entity_files: list[str],
642
+ entity_serializer: type[serializers.Serializer],
643
+ version_serializer: type[serializers.Serializer],
644
+ ) -> dict[str, Any]:
645
+ """Generic extraction + validation pipeline for containers or components."""
646
+ results: dict[str, list[Any]] = defaultdict(list)
647
+
648
+ for file in entity_files:
649
+ if not file.endswith(".toml"):
650
+ # Skip non-TOML files
651
+ continue
652
+
653
+ entity_data, draft_version, published_version = self._load_entity_data(file)
654
+ serializer = entity_serializer(
655
+ data={"created": self.utc_now, "created_by": None, **entity_data}
656
+ )
657
+
658
+ if not serializer.is_valid():
659
+ self.errors.append({"file": file, "errors": serializer.errors})
660
+ continue
661
+
662
+ entity_data = serializer.validated_data
663
+ self.all_publishable_entities_keys.add(entity_data["key"])
664
+ entity_type = entity_data.pop("container_type", "components")
665
+ results[entity_type].append(entity_data)
666
+
667
+ valid_versions = self._validate_versions(
668
+ entity_data,
669
+ draft_version,
670
+ published_version,
671
+ version_serializer,
672
+ file=file
673
+ )
674
+ if valid_versions["draft"]:
675
+ results[f"{entity_type}_drafts"].append(valid_versions["draft"])
676
+ if valid_versions["published"]:
677
+ results[f"{entity_type}_published"].append(valid_versions["published"])
678
+
679
+ return results
680
+
681
+ def _extract_collections(
682
+ self,
683
+ collection_files: list[str],
684
+ ) -> dict[str, Any]:
685
+ """Extraction + validation pipeline for collections."""
686
+ results: dict[str, list[Any]] = defaultdict(list)
687
+
688
+ for file in collection_files:
689
+ if not file.endswith(".toml"):
690
+ # Skip non-TOML files
691
+ continue
692
+ toml_content = self._read_file_from_zip(file)
693
+ collection_data = parse_collection_toml(toml_content)
694
+ collection_data = collection_data.get("collection", {})
695
+ serializer = CollectionSerializer(data={"created_by": None, **collection_data})
696
+ if not serializer.is_valid():
697
+ self.errors.append({"file": f"{file} collection section", "errors": serializer.errors})
698
+ continue
699
+ collection_validated = serializer.validated_data
700
+ entities_list = collection_validated["entities"]
701
+ for entity_key in entities_list:
702
+ if entity_key not in self.all_publishable_entities_keys:
703
+ self.errors.append({
704
+ "file": file,
705
+ "errors": f"Entity key {entity_key} not found for collection {collection_validated.get('key')}"
706
+ })
707
+ results["collections"].append(collection_validated)
708
+
709
+ return results
710
+
711
+ # --------------------------
712
+ # Save Logic
713
+ # --------------------------
714
+
715
+ def _save(
716
+ self,
717
+ learning_package: dict[str, Any],
718
+ components: dict[str, Any],
719
+ containers: dict[str, Any],
720
+ collections: dict[str, Any],
721
+ *,
722
+ component_static_files: dict[str, List[str]]
723
+ ) -> LearningPackage:
724
+ """Persist all validated entities in two phases: published then drafts."""
725
+
726
+ # Important: If not using a specific LP key, generate a temporary one
727
+ # We cannot use the original key because it may generate security issues
728
+ if not self.lp_key:
729
+ # Generate a tmp key for the staged learning package
730
+ if not self.user:
731
+ raise ValueError("User is required to create lp_key")
732
+ learning_package["key"] = generate_staged_lp_key(
733
+ archive_lp_key=learning_package["key"],
734
+ user=self.user
735
+ )
736
+ else:
737
+ learning_package["key"] = self.lp_key
738
+
739
+ learning_package_obj = publishing_api.create_learning_package(**learning_package)
740
+ self.learning_package_id = learning_package_obj.id
741
+
742
+ with publishing_api.bulk_draft_changes_for(learning_package_obj.id):
743
+ self._save_components(learning_package_obj, components, component_static_files)
744
+ self._save_units(learning_package_obj, containers)
745
+ self._save_subsections(learning_package_obj, containers)
746
+ self._save_sections(learning_package_obj, containers)
747
+ self._save_collections(learning_package_obj, collections)
748
+ publishing_api.publish_all_drafts(learning_package_obj.id)
749
+
750
+ with publishing_api.bulk_draft_changes_for(learning_package_obj.id):
751
+ self._save_draft_versions(components, containers, component_static_files)
752
+
753
+ return learning_package_obj
754
+
755
+ def _save_collections(self, learning_package, collections):
756
+ """Save collections and their entities."""
757
+ for valid_collection in collections.get("collections", []):
758
+ entities = valid_collection.pop("entities", [])
759
+ collection = collections_api.create_collection(learning_package.id, **valid_collection)
760
+ collection = collections_api.add_to_collection(
761
+ learning_package_id=learning_package.id,
762
+ key=collection.key,
763
+ entities_qset=publishing_api.get_publishable_entities(learning_package.id).filter(key__in=entities)
764
+ )
765
+
766
+ def _save_components(self, learning_package, components, component_static_files):
767
+ """Save components and published component versions."""
768
+ for valid_component in components.get("components", []):
769
+ entity_key = valid_component.pop("key")
770
+ component = components_api.create_component(learning_package.id, **valid_component)
771
+ self.components_map_by_key[entity_key] = component
772
+
773
+ for valid_published in components.get("components_published", []):
774
+ entity_key = valid_published.pop("entity_key")
775
+ version_num = valid_published["version_num"] # Should exist, validated earlier
776
+ content_to_replace = self._resolve_static_files(version_num, entity_key, component_static_files)
777
+ self.all_published_entities_versions.add(
778
+ (entity_key, version_num)
779
+ ) # Track published version
780
+ components_api.create_next_component_version(
781
+ self.components_map_by_key[entity_key].publishable_entity.id,
782
+ content_to_replace=content_to_replace,
783
+ force_version_num=valid_published.pop("version_num", None),
784
+ **valid_published
785
+ )
786
+
787
+ def _save_units(self, learning_package, containers):
788
+ """Save units and published unit versions."""
789
+ for valid_unit in containers.get("unit", []):
790
+ entity_key = valid_unit.get("key")
791
+ unit = units_api.create_unit(learning_package.id, **valid_unit)
792
+ self.units_map_by_key[entity_key] = unit
793
+
794
+ for valid_published in containers.get("unit_published", []):
795
+ entity_key = valid_published.pop("entity_key")
796
+ children = self._resolve_children(valid_published, self.components_map_by_key)
797
+ self.all_published_entities_versions.add(
798
+ (entity_key, valid_published.get('version_num'))
799
+ ) # Track published version
800
+ units_api.create_next_unit_version(
801
+ self.units_map_by_key[entity_key],
802
+ force_version_num=valid_published.pop("version_num", None),
803
+ components=children,
804
+ **valid_published
805
+ )
806
+
807
+ def _save_subsections(self, learning_package, containers):
808
+ """Save subsections and published subsection versions."""
809
+ for valid_subsection in containers.get("subsection", []):
810
+ entity_key = valid_subsection.get("key")
811
+ subsection = subsections_api.create_subsection(learning_package.id, **valid_subsection)
812
+ self.subsections_map_by_key[entity_key] = subsection
813
+
814
+ for valid_published in containers.get("subsection_published", []):
815
+ entity_key = valid_published.pop("entity_key")
816
+ children = self._resolve_children(valid_published, self.units_map_by_key)
817
+ self.all_published_entities_versions.add(
818
+ (entity_key, valid_published.get('version_num'))
819
+ ) # Track published version
820
+ subsections_api.create_next_subsection_version(
821
+ self.subsections_map_by_key[entity_key],
822
+ units=children,
823
+ force_version_num=valid_published.pop("version_num", None),
824
+ **valid_published
825
+ )
826
+
827
+ def _save_sections(self, learning_package, containers):
828
+ """Save sections and published section versions."""
829
+ for valid_section in containers.get("section", []):
830
+ entity_key = valid_section.get("key")
831
+ section = sections_api.create_section(learning_package.id, **valid_section)
832
+ self.sections_map_by_key[entity_key] = section
833
+
834
+ for valid_published in containers.get("section_published", []):
835
+ entity_key = valid_published.pop("entity_key")
836
+ children = self._resolve_children(valid_published, self.subsections_map_by_key)
837
+ self.all_published_entities_versions.add(
838
+ (entity_key, valid_published.get('version_num'))
839
+ ) # Track published version
840
+ sections_api.create_next_section_version(
841
+ self.sections_map_by_key[entity_key],
842
+ subsections=children,
843
+ force_version_num=valid_published.pop("version_num", None),
844
+ **valid_published
845
+ )
846
+
847
+ def _save_draft_versions(self, components, containers, component_static_files):
848
+ """Save draft versions for all entity types."""
849
+ for valid_draft in components.get("components_drafts", []):
850
+ entity_key = valid_draft.pop("entity_key")
851
+ version_num = valid_draft["version_num"] # Should exist, validated earlier
852
+ if self._is_version_already_exists(entity_key, version_num):
853
+ continue
854
+ content_to_replace = self._resolve_static_files(version_num, entity_key, component_static_files)
855
+ components_api.create_next_component_version(
856
+ self.components_map_by_key[entity_key].publishable_entity.id,
857
+ content_to_replace=content_to_replace,
858
+ force_version_num=valid_draft.pop("version_num", None),
859
+ # Drafts can diverge from published, so we allow ignoring previous content
860
+ # Use case: published v1 had files A, B; draft v2 only has file A
861
+ ignore_previous_content=True,
862
+ **valid_draft
863
+ )
864
+
865
+ for valid_draft in containers.get("unit_drafts", []):
866
+ entity_key = valid_draft.pop("entity_key")
867
+ version_num = valid_draft["version_num"] # Should exist, validated earlier
868
+ if self._is_version_already_exists(entity_key, version_num):
869
+ continue
870
+ children = self._resolve_children(valid_draft, self.components_map_by_key)
871
+ units_api.create_next_unit_version(
872
+ self.units_map_by_key[entity_key],
873
+ components=children,
874
+ force_version_num=valid_draft.pop("version_num", None),
875
+ **valid_draft
876
+ )
877
+
878
+ for valid_draft in containers.get("subsection_drafts", []):
879
+ entity_key = valid_draft.pop("entity_key")
880
+ version_num = valid_draft["version_num"] # Should exist, validated earlier
881
+ if self._is_version_already_exists(entity_key, version_num):
882
+ continue
883
+ children = self._resolve_children(valid_draft, self.units_map_by_key)
884
+ subsections_api.create_next_subsection_version(
885
+ self.subsections_map_by_key[entity_key],
886
+ units=children,
887
+ force_version_num=valid_draft.pop("version_num", None),
888
+ **valid_draft
889
+ )
890
+
891
+ for valid_draft in containers.get("section_drafts", []):
892
+ entity_key = valid_draft.pop("entity_key")
893
+ version_num = valid_draft["version_num"] # Should exist, validated earlier
894
+ if self._is_version_already_exists(entity_key, version_num):
895
+ continue
896
+ children = self._resolve_children(valid_draft, self.subsections_map_by_key)
897
+ sections_api.create_next_section_version(
898
+ self.sections_map_by_key[entity_key],
899
+ subsections=children,
900
+ force_version_num=valid_draft.pop("version_num", None),
901
+ **valid_draft
902
+ )
903
+
904
+ # --------------------------
905
+ # Utilities
906
+ # --------------------------
907
+
908
+ def _format_errors(self) -> str:
909
+ """Return formatted error content as a string."""
910
+ if not self.errors:
911
+ return ""
912
+ lines = [f"{err['file']}: {err['errors']}" for err in self.errors]
913
+ return "Errors encountered during restore:\n" + "\n".join(lines) + "\n"
914
+
915
+ def _write_errors(self) -> StringIO | None:
916
+ """
917
+ Write errors to a StringIO buffer.
918
+ """
919
+ content = self._format_errors()
920
+ if not content:
921
+ return None
922
+ return StringIO(content)
923
+
924
+ def _is_version_already_exists(self, entity_key: str, version_num: int) -> bool:
925
+ """
926
+ Check if a version already exists for a given entity key and version number.
927
+
928
+ Note:
929
+ Skip creating draft if this version is already published
930
+ Why? Because the version itself is already created and
931
+ we don't want to create duplicate versions.
932
+ Otherwise, we will raise an IntegrityError on PublishableEntityVersion
933
+ due to unique constraints between publishable_entity and version_num.
934
+ """
935
+ identifier = (entity_key, version_num)
936
+ return identifier in self.all_published_entities_versions
937
+
938
+ def _resolve_static_files(
939
+ self,
940
+ num_version: int,
941
+ entity_key: str,
942
+ static_files_map: dict[str, List[str]]
943
+ ) -> dict[str, bytes | int]:
944
+ """Resolve static file paths into their binary content."""
945
+ resolved_files: dict[str, bytes | int] = {}
946
+
947
+ static_file_key = f"{entity_key}:v{num_version}" # e.g., "xblock.v1:html:my_component_123456:v1"
948
+ block_type = entity_key.split(":")[1] # e.g., "html"
949
+ static_files = static_files_map.get(static_file_key, [])
950
+ for static_file in static_files:
951
+ local_key = static_file.split(f"v{num_version}/")[-1]
952
+ with self.zipf.open(static_file, "r") as f:
953
+ content_bytes = f.read()
954
+ if local_key == "block.xml":
955
+ # Special handling for block.xml to ensure
956
+ # storing the value as a content instance
957
+ if not self.learning_package_id:
958
+ raise ValueError("learning_package_id must be set before resolving static files.")
959
+ text_content = contents_api.get_or_create_text_content(
960
+ self.learning_package_id,
961
+ contents_api.get_or_create_media_type(f"application/vnd.openedx.xblock.v1.{block_type}+xml").id,
962
+ text=content_bytes.decode("utf-8"),
963
+ created=self.utc_now,
964
+ )
965
+ resolved_files[local_key] = text_content.id
966
+ else:
967
+ resolved_files[local_key] = content_bytes
968
+ return resolved_files
969
+
970
+ def _resolve_children(self, entity_data: dict[str, Any], lookup_map: dict[str, Any]) -> list[Any]:
971
+ """Resolve child entity keys into model instances."""
972
+ children_keys = entity_data.pop("children", [])
973
+ return [lookup_map[key] for key in children_keys if key in lookup_map]
974
+
975
+ def _load_entity_data(
976
+ self, entity_file: str
977
+ ) -> tuple[dict[str, Any], dict[str, Any] | None, dict[str, Any] | None]:
978
+ """Load entity data and its versions from TOML."""
979
+ entity_toml_txt = self._read_file_from_zip(entity_file)
980
+ entity_toml_dict = parse_publishable_entity_toml(entity_toml_txt)
981
+ entity_data = entity_toml_dict.get("entity", {})
982
+ version_data = entity_toml_dict.get("version", [])
983
+ return entity_data, *self._get_versions_to_write(version_data, entity_data)
984
+
985
+ def _validate_versions(self, entity_data, draft, published, serializer_cls, *, file) -> dict[str, Any]:
986
+ """Validate draft/published versions with serializer."""
987
+ valid = {"draft": None, "published": None}
988
+ for label, version in [("draft", draft), ("published", published)]:
989
+ if not version:
990
+ continue
991
+ serializer = serializer_cls(
992
+ data={
993
+ "entity_key": entity_data["key"],
994
+ "created": self.utc_now,
995
+ "created_by": None,
996
+ **version
997
+ }
998
+ )
999
+ if serializer.is_valid():
1000
+ valid[label] = serializer.validated_data
1001
+ else:
1002
+ self.errors.append({"file": file, "errors": serializer.errors})
1003
+ return valid
1004
+
1005
+ def _read_file_from_zip(self, filename: str) -> str:
1006
+ """Read and decode a UTF-8 file from the zip archive."""
1007
+ with self.zipf.open(filename) as f:
1008
+ return f.read().decode("utf-8")
1009
+
1010
+ def _get_organized_file_list(self, file_paths: list[str]) -> dict[str, Any]:
1011
+ """Organize file paths into categories: learning_package, containers, components, collections."""
1012
+ organized: dict[str, Any] = {
1013
+ "learning_package": None,
1014
+ "containers": [],
1015
+ "components": [],
1016
+ "component_static_files": defaultdict(list),
1017
+ "collections": [],
1018
+ }
1019
+
1020
+ for path in file_paths:
1021
+ if path.endswith("/"):
1022
+ # Skip directories
1023
+ continue
1024
+ if path == TOML_PACKAGE_NAME:
1025
+ organized["learning_package"] = path
1026
+ elif path.startswith("entities/") and str(Path(path).parent) == "entities" and path.endswith(".toml"):
1027
+ # Top-level entity TOML files are considered containers
1028
+ organized["containers"].append(path)
1029
+ elif path.startswith("entities/"):
1030
+ if path.endswith(".toml"):
1031
+ # Component entity TOML files
1032
+ organized["components"].append(path)
1033
+ else:
1034
+ # Component static files
1035
+ # Path structure: entities/<namespace>/<type>/<component_id>/component_versions/<version>/static/...
1036
+ # Example: entities/xblock.v1/html/my_component_123456/component_versions/v1/static/...
1037
+ component_key = Path(path).parts[1:4] # e.g., ['xblock.v1', 'html', 'my_component_123456']
1038
+ num_version = Path(path).parts[5] if len(Path(path).parts) > 5 else "v1" # e.g., 'v1'
1039
+ if len(component_key) == 3:
1040
+ component_identifier = ":".join(component_key)
1041
+ component_identifier += f":{num_version}"
1042
+ organized["component_static_files"][component_identifier].append(path)
1043
+ else:
1044
+ self.errors.append({"file": path, "errors": "Invalid component static file path structure."})
1045
+ elif path.startswith("collections/") and path.endswith(".toml"):
1046
+ # Collection TOML files
1047
+ organized["collections"].append(path)
1048
+ return organized
1049
+
1050
+ def _get_versions_to_write(
1051
+ self,
1052
+ version_data: list[dict[str, Any]],
1053
+ entity_data: dict[str, Any]
1054
+ ) -> tuple[Optional[dict[str, Any]], Optional[dict[str, Any]]]:
1055
+ """Return the draft and published versions to write, based on entity data."""
1056
+ draft_num = entity_data.get("draft", {}).get("version_num")
1057
+ published_num = entity_data.get("published", {}).get("version_num")
1058
+ lookup = {v.get("version_num"): v for v in version_data}
1059
+ return (
1060
+ lookup.get(draft_num) if draft_num else None,
1061
+ lookup.get(published_num) if published_num else None,
1062
+ )