openedx-learning 0.28.0__py2.py3-none-any.whl → 0.29.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,14 +3,24 @@ This module provides functionality to create a zip file containing the learning
3
3
  including a TOML representation of the learning package and its entities.
4
4
  """
5
5
  import hashlib
6
+ import time
6
7
  import zipfile
8
+ from collections import defaultdict
9
+ from dataclasses import asdict, dataclass
10
+ from datetime import datetime, timezone
11
+ from io import StringIO
7
12
  from pathlib import Path
8
- from typing import List, Optional
13
+ from typing import Any, List, Literal, Optional, Tuple
9
14
 
15
+ from django.contrib.auth.models import User as UserType # pylint: disable=imported-auth-user
16
+ from django.db import transaction
10
17
  from django.db.models import Prefetch, QuerySet
11
18
  from django.utils.text import slugify
19
+ from rest_framework import serializers
12
20
 
13
21
  from openedx_learning.api.authoring_models import (
22
+ Collection,
23
+ ComponentType,
14
24
  ComponentVersion,
15
25
  ComponentVersionContent,
16
26
  Content,
@@ -18,10 +28,32 @@ from openedx_learning.api.authoring_models import (
18
28
  PublishableEntity,
19
29
  PublishableEntityVersion,
20
30
  )
21
- from openedx_learning.apps.authoring.backup_restore.toml import toml_learning_package, toml_publishable_entity
31
+ from openedx_learning.apps.authoring.backup_restore.serializers import (
32
+ CollectionSerializer,
33
+ ComponentSerializer,
34
+ ComponentVersionSerializer,
35
+ ContainerSerializer,
36
+ ContainerVersionSerializer,
37
+ LearningPackageMetadataSerializer,
38
+ LearningPackageSerializer,
39
+ )
40
+ from openedx_learning.apps.authoring.backup_restore.toml import (
41
+ parse_collection_toml,
42
+ parse_learning_package_toml,
43
+ parse_publishable_entity_toml,
44
+ toml_collection,
45
+ toml_learning_package,
46
+ toml_publishable_entity,
47
+ )
48
+ from openedx_learning.apps.authoring.collections import api as collections_api
49
+ from openedx_learning.apps.authoring.components import api as components_api
22
50
  from openedx_learning.apps.authoring.publishing import api as publishing_api
51
+ from openedx_learning.apps.authoring.sections import api as sections_api
52
+ from openedx_learning.apps.authoring.subsections import api as subsections_api
53
+ from openedx_learning.apps.authoring.units import api as units_api
23
54
 
24
55
  TOML_PACKAGE_NAME = "package.toml"
56
+ DEFAULT_USERNAME = "command"
25
57
 
26
58
 
27
59
  def slugify_hashed_filename(identifier: str) -> str:
@@ -55,21 +87,73 @@ class LearningPackageZipper:
55
87
  A class to handle the zipping of learning content for backup and restore.
56
88
  """
57
89
 
58
- def __init__(self, learning_package: LearningPackage):
90
+ def __init__(self, learning_package: LearningPackage, user: UserType | None = None):
59
91
  self.learning_package = learning_package
92
+ self.user = user
60
93
  self.folders_already_created: set[Path] = set()
94
+ self.entities_filenames_already_created: set[str] = set()
95
+ self.utc_now = datetime.now(tz=timezone.utc)
61
96
 
62
- def create_folder(self, folder_path: Path, zip_file: zipfile.ZipFile) -> None:
97
+ def _ensure_parent_folders(
98
+ self,
99
+ zip_file: zipfile.ZipFile,
100
+ path: Path,
101
+ timestamp: datetime,
102
+ ) -> None:
63
103
  """
64
- Create a folder for the zip file structure.
65
- Skips creating the folder if it already exists based on the folder path.
66
- Args:
67
- folder_path (Path): The path of the folder to create.
104
+ Ensure all parent folders for the given path exist in the zip.
105
+ """
106
+ for parent in path.parents[::-1]:
107
+ if parent != Path(".") and parent not in self.folders_already_created:
108
+ folder_info = zipfile.ZipInfo(str(parent) + "/")
109
+ folder_info.date_time = timestamp.timetuple()[:6]
110
+ zip_file.writestr(folder_info, "")
111
+ self.folders_already_created.add(parent)
112
+
113
+ def add_folder_to_zip(
114
+ self,
115
+ zip_file: zipfile.ZipFile,
116
+ folder: Path,
117
+ timestamp: datetime | None = None,
118
+ ) -> None:
119
+ """
120
+ Explicitly add an empty folder into the zip structure.
68
121
  """
69
- if folder_path not in self.folders_already_created:
70
- zip_info = zipfile.ZipInfo(str(folder_path) + "/")
71
- zip_file.writestr(zip_info, "") # Add explicit empty directory entry
72
- self.folders_already_created.add(folder_path)
122
+ if folder in self.folders_already_created:
123
+ return
124
+
125
+ if timestamp is None:
126
+ timestamp = self.utc_now
127
+
128
+ self._ensure_parent_folders(zip_file, folder, timestamp)
129
+
130
+ folder_info = zipfile.ZipInfo(str(folder) + "/")
131
+ folder_info.date_time = timestamp.timetuple()[:6]
132
+ zip_file.writestr(folder_info, "")
133
+ self.folders_already_created.add(folder)
134
+
135
+ def add_file_to_zip(
136
+ self,
137
+ zip_file: zipfile.ZipFile,
138
+ file_path: Path,
139
+ content: bytes | str | None = None,
140
+ timestamp: datetime | None = None,
141
+ ) -> None:
142
+ """
143
+ Add a file into the zip structure.
144
+ """
145
+ if timestamp is None:
146
+ timestamp = self.utc_now
147
+
148
+ self._ensure_parent_folders(zip_file, file_path, timestamp)
149
+
150
+ file_info = zipfile.ZipInfo(str(file_path))
151
+ file_info.date_time = timestamp.timetuple()[:6]
152
+
153
+ if isinstance(content, str):
154
+ content = content.encode("utf-8")
155
+
156
+ zip_file.writestr(file_info, content or b"")
73
157
 
74
158
  def get_publishable_entities(self) -> QuerySet[PublishableEntity]:
75
159
  """
@@ -79,7 +163,7 @@ class LearningPackageZipper:
79
163
  lp_id = self.learning_package.pk
80
164
  publishable_entities: QuerySet[PublishableEntity] = publishing_api.get_publishable_entities(lp_id)
81
165
  return (
82
- publishable_entities
166
+ publishable_entities # type: ignore[no-redef]
83
167
  .select_related(
84
168
  "container",
85
169
  "component__component_type",
@@ -102,12 +186,32 @@ class LearningPackageZipper:
102
186
  to_attr="prefetched_contents",
103
187
  ),
104
188
  )
189
+ .order_by("key")
190
+ )
191
+
192
+ def get_collections(self) -> QuerySet[Collection]:
193
+ """
194
+ Get the collections associated with the learning package.
195
+ """
196
+ return (
197
+ collections_api.get_collections(self.learning_package.pk)
198
+ .prefetch_related("entities")
105
199
  )
106
200
 
107
- def get_versions_to_write(self, entity: PublishableEntity):
201
+ def get_versions_to_write(
202
+ self, entity: PublishableEntity
203
+ ) -> Tuple[List[PublishableEntityVersion],
204
+ Optional[PublishableEntityVersion],
205
+ Optional[PublishableEntityVersion]]:
108
206
  """
109
207
  Get the versions of a publishable entity that should be written to the zip file.
110
208
  It retrieves both draft and published versions.
209
+
210
+ Returns:
211
+ Tuple containing:
212
+ - versions_to_write: List of PublishableEntityVersion to write.
213
+ - draft_version: The current draft version, if any.
214
+ - published_version: The current published version, if any.
111
215
  """
112
216
  draft_version: Optional[PublishableEntityVersion] = publishing_api.get_draft_version(entity)
113
217
  published_version: Optional[PublishableEntityVersion] = publishing_api.get_published_version(entity)
@@ -116,7 +220,42 @@ class LearningPackageZipper:
116
220
 
117
221
  if published_version and published_version != draft_version:
118
222
  versions_to_write.append(published_version)
119
- return versions_to_write
223
+ return versions_to_write, draft_version, published_version
224
+
225
+ def get_entity_toml_filename(self, entity_key: str) -> str:
226
+ """
227
+ Generate a unique TOML filename for a publishable entity.
228
+ Ensures that the filename is unique within the zip file.
229
+
230
+ Behavior:
231
+ - If the slugified key has not been used yet, use it as the filename.
232
+ - If it has been used, append a short hash to ensure uniqueness.
233
+
234
+ Args:
235
+ entity_key (str): The key of the publishable entity.
236
+
237
+ Returns:
238
+ str: A unique TOML filename for the entity.
239
+ """
240
+ slugify_name = slugify(entity_key, allow_unicode=True)
241
+
242
+ if slugify_name in self.entities_filenames_already_created:
243
+ filename = slugify_hashed_filename(entity_key)
244
+ else:
245
+ filename = slugify_name
246
+
247
+ self.entities_filenames_already_created.add(slugify_name)
248
+ return filename
249
+
250
+ def get_latest_modified(self, versions_to_check: List[PublishableEntityVersion]) -> datetime:
251
+ """
252
+ Get the latest modification timestamp among the learning package and its entities.
253
+ """
254
+ latest = self.learning_package.updated
255
+ for version in versions_to_check:
256
+ if version and version.created > latest:
257
+ latest = version.created
258
+ return latest
120
259
 
121
260
  def create_zip(self, path: str) -> None:
122
261
  """
@@ -129,16 +268,16 @@ class LearningPackageZipper:
129
268
 
130
269
  with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zipf:
131
270
  # Add the package.toml file
132
- package_toml_content: str = toml_learning_package(self.learning_package)
133
- zipf.writestr(TOML_PACKAGE_NAME, package_toml_content)
271
+ package_toml_content: str = toml_learning_package(self.learning_package, self.utc_now, user=self.user)
272
+ self.add_file_to_zip(zipf, Path(TOML_PACKAGE_NAME), package_toml_content, self.learning_package.updated)
134
273
 
135
274
  # Add the entities directory
136
275
  entities_folder = Path("entities")
137
- self.create_folder(entities_folder, zipf)
276
+ self.add_folder_to_zip(zipf, entities_folder, timestamp=self.learning_package.updated)
138
277
 
139
278
  # Add the collections directory
140
279
  collections_folder = Path("collections")
141
- self.create_folder(collections_folder, zipf)
280
+ self.add_folder_to_zip(zipf, collections_folder, timestamp=self.learning_package.updated)
142
281
 
143
282
  # ------ ENTITIES SERIALIZATION -------------
144
283
 
@@ -148,14 +287,21 @@ class LearningPackageZipper:
148
287
  for entity in publishable_entities:
149
288
  # entity: PublishableEntity = entity # Type hint for clarity
150
289
 
290
+ # Get the versions to serialize for this entity
291
+ versions_to_write, draft_version, published_version = self.get_versions_to_write(entity)
292
+
293
+ latest_modified = self.get_latest_modified(versions_to_write)
294
+
151
295
  # Create a TOML representation of the entity
152
- entity_toml_content: str = toml_publishable_entity(entity)
296
+ entity_toml_content: str = toml_publishable_entity(
297
+ entity, versions_to_write, draft_version, published_version
298
+ )
153
299
 
154
300
  if hasattr(entity, 'container'):
155
- entity_slugify_hash = slugify_hashed_filename(entity.key)
156
- entity_toml_filename = f"{entity_slugify_hash}.toml"
301
+ entity_filename = self.get_entity_toml_filename(entity.key)
302
+ entity_toml_filename = f"{entity_filename}.toml"
157
303
  entity_toml_path = entities_folder / entity_toml_filename
158
- zipf.writestr(str(entity_toml_path), entity_toml_content)
304
+ self.add_file_to_zip(zipf, entity_toml_path, entity_toml_content, timestamp=latest_modified)
159
305
 
160
306
  if hasattr(entity, 'component'):
161
307
  # Create the component folder structure for the entity. The structure is as follows:
@@ -168,51 +314,43 @@ class LearningPackageZipper:
168
314
  # v1/
169
315
  # static/
170
316
 
171
- # Generate the slugified hash for the component local key
172
- # Example: if the local key is "my_component", the slugified hash might be "my_component_123456"
173
- # It's a combination of the local key and a hash and should be unique
174
- entity_slugify_hash = slugify_hashed_filename(entity.component.local_key)
317
+ entity_filename = self.get_entity_toml_filename(entity.component.local_key)
175
318
 
176
- # Create the component namespace folder
177
- # Example of component namespace is: "entities/xblock.v1/"
178
- component_namespace_folder = entities_folder / entity.component.component_type.namespace
179
- self.create_folder(component_namespace_folder, zipf)
319
+ component_root_folder = (
320
+ # Example: "entities/xblock.v1/html/"
321
+ entities_folder
322
+ / entity.component.component_type.namespace
323
+ / entity.component.component_type.name
324
+ )
180
325
 
181
- # Create the component type folder
182
- # Example of component type is: "entities/xblock.v1/html/"
183
- component_type_folder = component_namespace_folder / entity.component.component_type.name
184
- self.create_folder(component_type_folder, zipf)
326
+ component_folder = (
327
+ # Example: "entities/xblock.v1/html/my_component_123456/"
328
+ component_root_folder
329
+ / entity_filename
330
+ )
185
331
 
186
- # Create the component id folder
187
- # Example of component id is: "entities/xblock.v1/html/my_component_123456/"
188
- component_id_folder = component_type_folder / entity_slugify_hash
189
- self.create_folder(component_id_folder, zipf)
332
+ component_version_folder = (
333
+ # Example: "entities/xblock.v1/html/my_component_123456/component_versions/"
334
+ component_folder
335
+ / "component_versions"
336
+ )
190
337
 
191
338
  # Add the entity TOML file inside the component type folder as well
192
339
  # Example: "entities/xblock.v1/html/my_component_123456.toml"
193
- component_entity_toml_path = component_type_folder / f"{entity_slugify_hash}.toml"
194
- zipf.writestr(str(component_entity_toml_path), entity_toml_content)
195
-
196
- # Add component version folder into the component id folder
197
- # Example: "entities/xblock.v1/html/my_component_123456/component_versions/"
198
- component_version_folder = component_id_folder / "component_versions"
199
- self.create_folder(component_version_folder, zipf)
340
+ component_entity_toml_path = component_root_folder / f"{entity_filename}.toml"
341
+ self.add_file_to_zip(zipf, component_entity_toml_path, entity_toml_content, latest_modified)
200
342
 
201
343
  # ------ COMPONENT VERSIONING -------------
202
- # Focusing on draft and published versions
203
-
204
- # Get the draft and published versions
205
- versions_to_write: List[PublishableEntityVersion] = self.get_versions_to_write(entity)
206
-
344
+ # Focusing on draft and published versions only
207
345
  for version in versions_to_write:
208
346
  # Create a folder for the version
209
347
  version_number = f"v{version.version_num}"
210
348
  version_folder = component_version_folder / version_number
211
- self.create_folder(version_folder, zipf)
349
+ self.add_folder_to_zip(zipf, version_folder, timestamp=version.created)
212
350
 
213
351
  # Add static folder for the version
214
352
  static_folder = version_folder / "static"
215
- self.create_folder(static_folder, zipf)
353
+ self.add_folder_to_zip(zipf, static_folder, timestamp=version.created)
216
354
 
217
355
  # ------ COMPONENT STATIC CONTENT -------------
218
356
  component_version: ComponentVersion = version.componentversion
@@ -239,4 +377,668 @@ class LearningPackageZipper:
239
377
  else:
240
378
  # If no file and no text, we skip this content
241
379
  continue
242
- zipf.writestr(str(file_path), file_data)
380
+ self.add_file_to_zip(zipf, file_path, file_data, timestamp=content.created)
381
+
382
+ # ------ COLLECTION SERIALIZATION -------------
383
+ collections = self.get_collections()
384
+
385
+ for collection in collections:
386
+ collection_hash_slug = self.get_entity_toml_filename(collection.key)
387
+ collection_toml_file_path = collections_folder / f"{collection_hash_slug}.toml"
388
+ entity_keys_related = collection.entities.order_by("key").values_list("key", flat=True)
389
+ self.add_file_to_zip(
390
+ zipf,
391
+ collection_toml_file_path,
392
+ toml_collection(collection, list(entity_keys_related)),
393
+ timestamp=collection.modified,
394
+ )
395
+
396
+
397
+ @dataclass
398
+ class RestoreLearningPackageData:
399
+ """
400
+ Data about the restored learning package.
401
+ """
402
+ id: int # The ID of the restored learning package
403
+ key: str # The key of the restored learning package (may be different if staged)
404
+ archive_lp_key: str # The original key from the archive
405
+ archive_org_key: str # The original organization key from the archive
406
+ archive_slug: str # The original slug from the archive
407
+ title: str
408
+ num_containers: int
409
+ num_sections: int
410
+ num_subsections: int
411
+ num_units: int
412
+ num_components: int
413
+ num_collections: int
414
+
415
+
416
+ @dataclass
417
+ class BackupMetadata:
418
+ """
419
+ Metadata about the backup operation.
420
+ """
421
+ format_version: int
422
+ created_at: str
423
+ created_by: str | None = None
424
+ created_by_email: str | None = None
425
+ original_server: str | None = None
426
+
427
+
428
+ @dataclass
429
+ class RestoreResult:
430
+ """
431
+ Result of the restore operation.
432
+ """
433
+ status: Literal["success", "error"]
434
+ log_file_error: StringIO | None = None
435
+ lp_restored_data: RestoreLearningPackageData | None = None
436
+ backup_metadata: BackupMetadata | None = None
437
+
438
+
439
+ def unpack_lp_key(lp_key: str) -> tuple[str, str]:
440
+ """
441
+ Unpack a learning package key into its components.
442
+ """
443
+ parts = lp_key.split(":")
444
+ if len(parts) < 3:
445
+ raise ValueError(f"Invalid learning package key: {lp_key}")
446
+ _, org_key, lp_slug = parts[:3]
447
+ return org_key, lp_slug
448
+
449
+
450
+ def generate_staged_lp_key(archive_lp_key: str, user: UserType) -> str:
451
+ """
452
+ Generate a staged learning package key based on the given base key.
453
+
454
+ Arguments:
455
+ archive_lp_key (str): The original learning package key from the archive.
456
+ user (UserType | None): The user performing the restore operation.
457
+
458
+ Example:
459
+ Input: "lib:WGU:LIB_C001"
460
+ Output: "lp-restore:dave:WGU:LIB_C001:1728575321"
461
+
462
+ The timestamp at the end ensures the key is unique.
463
+ """
464
+ username = user.username
465
+ org_key, lp_slug = unpack_lp_key(archive_lp_key)
466
+ timestamp = int(time.time() * 1000) # Current time in milliseconds
467
+ return f"lp-restore:{username}:{org_key}:{lp_slug}:{timestamp}"
468
+
469
+
470
+ class LearningPackageUnzipper:
471
+ """
472
+ Handles extraction and restoration of learning package data from a zip archive.
473
+
474
+ Args:
475
+ zipf (zipfile.ZipFile): The zip file containing the learning package data.
476
+ user (UserType | None): The user performing the restore operation. Not necessarily the creator.
477
+ generate_new_key (bool): Whether to generate a new key for the restored learning package.
478
+
479
+ Returns:
480
+ dict[str, Any]: The result of the restore operation, including any errors encountered.
481
+
482
+ Responsibilities:
483
+ - Parse and organize files from the zip structure.
484
+ - Restore learning package, containers, components, and collections to the database.
485
+ - Ensure atomicity of the restore process.
486
+
487
+ Usage:
488
+ unzipper = LearningPackageUnzipper(zip_file)
489
+ result = unzipper.load()
490
+ """
491
+
492
+ def __init__(self, zipf: zipfile.ZipFile, key: str | None = None, user: UserType | None = None):
493
+ self.zipf = zipf
494
+ self.user = user
495
+ self.lp_key = key # If provided, use this key for the restored learning package
496
+ self.utc_now: datetime = datetime.now(timezone.utc)
497
+ self.component_types_cache: dict[tuple[str, str], ComponentType] = {}
498
+ self.errors: list[dict[str, Any]] = []
499
+ # Maps for resolving relationships
500
+ self.components_map_by_key: dict[str, Any] = {}
501
+ self.units_map_by_key: dict[str, Any] = {}
502
+ self.subsections_map_by_key: dict[str, Any] = {}
503
+ self.sections_map_by_key: dict[str, Any] = {}
504
+ self.all_publishable_entities_keys: set[str] = set()
505
+ self.all_published_entities_versions: set[tuple[str, int]] = set() # To track published entity versions
506
+
507
+ # --------------------------
508
+ # Public API
509
+ # --------------------------
510
+
511
+ @transaction.atomic
512
+ def load(self) -> dict[str, Any]:
513
+ """Extracts and restores all objects from the ZIP archive in an atomic transaction."""
514
+
515
+ # Step 1: Validate presence of package.toml and basic structure
516
+ _, organized_files = self.check_mandatory_files()
517
+ if self.errors:
518
+ # Early return if preliminary checks fail since mandatory files are missing
519
+ result = RestoreResult(
520
+ status="error",
521
+ log_file_error=self._write_errors(), # return a StringIO with the errors
522
+ lp_restored_data=None,
523
+ backup_metadata=None,
524
+ )
525
+ return asdict(result)
526
+
527
+ # Step 2: Extract and validate learning package, entities and collections
528
+ # Errors are collected and reported at the end
529
+ # No saving to DB happens until all validations pass
530
+ learning_package_validated = self._extract_learning_package(organized_files["learning_package"])
531
+ lp_metadata = learning_package_validated.pop("metadata", {})
532
+
533
+ components_validated = self._extract_entities(
534
+ organized_files["components"], ComponentSerializer, ComponentVersionSerializer
535
+ )
536
+ containers_validated = self._extract_entities(
537
+ organized_files["containers"], ContainerSerializer, ContainerVersionSerializer
538
+ )
539
+
540
+ collections_validated = self._extract_collections(
541
+ organized_files["collections"]
542
+ )
543
+
544
+ # Step 3.1: If there are validation errors, return them without saving anything
545
+ if self.errors:
546
+ result = RestoreResult(
547
+ status="error",
548
+ log_file_error=self._write_errors(), # return a StringIO with the errors
549
+ lp_restored_data=None,
550
+ backup_metadata=None,
551
+ )
552
+ return asdict(result)
553
+
554
+ # Step 3.2: Save everything to the DB
555
+ # All validations passed, we can proceed to save everything
556
+ # Save the learning package first to get its ID
557
+ archive_lp_key = learning_package_validated["key"]
558
+ learning_package = self._save(
559
+ learning_package_validated,
560
+ components_validated,
561
+ containers_validated,
562
+ collections_validated,
563
+ component_static_files=organized_files["component_static_files"]
564
+ )
565
+
566
+ num_containers = sum(
567
+ len(containers_validated.get(container_type, []))
568
+ for container_type in ["section", "subsection", "unit"]
569
+ )
570
+
571
+ org_key, lp_slug = unpack_lp_key(archive_lp_key)
572
+ result = RestoreResult(
573
+ status="success",
574
+ log_file_error=None,
575
+ lp_restored_data=RestoreLearningPackageData(
576
+ id=learning_package.id,
577
+ key=learning_package.key,
578
+ archive_lp_key=archive_lp_key, # The original key from the backup archive
579
+ archive_org_key=org_key, # The original organization key from the backup archive
580
+ archive_slug=lp_slug, # The original slug from the backup archive
581
+ title=learning_package.title,
582
+ num_containers=num_containers,
583
+ num_sections=len(containers_validated.get("section", [])),
584
+ num_subsections=len(containers_validated.get("subsection", [])),
585
+ num_units=len(containers_validated.get("unit", [])),
586
+ num_components=len(components_validated["components"]),
587
+ num_collections=len(collections_validated["collections"]),
588
+ ),
589
+ backup_metadata=BackupMetadata(
590
+ format_version=lp_metadata.get("format_version", 1),
591
+ created_by=lp_metadata.get("created_by"),
592
+ created_by_email=lp_metadata.get("created_by_email"),
593
+ created_at=lp_metadata.get("created_at"),
594
+ original_server=lp_metadata.get("origin_server"),
595
+ ) if lp_metadata else None,
596
+ )
597
+ return asdict(result)
598
+
599
+ def check_mandatory_files(self) -> Tuple[list[dict[str, Any]], dict[str, Any]]:
600
+ """
601
+ Check for the presence of mandatory files in the zip archive.
602
+ So far, the only mandatory file is package.toml.
603
+ """
604
+ organized_files = self._get_organized_file_list(self.zipf.namelist())
605
+
606
+ if not organized_files["learning_package"]:
607
+ self.errors.append({"file": TOML_PACKAGE_NAME, "errors": "Missing learning package file."})
608
+
609
+ return self.errors, organized_files
610
+
611
+ # --------------------------
612
+ # Extract + Validate
613
+ # --------------------------
614
+
615
+ def _extract_learning_package(self, package_file: str) -> dict[str, Any]:
616
+ """Extract and validate the learning package TOML file."""
617
+ toml_content_text = self._read_file_from_zip(package_file)
618
+ toml_content_dict = parse_learning_package_toml(toml_content_text)
619
+ lp = toml_content_dict.get("learning_package")
620
+ lp_metadata = toml_content_dict.get("meta")
621
+
622
+ # Validate learning package data
623
+ lp_serializer = LearningPackageSerializer(data=lp)
624
+ if not lp_serializer.is_valid():
625
+ self.errors.append({"file": f"{package_file} learning package section", "errors": lp_serializer.errors})
626
+
627
+ # Validate metadata if present
628
+ lp_metadata_serializer = LearningPackageMetadataSerializer(data=lp_metadata)
629
+ if not lp_metadata_serializer.is_valid():
630
+ self.errors.append({"file": f"{package_file} meta section", "errors": lp_metadata_serializer.errors})
631
+
632
+ lp_validated = lp_serializer.validated_data if lp_serializer.is_valid() else {}
633
+ lp_metadata = lp_metadata_serializer.validated_data if lp_metadata_serializer.is_valid() else {}
634
+ lp_validated["metadata"] = lp_metadata
635
+ return lp_validated
636
+
637
+ def _extract_entities(
638
+ self,
639
+ entity_files: list[str],
640
+ entity_serializer: type[serializers.Serializer],
641
+ version_serializer: type[serializers.Serializer],
642
+ ) -> dict[str, Any]:
643
+ """Generic extraction + validation pipeline for containers or components."""
644
+ results: dict[str, list[Any]] = defaultdict(list)
645
+
646
+ for file in entity_files:
647
+ if not file.endswith(".toml"):
648
+ # Skip non-TOML files
649
+ continue
650
+
651
+ entity_data, draft_version, published_version = self._load_entity_data(file)
652
+ serializer = entity_serializer(
653
+ data={"created": self.utc_now, "created_by": None, **entity_data}
654
+ )
655
+
656
+ if not serializer.is_valid():
657
+ self.errors.append({"file": file, "errors": serializer.errors})
658
+ continue
659
+
660
+ entity_data = serializer.validated_data
661
+ self.all_publishable_entities_keys.add(entity_data["key"])
662
+ entity_type = entity_data.pop("container_type", "components")
663
+ results[entity_type].append(entity_data)
664
+
665
+ valid_versions = self._validate_versions(
666
+ entity_data,
667
+ draft_version,
668
+ published_version,
669
+ version_serializer,
670
+ file=file
671
+ )
672
+ if valid_versions["draft"]:
673
+ results[f"{entity_type}_drafts"].append(valid_versions["draft"])
674
+ if valid_versions["published"]:
675
+ results[f"{entity_type}_published"].append(valid_versions["published"])
676
+
677
+ return results
678
+
679
+ def _extract_collections(
680
+ self,
681
+ collection_files: list[str],
682
+ ) -> dict[str, Any]:
683
+ """Extraction + validation pipeline for collections."""
684
+ results: dict[str, list[Any]] = defaultdict(list)
685
+
686
+ for file in collection_files:
687
+ if not file.endswith(".toml"):
688
+ # Skip non-TOML files
689
+ continue
690
+ toml_content = self._read_file_from_zip(file)
691
+ collection_data = parse_collection_toml(toml_content)
692
+ collection_data = collection_data.get("collection", {})
693
+ serializer = CollectionSerializer(data={"created_by": None, **collection_data})
694
+ if not serializer.is_valid():
695
+ self.errors.append({"file": f"{file} collection section", "errors": serializer.errors})
696
+ continue
697
+ collection_validated = serializer.validated_data
698
+ entities_list = collection_validated["entities"]
699
+ for entity_key in entities_list:
700
+ if entity_key not in self.all_publishable_entities_keys:
701
+ self.errors.append({
702
+ "file": file,
703
+ "errors": f"Entity key {entity_key} not found for collection {collection_validated.get('key')}"
704
+ })
705
+ results["collections"].append(collection_validated)
706
+
707
+ return results
708
+
709
+ # --------------------------
710
+ # Save Logic
711
+ # --------------------------
712
+
713
+ def _save(
714
+ self,
715
+ learning_package: dict[str, Any],
716
+ components: dict[str, Any],
717
+ containers: dict[str, Any],
718
+ collections: dict[str, Any],
719
+ *,
720
+ component_static_files: dict[str, List[str]]
721
+ ) -> LearningPackage:
722
+ """Persist all validated entities in two phases: published then drafts."""
723
+
724
+ # Important: If not using a specific LP key, generate a temporary one
725
+ # We cannot use the original key because it may generate security issues
726
+ if not self.lp_key:
727
+ # Generate a tmp key for the staged learning package
728
+ if not self.user:
729
+ raise ValueError("User is required to create lp_key")
730
+ learning_package["key"] = generate_staged_lp_key(
731
+ archive_lp_key=learning_package["key"],
732
+ user=self.user
733
+ )
734
+ else:
735
+ learning_package["key"] = self.lp_key
736
+
737
+ learning_package_obj = publishing_api.create_learning_package(**learning_package)
738
+
739
+ with publishing_api.bulk_draft_changes_for(learning_package_obj.id):
740
+ self._save_components(learning_package_obj, components, component_static_files)
741
+ self._save_units(learning_package_obj, containers)
742
+ self._save_subsections(learning_package_obj, containers)
743
+ self._save_sections(learning_package_obj, containers)
744
+ self._save_collections(learning_package_obj, collections)
745
+ publishing_api.publish_all_drafts(learning_package_obj.id)
746
+
747
+ with publishing_api.bulk_draft_changes_for(learning_package_obj.id):
748
+ self._save_draft_versions(components, containers, component_static_files)
749
+
750
+ return learning_package_obj
751
+
752
+ def _save_collections(self, learning_package, collections):
753
+ """Save collections and their entities."""
754
+ for valid_collection in collections.get("collections", []):
755
+ entities = valid_collection.pop("entities", [])
756
+ collection = collections_api.create_collection(learning_package.id, **valid_collection)
757
+ collection = collections_api.add_to_collection(
758
+ learning_package_id=learning_package.id,
759
+ key=collection.key,
760
+ entities_qset=publishing_api.get_publishable_entities(learning_package.id).filter(key__in=entities)
761
+ )
762
+
763
+ def _save_components(self, learning_package, components, component_static_files):
764
+ """Save components and published component versions."""
765
+ for valid_component in components.get("components", []):
766
+ entity_key = valid_component.pop("key")
767
+ component = components_api.create_component(learning_package.id, **valid_component)
768
+ self.components_map_by_key[entity_key] = component
769
+
770
+ for valid_published in components.get("components_published", []):
771
+ entity_key = valid_published.pop("entity_key")
772
+ version_num = valid_published["version_num"] # Should exist, validated earlier
773
+ content_to_replace = self._resolve_static_files(version_num, entity_key, component_static_files)
774
+ self.all_published_entities_versions.add(
775
+ (entity_key, version_num)
776
+ ) # Track published version
777
+ components_api.create_next_component_version(
778
+ self.components_map_by_key[entity_key].publishable_entity.id,
779
+ content_to_replace=content_to_replace,
780
+ force_version_num=valid_published.pop("version_num", None),
781
+ **valid_published
782
+ )
783
+
784
+ def _save_units(self, learning_package, containers):
785
+ """Save units and published unit versions."""
786
+ for valid_unit in containers.get("unit", []):
787
+ entity_key = valid_unit.get("key")
788
+ unit = units_api.create_unit(learning_package.id, **valid_unit)
789
+ self.units_map_by_key[entity_key] = unit
790
+
791
+ for valid_published in containers.get("unit_published", []):
792
+ entity_key = valid_published.pop("entity_key")
793
+ children = self._resolve_children(valid_published, self.components_map_by_key)
794
+ self.all_published_entities_versions.add(
795
+ (entity_key, valid_published.get('version_num'))
796
+ ) # Track published version
797
+ units_api.create_next_unit_version(
798
+ self.units_map_by_key[entity_key],
799
+ force_version_num=valid_published.pop("version_num", None),
800
+ components=children,
801
+ **valid_published
802
+ )
803
+
804
+ def _save_subsections(self, learning_package, containers):
805
+ """Save subsections and published subsection versions."""
806
+ for valid_subsection in containers.get("subsection", []):
807
+ entity_key = valid_subsection.get("key")
808
+ subsection = subsections_api.create_subsection(learning_package.id, **valid_subsection)
809
+ self.subsections_map_by_key[entity_key] = subsection
810
+
811
+ for valid_published in containers.get("subsection_published", []):
812
+ entity_key = valid_published.pop("entity_key")
813
+ children = self._resolve_children(valid_published, self.units_map_by_key)
814
+ self.all_published_entities_versions.add(
815
+ (entity_key, valid_published.get('version_num'))
816
+ ) # Track published version
817
+ subsections_api.create_next_subsection_version(
818
+ self.subsections_map_by_key[entity_key],
819
+ units=children,
820
+ force_version_num=valid_published.pop("version_num", None),
821
+ **valid_published
822
+ )
823
+
824
+ def _save_sections(self, learning_package, containers):
825
+ """Save sections and published section versions."""
826
+ for valid_section in containers.get("section", []):
827
+ entity_key = valid_section.get("key")
828
+ section = sections_api.create_section(learning_package.id, **valid_section)
829
+ self.sections_map_by_key[entity_key] = section
830
+
831
+ for valid_published in containers.get("section_published", []):
832
+ entity_key = valid_published.pop("entity_key")
833
+ children = self._resolve_children(valid_published, self.subsections_map_by_key)
834
+ self.all_published_entities_versions.add(
835
+ (entity_key, valid_published.get('version_num'))
836
+ ) # Track published version
837
+ sections_api.create_next_section_version(
838
+ self.sections_map_by_key[entity_key],
839
+ subsections=children,
840
+ force_version_num=valid_published.pop("version_num", None),
841
+ **valid_published
842
+ )
843
+
844
+ def _save_draft_versions(self, components, containers, component_static_files):
845
+ """Save draft versions for all entity types."""
846
+ for valid_draft in components.get("components_drafts", []):
847
+ entity_key = valid_draft.pop("entity_key")
848
+ version_num = valid_draft["version_num"] # Should exist, validated earlier
849
+ if self._is_version_already_exists(entity_key, version_num):
850
+ continue
851
+ content_to_replace = self._resolve_static_files(version_num, entity_key, component_static_files)
852
+ components_api.create_next_component_version(
853
+ self.components_map_by_key[entity_key].publishable_entity.id,
854
+ content_to_replace=content_to_replace,
855
+ force_version_num=valid_draft.pop("version_num", None),
856
+ # Drafts can diverge from published, so we allow ignoring previous content
857
+ # Use case: published v1 had files A, B; draft v2 only has file A
858
+ ignore_previous_content=True,
859
+ **valid_draft
860
+ )
861
+
862
+ for valid_draft in containers.get("unit_drafts", []):
863
+ entity_key = valid_draft.pop("entity_key")
864
+ version_num = valid_draft["version_num"] # Should exist, validated earlier
865
+ if self._is_version_already_exists(entity_key, version_num):
866
+ continue
867
+ children = self._resolve_children(valid_draft, self.components_map_by_key)
868
+ units_api.create_next_unit_version(
869
+ self.units_map_by_key[entity_key],
870
+ components=children,
871
+ force_version_num=valid_draft.pop("version_num", None),
872
+ **valid_draft
873
+ )
874
+
875
+ for valid_draft in containers.get("subsection_drafts", []):
876
+ entity_key = valid_draft.pop("entity_key")
877
+ version_num = valid_draft["version_num"] # Should exist, validated earlier
878
+ if self._is_version_already_exists(entity_key, version_num):
879
+ continue
880
+ children = self._resolve_children(valid_draft, self.units_map_by_key)
881
+ subsections_api.create_next_subsection_version(
882
+ self.subsections_map_by_key[entity_key],
883
+ units=children,
884
+ force_version_num=valid_draft.pop("version_num", None),
885
+ **valid_draft
886
+ )
887
+
888
+ for valid_draft in containers.get("section_drafts", []):
889
+ entity_key = valid_draft.pop("entity_key")
890
+ version_num = valid_draft["version_num"] # Should exist, validated earlier
891
+ if self._is_version_already_exists(entity_key, version_num):
892
+ continue
893
+ children = self._resolve_children(valid_draft, self.subsections_map_by_key)
894
+ sections_api.create_next_section_version(
895
+ self.sections_map_by_key[entity_key],
896
+ subsections=children,
897
+ force_version_num=valid_draft.pop("version_num", None),
898
+ **valid_draft
899
+ )
900
+
901
+ # --------------------------
902
+ # Utilities
903
+ # --------------------------
904
+
905
+ def _format_errors(self) -> str:
906
+ """Return formatted error content as a string."""
907
+ if not self.errors:
908
+ return ""
909
+ lines = [f"{err['file']}: {err['errors']}" for err in self.errors]
910
+ return "Errors encountered during restore:\n" + "\n".join(lines) + "\n"
911
+
912
+ def _write_errors(self) -> StringIO | None:
913
+ """
914
+ Write errors to a StringIO buffer.
915
+ """
916
+ content = self._format_errors()
917
+ if not content:
918
+ return None
919
+ return StringIO(content)
920
+
921
+ def _is_version_already_exists(self, entity_key: str, version_num: int) -> bool:
922
+ """
923
+ Check if a version already exists for a given entity key and version number.
924
+
925
+ Note:
926
+ Skip creating draft if this version is already published
927
+ Why? Because the version itself is already created and
928
+ we don't want to create duplicate versions.
929
+ Otherwise, we will raise an IntegrityError on PublishableEntityVersion
930
+ due to unique constraints between publishable_entity and version_num.
931
+ """
932
+ identifier = (entity_key, version_num)
933
+ return identifier in self.all_published_entities_versions
934
+
935
+ def _resolve_static_files(
936
+ self,
937
+ num_version: int,
938
+ entity_key: str,
939
+ static_files_map: dict[str, List[str]]
940
+ ) -> dict[str, bytes]:
941
+ """Resolve static file paths into their binary content."""
942
+ resolved_files: dict[str, bytes] = {}
943
+
944
+ static_file_key = f"{entity_key}:v{num_version}" # e.g., "my_component:123:v1"
945
+ static_files = static_files_map.get(static_file_key, [])
946
+ for static_file in static_files:
947
+ local_key = static_file.split(f"v{num_version}/")[-1]
948
+ with self.zipf.open(static_file, "r") as f:
949
+ resolved_files[local_key] = f.read()
950
+ return resolved_files
951
+
952
+ def _resolve_children(self, entity_data: dict[str, Any], lookup_map: dict[str, Any]) -> list[Any]:
953
+ """Resolve child entity keys into model instances."""
954
+ children_keys = entity_data.pop("children", [])
955
+ return [lookup_map[key] for key in children_keys if key in lookup_map]
956
+
957
+ def _load_entity_data(
958
+ self, entity_file: str
959
+ ) -> tuple[dict[str, Any], dict[str, Any] | None, dict[str, Any] | None]:
960
+ """Load entity data and its versions from TOML."""
961
+ entity_toml_txt = self._read_file_from_zip(entity_file)
962
+ entity_toml_dict = parse_publishable_entity_toml(entity_toml_txt)
963
+ entity_data = entity_toml_dict.get("entity", {})
964
+ version_data = entity_toml_dict.get("version", [])
965
+ return entity_data, *self._get_versions_to_write(version_data, entity_data)
966
+
967
+ def _validate_versions(self, entity_data, draft, published, serializer_cls, *, file) -> dict[str, Any]:
968
+ """Validate draft/published versions with serializer."""
969
+ valid = {"draft": None, "published": None}
970
+ for label, version in [("draft", draft), ("published", published)]:
971
+ if not version:
972
+ continue
973
+ serializer = serializer_cls(
974
+ data={
975
+ "entity_key": entity_data["key"],
976
+ "created": self.utc_now,
977
+ "created_by": None,
978
+ **version
979
+ }
980
+ )
981
+ if serializer.is_valid():
982
+ valid[label] = serializer.validated_data
983
+ else:
984
+ self.errors.append({"file": file, "errors": serializer.errors})
985
+ return valid
986
+
987
+ def _read_file_from_zip(self, filename: str) -> str:
988
+ """Read and decode a UTF-8 file from the zip archive."""
989
+ with self.zipf.open(filename) as f:
990
+ return f.read().decode("utf-8")
991
+
992
+ def _get_organized_file_list(self, file_paths: list[str]) -> dict[str, Any]:
993
+ """Organize file paths into categories: learning_package, containers, components, collections."""
994
+ organized: dict[str, Any] = {
995
+ "learning_package": None,
996
+ "containers": [],
997
+ "components": [],
998
+ "component_static_files": defaultdict(list),
999
+ "collections": [],
1000
+ }
1001
+
1002
+ for path in file_paths:
1003
+ if path.endswith("/"):
1004
+ # Skip directories
1005
+ continue
1006
+ if path == TOML_PACKAGE_NAME:
1007
+ organized["learning_package"] = path
1008
+ elif path.startswith("entities/") and str(Path(path).parent) == "entities" and path.endswith(".toml"):
1009
+ # Top-level entity TOML files are considered containers
1010
+ organized["containers"].append(path)
1011
+ elif path.startswith("entities/"):
1012
+ if path.endswith(".toml"):
1013
+ # Component entity TOML files
1014
+ organized["components"].append(path)
1015
+ else:
1016
+ # Component static files
1017
+ # Path structure: entities/<namespace>/<type>/<component_id>/component_versions/<version>/static/...
1018
+ # Example: entities/xblock.v1/html/my_component_123456/component_versions/v1/static/...
1019
+ component_key = Path(path).parts[1:4] # e.g., ['xblock.v1', 'html', 'my_component_123456']
1020
+ num_version = Path(path).parts[5] if len(Path(path).parts) > 5 else "v1" # e.g., 'v1'
1021
+ if len(component_key) == 3:
1022
+ component_identifier = ":".join(component_key)
1023
+ component_identifier += f":{num_version}"
1024
+ organized["component_static_files"][component_identifier].append(path)
1025
+ else:
1026
+ self.errors.append({"file": path, "errors": "Invalid component static file path structure."})
1027
+ elif path.startswith("collections/") and path.endswith(".toml"):
1028
+ # Collection TOML files
1029
+ organized["collections"].append(path)
1030
+ return organized
1031
+
1032
+ def _get_versions_to_write(
1033
+ self,
1034
+ version_data: list[dict[str, Any]],
1035
+ entity_data: dict[str, Any]
1036
+ ) -> tuple[Optional[dict[str, Any]], Optional[dict[str, Any]]]:
1037
+ """Return the draft and published versions to write, based on entity data."""
1038
+ draft_num = entity_data.get("draft", {}).get("version_num")
1039
+ published_num = entity_data.get("published", {}).get("version_num")
1040
+ lookup = {v.get("version_num"): v for v in version_data}
1041
+ return (
1042
+ lookup.get(draft_num) if draft_num else None,
1043
+ lookup.get(published_num) if published_num else None,
1044
+ )