openedx-learning 0.5.1__py2.py3-none-any.whl → 0.6.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. openedx_learning/__init__.py +1 -1
  2. openedx_learning/contrib/media_server/views.py +2 -2
  3. openedx_learning/core/components/admin.py +22 -31
  4. openedx_learning/core/components/api.py +51 -47
  5. openedx_learning/core/components/migrations/0001_initial.py +12 -12
  6. openedx_learning/core/components/migrations/0002_alter_componentversioncontent_key.py +20 -0
  7. openedx_learning/core/components/models.py +37 -30
  8. openedx_learning/core/contents/admin.py +13 -20
  9. openedx_learning/core/contents/api.py +104 -94
  10. openedx_learning/core/contents/migrations/0001_initial.py +23 -30
  11. openedx_learning/core/contents/models.py +230 -149
  12. openedx_learning/core/publishing/migrations/0001_initial.py +2 -2
  13. openedx_learning/core/publishing/migrations/0002_alter_learningpackage_key_and_more.py +25 -0
  14. openedx_learning/core/publishing/models.py +41 -2
  15. openedx_learning/lib/fields.py +14 -2
  16. openedx_learning/lib/managers.py +6 -2
  17. {openedx_learning-0.5.1.dist-info → openedx_learning-0.6.1.dist-info}/METADATA +4 -4
  18. {openedx_learning-0.5.1.dist-info → openedx_learning-0.6.1.dist-info}/RECORD +24 -22
  19. openedx_tagging/core/tagging/data.py +1 -0
  20. openedx_tagging/core/tagging/models/base.py +36 -5
  21. openedx_tagging/core/tagging/rest_api/v1/serializers.py +1 -0
  22. {openedx_learning-0.5.1.dist-info → openedx_learning-0.6.1.dist-info}/LICENSE.txt +0 -0
  23. {openedx_learning-0.5.1.dist-info → openedx_learning-0.6.1.dist-info}/WHEEL +0 -0
  24. {openedx_learning-0.5.1.dist-info → openedx_learning-0.6.1.dist-info}/top_level.txt +0 -0
@@ -3,42 +3,53 @@ These models are the most basic pieces of content we support. Think of them as
3
3
  the simplest building blocks to store data with. They need to be composed into
4
4
  more intelligent data models to be useful.
5
5
  """
6
+ from __future__ import annotations
7
+
6
8
  from functools import cached_property
7
9
 
8
- from django.conf import settings
9
- from django.core.files.storage import default_storage
10
+ from django.core.exceptions import ValidationError
11
+ from django.core.files.base import File
12
+ from django.core.files.storage import Storage, default_storage
10
13
  from django.core.validators import MaxValueValidator
11
14
  from django.db import models
12
15
 
13
- from openedx_learning.lib.fields import (
14
- MultiCollationTextField,
15
- case_insensitive_char_field,
16
- hash_field,
17
- manual_date_time_field,
18
- )
19
-
16
+ from ...lib.fields import MultiCollationTextField, case_insensitive_char_field, hash_field, manual_date_time_field
17
+ from ...lib.managers import WithRelationsManager
20
18
  from ..publishing.models import LearningPackage
21
19
 
22
20
 
21
+ def get_storage() -> Storage:
22
+ """
23
+ Return the Storage instance for our Content file persistence.
24
+
25
+ For right now, we're still only storing inline text and not static assets in
26
+ production, so just return the default_storage. We're also going through a
27
+ transition between Django 3.2 -> 4.2, where storage configuration has moved.
28
+
29
+ Make this work properly as part of adding support for static assets.
30
+ """
31
+ return default_storage
32
+
33
+
23
34
  class MediaType(models.Model):
24
35
  """
25
- Stores Media types for use by RawContent models.
36
+ Stores Media types for use by Content models.
26
37
 
27
38
  This is the same as MIME types (the IANA renamed MIME Types to Media Types).
28
- We don't pre-populate this table, so APIs that add RawContent must ensure
29
- that the desired Media Type exists.
39
+ We don't pre-populate this table, so APIs that add Content must ensure that
40
+ the desired Media Type exists.
30
41
 
31
42
  Media types are written as {type}/{sub_type}+{suffix}, where suffixes are
32
- seldom used.
43
+ seldom used. Examples:
33
44
 
34
45
  * application/json
35
46
  * text/css
36
47
  * image/svg+xml
37
48
  * application/vnd.openedx.xblock.v1.problem+xml
38
49
 
39
- We have this as a separate model (instead of a field on RawContent) because:
50
+ We have this as a separate model (instead of a field on Content) because:
40
51
 
41
- 1. We can save a lot on storage and indexing for RawContent if we're just
52
+ 1. We can save a lot on storage and indexing for Content if we're just
42
53
  storing foreign key references there, rather than the entire content
43
54
  string to be indexed. This is especially relevant for our (long) custom
44
55
  types like "application/vnd.openedx.xblock.v1.problem+xml".
@@ -46,9 +57,9 @@ class MediaType(models.Model):
46
57
  "application/javascript". Also, we will be using a fair number of "vnd."
47
58
  style of custom content types, and we may want the flexibility of
48
59
  changing that without having to worry about migrating millions of rows of
49
- RawContent.
60
+ Content.
50
61
  """
51
- # We're going to have many foreign key references from RawContent into this
62
+ # We're going to have many foreign key references from Content into this
52
63
  # model, and we don't need to store those as 8-byte BigAutoField, as is the
53
64
  # default for this app. It's likely that a SmallAutoField would work, but I
54
65
  # can just barely imagine using more than 32K Media types if we have a bunch
@@ -69,10 +80,9 @@ class MediaType(models.Model):
69
80
  # always written in lowercase.
70
81
  sub_type = case_insensitive_char_field(max_length=127, blank=False, null=False)
71
82
 
72
- # Suffix, usually just "xml" (e.g. "image/svg+xml"). Usually blank. I
73
- # couldn't find an RFC description of the length limit, and 127 is probably
74
- # excessive. But this table should be small enough where it doesn't really
75
- # matter.
83
+ # Suffix, like "xml" (e.g. "image/svg+xml"). Usually blank. I couldn't find
84
+ # an RFC description of the length limit, and 127 is probably excessive. But
85
+ # this table should be small enough where it doesn't really matter.
76
86
  suffix = case_insensitive_char_field(max_length=127, blank=True, null=False)
77
87
 
78
88
  class Meta:
@@ -95,92 +105,222 @@ class MediaType(models.Model):
95
105
  return base
96
106
 
97
107
 
98
- class RawContent(models.Model): # type: ignore[django-manager-missing]
108
+ class Content(models.Model):
99
109
  """
100
- This is the most basic piece of raw content data, with no version metadata.
101
-
102
- RawContent stores data using the "file" field. This data is not
103
- auto-normalized in any way, meaning that pieces of content that are
104
- semantically equivalent (e.g. differently spaced/sorted JSON) may result in
105
- new entries. This model is intentionally ignorant of what these things mean,
106
- because it expects supplemental data models to build on top of it.
107
-
108
- Two RawContent instances _can_ have the same hash_digest if they are of
109
- different MIME types. For instance, an empty text file and an empty SRT file
110
- will both hash the same way, but be considered different entities.
111
-
112
- The other fields on RawContent are for data that is intrinsic to the file
113
- data itself (e.g. the size). Any smart parsing of the contents into more
114
- structured metadata should happen in other models that hang off of
115
- RawContent.
116
-
117
- RawContent models are not versioned in any way. The concept of versioning
118
- only exists at a higher level.
119
-
120
- RawContent is optimized for cheap storage, not low latency. It stores
121
- content in a FileField. If you need faster text access across multiple rows,
122
- add a TextContent entry that corresponds to the relevant RawContent.
123
-
124
- If you need to transform this RawContent into more structured data for your
125
- application, create a model with a OneToOneField(primary_key=True)
126
- relationship to RawContent. Just remember that *you should always create the
127
- RawContent entry* first, to ensure content is always exportable, even if
128
- your app goes away in the future.
129
-
130
- Operational Notes
131
- -----------------
132
-
133
- RawContent stores data using a FileField, which you'd typically want to back
134
- with something like S3 when running in a production environment. That file
135
- storage backend will not support rollback, meaning that if you start the
136
- import process and things break halfway through, the RawContent model rows
137
- will be rolled back, but the uploaded files will still remain on your file
138
- storage system. The files are based on a hash of the contents though, so it
139
- should still work later on when the import succeeds (it'll just have to
140
- upload fewer files).
141
-
142
- TODO: Write about cleaning up accidental uploads of really large/unnecessary
143
- files. Pruning of unreferenced (never published, or currently unused)
144
- component versions and assets, and how that ties in?
110
+ This is the most primitive piece of content data.
111
+
112
+ This model serves to lookup, de-duplicate, and store text and files. A piece
113
+ of Content is identified purely by its data, the media type, and the
114
+ LearningPackage it is associated with. It has no version or file name
115
+ metadata associated with it. It exists to be a dumb blob of data that higher
116
+ level models like ComponentVersions can assemble together.
117
+
118
+ # In-model Text vs. File
119
+
120
+ That being said, the Content model does have some complexity to accomodate
121
+ different access patterns that we have in our app. In particular, it can
122
+ store data in two ways: the ``text`` field and a file (``has_file=True``)
123
+ A Content object must use at least one of these methods, but can use both if
124
+ it's appropriate.
125
+
126
+ Use the ``text`` field when:
127
+ * the content is a relatively small (< 50K, usually much less) piece of text
128
+ * you want to do be able to query up update across many rows at once
129
+ * low, predictable latency is important
130
+
131
+ Use file storage when:
132
+ * the content is large, or not text-based
133
+ * you want to be able to serve the file content directly to the browser
134
+
135
+ The high level tradeoff is that ``text`` will give you faster access, and
136
+ file storage will give you a much more affordable and scalable backend. The
137
+ backend used for files will also eventually allow direct browser download
138
+ access, whereas the ``text`` field will not. But again, you can use both at
139
+ the same time if needed.
140
+
141
+ # Association with a LearningPackage
142
+
143
+ Content is associated with a specific LearningPackage. Doing so allows us to
144
+ more easily query for how much storge space a specific LearningPackage
145
+ (likely a library) is using, and to clean up unused data.
146
+
147
+ When we get to borrowing Content across LearningPackages, it's likely that
148
+ we will want to copy them. That way, even if the originating LearningPackage
149
+ is deleted, it won't break other LearningPackages that are making use if it.
150
+
151
+ # Media Types, and file duplication
152
+
153
+ Content is almost 1:1 with the files that it pushes to a storage backend,
154
+ but not quite. The file locations are generated purely as a product of the
155
+ LearningPackage UUID and the Content's ``hash_digest``, but Content also
156
+ takes into account the ``media_type``.
157
+
158
+ For example, say we had a Content with the following data:
159
+
160
+ ["hello", "world"]
161
+
162
+ That is legal syntax for both JSON and YAML. If you want to attach some
163
+ YAML-specific metadata in a new model, you could make it 1:1 with the
164
+ Content that matched the "application/yaml" media type. The YAML and JSON
165
+ versions of this data would be two separate Content rows that would share
166
+ the same ``hash_digest`` value. If they both stored a file, they would be
167
+ pointing to the same file location. If they only used the ``text`` field,
168
+ then that value would be duplicated across the two separate Content rows.
169
+
170
+ The alternative would have been to associate media types at the level where
171
+ this data was being added to a ComponentVersion, but that would have added
172
+ more complexity. Right now, you could make an ImageContent 1:1 model that
173
+ analyzed images and created metatdata entries for them (dimensions, GPS)
174
+ without having to understand how ComponentVerisons work.
175
+
176
+ This is definitely an edge case, and it's likely the only time collisions
177
+ like this will happen in practice is with blank files. It also means that
178
+ using this table to measure disk usage may be slightly inaccurate when used
179
+ in a LearningPackage with collisions–though we expect to use numbers like
180
+ that mostly to get a broad sense of usage and look for major outliers,
181
+ rather than for byte-level accuracy (it wouldn't account for the non-trivial
182
+ indexing storage costs either).
183
+
184
+ # Immutability
185
+
186
+ From the outside, Content should appear immutable. Since the Content is
187
+ looked up by a hash of its data, a change in the data means that we should
188
+ look up the hash value of that new data and create a new Content if we don't
189
+ find a match.
190
+
191
+ That being said, the Content model has different ways of storing that data,
192
+ and that is mutable. We could decide that a certain type of Content should
193
+ be optimized to store its text in the table. Or that a content type that we
194
+ had previously only stored as text now also needs to be stored on in the
195
+ file storage backend so that it can be made available to be downloaded.
196
+ These operations would be done as data migrations.
197
+
198
+ # Extensibility
199
+
200
+ Third-party apps are encouraged to create models that have a OneToOneField
201
+ relationship with Content. For instance, an ImageContent model might join
202
+ 1:1 with all Content that has image/* media types, and provide additional
203
+ metadata for that data.
145
204
  """
146
-
147
- # 50 MB is our current limit, based on the current Open edX Studio file
148
- # upload size limit.
205
+ # Max size of the file.
149
206
  MAX_FILE_SIZE = 50_000_000
150
207
 
151
- learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE)
208
+ # 50K is our limit for text data, like OLX. This means 50K *characters*,
209
+ # not bytes. Since UTF-8 encodes characters using as many as 4 bytes, this
210
+ # could be as much as 200K of data if we had nothing but emojis.
211
+ MAX_TEXT_LENGTH = 50_000
152
212
 
153
- # This hash value may be calculated using create_hash_digest from the
154
- # openedx.lib.fields module.
155
- hash_digest = hash_field()
213
+ objects: models.Manager[Content] = WithRelationsManager('media_type')
214
+
215
+ learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE)
156
216
 
157
217
  # What is the Media type (a.k.a. MIME type) of this data?
158
218
  media_type = models.ForeignKey(MediaType, on_delete=models.PROTECT)
159
219
 
160
- # This is the size of the raw data file in bytes. This can be different than
161
- # the character length, since UTF-8 encoding can use anywhere between 1-4
162
- # bytes to represent any given character.
220
+ # This is the size of the file in bytes. This can be different than the
221
+ # character length of a text file, since UTF-8 encoding can use anywhere
222
+ # between 1-4 bytes to represent any given character.
163
223
  size = models.PositiveBigIntegerField(
164
224
  validators=[MaxValueValidator(MAX_FILE_SIZE)],
165
225
  )
166
226
 
167
- # This should be manually set so that multiple RawContent rows being set in
168
- # the same transaction are created with the same timestamp. The timestamp
169
- # should be UTC.
170
- created = manual_date_time_field()
227
+ # This hash value may be calculated using create_hash_digest from the
228
+ # openedx.lib.fields module. When storing text, we hash the UTF-8
229
+ # encoding of that text value, regardless of whether we also write it to a
230
+ # file or not. When storing just a file, we hash the bytes in the file.
231
+ hash_digest = hash_field()
171
232
 
172
- # All content for the LearningPackage should be stored in files. See model
173
- # docstring for more details on how to store this data in supplementary data
174
- # models that offer better latency guarantees.
175
- file = models.FileField(
233
+ # Do we have file data stored for this Content in our file storage backend?
234
+ has_file = models.BooleanField()
235
+
236
+ # The ``text`` field contains the text representation of the Content, if
237
+ # it is available. A blank value means means that we are storing text for
238
+ # this Content, and that text happens to be an empty string. A null value
239
+ # here means that we are not storing any text here, and the Content exists
240
+ # only in file form. It is an error for ``text`` to be None and ``has_file``
241
+ # to be False, since that would mean we haven't stored data anywhere at all.
242
+ #
243
+ # We annotate this because mypy doesn't recognize that ``text`` should be
244
+ # nullable when using MultiCollationTextField, but does the right thing for
245
+ # TextField. For more info, see:
246
+ # https://github.com/openedx/openedx-learning/issues/152
247
+ text: models.TextField[str | None, str | None] = MultiCollationTextField(
248
+ blank=True,
176
249
  null=True,
177
- storage=settings.OPENEDX_LEARNING.get("STORAGE", default_storage), # type: ignore
250
+ max_length=MAX_TEXT_LENGTH,
251
+ # We don't really expect to ever sort by the text column, but we may
252
+ # want to do case-insensitive searches, so it's useful to have a case
253
+ # and accent insensitive collation.
254
+ db_collations={
255
+ "sqlite": "NOCASE",
256
+ "mysql": "utf8mb4_unicode_ci",
257
+ }
178
258
  )
179
259
 
260
+ # This should be manually set so that multiple Content rows being set in
261
+ # the same transaction are created with the same timestamp. The timestamp
262
+ # should be UTC.
263
+ created = manual_date_time_field()
264
+
180
265
  @cached_property
181
- def mime_type(self):
266
+ def mime_type(self) -> str:
267
+ """
268
+ The IANA media type (a.k.a. MIME type) of the Content, in string form.
269
+
270
+ MIME types reference:
271
+ https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types
272
+ """
182
273
  return str(self.media_type)
183
274
 
275
+ def file_path(self):
276
+ """
277
+ Path at which this content is stored (or would be stored).
278
+
279
+ This path is relative to configured storage root.
280
+ """
281
+ return f"{self.learning_package.uuid}/{self.hash_digest}"
282
+
283
+ def write_file(self, file: File) -> None:
284
+ """
285
+ Write file contents to the file storage backend.
286
+ """
287
+ storage = get_storage()
288
+ file_path = self.file_path()
289
+
290
+ # There are two reasons why a file might already exist even if the the
291
+ # Content row is new:
292
+ #
293
+ # 1. We tried adding the file earlier, but an error rolled back the
294
+ # state of the database. The file storage system isn't covered by any
295
+ # sort of transaction semantics, so it won't get rolled back.
296
+ #
297
+ # 2. The Content is of a different MediaType. The same exact bytes can
298
+ # be two logically separate Content entries if they are different file
299
+ # types. This lets other models add data to Content via 1:1 relations by
300
+ # ContentType (e.g. all SRT files). This is definitely an edge case.
301
+ if not storage.exists(file_path):
302
+ storage.save(file_path, file)
303
+
304
+ def file_url(self) -> str:
305
+ """
306
+ This will sometimes be a time-limited signed URL.
307
+ """
308
+ return get_storage().url(self.file_path())
309
+
310
+ def clean(self):
311
+ """
312
+ Make sure we're actually storing *something*.
313
+
314
+ If this Content has neither a file or text data associated with it,
315
+ it's in a broken/useless state and shouldn't be saved.
316
+ """
317
+ if (not self.has_file) and (self.text is None):
318
+ raise ValidationError(
319
+ f"Content {self.pk} with hash {self.hash_digest} must either "
320
+ "set a string value for 'text', or it must set has_file=True "
321
+ "(or both)."
322
+ )
323
+
184
324
  class Meta:
185
325
  constraints = [
186
326
  # Make sure we don't store duplicates of this raw data within the
@@ -195,71 +335,12 @@ class RawContent(models.Model): # type: ignore[django-manager-missing]
195
335
  ),
196
336
  ]
197
337
  indexes = [
198
- # LearningPackage Media type Index:
199
- # * Break down Content counts by type/subtype with in a
200
- # LearningPackage.
201
- # * Find all the Content in a LearningPackage that matches a
202
- # certain MIME type (e.g. "image/png", "application/pdf".
203
- models.Index(
204
- fields=["learning_package", "media_type"],
205
- name="oel_content_idx_lp_media_type",
206
- ),
207
338
  # LearningPackage (reverse) Size Index:
208
- # * Find largest Content in a LearningPackage.
209
- # * Find the sum of Content size for a given LearningPackage.
339
+ # * Find the largest Content entries.
210
340
  models.Index(
211
341
  fields=["learning_package", "-size"],
212
342
  name="oel_content_idx_lp_rsize",
213
343
  ),
214
- # LearningPackage (reverse) Created Index:
215
- # * Find most recently added Content.
216
- models.Index(
217
- fields=["learning_package", "-created"],
218
- name="oel_content_idx_lp_rcreated",
219
- ),
220
344
  ]
221
- verbose_name = "Raw Content"
222
- verbose_name_plural = "Raw Contents"
223
-
224
-
225
- class TextContent(models.Model):
226
- """
227
- TextContent supplements RawContent to give an in-table text copy.
228
-
229
- This model exists so that we can have lower-latency access to this data,
230
- particularly if we're pulling back multiple rows at once.
231
-
232
- Apps are encouraged to create their own data models that further extend this
233
- one with a more intelligent, parsed data model. For example, individual
234
- XBlocks might parse the OLX in this model into separate data models for
235
- VideoBlock, ProblemBlock, etc. You can do this by making your supplementary
236
- model linked to this model via OneToOneField with primary_key=True.
237
-
238
- The reason this is built directly into the Learning Core data model is
239
- because we want to be able to easily access and browse this data even if the
240
- app-extended models get deleted (e.g. if they are deprecated and removed).
241
- """
242
-
243
- # 100K is our limit for text data, like OLX. This means 100K *characters*,
244
- # not bytes. Since UTF-8 encodes characters using as many as 4 bytes, this
245
- # could be as much as 400K of data if we had nothing but emojis.
246
- MAX_TEXT_LENGTH = 100_000
247
-
248
- raw_content = models.OneToOneField(
249
- RawContent,
250
- on_delete=models.CASCADE,
251
- primary_key=True,
252
- related_name="text_content",
253
- )
254
- text = MultiCollationTextField(
255
- blank=True,
256
- max_length=MAX_TEXT_LENGTH,
257
- # We don't really expect to ever sort by the text column, but we may
258
- # want to do case-insensitive searches, so it's useful to have a case
259
- # and accent insensitive collation.
260
- db_collations={
261
- "sqlite": "NOCASE",
262
- "mysql": "utf8mb4_unicode_ci",
263
- }
264
- )
265
- length = models.PositiveIntegerField(null=False)
345
+ verbose_name = "Content"
346
+ verbose_name_plural = "Contents"
@@ -1,4 +1,4 @@
1
- # Generated by Django 3.2.23 on 2024-01-22 00:37
1
+ # Generated by Django 3.2.23 on 2024-02-06 00:36
2
2
 
3
3
  import uuid
4
4
 
@@ -23,7 +23,7 @@ class Migration(migrations.Migration):
23
23
  migrations.CreateModel(
24
24
  name='LearningPackage',
25
25
  fields=[
26
- ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
26
+ ('id', models.AutoField(primary_key=True, serialize=False)),
27
27
  ('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True, verbose_name='UUID')),
28
28
  ('key', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=500)),
29
29
  ('title', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=500)),
@@ -0,0 +1,25 @@
1
+ # Generated by Django 4.2.10 on 2024-02-14 22:02
2
+
3
+ from django.db import migrations
4
+
5
+ import openedx_learning.lib.fields
6
+
7
+
8
+ class Migration(migrations.Migration):
9
+
10
+ dependencies = [
11
+ ('oel_publishing', '0001_initial'),
12
+ ]
13
+
14
+ operations = [
15
+ migrations.AlterField(
16
+ model_name='learningpackage',
17
+ name='key',
18
+ field=openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, db_column='_key', max_length=500),
19
+ ),
20
+ migrations.AlterField(
21
+ model_name='publishableentity',
22
+ name='key',
23
+ field=openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, db_column='_key', max_length=500),
24
+ ),
25
+ ]
@@ -30,9 +30,28 @@ class LearningPackage(models.Model): # type: ignore[django-manager-missing]
30
30
 
31
31
  Each PublishableEntity belongs to exactly one LearningPackage.
32
32
  """
33
+ # Explictly declare a 4-byte ID instead of using the app-default 8-byte ID.
34
+ # We do not expect to have more than 2 billion LearningPackages on a given
35
+ # site. Furthermore, many, many things have foreign keys to this model and
36
+ # uniqueness indexes on those foreign keys + their own fields, so the 4
37
+ # bytes saved will add up over time.
38
+ id = models.AutoField(primary_key=True)
39
+
33
40
  uuid = immutable_uuid_field()
34
- key = key_field()
41
+
42
+ # "key" is a reserved word for MySQL, so we're temporarily using the column
43
+ # name of "_key" to avoid breaking downstream tooling. There's an open
44
+ # question as to whether this field needs to exist at all, or whether the
45
+ # top level library key it's currently used for should be entirely in the
46
+ # LibraryContent model.
47
+ key = key_field(db_column="_key")
48
+
35
49
  title = case_insensitive_char_field(max_length=500, blank=False)
50
+
51
+ # TODO: We should probably defer this field, since many things pull back
52
+ # LearningPackage as select_related. Usually those relations only care about
53
+ # the UUID and key, so maybe it makes sense to separate the model at some
54
+ # point.
36
55
  description = MultiCollationTextField(
37
56
  blank=True,
38
57
  null=False,
@@ -160,7 +179,12 @@ class PublishableEntity(models.Model):
160
179
  on_delete=models.CASCADE,
161
180
  related_name="publishable_entities",
162
181
  )
163
- key = key_field()
182
+
183
+ # "key" is a reserved word for MySQL, so we're temporarily using the column
184
+ # name of "_key" to avoid breaking downstream tooling. Consider renaming
185
+ # this later.
186
+ key = key_field(db_column="_key")
187
+
164
188
  created = manual_date_time_field()
165
189
  created_by = models.ForeignKey(
166
190
  settings.AUTH_USER_MODEL,
@@ -355,6 +379,21 @@ class PublishLog(models.Model):
355
379
  Open question: Empty publishes are allowed at this time, and might be useful
356
380
  for "fake" publishes that are necessary to invoke other post-publish
357
381
  actions. It's not clear at this point how useful this will actually be.
382
+
383
+ The absence of a ``version_num`` field in this model is intentional, because
384
+ having one would potentially cause write contention/locking issues when
385
+ there are many people working on different entities in a very large library.
386
+ We already see some contention issues occuring in ModuleStore for courses,
387
+ and we want to support Libraries that are far larger.
388
+
389
+ If you need a LearningPackage-wide indicator for version and the only thing
390
+ you care about is "has *something* changed?", you can make a foreign key to
391
+ the most recent PublishLog, or use the most recent PublishLog's primary key.
392
+ This should be monotonically increasing, though there will be large gaps in
393
+ values, e.g. (5, 190, 1291, etc.). Be warned that this value will not port
394
+ across sites. If you need site-portability, the UUIDs for this model are a
395
+ safer bet, though there's a lot about import/export that we haven't fully
396
+ mapped out yet.
358
397
  """
359
398
 
360
399
  uuid = immutable_uuid_field()
@@ -20,6 +20,18 @@ from .validators import validate_utc_datetime
20
20
 
21
21
 
22
22
  def create_hash_digest(data_bytes: bytes) -> str:
23
+ """
24
+ Create a 40-byte, lower-case hex string representation of a hash digest.
25
+
26
+ The hash digest itself is 20-bytes using BLAKE2b.
27
+
28
+ DON'T JUST MODIFY THIS HASH BEHAVIOR!!! We use hashing for de-duplication
29
+ purposes. If this hash function ever changes, that deduplication will fail
30
+ because the hashing behavior won't match what's already in the database.
31
+
32
+ If we want to change this representation one day, we should create a new
33
+ function for that and do the appropriate data migration.
34
+ """
23
35
  return hashlib.blake2b(data_bytes, digest_size=20).hexdigest()
24
36
 
25
37
 
@@ -97,7 +109,7 @@ def immutable_uuid_field() -> models.UUIDField:
97
109
  )
98
110
 
99
111
 
100
- def key_field() -> MultiCollationCharField:
112
+ def key_field(**kwargs) -> MultiCollationCharField:
101
113
  """
102
114
  Externally created Identifier fields.
103
115
 
@@ -108,7 +120,7 @@ def key_field() -> MultiCollationCharField:
108
120
  Other apps should *not* make references to these values directly, since
109
121
  these values may in theory change (even if this is rare in practice).
110
122
  """
111
- return case_sensitive_char_field(max_length=500, blank=False)
123
+ return case_sensitive_char_field(max_length=500, blank=False, **kwargs)
112
124
 
113
125
 
114
126
  def hash_field() -> models.CharField:
@@ -13,8 +13,12 @@ class WithRelationsManager(models.Manager):
13
13
  into some of its relations and you want to avoid unnecessary extra database
14
14
  calls.
15
15
 
16
- Use this to create a distinctly named manager on your model class, instead
17
- of overwriting ``objects``. So for example::
16
+ You can override the default ``objects`` manager with this one if you have
17
+ a model that should basically always called with a ``select_related``. For
18
+ example, if you have a small lookup type-model that is frequently accessed.
19
+
20
+ For more complex joins, use this class to create a distinctly named manager
21
+ on your model class, instead of overwriting ``objects``. So for example::
18
22
 
19
23
  class Component(models.Model):
20
24
  with_publishing_relations = WithRelationsManager(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openedx-learning
3
- Version: 0.5.1
3
+ Version: 0.6.1
4
4
  Summary: An experiment.
5
5
  Home-page: https://github.com/openedx/openedx-learning
6
6
  Author: David Ormsbee
@@ -17,12 +17,12 @@ Classifier: Natural Language :: English
17
17
  Classifier: Programming Language :: Python :: 3
18
18
  Classifier: Programming Language :: Python :: 3.8
19
19
  Requires-Python: >=3.8
20
- Requires-Dist: djangorestframework (<4.0)
21
- Requires-Dist: rules (<4.0)
22
- Requires-Dist: celery
23
20
  Requires-Dist: Django (<5.0)
21
+ Requires-Dist: rules (<4.0)
22
+ Requires-Dist: djangorestframework (<4.0)
24
23
  Requires-Dist: attrs
25
24
  Requires-Dist: edx-drf-extensions
25
+ Requires-Dist: celery
26
26
 
27
27
  openedx-learning
28
28
  =============================