openedx-learning 0.5.1__py2.py3-none-any.whl → 0.6.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openedx_learning/__init__.py +1 -1
- openedx_learning/contrib/media_server/views.py +2 -2
- openedx_learning/core/components/admin.py +22 -31
- openedx_learning/core/components/api.py +51 -47
- openedx_learning/core/components/migrations/0001_initial.py +12 -12
- openedx_learning/core/components/migrations/0002_alter_componentversioncontent_key.py +20 -0
- openedx_learning/core/components/models.py +37 -30
- openedx_learning/core/contents/admin.py +13 -20
- openedx_learning/core/contents/api.py +104 -94
- openedx_learning/core/contents/migrations/0001_initial.py +23 -30
- openedx_learning/core/contents/models.py +230 -149
- openedx_learning/core/publishing/migrations/0001_initial.py +2 -2
- openedx_learning/core/publishing/migrations/0002_alter_learningpackage_key_and_more.py +25 -0
- openedx_learning/core/publishing/models.py +41 -2
- openedx_learning/lib/fields.py +14 -2
- openedx_learning/lib/managers.py +6 -2
- {openedx_learning-0.5.1.dist-info → openedx_learning-0.6.1.dist-info}/METADATA +4 -4
- {openedx_learning-0.5.1.dist-info → openedx_learning-0.6.1.dist-info}/RECORD +24 -22
- openedx_tagging/core/tagging/data.py +1 -0
- openedx_tagging/core/tagging/models/base.py +36 -5
- openedx_tagging/core/tagging/rest_api/v1/serializers.py +1 -0
- {openedx_learning-0.5.1.dist-info → openedx_learning-0.6.1.dist-info}/LICENSE.txt +0 -0
- {openedx_learning-0.5.1.dist-info → openedx_learning-0.6.1.dist-info}/WHEEL +0 -0
- {openedx_learning-0.5.1.dist-info → openedx_learning-0.6.1.dist-info}/top_level.txt +0 -0
|
@@ -3,42 +3,53 @@ These models are the most basic pieces of content we support. Think of them as
|
|
|
3
3
|
the simplest building blocks to store data with. They need to be composed into
|
|
4
4
|
more intelligent data models to be useful.
|
|
5
5
|
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
6
8
|
from functools import cached_property
|
|
7
9
|
|
|
8
|
-
from django.
|
|
9
|
-
from django.core.files.
|
|
10
|
+
from django.core.exceptions import ValidationError
|
|
11
|
+
from django.core.files.base import File
|
|
12
|
+
from django.core.files.storage import Storage, default_storage
|
|
10
13
|
from django.core.validators import MaxValueValidator
|
|
11
14
|
from django.db import models
|
|
12
15
|
|
|
13
|
-
from
|
|
14
|
-
|
|
15
|
-
case_insensitive_char_field,
|
|
16
|
-
hash_field,
|
|
17
|
-
manual_date_time_field,
|
|
18
|
-
)
|
|
19
|
-
|
|
16
|
+
from ...lib.fields import MultiCollationTextField, case_insensitive_char_field, hash_field, manual_date_time_field
|
|
17
|
+
from ...lib.managers import WithRelationsManager
|
|
20
18
|
from ..publishing.models import LearningPackage
|
|
21
19
|
|
|
22
20
|
|
|
21
|
+
def get_storage() -> Storage:
|
|
22
|
+
"""
|
|
23
|
+
Return the Storage instance for our Content file persistence.
|
|
24
|
+
|
|
25
|
+
For right now, we're still only storing inline text and not static assets in
|
|
26
|
+
production, so just return the default_storage. We're also going through a
|
|
27
|
+
transition between Django 3.2 -> 4.2, where storage configuration has moved.
|
|
28
|
+
|
|
29
|
+
Make this work properly as part of adding support for static assets.
|
|
30
|
+
"""
|
|
31
|
+
return default_storage
|
|
32
|
+
|
|
33
|
+
|
|
23
34
|
class MediaType(models.Model):
|
|
24
35
|
"""
|
|
25
|
-
Stores Media types for use by
|
|
36
|
+
Stores Media types for use by Content models.
|
|
26
37
|
|
|
27
38
|
This is the same as MIME types (the IANA renamed MIME Types to Media Types).
|
|
28
|
-
We don't pre-populate this table, so APIs that add
|
|
29
|
-
|
|
39
|
+
We don't pre-populate this table, so APIs that add Content must ensure that
|
|
40
|
+
the desired Media Type exists.
|
|
30
41
|
|
|
31
42
|
Media types are written as {type}/{sub_type}+{suffix}, where suffixes are
|
|
32
|
-
seldom used.
|
|
43
|
+
seldom used. Examples:
|
|
33
44
|
|
|
34
45
|
* application/json
|
|
35
46
|
* text/css
|
|
36
47
|
* image/svg+xml
|
|
37
48
|
* application/vnd.openedx.xblock.v1.problem+xml
|
|
38
49
|
|
|
39
|
-
We have this as a separate model (instead of a field on
|
|
50
|
+
We have this as a separate model (instead of a field on Content) because:
|
|
40
51
|
|
|
41
|
-
1. We can save a lot on storage and indexing for
|
|
52
|
+
1. We can save a lot on storage and indexing for Content if we're just
|
|
42
53
|
storing foreign key references there, rather than the entire content
|
|
43
54
|
string to be indexed. This is especially relevant for our (long) custom
|
|
44
55
|
types like "application/vnd.openedx.xblock.v1.problem+xml".
|
|
@@ -46,9 +57,9 @@ class MediaType(models.Model):
|
|
|
46
57
|
"application/javascript". Also, we will be using a fair number of "vnd."
|
|
47
58
|
style of custom content types, and we may want the flexibility of
|
|
48
59
|
changing that without having to worry about migrating millions of rows of
|
|
49
|
-
|
|
60
|
+
Content.
|
|
50
61
|
"""
|
|
51
|
-
# We're going to have many foreign key references from
|
|
62
|
+
# We're going to have many foreign key references from Content into this
|
|
52
63
|
# model, and we don't need to store those as 8-byte BigAutoField, as is the
|
|
53
64
|
# default for this app. It's likely that a SmallAutoField would work, but I
|
|
54
65
|
# can just barely imagine using more than 32K Media types if we have a bunch
|
|
@@ -69,10 +80,9 @@ class MediaType(models.Model):
|
|
|
69
80
|
# always written in lowercase.
|
|
70
81
|
sub_type = case_insensitive_char_field(max_length=127, blank=False, null=False)
|
|
71
82
|
|
|
72
|
-
# Suffix,
|
|
73
|
-
#
|
|
74
|
-
#
|
|
75
|
-
# matter.
|
|
83
|
+
# Suffix, like "xml" (e.g. "image/svg+xml"). Usually blank. I couldn't find
|
|
84
|
+
# an RFC description of the length limit, and 127 is probably excessive. But
|
|
85
|
+
# this table should be small enough where it doesn't really matter.
|
|
76
86
|
suffix = case_insensitive_char_field(max_length=127, blank=True, null=False)
|
|
77
87
|
|
|
78
88
|
class Meta:
|
|
@@ -95,92 +105,222 @@ class MediaType(models.Model):
|
|
|
95
105
|
return base
|
|
96
106
|
|
|
97
107
|
|
|
98
|
-
class
|
|
108
|
+
class Content(models.Model):
|
|
99
109
|
"""
|
|
100
|
-
This is the most
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
files
|
|
144
|
-
|
|
110
|
+
This is the most primitive piece of content data.
|
|
111
|
+
|
|
112
|
+
This model serves to lookup, de-duplicate, and store text and files. A piece
|
|
113
|
+
of Content is identified purely by its data, the media type, and the
|
|
114
|
+
LearningPackage it is associated with. It has no version or file name
|
|
115
|
+
metadata associated with it. It exists to be a dumb blob of data that higher
|
|
116
|
+
level models like ComponentVersions can assemble together.
|
|
117
|
+
|
|
118
|
+
# In-model Text vs. File
|
|
119
|
+
|
|
120
|
+
That being said, the Content model does have some complexity to accomodate
|
|
121
|
+
different access patterns that we have in our app. In particular, it can
|
|
122
|
+
store data in two ways: the ``text`` field and a file (``has_file=True``)
|
|
123
|
+
A Content object must use at least one of these methods, but can use both if
|
|
124
|
+
it's appropriate.
|
|
125
|
+
|
|
126
|
+
Use the ``text`` field when:
|
|
127
|
+
* the content is a relatively small (< 50K, usually much less) piece of text
|
|
128
|
+
* you want to do be able to query up update across many rows at once
|
|
129
|
+
* low, predictable latency is important
|
|
130
|
+
|
|
131
|
+
Use file storage when:
|
|
132
|
+
* the content is large, or not text-based
|
|
133
|
+
* you want to be able to serve the file content directly to the browser
|
|
134
|
+
|
|
135
|
+
The high level tradeoff is that ``text`` will give you faster access, and
|
|
136
|
+
file storage will give you a much more affordable and scalable backend. The
|
|
137
|
+
backend used for files will also eventually allow direct browser download
|
|
138
|
+
access, whereas the ``text`` field will not. But again, you can use both at
|
|
139
|
+
the same time if needed.
|
|
140
|
+
|
|
141
|
+
# Association with a LearningPackage
|
|
142
|
+
|
|
143
|
+
Content is associated with a specific LearningPackage. Doing so allows us to
|
|
144
|
+
more easily query for how much storge space a specific LearningPackage
|
|
145
|
+
(likely a library) is using, and to clean up unused data.
|
|
146
|
+
|
|
147
|
+
When we get to borrowing Content across LearningPackages, it's likely that
|
|
148
|
+
we will want to copy them. That way, even if the originating LearningPackage
|
|
149
|
+
is deleted, it won't break other LearningPackages that are making use if it.
|
|
150
|
+
|
|
151
|
+
# Media Types, and file duplication
|
|
152
|
+
|
|
153
|
+
Content is almost 1:1 with the files that it pushes to a storage backend,
|
|
154
|
+
but not quite. The file locations are generated purely as a product of the
|
|
155
|
+
LearningPackage UUID and the Content's ``hash_digest``, but Content also
|
|
156
|
+
takes into account the ``media_type``.
|
|
157
|
+
|
|
158
|
+
For example, say we had a Content with the following data:
|
|
159
|
+
|
|
160
|
+
["hello", "world"]
|
|
161
|
+
|
|
162
|
+
That is legal syntax for both JSON and YAML. If you want to attach some
|
|
163
|
+
YAML-specific metadata in a new model, you could make it 1:1 with the
|
|
164
|
+
Content that matched the "application/yaml" media type. The YAML and JSON
|
|
165
|
+
versions of this data would be two separate Content rows that would share
|
|
166
|
+
the same ``hash_digest`` value. If they both stored a file, they would be
|
|
167
|
+
pointing to the same file location. If they only used the ``text`` field,
|
|
168
|
+
then that value would be duplicated across the two separate Content rows.
|
|
169
|
+
|
|
170
|
+
The alternative would have been to associate media types at the level where
|
|
171
|
+
this data was being added to a ComponentVersion, but that would have added
|
|
172
|
+
more complexity. Right now, you could make an ImageContent 1:1 model that
|
|
173
|
+
analyzed images and created metatdata entries for them (dimensions, GPS)
|
|
174
|
+
without having to understand how ComponentVerisons work.
|
|
175
|
+
|
|
176
|
+
This is definitely an edge case, and it's likely the only time collisions
|
|
177
|
+
like this will happen in practice is with blank files. It also means that
|
|
178
|
+
using this table to measure disk usage may be slightly inaccurate when used
|
|
179
|
+
in a LearningPackage with collisions–though we expect to use numbers like
|
|
180
|
+
that mostly to get a broad sense of usage and look for major outliers,
|
|
181
|
+
rather than for byte-level accuracy (it wouldn't account for the non-trivial
|
|
182
|
+
indexing storage costs either).
|
|
183
|
+
|
|
184
|
+
# Immutability
|
|
185
|
+
|
|
186
|
+
From the outside, Content should appear immutable. Since the Content is
|
|
187
|
+
looked up by a hash of its data, a change in the data means that we should
|
|
188
|
+
look up the hash value of that new data and create a new Content if we don't
|
|
189
|
+
find a match.
|
|
190
|
+
|
|
191
|
+
That being said, the Content model has different ways of storing that data,
|
|
192
|
+
and that is mutable. We could decide that a certain type of Content should
|
|
193
|
+
be optimized to store its text in the table. Or that a content type that we
|
|
194
|
+
had previously only stored as text now also needs to be stored on in the
|
|
195
|
+
file storage backend so that it can be made available to be downloaded.
|
|
196
|
+
These operations would be done as data migrations.
|
|
197
|
+
|
|
198
|
+
# Extensibility
|
|
199
|
+
|
|
200
|
+
Third-party apps are encouraged to create models that have a OneToOneField
|
|
201
|
+
relationship with Content. For instance, an ImageContent model might join
|
|
202
|
+
1:1 with all Content that has image/* media types, and provide additional
|
|
203
|
+
metadata for that data.
|
|
145
204
|
"""
|
|
146
|
-
|
|
147
|
-
# 50 MB is our current limit, based on the current Open edX Studio file
|
|
148
|
-
# upload size limit.
|
|
205
|
+
# Max size of the file.
|
|
149
206
|
MAX_FILE_SIZE = 50_000_000
|
|
150
207
|
|
|
151
|
-
|
|
208
|
+
# 50K is our limit for text data, like OLX. This means 50K *characters*,
|
|
209
|
+
# not bytes. Since UTF-8 encodes characters using as many as 4 bytes, this
|
|
210
|
+
# could be as much as 200K of data if we had nothing but emojis.
|
|
211
|
+
MAX_TEXT_LENGTH = 50_000
|
|
152
212
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
213
|
+
objects: models.Manager[Content] = WithRelationsManager('media_type')
|
|
214
|
+
|
|
215
|
+
learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE)
|
|
156
216
|
|
|
157
217
|
# What is the Media type (a.k.a. MIME type) of this data?
|
|
158
218
|
media_type = models.ForeignKey(MediaType, on_delete=models.PROTECT)
|
|
159
219
|
|
|
160
|
-
# This is the size of the
|
|
161
|
-
#
|
|
162
|
-
# bytes to represent any given character.
|
|
220
|
+
# This is the size of the file in bytes. This can be different than the
|
|
221
|
+
# character length of a text file, since UTF-8 encoding can use anywhere
|
|
222
|
+
# between 1-4 bytes to represent any given character.
|
|
163
223
|
size = models.PositiveBigIntegerField(
|
|
164
224
|
validators=[MaxValueValidator(MAX_FILE_SIZE)],
|
|
165
225
|
)
|
|
166
226
|
|
|
167
|
-
# This
|
|
168
|
-
#
|
|
169
|
-
#
|
|
170
|
-
|
|
227
|
+
# This hash value may be calculated using create_hash_digest from the
|
|
228
|
+
# openedx.lib.fields module. When storing text, we hash the UTF-8
|
|
229
|
+
# encoding of that text value, regardless of whether we also write it to a
|
|
230
|
+
# file or not. When storing just a file, we hash the bytes in the file.
|
|
231
|
+
hash_digest = hash_field()
|
|
171
232
|
|
|
172
|
-
#
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
233
|
+
# Do we have file data stored for this Content in our file storage backend?
|
|
234
|
+
has_file = models.BooleanField()
|
|
235
|
+
|
|
236
|
+
# The ``text`` field contains the text representation of the Content, if
|
|
237
|
+
# it is available. A blank value means means that we are storing text for
|
|
238
|
+
# this Content, and that text happens to be an empty string. A null value
|
|
239
|
+
# here means that we are not storing any text here, and the Content exists
|
|
240
|
+
# only in file form. It is an error for ``text`` to be None and ``has_file``
|
|
241
|
+
# to be False, since that would mean we haven't stored data anywhere at all.
|
|
242
|
+
#
|
|
243
|
+
# We annotate this because mypy doesn't recognize that ``text`` should be
|
|
244
|
+
# nullable when using MultiCollationTextField, but does the right thing for
|
|
245
|
+
# TextField. For more info, see:
|
|
246
|
+
# https://github.com/openedx/openedx-learning/issues/152
|
|
247
|
+
text: models.TextField[str | None, str | None] = MultiCollationTextField(
|
|
248
|
+
blank=True,
|
|
176
249
|
null=True,
|
|
177
|
-
|
|
250
|
+
max_length=MAX_TEXT_LENGTH,
|
|
251
|
+
# We don't really expect to ever sort by the text column, but we may
|
|
252
|
+
# want to do case-insensitive searches, so it's useful to have a case
|
|
253
|
+
# and accent insensitive collation.
|
|
254
|
+
db_collations={
|
|
255
|
+
"sqlite": "NOCASE",
|
|
256
|
+
"mysql": "utf8mb4_unicode_ci",
|
|
257
|
+
}
|
|
178
258
|
)
|
|
179
259
|
|
|
260
|
+
# This should be manually set so that multiple Content rows being set in
|
|
261
|
+
# the same transaction are created with the same timestamp. The timestamp
|
|
262
|
+
# should be UTC.
|
|
263
|
+
created = manual_date_time_field()
|
|
264
|
+
|
|
180
265
|
@cached_property
|
|
181
|
-
def mime_type(self):
|
|
266
|
+
def mime_type(self) -> str:
|
|
267
|
+
"""
|
|
268
|
+
The IANA media type (a.k.a. MIME type) of the Content, in string form.
|
|
269
|
+
|
|
270
|
+
MIME types reference:
|
|
271
|
+
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types
|
|
272
|
+
"""
|
|
182
273
|
return str(self.media_type)
|
|
183
274
|
|
|
275
|
+
def file_path(self):
|
|
276
|
+
"""
|
|
277
|
+
Path at which this content is stored (or would be stored).
|
|
278
|
+
|
|
279
|
+
This path is relative to configured storage root.
|
|
280
|
+
"""
|
|
281
|
+
return f"{self.learning_package.uuid}/{self.hash_digest}"
|
|
282
|
+
|
|
283
|
+
def write_file(self, file: File) -> None:
|
|
284
|
+
"""
|
|
285
|
+
Write file contents to the file storage backend.
|
|
286
|
+
"""
|
|
287
|
+
storage = get_storage()
|
|
288
|
+
file_path = self.file_path()
|
|
289
|
+
|
|
290
|
+
# There are two reasons why a file might already exist even if the the
|
|
291
|
+
# Content row is new:
|
|
292
|
+
#
|
|
293
|
+
# 1. We tried adding the file earlier, but an error rolled back the
|
|
294
|
+
# state of the database. The file storage system isn't covered by any
|
|
295
|
+
# sort of transaction semantics, so it won't get rolled back.
|
|
296
|
+
#
|
|
297
|
+
# 2. The Content is of a different MediaType. The same exact bytes can
|
|
298
|
+
# be two logically separate Content entries if they are different file
|
|
299
|
+
# types. This lets other models add data to Content via 1:1 relations by
|
|
300
|
+
# ContentType (e.g. all SRT files). This is definitely an edge case.
|
|
301
|
+
if not storage.exists(file_path):
|
|
302
|
+
storage.save(file_path, file)
|
|
303
|
+
|
|
304
|
+
def file_url(self) -> str:
|
|
305
|
+
"""
|
|
306
|
+
This will sometimes be a time-limited signed URL.
|
|
307
|
+
"""
|
|
308
|
+
return get_storage().url(self.file_path())
|
|
309
|
+
|
|
310
|
+
def clean(self):
|
|
311
|
+
"""
|
|
312
|
+
Make sure we're actually storing *something*.
|
|
313
|
+
|
|
314
|
+
If this Content has neither a file or text data associated with it,
|
|
315
|
+
it's in a broken/useless state and shouldn't be saved.
|
|
316
|
+
"""
|
|
317
|
+
if (not self.has_file) and (self.text is None):
|
|
318
|
+
raise ValidationError(
|
|
319
|
+
f"Content {self.pk} with hash {self.hash_digest} must either "
|
|
320
|
+
"set a string value for 'text', or it must set has_file=True "
|
|
321
|
+
"(or both)."
|
|
322
|
+
)
|
|
323
|
+
|
|
184
324
|
class Meta:
|
|
185
325
|
constraints = [
|
|
186
326
|
# Make sure we don't store duplicates of this raw data within the
|
|
@@ -195,71 +335,12 @@ class RawContent(models.Model): # type: ignore[django-manager-missing]
|
|
|
195
335
|
),
|
|
196
336
|
]
|
|
197
337
|
indexes = [
|
|
198
|
-
# LearningPackage Media type Index:
|
|
199
|
-
# * Break down Content counts by type/subtype with in a
|
|
200
|
-
# LearningPackage.
|
|
201
|
-
# * Find all the Content in a LearningPackage that matches a
|
|
202
|
-
# certain MIME type (e.g. "image/png", "application/pdf".
|
|
203
|
-
models.Index(
|
|
204
|
-
fields=["learning_package", "media_type"],
|
|
205
|
-
name="oel_content_idx_lp_media_type",
|
|
206
|
-
),
|
|
207
338
|
# LearningPackage (reverse) Size Index:
|
|
208
|
-
# * Find largest Content
|
|
209
|
-
# * Find the sum of Content size for a given LearningPackage.
|
|
339
|
+
# * Find the largest Content entries.
|
|
210
340
|
models.Index(
|
|
211
341
|
fields=["learning_package", "-size"],
|
|
212
342
|
name="oel_content_idx_lp_rsize",
|
|
213
343
|
),
|
|
214
|
-
# LearningPackage (reverse) Created Index:
|
|
215
|
-
# * Find most recently added Content.
|
|
216
|
-
models.Index(
|
|
217
|
-
fields=["learning_package", "-created"],
|
|
218
|
-
name="oel_content_idx_lp_rcreated",
|
|
219
|
-
),
|
|
220
344
|
]
|
|
221
|
-
verbose_name = "
|
|
222
|
-
verbose_name_plural = "
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
class TextContent(models.Model):
|
|
226
|
-
"""
|
|
227
|
-
TextContent supplements RawContent to give an in-table text copy.
|
|
228
|
-
|
|
229
|
-
This model exists so that we can have lower-latency access to this data,
|
|
230
|
-
particularly if we're pulling back multiple rows at once.
|
|
231
|
-
|
|
232
|
-
Apps are encouraged to create their own data models that further extend this
|
|
233
|
-
one with a more intelligent, parsed data model. For example, individual
|
|
234
|
-
XBlocks might parse the OLX in this model into separate data models for
|
|
235
|
-
VideoBlock, ProblemBlock, etc. You can do this by making your supplementary
|
|
236
|
-
model linked to this model via OneToOneField with primary_key=True.
|
|
237
|
-
|
|
238
|
-
The reason this is built directly into the Learning Core data model is
|
|
239
|
-
because we want to be able to easily access and browse this data even if the
|
|
240
|
-
app-extended models get deleted (e.g. if they are deprecated and removed).
|
|
241
|
-
"""
|
|
242
|
-
|
|
243
|
-
# 100K is our limit for text data, like OLX. This means 100K *characters*,
|
|
244
|
-
# not bytes. Since UTF-8 encodes characters using as many as 4 bytes, this
|
|
245
|
-
# could be as much as 400K of data if we had nothing but emojis.
|
|
246
|
-
MAX_TEXT_LENGTH = 100_000
|
|
247
|
-
|
|
248
|
-
raw_content = models.OneToOneField(
|
|
249
|
-
RawContent,
|
|
250
|
-
on_delete=models.CASCADE,
|
|
251
|
-
primary_key=True,
|
|
252
|
-
related_name="text_content",
|
|
253
|
-
)
|
|
254
|
-
text = MultiCollationTextField(
|
|
255
|
-
blank=True,
|
|
256
|
-
max_length=MAX_TEXT_LENGTH,
|
|
257
|
-
# We don't really expect to ever sort by the text column, but we may
|
|
258
|
-
# want to do case-insensitive searches, so it's useful to have a case
|
|
259
|
-
# and accent insensitive collation.
|
|
260
|
-
db_collations={
|
|
261
|
-
"sqlite": "NOCASE",
|
|
262
|
-
"mysql": "utf8mb4_unicode_ci",
|
|
263
|
-
}
|
|
264
|
-
)
|
|
265
|
-
length = models.PositiveIntegerField(null=False)
|
|
345
|
+
verbose_name = "Content"
|
|
346
|
+
verbose_name_plural = "Contents"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Generated by Django 3.2.23 on 2024-
|
|
1
|
+
# Generated by Django 3.2.23 on 2024-02-06 00:36
|
|
2
2
|
|
|
3
3
|
import uuid
|
|
4
4
|
|
|
@@ -23,7 +23,7 @@ class Migration(migrations.Migration):
|
|
|
23
23
|
migrations.CreateModel(
|
|
24
24
|
name='LearningPackage',
|
|
25
25
|
fields=[
|
|
26
|
-
('id', models.
|
|
26
|
+
('id', models.AutoField(primary_key=True, serialize=False)),
|
|
27
27
|
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True, verbose_name='UUID')),
|
|
28
28
|
('key', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=500)),
|
|
29
29
|
('title', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=500)),
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Generated by Django 4.2.10 on 2024-02-14 22:02
|
|
2
|
+
|
|
3
|
+
from django.db import migrations
|
|
4
|
+
|
|
5
|
+
import openedx_learning.lib.fields
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Migration(migrations.Migration):
|
|
9
|
+
|
|
10
|
+
dependencies = [
|
|
11
|
+
('oel_publishing', '0001_initial'),
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
operations = [
|
|
15
|
+
migrations.AlterField(
|
|
16
|
+
model_name='learningpackage',
|
|
17
|
+
name='key',
|
|
18
|
+
field=openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, db_column='_key', max_length=500),
|
|
19
|
+
),
|
|
20
|
+
migrations.AlterField(
|
|
21
|
+
model_name='publishableentity',
|
|
22
|
+
name='key',
|
|
23
|
+
field=openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, db_column='_key', max_length=500),
|
|
24
|
+
),
|
|
25
|
+
]
|
|
@@ -30,9 +30,28 @@ class LearningPackage(models.Model): # type: ignore[django-manager-missing]
|
|
|
30
30
|
|
|
31
31
|
Each PublishableEntity belongs to exactly one LearningPackage.
|
|
32
32
|
"""
|
|
33
|
+
# Explictly declare a 4-byte ID instead of using the app-default 8-byte ID.
|
|
34
|
+
# We do not expect to have more than 2 billion LearningPackages on a given
|
|
35
|
+
# site. Furthermore, many, many things have foreign keys to this model and
|
|
36
|
+
# uniqueness indexes on those foreign keys + their own fields, so the 4
|
|
37
|
+
# bytes saved will add up over time.
|
|
38
|
+
id = models.AutoField(primary_key=True)
|
|
39
|
+
|
|
33
40
|
uuid = immutable_uuid_field()
|
|
34
|
-
|
|
41
|
+
|
|
42
|
+
# "key" is a reserved word for MySQL, so we're temporarily using the column
|
|
43
|
+
# name of "_key" to avoid breaking downstream tooling. There's an open
|
|
44
|
+
# question as to whether this field needs to exist at all, or whether the
|
|
45
|
+
# top level library key it's currently used for should be entirely in the
|
|
46
|
+
# LibraryContent model.
|
|
47
|
+
key = key_field(db_column="_key")
|
|
48
|
+
|
|
35
49
|
title = case_insensitive_char_field(max_length=500, blank=False)
|
|
50
|
+
|
|
51
|
+
# TODO: We should probably defer this field, since many things pull back
|
|
52
|
+
# LearningPackage as select_related. Usually those relations only care about
|
|
53
|
+
# the UUID and key, so maybe it makes sense to separate the model at some
|
|
54
|
+
# point.
|
|
36
55
|
description = MultiCollationTextField(
|
|
37
56
|
blank=True,
|
|
38
57
|
null=False,
|
|
@@ -160,7 +179,12 @@ class PublishableEntity(models.Model):
|
|
|
160
179
|
on_delete=models.CASCADE,
|
|
161
180
|
related_name="publishable_entities",
|
|
162
181
|
)
|
|
163
|
-
|
|
182
|
+
|
|
183
|
+
# "key" is a reserved word for MySQL, so we're temporarily using the column
|
|
184
|
+
# name of "_key" to avoid breaking downstream tooling. Consider renaming
|
|
185
|
+
# this later.
|
|
186
|
+
key = key_field(db_column="_key")
|
|
187
|
+
|
|
164
188
|
created = manual_date_time_field()
|
|
165
189
|
created_by = models.ForeignKey(
|
|
166
190
|
settings.AUTH_USER_MODEL,
|
|
@@ -355,6 +379,21 @@ class PublishLog(models.Model):
|
|
|
355
379
|
Open question: Empty publishes are allowed at this time, and might be useful
|
|
356
380
|
for "fake" publishes that are necessary to invoke other post-publish
|
|
357
381
|
actions. It's not clear at this point how useful this will actually be.
|
|
382
|
+
|
|
383
|
+
The absence of a ``version_num`` field in this model is intentional, because
|
|
384
|
+
having one would potentially cause write contention/locking issues when
|
|
385
|
+
there are many people working on different entities in a very large library.
|
|
386
|
+
We already see some contention issues occuring in ModuleStore for courses,
|
|
387
|
+
and we want to support Libraries that are far larger.
|
|
388
|
+
|
|
389
|
+
If you need a LearningPackage-wide indicator for version and the only thing
|
|
390
|
+
you care about is "has *something* changed?", you can make a foreign key to
|
|
391
|
+
the most recent PublishLog, or use the most recent PublishLog's primary key.
|
|
392
|
+
This should be monotonically increasing, though there will be large gaps in
|
|
393
|
+
values, e.g. (5, 190, 1291, etc.). Be warned that this value will not port
|
|
394
|
+
across sites. If you need site-portability, the UUIDs for this model are a
|
|
395
|
+
safer bet, though there's a lot about import/export that we haven't fully
|
|
396
|
+
mapped out yet.
|
|
358
397
|
"""
|
|
359
398
|
|
|
360
399
|
uuid = immutable_uuid_field()
|
openedx_learning/lib/fields.py
CHANGED
|
@@ -20,6 +20,18 @@ from .validators import validate_utc_datetime
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def create_hash_digest(data_bytes: bytes) -> str:
|
|
23
|
+
"""
|
|
24
|
+
Create a 40-byte, lower-case hex string representation of a hash digest.
|
|
25
|
+
|
|
26
|
+
The hash digest itself is 20-bytes using BLAKE2b.
|
|
27
|
+
|
|
28
|
+
DON'T JUST MODIFY THIS HASH BEHAVIOR!!! We use hashing for de-duplication
|
|
29
|
+
purposes. If this hash function ever changes, that deduplication will fail
|
|
30
|
+
because the hashing behavior won't match what's already in the database.
|
|
31
|
+
|
|
32
|
+
If we want to change this representation one day, we should create a new
|
|
33
|
+
function for that and do the appropriate data migration.
|
|
34
|
+
"""
|
|
23
35
|
return hashlib.blake2b(data_bytes, digest_size=20).hexdigest()
|
|
24
36
|
|
|
25
37
|
|
|
@@ -97,7 +109,7 @@ def immutable_uuid_field() -> models.UUIDField:
|
|
|
97
109
|
)
|
|
98
110
|
|
|
99
111
|
|
|
100
|
-
def key_field() -> MultiCollationCharField:
|
|
112
|
+
def key_field(**kwargs) -> MultiCollationCharField:
|
|
101
113
|
"""
|
|
102
114
|
Externally created Identifier fields.
|
|
103
115
|
|
|
@@ -108,7 +120,7 @@ def key_field() -> MultiCollationCharField:
|
|
|
108
120
|
Other apps should *not* make references to these values directly, since
|
|
109
121
|
these values may in theory change (even if this is rare in practice).
|
|
110
122
|
"""
|
|
111
|
-
return case_sensitive_char_field(max_length=500, blank=False)
|
|
123
|
+
return case_sensitive_char_field(max_length=500, blank=False, **kwargs)
|
|
112
124
|
|
|
113
125
|
|
|
114
126
|
def hash_field() -> models.CharField:
|
openedx_learning/lib/managers.py
CHANGED
|
@@ -13,8 +13,12 @@ class WithRelationsManager(models.Manager):
|
|
|
13
13
|
into some of its relations and you want to avoid unnecessary extra database
|
|
14
14
|
calls.
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
16
|
+
You can override the default ``objects`` manager with this one if you have
|
|
17
|
+
a model that should basically always called with a ``select_related``. For
|
|
18
|
+
example, if you have a small lookup type-model that is frequently accessed.
|
|
19
|
+
|
|
20
|
+
For more complex joins, use this class to create a distinctly named manager
|
|
21
|
+
on your model class, instead of overwriting ``objects``. So for example::
|
|
18
22
|
|
|
19
23
|
class Component(models.Model):
|
|
20
24
|
with_publishing_relations = WithRelationsManager(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: openedx-learning
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.1
|
|
4
4
|
Summary: An experiment.
|
|
5
5
|
Home-page: https://github.com/openedx/openedx-learning
|
|
6
6
|
Author: David Ormsbee
|
|
@@ -17,12 +17,12 @@ Classifier: Natural Language :: English
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.8
|
|
19
19
|
Requires-Python: >=3.8
|
|
20
|
-
Requires-Dist: djangorestframework (<4.0)
|
|
21
|
-
Requires-Dist: rules (<4.0)
|
|
22
|
-
Requires-Dist: celery
|
|
23
20
|
Requires-Dist: Django (<5.0)
|
|
21
|
+
Requires-Dist: rules (<4.0)
|
|
22
|
+
Requires-Dist: djangorestframework (<4.0)
|
|
24
23
|
Requires-Dist: attrs
|
|
25
24
|
Requires-Dist: edx-drf-extensions
|
|
25
|
+
Requires-Dist: celery
|
|
26
26
|
|
|
27
27
|
openedx-learning
|
|
28
28
|
=============================
|