openstax_content 0.2.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/openstax/content/abl.rb +74 -6
- data/lib/openstax/content/archive.rb +14 -14
- data/lib/openstax/content/book.rb +16 -31
- data/lib/openstax/content/book_part.rb +6 -0
- data/lib/openstax/content/fragment/html.rb +18 -7
- data/lib/openstax/content/fragment_splitter.rb +4 -2
- data/lib/openstax/content/s3.rb +3 -2
- data/lib/openstax/content/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5ce0c0361f6d9e8205eb18893bb7d4f4bf726a2adf6fab3fe35649c56ae55876
|
4
|
+
data.tar.gz: ccec97c10020878437487261de88a742ccf16480e178227a323420ed2f9fe4fd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 419d040e582db3b9233e2a6ebfcc3a6ad5bf908ed656a74aa67ecab1f111ad9278947e9bae3dab41d0479f36c7ad8750cabdac760dd6ca8ba73acacc8b94841d
|
7
|
+
data.tar.gz: aafb9064479bdaa157c557018b46c442f9c9abc4f0ce8fb48e12b7f81c8f0f09f2bba2829db6227bc7d5c36831b9909bb581839cea80da65cea87a5b807d33a9
|
data/lib/openstax/content/abl.rb
CHANGED
@@ -1,13 +1,81 @@
|
|
1
|
+
require_relative 'archive'
|
2
|
+
require_relative 'book'
|
3
|
+
|
1
4
|
class OpenStax::Content::Abl
|
2
|
-
def
|
3
|
-
@
|
5
|
+
def initialize(url: nil)
|
6
|
+
@url = url
|
7
|
+
end
|
8
|
+
|
9
|
+
def url
|
10
|
+
@url ||= OpenStax::Content.abl_url
|
11
|
+
end
|
12
|
+
|
13
|
+
def body_string
|
14
|
+
@body_string ||= Faraday.get(url).body
|
4
15
|
end
|
5
16
|
|
6
|
-
def
|
7
|
-
|
17
|
+
def body_hash
|
18
|
+
@body_hash ||= JSON.parse(body_string, symbolize_names: true)
|
8
19
|
end
|
9
20
|
|
10
|
-
def
|
11
|
-
|
21
|
+
def digest
|
22
|
+
Digest::SHA256.hexdigest body_string
|
23
|
+
end
|
24
|
+
|
25
|
+
def latest_approved_version_by_collection_id(archive: OpenStax::Content::Archive.new)
|
26
|
+
{}.tap do |hash|
|
27
|
+
body_hash[:approved_versions].each do |version|
|
28
|
+
next if version[:min_code_version] > archive.version
|
29
|
+
|
30
|
+
existing_version = hash[version[:collection_id]]
|
31
|
+
|
32
|
+
next if !existing_version.nil? &&
|
33
|
+
(existing_version[:content_version].split('.').map(&:to_i) <=>
|
34
|
+
version[:content_version].split('.').map(&:to_i)) >= 0
|
35
|
+
|
36
|
+
hash[version[:collection_id]] = version
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def approved_books(archive: OpenStax::Content::Archive.new)
|
42
|
+
# Can be removed once we have no more CNX books
|
43
|
+
version_by_collection_id = latest_approved_version_by_collection_id(archive: archive)
|
44
|
+
|
45
|
+
body_hash[:approved_books].flat_map do |approved_book|
|
46
|
+
if approved_book[:versions].nil?
|
47
|
+
# CNX-hosted book
|
48
|
+
version = version_by_collection_id[approved_book[:collection_id]]
|
49
|
+
|
50
|
+
next [] if version.nil?
|
51
|
+
|
52
|
+
approved_book[:books].map do |book|
|
53
|
+
OpenStax::Content::Book.new(
|
54
|
+
archive: archive,
|
55
|
+
uuid: book[:uuid],
|
56
|
+
version: version[:content_version].sub('1.', ''),
|
57
|
+
slug: book[:slug],
|
58
|
+
style: approved_book[:style]
|
59
|
+
)
|
60
|
+
end
|
61
|
+
else
|
62
|
+
# Git-hosted book
|
63
|
+
approved_book[:versions].flat_map do |version|
|
64
|
+
next [] if version[:min_code_version] > archive.version
|
65
|
+
|
66
|
+
commit_metadata = version[:commit_metadata]
|
67
|
+
|
68
|
+
commit_metadata[:books].map do |book|
|
69
|
+
OpenStax::Content::Book.new(
|
70
|
+
archive: archive,
|
71
|
+
uuid: book[:uuid],
|
72
|
+
version: version[:commit_sha][0..6],
|
73
|
+
slug: book[:slug],
|
74
|
+
style: book[:style]
|
75
|
+
)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
12
80
|
end
|
13
81
|
end
|
@@ -2,14 +2,22 @@ require 'addressable/uri'
|
|
2
2
|
require 'faraday'
|
3
3
|
|
4
4
|
class OpenStax::Content::Archive
|
5
|
-
def initialize(version)
|
5
|
+
def initialize(version: nil)
|
6
6
|
@version = version
|
7
7
|
@slugs = {}
|
8
8
|
end
|
9
9
|
|
10
|
+
def s3
|
11
|
+
@s3 ||= OpenStax::Content::S3.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def version
|
15
|
+
@version ||= s3.ls.last
|
16
|
+
end
|
17
|
+
|
10
18
|
def base_url
|
11
19
|
@base_url ||= "https://#{OpenStax::Content.domain}/#{
|
12
|
-
OpenStax::Content.archive_path}/#{
|
20
|
+
OpenStax::Content.archive_path}/#{version}"
|
13
21
|
end
|
14
22
|
|
15
23
|
def url_for(object)
|
@@ -28,10 +36,6 @@ class OpenStax::Content::Archive
|
|
28
36
|
end
|
29
37
|
|
30
38
|
if uri.absolute?
|
31
|
-
OpenStax::Content.logger.warn do
|
32
|
-
"#{self.class.name} received an unexpected absolute URL in url_for: \"#{object}\""
|
33
|
-
end
|
34
|
-
|
35
39
|
# Force absolute URLs to be https
|
36
40
|
uri.scheme = 'https'
|
37
41
|
return uri.to_s
|
@@ -76,20 +80,16 @@ class OpenStax::Content::Archive
|
|
76
80
|
end
|
77
81
|
end
|
78
82
|
|
79
|
-
def s3
|
80
|
-
@s3 ||= OpenStax::Content::S3.new
|
81
|
-
end
|
82
|
-
|
83
83
|
def add_latest_book_version_if_missing(object)
|
84
84
|
book_id, page_id = object.split(':', 2)
|
85
85
|
book_uuid, book_version = book_id.split('@', 2)
|
86
86
|
return object unless book_version.nil? && s3.bucket_configured?
|
87
87
|
|
88
|
-
s3.ls(
|
89
|
-
|
90
|
-
next unless
|
88
|
+
s3.ls(version).each do |book|
|
89
|
+
s3_uuid, s3_version = book.split('@')
|
90
|
+
next unless s3_uuid == book_uuid
|
91
91
|
|
92
|
-
book_version =
|
92
|
+
book_version = s3_version
|
93
93
|
break
|
94
94
|
end
|
95
95
|
|
@@ -1,33 +1,28 @@
|
|
1
|
-
require_relative 'archive'
|
2
1
|
require_relative 'book_part'
|
3
2
|
|
4
3
|
class OpenStax::Content::Book
|
5
|
-
|
6
|
-
archive_version:, uuid: nil, version: nil, hash: nil, title: nil, tree: nil, root_book_part: nil
|
7
|
-
)
|
8
|
-
@uuid = uuid || (hash || {})['id']
|
9
|
-
raise ArgumentError, 'Either uuid or hash with id key is required' if @uuid.nil?
|
4
|
+
extend Forwardable
|
10
5
|
|
11
|
-
|
12
|
-
raise ArgumentError, 'Either version or hash with version key is required' if @version.nil?
|
6
|
+
attr_reader :archive, :uuid, :version, :slug, :style
|
13
7
|
|
14
|
-
|
15
|
-
@
|
16
|
-
@
|
17
|
-
@
|
18
|
-
@
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
def archive
|
24
|
-
@archive ||= OpenStax::Content::Archive.new archive_version
|
8
|
+
def initialize(archive:, uuid:, version:, url: nil, hash: nil, slug: nil, style: nil)
|
9
|
+
@archive = archive
|
10
|
+
@uuid = uuid
|
11
|
+
@version = version
|
12
|
+
@url = url
|
13
|
+
@hash = hash
|
14
|
+
@slug = slug
|
15
|
+
@style = style
|
25
16
|
end
|
26
17
|
|
27
18
|
def url
|
28
19
|
@url ||= archive.url_for "#{uuid}@#{version}"
|
29
20
|
end
|
30
21
|
|
22
|
+
def hash
|
23
|
+
@hash ||= archive.json url
|
24
|
+
end
|
25
|
+
|
31
26
|
def url_fragment
|
32
27
|
@url_fragment ||= url.chomp('.json')
|
33
28
|
end
|
@@ -40,22 +35,10 @@ class OpenStax::Content::Book
|
|
40
35
|
@collated ||= hash.fetch('collated', false)
|
41
36
|
end
|
42
37
|
|
43
|
-
def hash
|
44
|
-
@hash ||= archive.json url
|
45
|
-
end
|
46
|
-
|
47
|
-
def uuid
|
48
|
-
@uuid ||= hash.fetch('id')
|
49
|
-
end
|
50
|
-
|
51
38
|
def short_id
|
52
39
|
@short_id ||= hash['shortId']
|
53
40
|
end
|
54
41
|
|
55
|
-
def version
|
56
|
-
@version ||= hash.fetch('version')
|
57
|
-
end
|
58
|
-
|
59
42
|
def title
|
60
43
|
@title ||= hash.fetch('title')
|
61
44
|
end
|
@@ -67,4 +50,6 @@ class OpenStax::Content::Book
|
|
67
50
|
def root_book_part
|
68
51
|
@root_book_part ||= OpenStax::Content::BookPart.new(hash: tree, is_root: true, book: self)
|
69
52
|
end
|
53
|
+
|
54
|
+
def_delegator :root_book_part, :all_pages
|
70
55
|
end
|
@@ -12,17 +12,28 @@ class OpenStax::Content::Fragment::Html < OpenStax::Content::Fragment
|
|
12
12
|
@to_html = @node.to_html
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
# Serialization methods use #instance_variables to iterate through and dump all instance variables
|
16
|
+
# Nokogiri classes are not serializable, so we do not want to dump the @node variable
|
17
|
+
# Instead, we recreate it by parsing the HTML again if needed
|
18
|
+
def instance_variables
|
19
|
+
super - [ :@node ]
|
18
20
|
end
|
19
21
|
|
20
|
-
def
|
21
|
-
|
22
|
+
def blank?
|
23
|
+
return @blank unless @blank.nil?
|
24
|
+
|
25
|
+
@blank = if to_html.nil? || to_html.strip.empty?
|
26
|
+
true
|
27
|
+
else
|
28
|
+
node_without_title = node.dup
|
29
|
+
node_without_title.css('[data-type="document-title"]').remove
|
30
|
+
text = node_without_title.text
|
31
|
+
text.nil? || text.strip.empty?
|
32
|
+
end
|
22
33
|
end
|
23
34
|
|
24
|
-
def
|
25
|
-
!
|
35
|
+
def html?
|
36
|
+
!blank?
|
26
37
|
end
|
27
38
|
|
28
39
|
def node
|
@@ -40,9 +40,11 @@ class OpenStax::Content::FragmentSplitter
|
|
40
40
|
# Flatten, remove empty nodes and transform remaining nodes into reading fragments
|
41
41
|
result.map do |obj|
|
42
42
|
next obj unless obj.is_a?(Nokogiri::XML::Node)
|
43
|
-
next if obj.content.nil? || obj.content.strip.empty?
|
44
43
|
|
45
|
-
OpenStax::Content::Fragment::Reading.new
|
44
|
+
fragment = OpenStax::Content::Fragment::Reading.new(
|
45
|
+
node: obj, reference_view_url: reference_view_url
|
46
|
+
)
|
47
|
+
fragment unless fragment.blank?
|
46
48
|
end.compact.tap do |result|
|
47
49
|
@media_nodes.each do |node|
|
48
50
|
# Media processing instructions
|
data/lib/openstax/content/s3.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
require 'aws-sdk-s3'
|
2
2
|
|
3
3
|
class OpenStax::Content::S3
|
4
|
-
def initialize
|
4
|
+
def initialize(bucket_name: nil)
|
5
|
+
@bucket_name = bucket_name
|
5
6
|
@ls = Hash.new { |hash, key| hash[key] = Hash.new { |hash, key| hash[key] = {} } }
|
6
7
|
end
|
7
8
|
|
8
9
|
def bucket_name
|
9
|
-
OpenStax::Content.bucket_name
|
10
|
+
@bucket_name ||= OpenStax::Content.bucket_name
|
10
11
|
end
|
11
12
|
|
12
13
|
def bucket_configured?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: openstax_content
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dante Soares
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-03-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk-s3
|
@@ -168,7 +168,7 @@ homepage: https://github.com/openstax/content-ruby
|
|
168
168
|
licenses:
|
169
169
|
- AGPL-3.0
|
170
170
|
metadata: {}
|
171
|
-
post_install_message:
|
171
|
+
post_install_message:
|
172
172
|
rdoc_options: []
|
173
173
|
require_paths:
|
174
174
|
- lib
|
@@ -183,8 +183,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
183
|
- !ruby/object:Gem::Version
|
184
184
|
version: '0'
|
185
185
|
requirements: []
|
186
|
-
rubygems_version: 3.
|
187
|
-
signing_key:
|
186
|
+
rubygems_version: 3.1.4
|
187
|
+
signing_key:
|
188
188
|
specification_version: 4
|
189
189
|
summary: Ruby bindings to read and parse the OpenStax ABL and the content archive
|
190
190
|
test_files: []
|