openstax_content 0.2.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/openstax/content/abl.rb +74 -6
- data/lib/openstax/content/archive.rb +14 -14
- data/lib/openstax/content/book.rb +16 -31
- data/lib/openstax/content/book_part.rb +6 -0
- data/lib/openstax/content/fragment/html.rb +18 -7
- data/lib/openstax/content/fragment_splitter.rb +4 -2
- data/lib/openstax/content/s3.rb +3 -2
- data/lib/openstax/content/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5ce0c0361f6d9e8205eb18893bb7d4f4bf726a2adf6fab3fe35649c56ae55876
|
4
|
+
data.tar.gz: ccec97c10020878437487261de88a742ccf16480e178227a323420ed2f9fe4fd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 419d040e582db3b9233e2a6ebfcc3a6ad5bf908ed656a74aa67ecab1f111ad9278947e9bae3dab41d0479f36c7ad8750cabdac760dd6ca8ba73acacc8b94841d
|
7
|
+
data.tar.gz: aafb9064479bdaa157c557018b46c442f9c9abc4f0ce8fb48e12b7f81c8f0f09f2bba2829db6227bc7d5c36831b9909bb581839cea80da65cea87a5b807d33a9
|
data/lib/openstax/content/abl.rb
CHANGED
@@ -1,13 +1,81 @@
|
|
1
|
+
require_relative 'archive'
|
2
|
+
require_relative 'book'
|
3
|
+
|
1
4
|
class OpenStax::Content::Abl
|
2
|
-
def
|
3
|
-
@
|
5
|
+
def initialize(url: nil)
|
6
|
+
@url = url
|
7
|
+
end
|
8
|
+
|
9
|
+
def url
|
10
|
+
@url ||= OpenStax::Content.abl_url
|
11
|
+
end
|
12
|
+
|
13
|
+
def body_string
|
14
|
+
@body_string ||= Faraday.get(url).body
|
4
15
|
end
|
5
16
|
|
6
|
-
def
|
7
|
-
|
17
|
+
def body_hash
|
18
|
+
@body_hash ||= JSON.parse(body_string, symbolize_names: true)
|
8
19
|
end
|
9
20
|
|
10
|
-
def
|
11
|
-
|
21
|
+
def digest
|
22
|
+
Digest::SHA256.hexdigest body_string
|
23
|
+
end
|
24
|
+
|
25
|
+
def latest_approved_version_by_collection_id(archive: OpenStax::Content::Archive.new)
|
26
|
+
{}.tap do |hash|
|
27
|
+
body_hash[:approved_versions].each do |version|
|
28
|
+
next if version[:min_code_version] > archive.version
|
29
|
+
|
30
|
+
existing_version = hash[version[:collection_id]]
|
31
|
+
|
32
|
+
next if !existing_version.nil? &&
|
33
|
+
(existing_version[:content_version].split('.').map(&:to_i) <=>
|
34
|
+
version[:content_version].split('.').map(&:to_i)) >= 0
|
35
|
+
|
36
|
+
hash[version[:collection_id]] = version
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def approved_books(archive: OpenStax::Content::Archive.new)
|
42
|
+
# Can be removed once we have no more CNX books
|
43
|
+
version_by_collection_id = latest_approved_version_by_collection_id(archive: archive)
|
44
|
+
|
45
|
+
body_hash[:approved_books].flat_map do |approved_book|
|
46
|
+
if approved_book[:versions].nil?
|
47
|
+
# CNX-hosted book
|
48
|
+
version = version_by_collection_id[approved_book[:collection_id]]
|
49
|
+
|
50
|
+
next [] if version.nil?
|
51
|
+
|
52
|
+
approved_book[:books].map do |book|
|
53
|
+
OpenStax::Content::Book.new(
|
54
|
+
archive: archive,
|
55
|
+
uuid: book[:uuid],
|
56
|
+
version: version[:content_version].sub('1.', ''),
|
57
|
+
slug: book[:slug],
|
58
|
+
style: approved_book[:style]
|
59
|
+
)
|
60
|
+
end
|
61
|
+
else
|
62
|
+
# Git-hosted book
|
63
|
+
approved_book[:versions].flat_map do |version|
|
64
|
+
next [] if version[:min_code_version] > archive.version
|
65
|
+
|
66
|
+
commit_metadata = version[:commit_metadata]
|
67
|
+
|
68
|
+
commit_metadata[:books].map do |book|
|
69
|
+
OpenStax::Content::Book.new(
|
70
|
+
archive: archive,
|
71
|
+
uuid: book[:uuid],
|
72
|
+
version: version[:commit_sha][0..6],
|
73
|
+
slug: book[:slug],
|
74
|
+
style: book[:style]
|
75
|
+
)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
12
80
|
end
|
13
81
|
end
|
@@ -2,14 +2,22 @@ require 'addressable/uri'
|
|
2
2
|
require 'faraday'
|
3
3
|
|
4
4
|
class OpenStax::Content::Archive
|
5
|
-
def initialize(version)
|
5
|
+
def initialize(version: nil)
|
6
6
|
@version = version
|
7
7
|
@slugs = {}
|
8
8
|
end
|
9
9
|
|
10
|
+
def s3
|
11
|
+
@s3 ||= OpenStax::Content::S3.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def version
|
15
|
+
@version ||= s3.ls.last
|
16
|
+
end
|
17
|
+
|
10
18
|
def base_url
|
11
19
|
@base_url ||= "https://#{OpenStax::Content.domain}/#{
|
12
|
-
OpenStax::Content.archive_path}/#{
|
20
|
+
OpenStax::Content.archive_path}/#{version}"
|
13
21
|
end
|
14
22
|
|
15
23
|
def url_for(object)
|
@@ -28,10 +36,6 @@ class OpenStax::Content::Archive
|
|
28
36
|
end
|
29
37
|
|
30
38
|
if uri.absolute?
|
31
|
-
OpenStax::Content.logger.warn do
|
32
|
-
"#{self.class.name} received an unexpected absolute URL in url_for: \"#{object}\""
|
33
|
-
end
|
34
|
-
|
35
39
|
# Force absolute URLs to be https
|
36
40
|
uri.scheme = 'https'
|
37
41
|
return uri.to_s
|
@@ -76,20 +80,16 @@ class OpenStax::Content::Archive
|
|
76
80
|
end
|
77
81
|
end
|
78
82
|
|
79
|
-
def s3
|
80
|
-
@s3 ||= OpenStax::Content::S3.new
|
81
|
-
end
|
82
|
-
|
83
83
|
def add_latest_book_version_if_missing(object)
|
84
84
|
book_id, page_id = object.split(':', 2)
|
85
85
|
book_uuid, book_version = book_id.split('@', 2)
|
86
86
|
return object unless book_version.nil? && s3.bucket_configured?
|
87
87
|
|
88
|
-
s3.ls(
|
89
|
-
|
90
|
-
next unless
|
88
|
+
s3.ls(version).each do |book|
|
89
|
+
s3_uuid, s3_version = book.split('@')
|
90
|
+
next unless s3_uuid == book_uuid
|
91
91
|
|
92
|
-
book_version =
|
92
|
+
book_version = s3_version
|
93
93
|
break
|
94
94
|
end
|
95
95
|
|
@@ -1,33 +1,28 @@
|
|
1
|
-
require_relative 'archive'
|
2
1
|
require_relative 'book_part'
|
3
2
|
|
4
3
|
class OpenStax::Content::Book
|
5
|
-
|
6
|
-
archive_version:, uuid: nil, version: nil, hash: nil, title: nil, tree: nil, root_book_part: nil
|
7
|
-
)
|
8
|
-
@uuid = uuid || (hash || {})['id']
|
9
|
-
raise ArgumentError, 'Either uuid or hash with id key is required' if @uuid.nil?
|
4
|
+
extend Forwardable
|
10
5
|
|
11
|
-
|
12
|
-
raise ArgumentError, 'Either version or hash with version key is required' if @version.nil?
|
6
|
+
attr_reader :archive, :uuid, :version, :slug, :style
|
13
7
|
|
14
|
-
|
15
|
-
@
|
16
|
-
@
|
17
|
-
@
|
18
|
-
@
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
def archive
|
24
|
-
@archive ||= OpenStax::Content::Archive.new archive_version
|
8
|
+
def initialize(archive:, uuid:, version:, url: nil, hash: nil, slug: nil, style: nil)
|
9
|
+
@archive = archive
|
10
|
+
@uuid = uuid
|
11
|
+
@version = version
|
12
|
+
@url = url
|
13
|
+
@hash = hash
|
14
|
+
@slug = slug
|
15
|
+
@style = style
|
25
16
|
end
|
26
17
|
|
27
18
|
def url
|
28
19
|
@url ||= archive.url_for "#{uuid}@#{version}"
|
29
20
|
end
|
30
21
|
|
22
|
+
def hash
|
23
|
+
@hash ||= archive.json url
|
24
|
+
end
|
25
|
+
|
31
26
|
def url_fragment
|
32
27
|
@url_fragment ||= url.chomp('.json')
|
33
28
|
end
|
@@ -40,22 +35,10 @@ class OpenStax::Content::Book
|
|
40
35
|
@collated ||= hash.fetch('collated', false)
|
41
36
|
end
|
42
37
|
|
43
|
-
def hash
|
44
|
-
@hash ||= archive.json url
|
45
|
-
end
|
46
|
-
|
47
|
-
def uuid
|
48
|
-
@uuid ||= hash.fetch('id')
|
49
|
-
end
|
50
|
-
|
51
38
|
def short_id
|
52
39
|
@short_id ||= hash['shortId']
|
53
40
|
end
|
54
41
|
|
55
|
-
def version
|
56
|
-
@version ||= hash.fetch('version')
|
57
|
-
end
|
58
|
-
|
59
42
|
def title
|
60
43
|
@title ||= hash.fetch('title')
|
61
44
|
end
|
@@ -67,4 +50,6 @@ class OpenStax::Content::Book
|
|
67
50
|
def root_book_part
|
68
51
|
@root_book_part ||= OpenStax::Content::BookPart.new(hash: tree, is_root: true, book: self)
|
69
52
|
end
|
53
|
+
|
54
|
+
def_delegator :root_book_part, :all_pages
|
70
55
|
end
|
@@ -12,17 +12,28 @@ class OpenStax::Content::Fragment::Html < OpenStax::Content::Fragment
|
|
12
12
|
@to_html = @node.to_html
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
# Serialization methods use #instance_variables to iterate through and dump all instance variables
|
16
|
+
# Nokogiri classes are not serializable, so we do not want to dump the @node variable
|
17
|
+
# Instead, we recreate it by parsing the HTML again if needed
|
18
|
+
def instance_variables
|
19
|
+
super - [ :@node ]
|
18
20
|
end
|
19
21
|
|
20
|
-
def
|
21
|
-
|
22
|
+
def blank?
|
23
|
+
return @blank unless @blank.nil?
|
24
|
+
|
25
|
+
@blank = if to_html.nil? || to_html.strip.empty?
|
26
|
+
true
|
27
|
+
else
|
28
|
+
node_without_title = node.dup
|
29
|
+
node_without_title.css('[data-type="document-title"]').remove
|
30
|
+
text = node_without_title.text
|
31
|
+
text.nil? || text.strip.empty?
|
32
|
+
end
|
22
33
|
end
|
23
34
|
|
24
|
-
def
|
25
|
-
!
|
35
|
+
def html?
|
36
|
+
!blank?
|
26
37
|
end
|
27
38
|
|
28
39
|
def node
|
@@ -40,9 +40,11 @@ class OpenStax::Content::FragmentSplitter
|
|
40
40
|
# Flatten, remove empty nodes and transform remaining nodes into reading fragments
|
41
41
|
result.map do |obj|
|
42
42
|
next obj unless obj.is_a?(Nokogiri::XML::Node)
|
43
|
-
next if obj.content.nil? || obj.content.strip.empty?
|
44
43
|
|
45
|
-
OpenStax::Content::Fragment::Reading.new
|
44
|
+
fragment = OpenStax::Content::Fragment::Reading.new(
|
45
|
+
node: obj, reference_view_url: reference_view_url
|
46
|
+
)
|
47
|
+
fragment unless fragment.blank?
|
46
48
|
end.compact.tap do |result|
|
47
49
|
@media_nodes.each do |node|
|
48
50
|
# Media processing instructions
|
data/lib/openstax/content/s3.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
require 'aws-sdk-s3'
|
2
2
|
|
3
3
|
class OpenStax::Content::S3
|
4
|
-
def initialize
|
4
|
+
def initialize(bucket_name: nil)
|
5
|
+
@bucket_name = bucket_name
|
5
6
|
@ls = Hash.new { |hash, key| hash[key] = Hash.new { |hash, key| hash[key] = {} } }
|
6
7
|
end
|
7
8
|
|
8
9
|
def bucket_name
|
9
|
-
OpenStax::Content.bucket_name
|
10
|
+
@bucket_name ||= OpenStax::Content.bucket_name
|
10
11
|
end
|
11
12
|
|
12
13
|
def bucket_configured?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: openstax_content
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dante Soares
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-03-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk-s3
|
@@ -168,7 +168,7 @@ homepage: https://github.com/openstax/content-ruby
|
|
168
168
|
licenses:
|
169
169
|
- AGPL-3.0
|
170
170
|
metadata: {}
|
171
|
-
post_install_message:
|
171
|
+
post_install_message:
|
172
172
|
rdoc_options: []
|
173
173
|
require_paths:
|
174
174
|
- lib
|
@@ -183,8 +183,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
183
|
- !ruby/object:Gem::Version
|
184
184
|
version: '0'
|
185
185
|
requirements: []
|
186
|
-
rubygems_version: 3.
|
187
|
-
signing_key:
|
186
|
+
rubygems_version: 3.1.4
|
187
|
+
signing_key:
|
188
188
|
specification_version: 4
|
189
189
|
summary: Ruby bindings to read and parse the OpenStax ABL and the content archive
|
190
190
|
test_files: []
|