openstax_content 0.0.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4aebb8f3f720b66d54a35964a2c7471a4c1791b1eddf3a79278fb42974c1b03b
|
4
|
+
data.tar.gz: 743c42fe074b7e8aabc47540785d53ff2dec03c0f3b40eb12f79f0624d1e0f9c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af95d3a223033476027839a1c4d49e01a9e8d72c8be9185059d83dd13bf8544e79d291984eec3ac2473ff63850ddb45e1d5bca4a1bfc70d6e86fa99429db2c7f
|
7
|
+
data.tar.gz: 0eae65c706c57ef7a9e27125124972e62c8edaa77d22a9d16785ac2efcb6723693b1f18bc1c9ffdb82d699c6134bfa3453b1b491e6d7ccffc73df399e470aa11
|
@@ -28,10 +28,6 @@ class OpenStax::Content::Archive
|
|
28
28
|
end
|
29
29
|
|
30
30
|
if uri.absolute?
|
31
|
-
OpenStax::Content.logger.warn do
|
32
|
-
"#{self.class.name} received an unexpected absolute URL in url_for: \"#{object}\""
|
33
|
-
end
|
34
|
-
|
35
31
|
# Force absolute URLs to be https
|
36
32
|
uri.scheme = 'https'
|
37
33
|
return uri.to_s
|
@@ -48,6 +44,10 @@ class OpenStax::Content::Archive
|
|
48
44
|
if uri.path.start_with?('../')
|
49
45
|
uri.path = uri.path.sub('..', '')
|
50
46
|
"#{base_url}#{uri.to_s}"
|
47
|
+
elsif uri.path.start_with?(OpenStax::Content.archive_path) ||
|
48
|
+
uri.path.start_with?("/#{OpenStax::Content.archive_path}")
|
49
|
+
uri.path.start_with?('/') ? "https://#{OpenStax::Content.domain}#{uri.to_s}" :
|
50
|
+
"https://#{OpenStax::Content.domain}/#{uri.to_s}"
|
51
51
|
else
|
52
52
|
uri.path = "#{uri.path.chomp('.json').chomp('.xhtml')}.json"
|
53
53
|
|
@@ -101,4 +101,29 @@ class OpenStax::Content::Archive
|
|
101
101
|
slug
|
102
102
|
end
|
103
103
|
end
|
104
|
+
|
105
|
+
def webview_uri_for(page)
|
106
|
+
uri = if page.is_a?(Addressable::URI)
|
107
|
+
page
|
108
|
+
else
|
109
|
+
begin
|
110
|
+
Addressable::URI.parse page
|
111
|
+
rescue Addressable::URI::InvalidURIError
|
112
|
+
begin
|
113
|
+
Addressable::URI.parse "/#{page}"
|
114
|
+
rescue Addressable::URI::InvalidURIError
|
115
|
+
OpenStax::Content.logger.warn { "Invalid page url: \"#{page}\"" }
|
116
|
+
|
117
|
+
return page
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
object = uri.path.split('/').last
|
122
|
+
book_id, page_id = object.split(':', 2)
|
123
|
+
page_uuid = page_id.split('@', 2).first
|
124
|
+
book_slug = slug book_id
|
125
|
+
page_slug = slug object
|
126
|
+
uri.path = "books/#{book_slug}/pages/#{page_slug}"
|
127
|
+
Addressable::URI.join "https://#{OpenStax::Content.domain}", uri
|
128
|
+
end
|
104
129
|
end
|
@@ -12,17 +12,28 @@ class OpenStax::Content::Fragment::Html < OpenStax::Content::Fragment
|
|
12
12
|
@to_html = @node.to_html
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
# Serialization methods use #instance_variables to iterate through and dump all instance variables
|
16
|
+
# Nokogiri classes are not serializable, so we do not want to dump the @node variable
|
17
|
+
# Instead, we recreate it by parsing the HTML again if needed
|
18
|
+
def instance_variables
|
19
|
+
super - [ :@node ]
|
18
20
|
end
|
19
21
|
|
20
|
-
def
|
21
|
-
|
22
|
+
def blank?
|
23
|
+
return @blank unless @blank.nil?
|
24
|
+
|
25
|
+
@blank = if to_html.nil? || to_html.strip.empty?
|
26
|
+
true
|
27
|
+
else
|
28
|
+
node_without_title = node.dup
|
29
|
+
node_without_title.css('[data-type="document-title"]').remove
|
30
|
+
text = node_without_title.text
|
31
|
+
text.nil? || text.strip.empty?
|
32
|
+
end
|
22
33
|
end
|
23
34
|
|
24
|
-
def
|
25
|
-
!
|
35
|
+
def html?
|
36
|
+
!blank?
|
26
37
|
end
|
27
38
|
|
28
39
|
def node
|
@@ -40,9 +40,11 @@ class OpenStax::Content::FragmentSplitter
|
|
40
40
|
# Flatten, remove empty nodes and transform remaining nodes into reading fragments
|
41
41
|
result.map do |obj|
|
42
42
|
next obj unless obj.is_a?(Nokogiri::XML::Node)
|
43
|
-
next if obj.content.nil? || obj.content.strip.empty?
|
44
43
|
|
45
|
-
OpenStax::Content::Fragment::Reading.new
|
44
|
+
fragment = OpenStax::Content::Fragment::Reading.new(
|
45
|
+
node: obj, reference_view_url: reference_view_url
|
46
|
+
)
|
47
|
+
fragment unless fragment.blank?
|
46
48
|
end.compact.tap do |result|
|
47
49
|
@media_nodes.each do |node|
|
48
50
|
# Media processing instructions
|
data/lib/openstax/content/s3.rb
CHANGED
@@ -2,7 +2,7 @@ require 'aws-sdk-s3'
|
|
2
2
|
|
3
3
|
class OpenStax::Content::S3
|
4
4
|
def initialize
|
5
|
-
@ls = {}
|
5
|
+
@ls = Hash.new { |hash, key| hash[key] = Hash.new { |hash, key| hash[key] = {} } }
|
6
6
|
end
|
7
7
|
|
8
8
|
def bucket_name
|
@@ -21,24 +21,65 @@ class OpenStax::Content::S3
|
|
21
21
|
)
|
22
22
|
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
# Returns the archive path for the given archive_version, book_id, page_uuid and extension
|
25
|
+
# If not all arguments are given, returns the prefix instead
|
26
|
+
def path_for(archive_version = nil, book_id = nil, page_uuid = nil, extension = nil)
|
28
27
|
archive_path = OpenStax::Content.archive_path.chomp('/')
|
29
28
|
|
30
29
|
if archive_version.nil?
|
31
|
-
|
32
|
-
|
30
|
+
"#{archive_path}/"
|
31
|
+
elsif book_id.nil?
|
32
|
+
"#{archive_path}/#{archive_version}/contents/"
|
33
|
+
elsif page_uuid.nil?
|
34
|
+
"#{archive_path}/#{archive_version}/contents/#{book_id}:"
|
35
|
+
elsif extension.nil?
|
36
|
+
"#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}."
|
37
|
+
else
|
38
|
+
"#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}.#{extension}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Without an archive version, returns a list of archive versions
|
43
|
+
# With an archive version, returns a list of book ids (uuid@version)
|
44
|
+
# With an archive version and a book, returns a list of page uuids
|
45
|
+
# With an archive version, book id and page uuid, returns the available extensions, if any
|
46
|
+
def ls(archive_version = nil, book_id = nil, page_uuid = nil)
|
47
|
+
return @ls[archive_version][book_id][page_uuid] \
|
48
|
+
unless @ls[archive_version][book_id][page_uuid].nil?
|
49
|
+
return unless bucket_configured?
|
50
|
+
|
51
|
+
prefix = path_for archive_version, book_id, page_uuid
|
52
|
+
|
53
|
+
delimiter = if archive_version.nil?
|
54
|
+
'/'
|
55
|
+
elsif book_id.nil?
|
56
|
+
':'
|
57
|
+
elsif page_uuid.nil?
|
58
|
+
'.'
|
59
|
+
else
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
|
63
|
+
responses = client.list_objects_v2 bucket: bucket_name, prefix: prefix, delimiter: delimiter
|
64
|
+
|
65
|
+
@ls[archive_version][book_id][page_uuid] = if page_uuid.nil?
|
66
|
+
responses.flat_map(&:common_prefixes).map do |common_prefix|
|
67
|
+
common_prefix.prefix.sub(prefix, '').chomp(delimiter)
|
68
|
+
end
|
33
69
|
else
|
34
|
-
prefix
|
35
|
-
delimiter = ':'
|
70
|
+
responses.flat_map(&:contents).map { |content| content.key.sub(prefix, '') }
|
36
71
|
end
|
72
|
+
end
|
37
73
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
74
|
+
# Checks all books for the given page uuid and returns the path to the first one found
|
75
|
+
def find_page(page_uuid, archive_version: nil, extension: 'json')
|
76
|
+
archive_version ||= ls.last
|
77
|
+
|
78
|
+
ls(archive_version).each do |book_id|
|
79
|
+
return path_for(archive_version, book_id, page_uuid, extension) \
|
80
|
+
if ls(archive_version, book_id, page_uuid).include?(extension)
|
42
81
|
end
|
82
|
+
|
83
|
+
nil
|
43
84
|
end
|
44
85
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: openstax_content
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dante Soares
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk-s3
|
@@ -168,7 +168,7 @@ homepage: https://github.com/openstax/content-ruby
|
|
168
168
|
licenses:
|
169
169
|
- AGPL-3.0
|
170
170
|
metadata: {}
|
171
|
-
post_install_message:
|
171
|
+
post_install_message:
|
172
172
|
rdoc_options: []
|
173
173
|
require_paths:
|
174
174
|
- lib
|
@@ -184,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
184
184
|
version: '0'
|
185
185
|
requirements: []
|
186
186
|
rubygems_version: 3.2.19
|
187
|
-
signing_key:
|
187
|
+
signing_key:
|
188
188
|
specification_version: 4
|
189
189
|
summary: Ruby bindings to read and parse the OpenStax ABL and the content archive
|
190
190
|
test_files: []
|