openstax_content 0.0.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4aebb8f3f720b66d54a35964a2c7471a4c1791b1eddf3a79278fb42974c1b03b
|
4
|
+
data.tar.gz: 743c42fe074b7e8aabc47540785d53ff2dec03c0f3b40eb12f79f0624d1e0f9c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af95d3a223033476027839a1c4d49e01a9e8d72c8be9185059d83dd13bf8544e79d291984eec3ac2473ff63850ddb45e1d5bca4a1bfc70d6e86fa99429db2c7f
|
7
|
+
data.tar.gz: 0eae65c706c57ef7a9e27125124972e62c8edaa77d22a9d16785ac2efcb6723693b1f18bc1c9ffdb82d699c6134bfa3453b1b491e6d7ccffc73df399e470aa11
|
@@ -28,10 +28,6 @@ class OpenStax::Content::Archive
|
|
28
28
|
end
|
29
29
|
|
30
30
|
if uri.absolute?
|
31
|
-
OpenStax::Content.logger.warn do
|
32
|
-
"#{self.class.name} received an unexpected absolute URL in url_for: \"#{object}\""
|
33
|
-
end
|
34
|
-
|
35
31
|
# Force absolute URLs to be https
|
36
32
|
uri.scheme = 'https'
|
37
33
|
return uri.to_s
|
@@ -48,6 +44,10 @@ class OpenStax::Content::Archive
|
|
48
44
|
if uri.path.start_with?('../')
|
49
45
|
uri.path = uri.path.sub('..', '')
|
50
46
|
"#{base_url}#{uri.to_s}"
|
47
|
+
elsif uri.path.start_with?(OpenStax::Content.archive_path) ||
|
48
|
+
uri.path.start_with?("/#{OpenStax::Content.archive_path}")
|
49
|
+
uri.path.start_with?('/') ? "https://#{OpenStax::Content.domain}#{uri.to_s}" :
|
50
|
+
"https://#{OpenStax::Content.domain}/#{uri.to_s}"
|
51
51
|
else
|
52
52
|
uri.path = "#{uri.path.chomp('.json').chomp('.xhtml')}.json"
|
53
53
|
|
@@ -101,4 +101,29 @@ class OpenStax::Content::Archive
|
|
101
101
|
slug
|
102
102
|
end
|
103
103
|
end
|
104
|
+
|
105
|
+
def webview_uri_for(page)
|
106
|
+
uri = if page.is_a?(Addressable::URI)
|
107
|
+
page
|
108
|
+
else
|
109
|
+
begin
|
110
|
+
Addressable::URI.parse page
|
111
|
+
rescue Addressable::URI::InvalidURIError
|
112
|
+
begin
|
113
|
+
Addressable::URI.parse "/#{page}"
|
114
|
+
rescue Addressable::URI::InvalidURIError
|
115
|
+
OpenStax::Content.logger.warn { "Invalid page url: \"#{page}\"" }
|
116
|
+
|
117
|
+
return page
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
object = uri.path.split('/').last
|
122
|
+
book_id, page_id = object.split(':', 2)
|
123
|
+
page_uuid = page_id.split('@', 2).first
|
124
|
+
book_slug = slug book_id
|
125
|
+
page_slug = slug object
|
126
|
+
uri.path = "books/#{book_slug}/pages/#{page_slug}"
|
127
|
+
Addressable::URI.join "https://#{OpenStax::Content.domain}", uri
|
128
|
+
end
|
104
129
|
end
|
@@ -12,17 +12,28 @@ class OpenStax::Content::Fragment::Html < OpenStax::Content::Fragment
|
|
12
12
|
@to_html = @node.to_html
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
# Serialization methods use #instance_variables to iterate through and dump all instance variables
|
16
|
+
# Nokogiri classes are not serializable, so we do not want to dump the @node variable
|
17
|
+
# Instead, we recreate it by parsing the HTML again if needed
|
18
|
+
def instance_variables
|
19
|
+
super - [ :@node ]
|
18
20
|
end
|
19
21
|
|
20
|
-
def
|
21
|
-
|
22
|
+
def blank?
|
23
|
+
return @blank unless @blank.nil?
|
24
|
+
|
25
|
+
@blank = if to_html.nil? || to_html.strip.empty?
|
26
|
+
true
|
27
|
+
else
|
28
|
+
node_without_title = node.dup
|
29
|
+
node_without_title.css('[data-type="document-title"]').remove
|
30
|
+
text = node_without_title.text
|
31
|
+
text.nil? || text.strip.empty?
|
32
|
+
end
|
22
33
|
end
|
23
34
|
|
24
|
-
def
|
25
|
-
!
|
35
|
+
def html?
|
36
|
+
!blank?
|
26
37
|
end
|
27
38
|
|
28
39
|
def node
|
@@ -40,9 +40,11 @@ class OpenStax::Content::FragmentSplitter
|
|
40
40
|
# Flatten, remove empty nodes and transform remaining nodes into reading fragments
|
41
41
|
result.map do |obj|
|
42
42
|
next obj unless obj.is_a?(Nokogiri::XML::Node)
|
43
|
-
next if obj.content.nil? || obj.content.strip.empty?
|
44
43
|
|
45
|
-
OpenStax::Content::Fragment::Reading.new
|
44
|
+
fragment = OpenStax::Content::Fragment::Reading.new(
|
45
|
+
node: obj, reference_view_url: reference_view_url
|
46
|
+
)
|
47
|
+
fragment unless fragment.blank?
|
46
48
|
end.compact.tap do |result|
|
47
49
|
@media_nodes.each do |node|
|
48
50
|
# Media processing instructions
|
data/lib/openstax/content/s3.rb
CHANGED
@@ -2,7 +2,7 @@ require 'aws-sdk-s3'
|
|
2
2
|
|
3
3
|
class OpenStax::Content::S3
|
4
4
|
def initialize
|
5
|
-
@ls = {}
|
5
|
+
@ls = Hash.new { |hash, key| hash[key] = Hash.new { |hash, key| hash[key] = {} } }
|
6
6
|
end
|
7
7
|
|
8
8
|
def bucket_name
|
@@ -21,24 +21,65 @@ class OpenStax::Content::S3
|
|
21
21
|
)
|
22
22
|
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
# Returns the archive path for the given archive_version, book_id, page_uuid and extension
|
25
|
+
# If not all arguments are given, returns the prefix instead
|
26
|
+
def path_for(archive_version = nil, book_id = nil, page_uuid = nil, extension = nil)
|
28
27
|
archive_path = OpenStax::Content.archive_path.chomp('/')
|
29
28
|
|
30
29
|
if archive_version.nil?
|
31
|
-
|
32
|
-
|
30
|
+
"#{archive_path}/"
|
31
|
+
elsif book_id.nil?
|
32
|
+
"#{archive_path}/#{archive_version}/contents/"
|
33
|
+
elsif page_uuid.nil?
|
34
|
+
"#{archive_path}/#{archive_version}/contents/#{book_id}:"
|
35
|
+
elsif extension.nil?
|
36
|
+
"#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}."
|
37
|
+
else
|
38
|
+
"#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}.#{extension}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Without an archive version, returns a list of archive versions
|
43
|
+
# With an archive version, returns a list of book ids (uuid@version)
|
44
|
+
# With an archive version and a book, returns a list of page uuids
|
45
|
+
# With an archive version, book id and page uuid, returns the available extensions, if any
|
46
|
+
def ls(archive_version = nil, book_id = nil, page_uuid = nil)
|
47
|
+
return @ls[archive_version][book_id][page_uuid] \
|
48
|
+
unless @ls[archive_version][book_id][page_uuid].nil?
|
49
|
+
return unless bucket_configured?
|
50
|
+
|
51
|
+
prefix = path_for archive_version, book_id, page_uuid
|
52
|
+
|
53
|
+
delimiter = if archive_version.nil?
|
54
|
+
'/'
|
55
|
+
elsif book_id.nil?
|
56
|
+
':'
|
57
|
+
elsif page_uuid.nil?
|
58
|
+
'.'
|
59
|
+
else
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
|
63
|
+
responses = client.list_objects_v2 bucket: bucket_name, prefix: prefix, delimiter: delimiter
|
64
|
+
|
65
|
+
@ls[archive_version][book_id][page_uuid] = if page_uuid.nil?
|
66
|
+
responses.flat_map(&:common_prefixes).map do |common_prefix|
|
67
|
+
common_prefix.prefix.sub(prefix, '').chomp(delimiter)
|
68
|
+
end
|
33
69
|
else
|
34
|
-
prefix
|
35
|
-
delimiter = ':'
|
70
|
+
responses.flat_map(&:contents).map { |content| content.key.sub(prefix, '') }
|
36
71
|
end
|
72
|
+
end
|
37
73
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
74
|
+
# Checks all books for the given page uuid and returns the path to the first one found
|
75
|
+
def find_page(page_uuid, archive_version: nil, extension: 'json')
|
76
|
+
archive_version ||= ls.last
|
77
|
+
|
78
|
+
ls(archive_version).each do |book_id|
|
79
|
+
return path_for(archive_version, book_id, page_uuid, extension) \
|
80
|
+
if ls(archive_version, book_id, page_uuid).include?(extension)
|
42
81
|
end
|
82
|
+
|
83
|
+
nil
|
43
84
|
end
|
44
85
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: openstax_content
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dante Soares
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk-s3
|
@@ -168,7 +168,7 @@ homepage: https://github.com/openstax/content-ruby
|
|
168
168
|
licenses:
|
169
169
|
- AGPL-3.0
|
170
170
|
metadata: {}
|
171
|
-
post_install_message:
|
171
|
+
post_install_message:
|
172
172
|
rdoc_options: []
|
173
173
|
require_paths:
|
174
174
|
- lib
|
@@ -184,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
184
184
|
version: '0'
|
185
185
|
requirements: []
|
186
186
|
rubygems_version: 3.2.19
|
187
|
-
signing_key:
|
187
|
+
signing_key:
|
188
188
|
specification_version: 4
|
189
189
|
summary: Ruby bindings to read and parse the OpenStax ABL and the content archive
|
190
190
|
test_files: []
|