openstax_content 0.0.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 578cf5b445c79d9e7dd1a904da90cb3daf527d2d8b7de128d07640e15d8bd066
4
- data.tar.gz: b2bfe3524398a047b6722a047aa245de8ed52d02e4c9c37ca28f0b36840ba081
3
+ metadata.gz: 4aebb8f3f720b66d54a35964a2c7471a4c1791b1eddf3a79278fb42974c1b03b
4
+ data.tar.gz: 743c42fe074b7e8aabc47540785d53ff2dec03c0f3b40eb12f79f0624d1e0f9c
5
5
  SHA512:
6
- metadata.gz: c2f7708cce0fb71727d985dcef91f6b0844f1b3ba2b134a55cc6f4eb3bddf0132c633cce04ed8221d16f8aac785a08f1ef16fe17e6f184c7f481c479054e496b
7
- data.tar.gz: 07bd028f00e935ef240585da56b2fd88e82959da61213787f03421e1f07c0afecb8f3257b9b269ff1affea5483a227f70d299e1782a528e7ebb1c4d96dd2dd6b
6
+ metadata.gz: af95d3a223033476027839a1c4d49e01a9e8d72c8be9185059d83dd13bf8544e79d291984eec3ac2473ff63850ddb45e1d5bca4a1bfc70d6e86fa99429db2c7f
7
+ data.tar.gz: 0eae65c706c57ef7a9e27125124972e62c8edaa77d22a9d16785ac2efcb6723693b1f18bc1c9ffdb82d699c6134bfa3453b1b491e6d7ccffc73df399e470aa11
@@ -28,10 +28,6 @@ class OpenStax::Content::Archive
28
28
  end
29
29
 
30
30
  if uri.absolute?
31
- OpenStax::Content.logger.warn do
32
- "#{self.class.name} received an unexpected absolute URL in url_for: \"#{object}\""
33
- end
34
-
35
31
  # Force absolute URLs to be https
36
32
  uri.scheme = 'https'
37
33
  return uri.to_s
@@ -48,6 +44,10 @@ class OpenStax::Content::Archive
48
44
  if uri.path.start_with?('../')
49
45
  uri.path = uri.path.sub('..', '')
50
46
  "#{base_url}#{uri.to_s}"
47
+ elsif uri.path.start_with?(OpenStax::Content.archive_path) ||
48
+ uri.path.start_with?("/#{OpenStax::Content.archive_path}")
49
+ uri.path.start_with?('/') ? "https://#{OpenStax::Content.domain}#{uri.to_s}" :
50
+ "https://#{OpenStax::Content.domain}/#{uri.to_s}"
51
51
  else
52
52
  uri.path = "#{uri.path.chomp('.json').chomp('.xhtml')}.json"
53
53
 
@@ -101,4 +101,29 @@ class OpenStax::Content::Archive
101
101
  slug
102
102
  end
103
103
  end
104
+
105
+ def webview_uri_for(page)
106
+ uri = if page.is_a?(Addressable::URI)
107
+ page
108
+ else
109
+ begin
110
+ Addressable::URI.parse page
111
+ rescue Addressable::URI::InvalidURIError
112
+ begin
113
+ Addressable::URI.parse "/#{page}"
114
+ rescue Addressable::URI::InvalidURIError
115
+ OpenStax::Content.logger.warn { "Invalid page url: \"#{page}\"" }
116
+
117
+ return page
118
+ end
119
+ end
120
+ end
121
+ object = uri.path.split('/').last
122
+ book_id, page_id = object.split(':', 2)
123
+ page_uuid = page_id.split('@', 2).first
124
+ book_slug = slug book_id
125
+ page_slug = slug object
126
+ uri.path = "books/#{book_slug}/pages/#{page_slug}"
127
+ Addressable::URI.join "https://#{OpenStax::Content.domain}", uri
128
+ end
104
129
  end
@@ -12,17 +12,28 @@ class OpenStax::Content::Fragment::Html < OpenStax::Content::Fragment
12
12
  @to_html = @node.to_html
13
13
  end
14
14
 
15
- def as_json(*args)
16
- # Don't attempt to serialize @node (it would fail)
17
- super.except('node')
15
+ # Serialization methods use #instance_variables to iterate through and dump all instance variables
16
+ # Nokogiri classes are not serializable, so we do not want to dump the @node variable
17
+ # Instead, we recreate it by parsing the HTML again if needed
18
+ def instance_variables
19
+ super - [ :@node ]
18
20
  end
19
21
 
20
- def html?
21
- !to_html.empty?
22
+ def blank?
23
+ return @blank unless @blank.nil?
24
+
25
+ @blank = if to_html.nil? || to_html.strip.empty?
26
+ true
27
+ else
28
+ node_without_title = node.dup
29
+ node_without_title.css('[data-type="document-title"]').remove
30
+ text = node_without_title.text
31
+ text.nil? || text.strip.empty?
32
+ end
22
33
  end
23
34
 
24
- def blank?
25
- !html?
35
+ def html?
36
+ !blank?
26
37
  end
27
38
 
28
39
  def node
@@ -40,9 +40,11 @@ class OpenStax::Content::FragmentSplitter
40
40
  # Flatten, remove empty nodes and transform remaining nodes into reading fragments
41
41
  result.map do |obj|
42
42
  next obj unless obj.is_a?(Nokogiri::XML::Node)
43
- next if obj.content.nil? || obj.content.strip.empty?
44
43
 
45
- OpenStax::Content::Fragment::Reading.new node: obj, reference_view_url: reference_view_url
44
+ fragment = OpenStax::Content::Fragment::Reading.new(
45
+ node: obj, reference_view_url: reference_view_url
46
+ )
47
+ fragment unless fragment.blank?
46
48
  end.compact.tap do |result|
47
49
  @media_nodes.each do |node|
48
50
  # Media processing instructions
@@ -2,7 +2,7 @@ require 'aws-sdk-s3'
2
2
 
3
3
  class OpenStax::Content::S3
4
4
  def initialize
5
- @ls = {}
5
+ @ls = Hash.new { |hash, key| hash[key] = Hash.new { |hash, key| hash[key] = {} } }
6
6
  end
7
7
 
8
8
  def bucket_name
@@ -21,24 +21,65 @@ class OpenStax::Content::S3
21
21
  )
22
22
  end
23
23
 
24
- def ls(archive_version = nil)
25
- return @ls[archive_version] unless @ls[archive_version].nil?
26
- return unless bucket_configured?
27
-
24
+ # Returns the archive path for the given archive_version, book_id, page_uuid and extension
25
+ # If not all arguments are given, returns the prefix instead
26
+ def path_for(archive_version = nil, book_id = nil, page_uuid = nil, extension = nil)
28
27
  archive_path = OpenStax::Content.archive_path.chomp('/')
29
28
 
30
29
  if archive_version.nil?
31
- prefix = "#{archive_path}/"
32
- delimiter = '/'
30
+ "#{archive_path}/"
31
+ elsif book_id.nil?
32
+ "#{archive_path}/#{archive_version}/contents/"
33
+ elsif page_uuid.nil?
34
+ "#{archive_path}/#{archive_version}/contents/#{book_id}:"
35
+ elsif extension.nil?
36
+ "#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}."
37
+ else
38
+ "#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}.#{extension}"
39
+ end
40
+ end
41
+
42
+ # Without an archive version, returns a list of archive versions
43
+ # With an archive version, returns a list of book ids (uuid@version)
44
+ # With an archive version and a book, returns a list of page uuids
45
+ # With an archive version, book id and page uuid, returns the available extensions, if any
46
+ def ls(archive_version = nil, book_id = nil, page_uuid = nil)
47
+ return @ls[archive_version][book_id][page_uuid] \
48
+ unless @ls[archive_version][book_id][page_uuid].nil?
49
+ return unless bucket_configured?
50
+
51
+ prefix = path_for archive_version, book_id, page_uuid
52
+
53
+ delimiter = if archive_version.nil?
54
+ '/'
55
+ elsif book_id.nil?
56
+ ':'
57
+ elsif page_uuid.nil?
58
+ '.'
59
+ else
60
+ nil
61
+ end
62
+
63
+ responses = client.list_objects_v2 bucket: bucket_name, prefix: prefix, delimiter: delimiter
64
+
65
+ @ls[archive_version][book_id][page_uuid] = if page_uuid.nil?
66
+ responses.flat_map(&:common_prefixes).map do |common_prefix|
67
+ common_prefix.prefix.sub(prefix, '').chomp(delimiter)
68
+ end
33
69
  else
34
- prefix = "#{archive_path}/#{archive_version}/contents/"
35
- delimiter = ':'
70
+ responses.flat_map(&:contents).map { |content| content.key.sub(prefix, '') }
36
71
  end
72
+ end
37
73
 
38
- @ls[archive_version] = client.list_objects_v2(
39
- bucket: bucket_name, prefix: prefix, delimiter: delimiter
40
- ).flat_map(&:common_prefixes).map do |common_prefix|
41
- common_prefix.prefix.sub(prefix, '').chomp(delimiter)
74
+ # Checks all books for the given page uuid and returns the path to the first one found
75
+ def find_page(page_uuid, archive_version: nil, extension: 'json')
76
+ archive_version ||= ls.last
77
+
78
+ ls(archive_version).each do |book_id|
79
+ return path_for(archive_version, book_id, page_uuid, extension) \
80
+ if ls(archive_version, book_id, page_uuid).include?(extension)
42
81
  end
82
+
83
+ nil
43
84
  end
44
85
  end
@@ -1,5 +1,5 @@
1
1
  module OpenStax
2
2
  module Content
3
- VERSION = '0.0.2'
3
+ VERSION = '0.3.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: openstax_content
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dante Soares
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-11 00:00:00.000000000 Z
11
+ date: 2021-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-s3
@@ -168,7 +168,7 @@ homepage: https://github.com/openstax/content-ruby
168
168
  licenses:
169
169
  - AGPL-3.0
170
170
  metadata: {}
171
- post_install_message:
171
+ post_install_message:
172
172
  rdoc_options: []
173
173
  require_paths:
174
174
  - lib
@@ -184,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
184
184
  version: '0'
185
185
  requirements: []
186
186
  rubygems_version: 3.2.19
187
- signing_key:
187
+ signing_key:
188
188
  specification_version: 4
189
189
  summary: Ruby bindings to read and parse the OpenStax ABL and the content archive
190
190
  test_files: []