openstax_content 0.0.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 578cf5b445c79d9e7dd1a904da90cb3daf527d2d8b7de128d07640e15d8bd066
4
- data.tar.gz: b2bfe3524398a047b6722a047aa245de8ed52d02e4c9c37ca28f0b36840ba081
3
+ metadata.gz: 4aebb8f3f720b66d54a35964a2c7471a4c1791b1eddf3a79278fb42974c1b03b
4
+ data.tar.gz: 743c42fe074b7e8aabc47540785d53ff2dec03c0f3b40eb12f79f0624d1e0f9c
5
5
  SHA512:
6
- metadata.gz: c2f7708cce0fb71727d985dcef91f6b0844f1b3ba2b134a55cc6f4eb3bddf0132c633cce04ed8221d16f8aac785a08f1ef16fe17e6f184c7f481c479054e496b
7
- data.tar.gz: 07bd028f00e935ef240585da56b2fd88e82959da61213787f03421e1f07c0afecb8f3257b9b269ff1affea5483a227f70d299e1782a528e7ebb1c4d96dd2dd6b
6
+ metadata.gz: af95d3a223033476027839a1c4d49e01a9e8d72c8be9185059d83dd13bf8544e79d291984eec3ac2473ff63850ddb45e1d5bca4a1bfc70d6e86fa99429db2c7f
7
+ data.tar.gz: 0eae65c706c57ef7a9e27125124972e62c8edaa77d22a9d16785ac2efcb6723693b1f18bc1c9ffdb82d699c6134bfa3453b1b491e6d7ccffc73df399e470aa11
@@ -28,10 +28,6 @@ class OpenStax::Content::Archive
28
28
  end
29
29
 
30
30
  if uri.absolute?
31
- OpenStax::Content.logger.warn do
32
- "#{self.class.name} received an unexpected absolute URL in url_for: \"#{object}\""
33
- end
34
-
35
31
  # Force absolute URLs to be https
36
32
  uri.scheme = 'https'
37
33
  return uri.to_s
@@ -48,6 +44,10 @@ class OpenStax::Content::Archive
48
44
  if uri.path.start_with?('../')
49
45
  uri.path = uri.path.sub('..', '')
50
46
  "#{base_url}#{uri.to_s}"
47
+ elsif uri.path.start_with?(OpenStax::Content.archive_path) ||
48
+ uri.path.start_with?("/#{OpenStax::Content.archive_path}")
49
+ uri.path.start_with?('/') ? "https://#{OpenStax::Content.domain}#{uri.to_s}" :
50
+ "https://#{OpenStax::Content.domain}/#{uri.to_s}"
51
51
  else
52
52
  uri.path = "#{uri.path.chomp('.json').chomp('.xhtml')}.json"
53
53
 
@@ -101,4 +101,29 @@ class OpenStax::Content::Archive
101
101
  slug
102
102
  end
103
103
  end
104
+
105
+ def webview_uri_for(page)
106
+ uri = if page.is_a?(Addressable::URI)
107
+ page
108
+ else
109
+ begin
110
+ Addressable::URI.parse page
111
+ rescue Addressable::URI::InvalidURIError
112
+ begin
113
+ Addressable::URI.parse "/#{page}"
114
+ rescue Addressable::URI::InvalidURIError
115
+ OpenStax::Content.logger.warn { "Invalid page url: \"#{page}\"" }
116
+
117
+ return page
118
+ end
119
+ end
120
+ end
121
+ object = uri.path.split('/').last
122
+ book_id, page_id = object.split(':', 2)
123
+ page_uuid = page_id.split('@', 2).first
124
+ book_slug = slug book_id
125
+ page_slug = slug object
126
+ uri.path = "books/#{book_slug}/pages/#{page_slug}"
127
+ Addressable::URI.join "https://#{OpenStax::Content.domain}", uri
128
+ end
104
129
  end
@@ -12,17 +12,28 @@ class OpenStax::Content::Fragment::Html < OpenStax::Content::Fragment
12
12
  @to_html = @node.to_html
13
13
  end
14
14
 
15
- def as_json(*args)
16
- # Don't attempt to serialize @node (it would fail)
17
- super.except('node')
15
+ # Serialization methods use #instance_variables to iterate through and dump all instance variables
16
+ # Nokogiri classes are not serializable, so we do not want to dump the @node variable
17
+ # Instead, we recreate it by parsing the HTML again if needed
18
+ def instance_variables
19
+ super - [ :@node ]
18
20
  end
19
21
 
20
- def html?
21
- !to_html.empty?
22
+ def blank?
23
+ return @blank unless @blank.nil?
24
+
25
+ @blank = if to_html.nil? || to_html.strip.empty?
26
+ true
27
+ else
28
+ node_without_title = node.dup
29
+ node_without_title.css('[data-type="document-title"]').remove
30
+ text = node_without_title.text
31
+ text.nil? || text.strip.empty?
32
+ end
22
33
  end
23
34
 
24
- def blank?
25
- !html?
35
+ def html?
36
+ !blank?
26
37
  end
27
38
 
28
39
  def node
@@ -40,9 +40,11 @@ class OpenStax::Content::FragmentSplitter
40
40
  # Flatten, remove empty nodes and transform remaining nodes into reading fragments
41
41
  result.map do |obj|
42
42
  next obj unless obj.is_a?(Nokogiri::XML::Node)
43
- next if obj.content.nil? || obj.content.strip.empty?
44
43
 
45
- OpenStax::Content::Fragment::Reading.new node: obj, reference_view_url: reference_view_url
44
+ fragment = OpenStax::Content::Fragment::Reading.new(
45
+ node: obj, reference_view_url: reference_view_url
46
+ )
47
+ fragment unless fragment.blank?
46
48
  end.compact.tap do |result|
47
49
  @media_nodes.each do |node|
48
50
  # Media processing instructions
@@ -2,7 +2,7 @@ require 'aws-sdk-s3'
2
2
 
3
3
  class OpenStax::Content::S3
4
4
  def initialize
5
- @ls = {}
5
+ @ls = Hash.new { |hash, key| hash[key] = Hash.new { |hash, key| hash[key] = {} } }
6
6
  end
7
7
 
8
8
  def bucket_name
@@ -21,24 +21,65 @@ class OpenStax::Content::S3
21
21
  )
22
22
  end
23
23
 
24
- def ls(archive_version = nil)
25
- return @ls[archive_version] unless @ls[archive_version].nil?
26
- return unless bucket_configured?
27
-
24
+ # Returns the archive path for the given archive_version, book_id, page_uuid and extension
25
+ # If not all arguments are given, returns the prefix instead
26
+ def path_for(archive_version = nil, book_id = nil, page_uuid = nil, extension = nil)
28
27
  archive_path = OpenStax::Content.archive_path.chomp('/')
29
28
 
30
29
  if archive_version.nil?
31
- prefix = "#{archive_path}/"
32
- delimiter = '/'
30
+ "#{archive_path}/"
31
+ elsif book_id.nil?
32
+ "#{archive_path}/#{archive_version}/contents/"
33
+ elsif page_uuid.nil?
34
+ "#{archive_path}/#{archive_version}/contents/#{book_id}:"
35
+ elsif extension.nil?
36
+ "#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}."
37
+ else
38
+ "#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}.#{extension}"
39
+ end
40
+ end
41
+
42
+ # Without an archive version, returns a list of archive versions
43
+ # With an archive version, returns a list of book ids (uuid@version)
44
+ # With an archive version and a book, returns a list of page uuids
45
+ # With an archive version, book id and page uuid, returns the available extensions, if any
46
+ def ls(archive_version = nil, book_id = nil, page_uuid = nil)
47
+ return @ls[archive_version][book_id][page_uuid] \
48
+ unless @ls[archive_version][book_id][page_uuid].nil?
49
+ return unless bucket_configured?
50
+
51
+ prefix = path_for archive_version, book_id, page_uuid
52
+
53
+ delimiter = if archive_version.nil?
54
+ '/'
55
+ elsif book_id.nil?
56
+ ':'
57
+ elsif page_uuid.nil?
58
+ '.'
59
+ else
60
+ nil
61
+ end
62
+
63
+ responses = client.list_objects_v2 bucket: bucket_name, prefix: prefix, delimiter: delimiter
64
+
65
+ @ls[archive_version][book_id][page_uuid] = if page_uuid.nil?
66
+ responses.flat_map(&:common_prefixes).map do |common_prefix|
67
+ common_prefix.prefix.sub(prefix, '').chomp(delimiter)
68
+ end
33
69
  else
34
- prefix = "#{archive_path}/#{archive_version}/contents/"
35
- delimiter = ':'
70
+ responses.flat_map(&:contents).map { |content| content.key.sub(prefix, '') }
36
71
  end
72
+ end
37
73
 
38
- @ls[archive_version] = client.list_objects_v2(
39
- bucket: bucket_name, prefix: prefix, delimiter: delimiter
40
- ).flat_map(&:common_prefixes).map do |common_prefix|
41
- common_prefix.prefix.sub(prefix, '').chomp(delimiter)
74
+ # Checks all books for the given page uuid and returns the path to the first one found
75
+ def find_page(page_uuid, archive_version: nil, extension: 'json')
76
+ archive_version ||= ls.last
77
+
78
+ ls(archive_version).each do |book_id|
79
+ return path_for(archive_version, book_id, page_uuid, extension) \
80
+ if ls(archive_version, book_id, page_uuid).include?(extension)
42
81
  end
82
+
83
+ nil
43
84
  end
44
85
  end
@@ -1,5 +1,5 @@
1
1
  module OpenStax
2
2
  module Content
3
- VERSION = '0.0.2'
3
+ VERSION = '0.3.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: openstax_content
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dante Soares
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-11 00:00:00.000000000 Z
11
+ date: 2021-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-s3
@@ -168,7 +168,7 @@ homepage: https://github.com/openstax/content-ruby
168
168
  licenses:
169
169
  - AGPL-3.0
170
170
  metadata: {}
171
- post_install_message:
171
+ post_install_message:
172
172
  rdoc_options: []
173
173
  require_paths:
174
174
  - lib
@@ -184,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
184
184
  version: '0'
185
185
  requirements: []
186
186
  rubygems_version: 3.2.19
187
- signing_key:
187
+ signing_key:
188
188
  specification_version: 4
189
189
  summary: Ruby bindings to read and parse the OpenStax ABL and the content archive
190
190
  test_files: []