openstax_content 0.0.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 438e6037471accd85a41ff64543ef625e22d670121b1491eaa6c4d3827f4228b
4
- data.tar.gz: 91b8fb852646355aad1c892e9a1fbfd0d54f3fd03ac5c4a286239ae10f51a710
3
+ metadata.gz: 760f80ecbe0193cdb9597862fd3d62e2bd687affa9c392ca07cd41440f5e7a62
4
+ data.tar.gz: f357ee9efc8c537f6c84c8e02fc15e0919691be79be677e413ba7542220a1ab0
5
5
  SHA512:
6
- metadata.gz: 5c73997fedc8f642bbcda603fac97e1c071710464117f2de9ad4d2ef7cb648df4edcd0a71da2895442f196ad214dbdd3c7bc8a43e4aba2549bf2d97832a59a8a
7
- data.tar.gz: e73e524ac9ea295f635b9388aae5e73ec06a7e514ee74bf555f46df4d9a45af3d38baa1be12fdcef7c5c9c7ad77d5d036e09a289bb62c08ada89c5575a12419b
6
+ metadata.gz: 78bf8346586fea75e4cc1d5eed2f5e083a6feb75974f71838b83c90383f9df1f6742a47203a60d9e3e293b855b5bf30bbe7eb78e5536f1f6b4847757fc24b3a0
7
+ data.tar.gz: 162cf2c108e2c94e1c11dea32baf337c82ec0965783aa6358209b58888cc389ecc7953e637c802511ce167603f1b9081fb346e3e32b787304f9a4c6740fbe75b
data/README.md CHANGED
@@ -1,2 +1,100 @@
1
+ [![Tests](https://github.com/openstax/content-ruby/workflows/Tests/badge.svg)](https://github.com/openstax/content-ruby/actions/workflows/tests.yml)
2
+
1
3
  # content-ruby
2
4
  Ruby bindings to read and parse the OpenStax ABL and the content archive
5
+
6
+ ## Installation
7
+ Add this gem to your Gemfile and then add the following configuration to your boot
8
+ (for example, in a Rails initializer):
9
+
10
+ ```rb
11
+ OpenStax::Content.configure do |config|
12
+ config.abl_url = ENV['OPENSTAX_CONTENT_ABL_URL']
13
+ config.archive_path = ENV['OPENSTAX_CONTENT_ARCHIVE_PATH']
14
+ config.bucket_name = ENV['OPENSTAX_CONTENT_BUCKET_NAME']
15
+ config.domain = ENV['OPENSTAX_CONTENT_DOMAIN']
16
+ config.exercises_search_api_url = ENV['OPENSTAX_CONTENT_EXERCISES_SEARCH_API_URL']
17
+ config.logger = defined?(Rails) ? Rails.logger : Logger.new(STDOUT)
18
+ config.s3_region = ENV['OPENSTAX_CONTENT_S3_REGION']
19
+ config.s3_access_key_id = ENV['OPENSTAX_CONTENT_S3_ACCESS_KEY_ID']
20
+ config.s3_secret_access_key = ENV['OPENSTAX_CONTENT_S3_SECRET_ACCESS_KEY']
21
+ end
22
+ ```
23
+
24
+ It's probably a good idea to read these values from environment variables
25
+ s3_access_key_id and s3_secret_access_key are optional (you can use AWS instance roles instead)
26
+
27
+ ## Usage
28
+
29
+ ### Approved Book List (to get approved books and approved versions)
30
+ ```rb
31
+ abl = OpenStax::Content::Abl.new
32
+ approved_books = abl.approved_books
33
+ approved_versions = abl.approved_versions
34
+ ```
35
+
36
+ ### S3 Bucket Listing (to get latest archive and book versions)
37
+ ```rb
38
+ s3 = OpenStax::Content::S3.new
39
+ if s3.bucket_configured?
40
+ latest_archive_version = s3.ls.last
41
+ latest_book_ids = s3.ls latest_archive_version
42
+ chosen_book = latest_book_ids.sample
43
+ book_uuid, book_version = chosen_book.split('@', 2)
44
+ book = OpenStax::Content::Book.new(
45
+ archive_version: latest_archive_version, uuid: book_uuid, version: book_version
46
+ )
47
+ end
48
+ ```
49
+
50
+ ### Archive (to create archive links, load content and get book and page slugs)
51
+ ```rb
52
+ archive = OpenStax::Content::Archive.new latest_archive_version
53
+
54
+ book_id = "#{book_uuid}@#{book_version}"
55
+ page_id = "#{book_id}:#{page_uuid}"
56
+
57
+ book_url = archive.url_for book_id
58
+ page_url = archive.url_for page_id
59
+
60
+ book_json = archive.fetch book_id
61
+ page_json = archive.fetch page_id
62
+
63
+ book_hash = archive.json book_id
64
+ page_hash = archive.json page_id
65
+
66
+ book_slug = archive.slug book_id # or book_uuid
67
+ page_slug = archive.slug page_id # or "#{book_uuid}:#{page_uuid}"
68
+ ```
69
+
70
+ ### Fragment Splitter (to split pages and create interactive readings)
71
+ ```rb
72
+ fragment_splitter = OpenStax::Content::FragmentSplitter.new(
73
+ book.reading_processing_instructions, reference_view_url
74
+ )
75
+ fragment_splitter.split_into_fragments page.root
76
+ ```
77
+
78
+ ## Testing
79
+
80
+ To run all existing tests for this gem, simply execute the following from the main folder:
81
+
82
+ ```sh
83
+ $ rake
84
+ ```
85
+
86
+ ## Contributing
87
+
88
+ 1. Fork the openstax/content-ruby repo on Github
89
+ 2. Create a feature or bugfix branch (`git checkout -b my-new-feature`)
90
+ 3. Write tests for the feature/bugfix
91
+ 4. Implement the new feature/bugfix
92
+ 5. Make sure both new and old tests pass (`rake`)
93
+ 6. Commit your changes (`git commit -am 'Added some feature'`)
94
+ 7. Push the branch (`git push origin my-new-feature`)
95
+ 8. Create a new Pull Request to openstax/content-ruby on Github
96
+
97
+ ## License
98
+
99
+ This gem is distributed under the terms of the AGPLv3 license.
100
+ See the LICENSE file for details.
@@ -48,6 +48,10 @@ class OpenStax::Content::Archive
48
48
  if uri.path.start_with?('../')
49
49
  uri.path = uri.path.sub('..', '')
50
50
  "#{base_url}#{uri.to_s}"
51
+ elsif uri.path.start_with?(OpenStax::Content.archive_path) ||
52
+ uri.path.start_with?("/#{OpenStax::Content.archive_path}")
53
+ uri.path.start_with?('/') ? "https://#{OpenStax::Content.domain}#{uri.to_s}" :
54
+ "https://#{OpenStax::Content.domain}/#{uri.to_s}"
51
55
  else
52
56
  uri.path = "#{uri.path.chomp('.json').chomp('.xhtml')}.json"
53
57
 
@@ -101,4 +105,29 @@ class OpenStax::Content::Archive
101
105
  slug
102
106
  end
103
107
  end
108
+
109
+ def webview_uri_for(page)
110
+ uri = if page.is_a?(Addressable::URI)
111
+ page
112
+ else
113
+ begin
114
+ Addressable::URI.parse page
115
+ rescue Addressable::URI::InvalidURIError
116
+ begin
117
+ Addressable::URI.parse "/#{page}"
118
+ rescue Addressable::URI::InvalidURIError
119
+ OpenStax::Content.logger.warn { "Invalid page url: \"#{page}\"" }
120
+
121
+ return page
122
+ end
123
+ end
124
+ end
125
+ object = uri.path.split('/').last
126
+ book_id, page_id = object.split(':', 2)
127
+ page_uuid = page_id.split('@', 2).first
128
+ book_slug = slug book_id
129
+ page_slug = slug object
130
+ uri.path = "books/#{book_slug}/pages/#{page_slug}"
131
+ Addressable::URI.join "https://#{OpenStax::Content.domain}", uri
132
+ end
104
133
  end
@@ -17,12 +17,21 @@ class OpenStax::Content::Fragment::Html < OpenStax::Content::Fragment
17
17
  super.except('node')
18
18
  end
19
19
 
20
- def html?
21
- !to_html.empty?
20
+ def blank?
21
+ return @blank unless @blank.nil?
22
+
23
+ @blank = if to_html.nil? || to_html.strip.empty?
24
+ true
25
+ else
26
+ node_without_title = node.dup
27
+ node_without_title.css('[data-type="document-title"]').remove
28
+ text = node_without_title.text
29
+ text.nil? || text.strip.empty?
30
+ end
22
31
  end
23
32
 
24
- def blank?
25
- !html?
33
+ def html?
34
+ !blank?
26
35
  end
27
36
 
28
37
  def node
@@ -40,9 +40,11 @@ class OpenStax::Content::FragmentSplitter
40
40
  # Flatten, remove empty nodes and transform remaining nodes into reading fragments
41
41
  result.map do |obj|
42
42
  next obj unless obj.is_a?(Nokogiri::XML::Node)
43
- next if obj.content.nil? || obj.content.strip.empty?
44
43
 
45
- OpenStax::Content::Fragment::Reading.new node: obj, reference_view_url: reference_view_url
44
+ fragment = OpenStax::Content::Fragment::Reading.new(
45
+ node: obj, reference_view_url: reference_view_url
46
+ )
47
+ fragment unless fragment.blank?
46
48
  end.compact.tap do |result|
47
49
  @media_nodes.each do |node|
48
50
  # Media processing instructions
@@ -2,7 +2,7 @@ require 'aws-sdk-s3'
2
2
 
3
3
  class OpenStax::Content::S3
4
4
  def initialize
5
- @ls = {}
5
+ @ls = Hash.new { |hash, key| hash[key] = Hash.new { |hash, key| hash[key] = {} } }
6
6
  end
7
7
 
8
8
  def bucket_name
@@ -21,24 +21,65 @@ class OpenStax::Content::S3
21
21
  )
22
22
  end
23
23
 
24
- def ls(archive_version = nil)
25
- return @ls[archive_version] unless @ls[archive_version].nil?
26
- return unless bucket_configured?
27
-
24
+ # Returns the archive path for the given archive_version, book_id, page_uuid and extension
25
+ # If not all arguments are given, returns the prefix instead
26
+ def path_for(archive_version = nil, book_id = nil, page_uuid = nil, extension = nil)
28
27
  archive_path = OpenStax::Content.archive_path.chomp('/')
29
28
 
30
29
  if archive_version.nil?
31
- prefix = "#{archive_path}/"
32
- delimiter = '/'
30
+ "#{archive_path}/"
31
+ elsif book_id.nil?
32
+ "#{archive_path}/#{archive_version}/contents/"
33
+ elsif page_uuid.nil?
34
+ "#{archive_path}/#{archive_version}/contents/#{book_id}:"
35
+ elsif extension.nil?
36
+ "#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}."
37
+ else
38
+ "#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}.#{extension}"
39
+ end
40
+ end
41
+
42
+ # Without an archive version, returns a list of archive versions
43
+ # With an archive version, returns a list of book ids (uuid@version)
44
+ # With an archive version and a book, returns a list of page uuids
45
+ # With an archive version, book id and page uuid, returns the available extensions, if any
46
+ def ls(archive_version = nil, book_id = nil, page_uuid = nil)
47
+ return @ls[archive_version][book_id][page_uuid] \
48
+ unless @ls[archive_version][book_id][page_uuid].nil?
49
+ return unless bucket_configured?
50
+
51
+ prefix = path_for archive_version, book_id, page_uuid
52
+
53
+ delimiter = if archive_version.nil?
54
+ '/'
55
+ elsif book_id.nil?
56
+ ':'
57
+ elsif page_uuid.nil?
58
+ '.'
59
+ else
60
+ nil
61
+ end
62
+
63
+ responses = client.list_objects_v2 bucket: bucket_name, prefix: prefix, delimiter: delimiter
64
+
65
+ @ls[archive_version][book_id][page_uuid] = if page_uuid.nil?
66
+ responses.flat_map(&:common_prefixes).map do |common_prefix|
67
+ common_prefix.prefix.sub(prefix, '').chomp(delimiter)
68
+ end
33
69
  else
34
- prefix = "#{archive_path}/#{archive_version}/contents/"
35
- delimiter = ':'
70
+ responses.flat_map(&:contents).map { |content| content.key.sub(prefix, '') }
36
71
  end
72
+ end
37
73
 
38
- @ls[archive_version] = client.list_objects_v2(
39
- bucket: bucket_name, prefix: prefix, delimiter: delimiter
40
- ).flat_map(&:common_prefixes).map do |common_prefix|
41
- common_prefix.prefix.sub(prefix, '').chomp(delimiter)
74
+ # Checks all books for the given page uuid and returns the path to the first one found
75
+ def find_page(page_uuid, archive_version: nil, extension: 'json')
76
+ archive_version ||= ls.last
77
+
78
+ ls(archive_version).each do |book_id|
79
+ return path_for(archive_version, book_id, page_uuid, extension) \
80
+ if ls(archive_version, book_id, page_uuid).include?(extension)
42
81
  end
82
+
83
+ nil
43
84
  end
44
85
  end
@@ -1,5 +1,5 @@
1
1
  module OpenStax
2
2
  module Content
3
- VERSION = '0.0.1'
3
+ VERSION = '0.2.1'
4
4
  end
5
5
  end
@@ -11,4 +11,4 @@ module OpenStax
11
11
  end
12
12
  end
13
13
 
14
- Dir["#{__dir__}/content/**/*.rb"].each { |file| require file }
14
+ Dir["#{__dir__}/openstax/content/**/*.rb"].each { |file| require file }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: openstax_content
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dante Soares
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-10 00:00:00.000000000 Z
11
+ date: 2021-08-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-s3
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: rspec
85
99
  requirement: !ruby/object:Gem::Requirement
@@ -108,6 +122,20 @@ dependencies:
108
122
  - - ">="
109
123
  - !ruby/object:Gem::Version
110
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: webmock
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
111
139
  description: Ruby bindings to read and parse the OpenStax ABL and the content archive
112
140
  email:
113
141
  - dante.m.soares@rice.edu
@@ -117,7 +145,6 @@ extra_rdoc_files: []
117
145
  files:
118
146
  - LICENSE
119
147
  - README.md
120
- - lib/openstax/content.rb
121
148
  - lib/openstax/content/abl.rb
122
149
  - lib/openstax/content/archive.rb
123
150
  - lib/openstax/content/book.rb
@@ -136,6 +163,7 @@ files:
136
163
  - lib/openstax/content/s3.rb
137
164
  - lib/openstax/content/title.rb
138
165
  - lib/openstax/content/version.rb
166
+ - lib/openstax_content.rb
139
167
  homepage: https://github.com/openstax/content-ruby
140
168
  licenses:
141
169
  - AGPL-3.0