openstax_content 0.0.1 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 438e6037471accd85a41ff64543ef625e22d670121b1491eaa6c4d3827f4228b
4
- data.tar.gz: 91b8fb852646355aad1c892e9a1fbfd0d54f3fd03ac5c4a286239ae10f51a710
3
+ metadata.gz: 760f80ecbe0193cdb9597862fd3d62e2bd687affa9c392ca07cd41440f5e7a62
4
+ data.tar.gz: f357ee9efc8c537f6c84c8e02fc15e0919691be79be677e413ba7542220a1ab0
5
5
  SHA512:
6
- metadata.gz: 5c73997fedc8f642bbcda603fac97e1c071710464117f2de9ad4d2ef7cb648df4edcd0a71da2895442f196ad214dbdd3c7bc8a43e4aba2549bf2d97832a59a8a
7
- data.tar.gz: e73e524ac9ea295f635b9388aae5e73ec06a7e514ee74bf555f46df4d9a45af3d38baa1be12fdcef7c5c9c7ad77d5d036e09a289bb62c08ada89c5575a12419b
6
+ metadata.gz: 78bf8346586fea75e4cc1d5eed2f5e083a6feb75974f71838b83c90383f9df1f6742a47203a60d9e3e293b855b5bf30bbe7eb78e5536f1f6b4847757fc24b3a0
7
+ data.tar.gz: 162cf2c108e2c94e1c11dea32baf337c82ec0965783aa6358209b58888cc389ecc7953e637c802511ce167603f1b9081fb346e3e32b787304f9a4c6740fbe75b
data/README.md CHANGED
@@ -1,2 +1,100 @@
1
+ [![Tests](https://github.com/openstax/content-ruby/workflows/Tests/badge.svg)](https://github.com/openstax/content-ruby/actions/workflows/tests.yml)
2
+
1
3
  # content-ruby
2
4
  Ruby bindings to read and parse the OpenStax ABL and the content archive
5
+
6
+ ## Installation
7
+ Add this gem to your Gemfile and then add the following configuration to your boot
8
+ (for example, in a Rails initializer):
9
+
10
+ ```rb
11
+ OpenStax::Content.configure do |config|
12
+ config.abl_url = ENV['OPENSTAX_CONTENT_ABL_URL']
13
+ config.archive_path = ENV['OPENSTAX_CONTENT_ARCHIVE_PATH']
14
+ config.bucket_name = ENV['OPENSTAX_CONTENT_BUCKET_NAME']
15
+ config.domain = ENV['OPENSTAX_CONTENT_DOMAIN']
16
+ config.exercises_search_api_url = ENV['OPENSTAX_CONTENT_EXERCISES_SEARCH_API_URL']
17
+ config.logger = defined?(Rails) ? Rails.logger : Logger.new(STDOUT)
18
+ config.s3_region = ENV['OPENSTAX_CONTENT_S3_REGION']
19
+ config.s3_access_key_id = ENV['OPENSTAX_CONTENT_S3_ACCESS_KEY_ID']
20
+ config.s3_secret_access_key = ENV['OPENSTAX_CONTENT_S3_SECRET_ACCESS_KEY']
21
+ end
22
+ ```
23
+
24
+ It's probably a good idea to read these values from environment variables
25
+ s3_access_key_id and s3_secret_access_key are optional (you can use AWS instance roles instead)
26
+
27
+ ## Usage
28
+
29
+ ### Approved Book List (to get approved books and approved versions)
30
+ ```rb
31
+ abl = OpenStax::Content::Abl.new
32
+ approved_books = abl.approved_books
33
+ approved_versions = abl.approved_versions
34
+ ```
35
+
36
+ ### S3 Bucket Listing (to get latest archive and book versions)
37
+ ```rb
38
+ s3 = OpenStax::Content::S3.new
39
+ if s3.bucket_configured?
40
+ latest_archive_version = s3.ls.last
41
+ latest_book_ids = s3.ls latest_archive_version
42
+ chosen_book = latest_book_ids.sample
43
+ book_uuid, book_version = chosen_book.split('@', 2)
44
+ book = OpenStax::Content::Book.new(
45
+ archive_version: latest_archive_version, uuid: book_uuid, version: book_version
46
+ )
47
+ end
48
+ ```
49
+
50
+ ### Archive (to create archive links, load content and get book and page slugs)
51
+ ```rb
52
+ archive = OpenStax::Content::Archive.new latest_archive_version
53
+
54
+ book_id = "#{book_uuid}@#{book_version}"
55
+ page_id = "#{book_id}:#{page_uuid}"
56
+
57
+ book_url = archive.url_for book_id
58
+ page_url = archive.url_for page_id
59
+
60
+ book_json = archive.fetch book_id
61
+ page_json = archive.fetch page_id
62
+
63
+ book_hash = archive.json book_id
64
+ page_hash = archive.json page_id
65
+
66
+ book_slug = archive.slug book_id # or book_uuid
67
+ page_slug = archive.slug page_id # or "#{book_uuid}:#{page_uuid}"
68
+ ```
69
+
70
+ ### Fragment Splitter (to split pages and create interactive readings)
71
+ ```rb
72
+ fragment_splitter = OpenStax::Content::FragmentSplitter.new(
73
+ book.reading_processing_instructions, reference_view_url
74
+ )
75
+ fragment_splitter.split_into_fragments page.root
76
+ ```
77
+
78
+ ## Testing
79
+
80
+ To run all existing tests for this gem, simply execute the following from the main folder:
81
+
82
+ ```sh
83
+ $ rake
84
+ ```
85
+
86
+ ## Contributing
87
+
88
+ 1. Fork the openstax/content-ruby repo on Github
89
+ 2. Create a feature or bugfix branch (`git checkout -b my-new-feature`)
90
+ 3. Write tests for the feature/bugfix
91
+ 4. Implement the new feature/bugfix
92
+ 5. Make sure both new and old tests pass (`rake`)
93
+ 6. Commit your changes (`git commit -am 'Added some feature'`)
94
+ 7. Push the branch (`git push origin my-new-feature`)
95
+ 8. Create a new Pull Request to openstax/content-ruby on Github
96
+
97
+ ## License
98
+
99
+ This gem is distributed under the terms of the AGPLv3 license.
100
+ See the LICENSE file for details.
@@ -48,6 +48,10 @@ class OpenStax::Content::Archive
48
48
  if uri.path.start_with?('../')
49
49
  uri.path = uri.path.sub('..', '')
50
50
  "#{base_url}#{uri.to_s}"
51
+ elsif uri.path.start_with?(OpenStax::Content.archive_path) ||
52
+ uri.path.start_with?("/#{OpenStax::Content.archive_path}")
53
+ uri.path.start_with?('/') ? "https://#{OpenStax::Content.domain}#{uri.to_s}" :
54
+ "https://#{OpenStax::Content.domain}/#{uri.to_s}"
51
55
  else
52
56
  uri.path = "#{uri.path.chomp('.json').chomp('.xhtml')}.json"
53
57
 
@@ -101,4 +105,29 @@ class OpenStax::Content::Archive
101
105
  slug
102
106
  end
103
107
  end
108
+
109
+ def webview_uri_for(page)
110
+ uri = if page.is_a?(Addressable::URI)
111
+ page
112
+ else
113
+ begin
114
+ Addressable::URI.parse page
115
+ rescue Addressable::URI::InvalidURIError
116
+ begin
117
+ Addressable::URI.parse "/#{page}"
118
+ rescue Addressable::URI::InvalidURIError
119
+ OpenStax::Content.logger.warn { "Invalid page url: \"#{page}\"" }
120
+
121
+ return page
122
+ end
123
+ end
124
+ end
125
+ object = uri.path.split('/').last
126
+ book_id, page_id = object.split(':', 2)
127
+ page_uuid = page_id.split('@', 2).first
128
+ book_slug = slug book_id
129
+ page_slug = slug object
130
+ uri.path = "books/#{book_slug}/pages/#{page_slug}"
131
+ Addressable::URI.join "https://#{OpenStax::Content.domain}", uri
132
+ end
104
133
  end
@@ -17,12 +17,21 @@ class OpenStax::Content::Fragment::Html < OpenStax::Content::Fragment
17
17
  super.except('node')
18
18
  end
19
19
 
20
- def html?
21
- !to_html.empty?
20
+ def blank?
21
+ return @blank unless @blank.nil?
22
+
23
+ @blank = if to_html.nil? || to_html.strip.empty?
24
+ true
25
+ else
26
+ node_without_title = node.dup
27
+ node_without_title.css('[data-type="document-title"]').remove
28
+ text = node_without_title.text
29
+ text.nil? || text.strip.empty?
30
+ end
22
31
  end
23
32
 
24
- def blank?
25
- !html?
33
+ def html?
34
+ !blank?
26
35
  end
27
36
 
28
37
  def node
@@ -40,9 +40,11 @@ class OpenStax::Content::FragmentSplitter
40
40
  # Flatten, remove empty nodes and transform remaining nodes into reading fragments
41
41
  result.map do |obj|
42
42
  next obj unless obj.is_a?(Nokogiri::XML::Node)
43
- next if obj.content.nil? || obj.content.strip.empty?
44
43
 
45
- OpenStax::Content::Fragment::Reading.new node: obj, reference_view_url: reference_view_url
44
+ fragment = OpenStax::Content::Fragment::Reading.new(
45
+ node: obj, reference_view_url: reference_view_url
46
+ )
47
+ fragment unless fragment.blank?
46
48
  end.compact.tap do |result|
47
49
  @media_nodes.each do |node|
48
50
  # Media processing instructions
@@ -2,7 +2,7 @@ require 'aws-sdk-s3'
2
2
 
3
3
  class OpenStax::Content::S3
4
4
  def initialize
5
- @ls = {}
5
+ @ls = Hash.new { |hash, key| hash[key] = Hash.new { |hash, key| hash[key] = {} } }
6
6
  end
7
7
 
8
8
  def bucket_name
@@ -21,24 +21,65 @@ class OpenStax::Content::S3
21
21
  )
22
22
  end
23
23
 
24
- def ls(archive_version = nil)
25
- return @ls[archive_version] unless @ls[archive_version].nil?
26
- return unless bucket_configured?
27
-
24
+ # Returns the archive path for the given archive_version, book_id, page_uuid and extension
25
+ # If not all arguments are given, returns the prefix instead
26
+ def path_for(archive_version = nil, book_id = nil, page_uuid = nil, extension = nil)
28
27
  archive_path = OpenStax::Content.archive_path.chomp('/')
29
28
 
30
29
  if archive_version.nil?
31
- prefix = "#{archive_path}/"
32
- delimiter = '/'
30
+ "#{archive_path}/"
31
+ elsif book_id.nil?
32
+ "#{archive_path}/#{archive_version}/contents/"
33
+ elsif page_uuid.nil?
34
+ "#{archive_path}/#{archive_version}/contents/#{book_id}:"
35
+ elsif extension.nil?
36
+ "#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}."
37
+ else
38
+ "#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}.#{extension}"
39
+ end
40
+ end
41
+
42
+ # Without an archive version, returns a list of archive versions
43
+ # With an archive version, returns a list of book ids (uuid@version)
44
+ # With an archive version and a book, returns a list of page uuids
45
+ # With an archive version, book id and page uuid, returns the available extensions, if any
46
+ def ls(archive_version = nil, book_id = nil, page_uuid = nil)
47
+ return @ls[archive_version][book_id][page_uuid] \
48
+ unless @ls[archive_version][book_id][page_uuid].nil?
49
+ return unless bucket_configured?
50
+
51
+ prefix = path_for archive_version, book_id, page_uuid
52
+
53
+ delimiter = if archive_version.nil?
54
+ '/'
55
+ elsif book_id.nil?
56
+ ':'
57
+ elsif page_uuid.nil?
58
+ '.'
59
+ else
60
+ nil
61
+ end
62
+
63
+ responses = client.list_objects_v2 bucket: bucket_name, prefix: prefix, delimiter: delimiter
64
+
65
+ @ls[archive_version][book_id][page_uuid] = if page_uuid.nil?
66
+ responses.flat_map(&:common_prefixes).map do |common_prefix|
67
+ common_prefix.prefix.sub(prefix, '').chomp(delimiter)
68
+ end
33
69
  else
34
- prefix = "#{archive_path}/#{archive_version}/contents/"
35
- delimiter = ':'
70
+ responses.flat_map(&:contents).map { |content| content.key.sub(prefix, '') }
36
71
  end
72
+ end
37
73
 
38
- @ls[archive_version] = client.list_objects_v2(
39
- bucket: bucket_name, prefix: prefix, delimiter: delimiter
40
- ).flat_map(&:common_prefixes).map do |common_prefix|
41
- common_prefix.prefix.sub(prefix, '').chomp(delimiter)
74
+ # Checks all books for the given page uuid and returns the path to the first one found
75
+ def find_page(page_uuid, archive_version: nil, extension: 'json')
76
+ archive_version ||= ls.last
77
+
78
+ ls(archive_version).each do |book_id|
79
+ return path_for(archive_version, book_id, page_uuid, extension) \
80
+ if ls(archive_version, book_id, page_uuid).include?(extension)
42
81
  end
82
+
83
+ nil
43
84
  end
44
85
  end
@@ -1,5 +1,5 @@
1
1
  module OpenStax
2
2
  module Content
3
- VERSION = '0.0.1'
3
+ VERSION = '0.2.1'
4
4
  end
5
5
  end
@@ -11,4 +11,4 @@ module OpenStax
11
11
  end
12
12
  end
13
13
 
14
- Dir["#{__dir__}/content/**/*.rb"].each { |file| require file }
14
+ Dir["#{__dir__}/openstax/content/**/*.rb"].each { |file| require file }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: openstax_content
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dante Soares
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-10 00:00:00.000000000 Z
11
+ date: 2021-08-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-s3
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: rspec
85
99
  requirement: !ruby/object:Gem::Requirement
@@ -108,6 +122,20 @@ dependencies:
108
122
  - - ">="
109
123
  - !ruby/object:Gem::Version
110
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: webmock
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
111
139
  description: Ruby bindings to read and parse the OpenStax ABL and the content archive
112
140
  email:
113
141
  - dante.m.soares@rice.edu
@@ -117,7 +145,6 @@ extra_rdoc_files: []
117
145
  files:
118
146
  - LICENSE
119
147
  - README.md
120
- - lib/openstax/content.rb
121
148
  - lib/openstax/content/abl.rb
122
149
  - lib/openstax/content/archive.rb
123
150
  - lib/openstax/content/book.rb
@@ -136,6 +163,7 @@ files:
136
163
  - lib/openstax/content/s3.rb
137
164
  - lib/openstax/content/title.rb
138
165
  - lib/openstax/content/version.rb
166
+ - lib/openstax_content.rb
139
167
  homepage: https://github.com/openstax/content-ruby
140
168
  licenses:
141
169
  - AGPL-3.0