openstax_content 0.0.1 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +98 -0
- data/lib/openstax/content/archive.rb +29 -0
- data/lib/openstax/content/fragment/html.rb +13 -4
- data/lib/openstax/content/fragment_splitter.rb +4 -2
- data/lib/openstax/content/s3.rb +54 -13
- data/lib/openstax/content/version.rb +1 -1
- data/lib/{openstax/content.rb → openstax_content.rb} +1 -1
- metadata +31 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 760f80ecbe0193cdb9597862fd3d62e2bd687affa9c392ca07cd41440f5e7a62
|
4
|
+
data.tar.gz: f357ee9efc8c537f6c84c8e02fc15e0919691be79be677e413ba7542220a1ab0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 78bf8346586fea75e4cc1d5eed2f5e083a6feb75974f71838b83c90383f9df1f6742a47203a60d9e3e293b855b5bf30bbe7eb78e5536f1f6b4847757fc24b3a0
|
7
|
+
data.tar.gz: 162cf2c108e2c94e1c11dea32baf337c82ec0965783aa6358209b58888cc389ecc7953e637c802511ce167603f1b9081fb346e3e32b787304f9a4c6740fbe75b
|
data/README.md
CHANGED
@@ -1,2 +1,100 @@
|
|
1
|
+
[![Tests](https://github.com/openstax/content-ruby/workflows/Tests/badge.svg)](https://github.com/openstax/content-ruby/actions/workflows/tests.yml)
|
2
|
+
|
1
3
|
# content-ruby
|
2
4
|
Ruby bindings to read and parse the OpenStax ABL and the content archive
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
Add this gem to your Gemfile and then add the following configuration to your boot
|
8
|
+
(for example, in a Rails initializer):
|
9
|
+
|
10
|
+
```rb
|
11
|
+
OpenStax::Content.configure do |config|
|
12
|
+
config.abl_url = ENV['OPENSTAX_CONTENT_ABL_URL']
|
13
|
+
config.archive_path = ENV['OPENSTAX_CONTENT_ARCHIVE_PATH']
|
14
|
+
config.bucket_name = ENV['OPENSTAX_CONTENT_BUCKET_NAME']
|
15
|
+
config.domain = ENV['OPENSTAX_CONTENT_DOMAIN']
|
16
|
+
config.exercises_search_api_url = ENV['OPENSTAX_CONTENT_EXERCISES_SEARCH_API_URL']
|
17
|
+
config.logger = defined?(Rails) ? Rails.logger : Logger.new(STDOUT)
|
18
|
+
config.s3_region = ENV['OPENSTAX_CONTENT_S3_REGION']
|
19
|
+
config.s3_access_key_id = ENV['OPENSTAX_CONTENT_S3_ACCESS_KEY_ID']
|
20
|
+
config.s3_secret_access_key = ENV['OPENSTAX_CONTENT_S3_SECRET_ACCESS_KEY']
|
21
|
+
end
|
22
|
+
```
|
23
|
+
|
24
|
+
It's probably a good idea to read these values from environment variables
|
25
|
+
s3_access_key_id and s3_secret_access_key are optional (you can use AWS instance roles instead)
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
### Approved Book List (to get approved books and approved versions)
|
30
|
+
```rb
|
31
|
+
abl = OpenStax::Content::Abl.new
|
32
|
+
approved_books = abl.approved_books
|
33
|
+
approved_versions = abl.approved_versions
|
34
|
+
```
|
35
|
+
|
36
|
+
### S3 Bucket Listing (to get latest archive and book versions)
|
37
|
+
```rb
|
38
|
+
s3 = OpenStax::Content::S3.new
|
39
|
+
if s3.bucket_configured?
|
40
|
+
latest_archive_version = s3.ls.last
|
41
|
+
latest_book_ids = s3.ls latest_archive_version
|
42
|
+
chosen_book = latest_book_ids.sample
|
43
|
+
book_uuid, book_version = chosen_book.split('@', 2)
|
44
|
+
book = OpenStax::Content::Book.new(
|
45
|
+
archive_version: latest_archive_version, uuid: book_uuid, version: book_version
|
46
|
+
)
|
47
|
+
end
|
48
|
+
```
|
49
|
+
|
50
|
+
### Archive (to create archive links, load content and get book and page slugs)
|
51
|
+
```rb
|
52
|
+
archive = OpenStax::Content::Archive.new latest_archive_version
|
53
|
+
|
54
|
+
book_id = "#{book_uuid}@#{book_version}"
|
55
|
+
page_id = "#{book_id}:#{page_uuid}"
|
56
|
+
|
57
|
+
book_url = archive.url_for book_id
|
58
|
+
page_url = archive.url_for page_id
|
59
|
+
|
60
|
+
book_json = archive.fetch book_id
|
61
|
+
page_json = archive.fetch page_id
|
62
|
+
|
63
|
+
book_hash = archive.json book_id
|
64
|
+
page_hash = archive.json page_id
|
65
|
+
|
66
|
+
book_slug = archive.slug book_id # or book_uuid
|
67
|
+
page_slug = archive.slug page_id # or "#{book_uuid}:#{page_uuid}"
|
68
|
+
```
|
69
|
+
|
70
|
+
### Fragment Splitter (to split pages and create interactive readings)
|
71
|
+
```rb
|
72
|
+
fragment_splitter = OpenStax::Content::FragmentSplitter.new(
|
73
|
+
book.reading_processing_instructions, reference_view_url
|
74
|
+
)
|
75
|
+
fragment_splitter.split_into_fragments page.root
|
76
|
+
```
|
77
|
+
|
78
|
+
## Testing
|
79
|
+
|
80
|
+
To run all existing tests for this gem, simply execute the following from the main folder:
|
81
|
+
|
82
|
+
```sh
|
83
|
+
$ rake
|
84
|
+
```
|
85
|
+
|
86
|
+
## Contributing
|
87
|
+
|
88
|
+
1. Fork the openstax/content-ruby repo on Github
|
89
|
+
2. Create a feature or bugfix branch (`git checkout -b my-new-feature`)
|
90
|
+
3. Write tests for the feature/bugfix
|
91
|
+
4. Implement the new feature/bugfix
|
92
|
+
5. Make sure both new and old tests pass (`rake`)
|
93
|
+
6. Commit your changes (`git commit -am 'Added some feature'`)
|
94
|
+
7. Push the branch (`git push origin my-new-feature`)
|
95
|
+
8. Create a new Pull Request to openstax/content-ruby on Github
|
96
|
+
|
97
|
+
## License
|
98
|
+
|
99
|
+
This gem is distributed under the terms of the AGPLv3 license.
|
100
|
+
See the LICENSE file for details.
|
@@ -48,6 +48,10 @@ class OpenStax::Content::Archive
|
|
48
48
|
if uri.path.start_with?('../')
|
49
49
|
uri.path = uri.path.sub('..', '')
|
50
50
|
"#{base_url}#{uri.to_s}"
|
51
|
+
elsif uri.path.start_with?(OpenStax::Content.archive_path) ||
|
52
|
+
uri.path.start_with?("/#{OpenStax::Content.archive_path}")
|
53
|
+
uri.path.start_with?('/') ? "https://#{OpenStax::Content.domain}#{uri.to_s}" :
|
54
|
+
"https://#{OpenStax::Content.domain}/#{uri.to_s}"
|
51
55
|
else
|
52
56
|
uri.path = "#{uri.path.chomp('.json').chomp('.xhtml')}.json"
|
53
57
|
|
@@ -101,4 +105,29 @@ class OpenStax::Content::Archive
|
|
101
105
|
slug
|
102
106
|
end
|
103
107
|
end
|
108
|
+
|
109
|
+
def webview_uri_for(page)
|
110
|
+
uri = if page.is_a?(Addressable::URI)
|
111
|
+
page
|
112
|
+
else
|
113
|
+
begin
|
114
|
+
Addressable::URI.parse page
|
115
|
+
rescue Addressable::URI::InvalidURIError
|
116
|
+
begin
|
117
|
+
Addressable::URI.parse "/#{page}"
|
118
|
+
rescue Addressable::URI::InvalidURIError
|
119
|
+
OpenStax::Content.logger.warn { "Invalid page url: \"#{page}\"" }
|
120
|
+
|
121
|
+
return page
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
object = uri.path.split('/').last
|
126
|
+
book_id, page_id = object.split(':', 2)
|
127
|
+
page_uuid = page_id.split('@', 2).first
|
128
|
+
book_slug = slug book_id
|
129
|
+
page_slug = slug object
|
130
|
+
uri.path = "books/#{book_slug}/pages/#{page_slug}"
|
131
|
+
Addressable::URI.join "https://#{OpenStax::Content.domain}", uri
|
132
|
+
end
|
104
133
|
end
|
@@ -17,12 +17,21 @@ class OpenStax::Content::Fragment::Html < OpenStax::Content::Fragment
|
|
17
17
|
super.except('node')
|
18
18
|
end
|
19
19
|
|
20
|
-
def
|
21
|
-
|
20
|
+
def blank?
|
21
|
+
return @blank unless @blank.nil?
|
22
|
+
|
23
|
+
@blank = if to_html.nil? || to_html.strip.empty?
|
24
|
+
true
|
25
|
+
else
|
26
|
+
node_without_title = node.dup
|
27
|
+
node_without_title.css('[data-type="document-title"]').remove
|
28
|
+
text = node_without_title.text
|
29
|
+
text.nil? || text.strip.empty?
|
30
|
+
end
|
22
31
|
end
|
23
32
|
|
24
|
-
def
|
25
|
-
!
|
33
|
+
def html?
|
34
|
+
!blank?
|
26
35
|
end
|
27
36
|
|
28
37
|
def node
|
@@ -40,9 +40,11 @@ class OpenStax::Content::FragmentSplitter
|
|
40
40
|
# Flatten, remove empty nodes and transform remaining nodes into reading fragments
|
41
41
|
result.map do |obj|
|
42
42
|
next obj unless obj.is_a?(Nokogiri::XML::Node)
|
43
|
-
next if obj.content.nil? || obj.content.strip.empty?
|
44
43
|
|
45
|
-
OpenStax::Content::Fragment::Reading.new
|
44
|
+
fragment = OpenStax::Content::Fragment::Reading.new(
|
45
|
+
node: obj, reference_view_url: reference_view_url
|
46
|
+
)
|
47
|
+
fragment unless fragment.blank?
|
46
48
|
end.compact.tap do |result|
|
47
49
|
@media_nodes.each do |node|
|
48
50
|
# Media processing instructions
|
data/lib/openstax/content/s3.rb
CHANGED
@@ -2,7 +2,7 @@ require 'aws-sdk-s3'
|
|
2
2
|
|
3
3
|
class OpenStax::Content::S3
|
4
4
|
def initialize
|
5
|
-
@ls = {}
|
5
|
+
@ls = Hash.new { |hash, key| hash[key] = Hash.new { |hash, key| hash[key] = {} } }
|
6
6
|
end
|
7
7
|
|
8
8
|
def bucket_name
|
@@ -21,24 +21,65 @@ class OpenStax::Content::S3
|
|
21
21
|
)
|
22
22
|
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
# Returns the archive path for the given archive_version, book_id, page_uuid and extension
|
25
|
+
# If not all arguments are given, returns the prefix instead
|
26
|
+
def path_for(archive_version = nil, book_id = nil, page_uuid = nil, extension = nil)
|
28
27
|
archive_path = OpenStax::Content.archive_path.chomp('/')
|
29
28
|
|
30
29
|
if archive_version.nil?
|
31
|
-
|
32
|
-
|
30
|
+
"#{archive_path}/"
|
31
|
+
elsif book_id.nil?
|
32
|
+
"#{archive_path}/#{archive_version}/contents/"
|
33
|
+
elsif page_uuid.nil?
|
34
|
+
"#{archive_path}/#{archive_version}/contents/#{book_id}:"
|
35
|
+
elsif extension.nil?
|
36
|
+
"#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}."
|
37
|
+
else
|
38
|
+
"#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}.#{extension}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Without an archive version, returns a list of archive versions
|
43
|
+
# With an archive version, returns a list of book ids (uuid@version)
|
44
|
+
# With an archive version and a book, returns a list of page uuids
|
45
|
+
# With an archive version, book id and page uuid, returns the available extensions, if any
|
46
|
+
def ls(archive_version = nil, book_id = nil, page_uuid = nil)
|
47
|
+
return @ls[archive_version][book_id][page_uuid] \
|
48
|
+
unless @ls[archive_version][book_id][page_uuid].nil?
|
49
|
+
return unless bucket_configured?
|
50
|
+
|
51
|
+
prefix = path_for archive_version, book_id, page_uuid
|
52
|
+
|
53
|
+
delimiter = if archive_version.nil?
|
54
|
+
'/'
|
55
|
+
elsif book_id.nil?
|
56
|
+
':'
|
57
|
+
elsif page_uuid.nil?
|
58
|
+
'.'
|
59
|
+
else
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
|
63
|
+
responses = client.list_objects_v2 bucket: bucket_name, prefix: prefix, delimiter: delimiter
|
64
|
+
|
65
|
+
@ls[archive_version][book_id][page_uuid] = if page_uuid.nil?
|
66
|
+
responses.flat_map(&:common_prefixes).map do |common_prefix|
|
67
|
+
common_prefix.prefix.sub(prefix, '').chomp(delimiter)
|
68
|
+
end
|
33
69
|
else
|
34
|
-
prefix
|
35
|
-
delimiter = ':'
|
70
|
+
responses.flat_map(&:contents).map { |content| content.key.sub(prefix, '') }
|
36
71
|
end
|
72
|
+
end
|
37
73
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
74
|
+
# Checks all books for the given page uuid and returns the path to the first one found
|
75
|
+
def find_page(page_uuid, archive_version: nil, extension: 'json')
|
76
|
+
archive_version ||= ls.last
|
77
|
+
|
78
|
+
ls(archive_version).each do |book_id|
|
79
|
+
return path_for(archive_version, book_id, page_uuid, extension) \
|
80
|
+
if ls(archive_version, book_id, page_uuid).include?(extension)
|
42
81
|
end
|
82
|
+
|
83
|
+
nil
|
43
84
|
end
|
44
85
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: openstax_content
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dante Soares
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-08-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk-s3
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: rspec
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +122,20 @@ dependencies:
|
|
108
122
|
- - ">="
|
109
123
|
- !ruby/object:Gem::Version
|
110
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: webmock
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
111
139
|
description: Ruby bindings to read and parse the OpenStax ABL and the content archive
|
112
140
|
email:
|
113
141
|
- dante.m.soares@rice.edu
|
@@ -117,7 +145,6 @@ extra_rdoc_files: []
|
|
117
145
|
files:
|
118
146
|
- LICENSE
|
119
147
|
- README.md
|
120
|
-
- lib/openstax/content.rb
|
121
148
|
- lib/openstax/content/abl.rb
|
122
149
|
- lib/openstax/content/archive.rb
|
123
150
|
- lib/openstax/content/book.rb
|
@@ -136,6 +163,7 @@ files:
|
|
136
163
|
- lib/openstax/content/s3.rb
|
137
164
|
- lib/openstax/content/title.rb
|
138
165
|
- lib/openstax/content/version.rb
|
166
|
+
- lib/openstax_content.rb
|
139
167
|
homepage: https://github.com/openstax/content-ruby
|
140
168
|
licenses:
|
141
169
|
- AGPL-3.0
|