openstax_content 0.0.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +98 -0
- data/lib/openstax/content/archive.rb +29 -0
- data/lib/openstax/content/fragment/html.rb +13 -4
- data/lib/openstax/content/fragment_splitter.rb +4 -2
- data/lib/openstax/content/s3.rb +54 -13
- data/lib/openstax/content/version.rb +1 -1
- data/lib/{openstax/content.rb → openstax_content.rb} +1 -1
- metadata +31 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 760f80ecbe0193cdb9597862fd3d62e2bd687affa9c392ca07cd41440f5e7a62
|
4
|
+
data.tar.gz: f357ee9efc8c537f6c84c8e02fc15e0919691be79be677e413ba7542220a1ab0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 78bf8346586fea75e4cc1d5eed2f5e083a6feb75974f71838b83c90383f9df1f6742a47203a60d9e3e293b855b5bf30bbe7eb78e5536f1f6b4847757fc24b3a0
|
7
|
+
data.tar.gz: 162cf2c108e2c94e1c11dea32baf337c82ec0965783aa6358209b58888cc389ecc7953e637c802511ce167603f1b9081fb346e3e32b787304f9a4c6740fbe75b
|
data/README.md
CHANGED
@@ -1,2 +1,100 @@
|
|
1
|
+
[](https://github.com/openstax/content-ruby/actions/workflows/tests.yml)
|
2
|
+
|
1
3
|
# content-ruby
|
2
4
|
Ruby bindings to read and parse the OpenStax ABL and the content archive
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
Add this gem to your Gemfile and then add the following configuration to your boot
|
8
|
+
(for example, in a Rails initializer):
|
9
|
+
|
10
|
+
```rb
|
11
|
+
OpenStax::Content.configure do |config|
|
12
|
+
config.abl_url = ENV['OPENSTAX_CONTENT_ABL_URL']
|
13
|
+
config.archive_path = ENV['OPENSTAX_CONTENT_ARCHIVE_PATH']
|
14
|
+
config.bucket_name = ENV['OPENSTAX_CONTENT_BUCKET_NAME']
|
15
|
+
config.domain = ENV['OPENSTAX_CONTENT_DOMAIN']
|
16
|
+
config.exercises_search_api_url = ENV['OPENSTAX_CONTENT_EXERCISES_SEARCH_API_URL']
|
17
|
+
config.logger = defined?(Rails) ? Rails.logger : Logger.new(STDOUT)
|
18
|
+
config.s3_region = ENV['OPENSTAX_CONTENT_S3_REGION']
|
19
|
+
config.s3_access_key_id = ENV['OPENSTAX_CONTENT_S3_ACCESS_KEY_ID']
|
20
|
+
config.s3_secret_access_key = ENV['OPENSTAX_CONTENT_S3_SECRET_ACCESS_KEY']
|
21
|
+
end
|
22
|
+
```
|
23
|
+
|
24
|
+
It's probably a good idea to read these values from environment variables
|
25
|
+
s3_access_key_id and s3_secret_access_key are optional (you can use AWS instance roles instead)
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
### Approved Book List (to get approved books and approved versions)
|
30
|
+
```rb
|
31
|
+
abl = OpenStax::Content::Abl.new
|
32
|
+
approved_books = abl.approved_books
|
33
|
+
approved_versions = abl.approved_versions
|
34
|
+
```
|
35
|
+
|
36
|
+
### S3 Bucket Listing (to get latest archive and book versions)
|
37
|
+
```rb
|
38
|
+
s3 = OpenStax::Content::S3.new
|
39
|
+
if s3.bucket_configured?
|
40
|
+
latest_archive_version = s3.ls.last
|
41
|
+
latest_book_ids = s3.ls latest_archive_version
|
42
|
+
chosen_book = latest_book_ids.sample
|
43
|
+
book_uuid, book_version = chosen_book.split('@', 2)
|
44
|
+
book = OpenStax::Content::Book.new(
|
45
|
+
archive_version: latest_archive_version, uuid: book_uuid, version: book_version
|
46
|
+
)
|
47
|
+
end
|
48
|
+
```
|
49
|
+
|
50
|
+
### Archive (to create archive links, load content and get book and page slugs)
|
51
|
+
```rb
|
52
|
+
archive = OpenStax::Content::Archive.new latest_archive_version
|
53
|
+
|
54
|
+
book_id = "#{book_uuid}@#{book_version}"
|
55
|
+
page_id = "#{book_id}:#{page_uuid}"
|
56
|
+
|
57
|
+
book_url = archive.url_for book_id
|
58
|
+
page_url = archive.url_for page_id
|
59
|
+
|
60
|
+
book_json = archive.fetch book_id
|
61
|
+
page_json = archive.fetch page_id
|
62
|
+
|
63
|
+
book_hash = archive.json book_id
|
64
|
+
page_hash = archive.json page_id
|
65
|
+
|
66
|
+
book_slug = archive.slug book_id # or book_uuid
|
67
|
+
page_slug = archive.slug page_id # or "#{book_uuid}:#{page_uuid}"
|
68
|
+
```
|
69
|
+
|
70
|
+
### Fragment Splitter (to split pages and create interactive readings)
|
71
|
+
```rb
|
72
|
+
fragment_splitter = OpenStax::Content::FragmentSplitter.new(
|
73
|
+
book.reading_processing_instructions, reference_view_url
|
74
|
+
)
|
75
|
+
fragment_splitter.split_into_fragments page.root
|
76
|
+
```
|
77
|
+
|
78
|
+
## Testing
|
79
|
+
|
80
|
+
To run all existing tests for this gem, simply execute the following from the main folder:
|
81
|
+
|
82
|
+
```sh
|
83
|
+
$ rake
|
84
|
+
```
|
85
|
+
|
86
|
+
## Contributing
|
87
|
+
|
88
|
+
1. Fork the openstax/content-ruby repo on Github
|
89
|
+
2. Create a feature or bugfix branch (`git checkout -b my-new-feature`)
|
90
|
+
3. Write tests for the feature/bugfix
|
91
|
+
4. Implement the new feature/bugfix
|
92
|
+
5. Make sure both new and old tests pass (`rake`)
|
93
|
+
6. Commit your changes (`git commit -am 'Added some feature'`)
|
94
|
+
7. Push the branch (`git push origin my-new-feature`)
|
95
|
+
8. Create a new Pull Request to openstax/content-ruby on Github
|
96
|
+
|
97
|
+
## License
|
98
|
+
|
99
|
+
This gem is distributed under the terms of the AGPLv3 license.
|
100
|
+
See the LICENSE file for details.
|
@@ -48,6 +48,10 @@ class OpenStax::Content::Archive
|
|
48
48
|
if uri.path.start_with?('../')
|
49
49
|
uri.path = uri.path.sub('..', '')
|
50
50
|
"#{base_url}#{uri.to_s}"
|
51
|
+
elsif uri.path.start_with?(OpenStax::Content.archive_path) ||
|
52
|
+
uri.path.start_with?("/#{OpenStax::Content.archive_path}")
|
53
|
+
uri.path.start_with?('/') ? "https://#{OpenStax::Content.domain}#{uri.to_s}" :
|
54
|
+
"https://#{OpenStax::Content.domain}/#{uri.to_s}"
|
51
55
|
else
|
52
56
|
uri.path = "#{uri.path.chomp('.json').chomp('.xhtml')}.json"
|
53
57
|
|
@@ -101,4 +105,29 @@ class OpenStax::Content::Archive
|
|
101
105
|
slug
|
102
106
|
end
|
103
107
|
end
|
108
|
+
|
109
|
+
def webview_uri_for(page)
|
110
|
+
uri = if page.is_a?(Addressable::URI)
|
111
|
+
page
|
112
|
+
else
|
113
|
+
begin
|
114
|
+
Addressable::URI.parse page
|
115
|
+
rescue Addressable::URI::InvalidURIError
|
116
|
+
begin
|
117
|
+
Addressable::URI.parse "/#{page}"
|
118
|
+
rescue Addressable::URI::InvalidURIError
|
119
|
+
OpenStax::Content.logger.warn { "Invalid page url: \"#{page}\"" }
|
120
|
+
|
121
|
+
return page
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
object = uri.path.split('/').last
|
126
|
+
book_id, page_id = object.split(':', 2)
|
127
|
+
page_uuid = page_id.split('@', 2).first
|
128
|
+
book_slug = slug book_id
|
129
|
+
page_slug = slug object
|
130
|
+
uri.path = "books/#{book_slug}/pages/#{page_slug}"
|
131
|
+
Addressable::URI.join "https://#{OpenStax::Content.domain}", uri
|
132
|
+
end
|
104
133
|
end
|
@@ -17,12 +17,21 @@ class OpenStax::Content::Fragment::Html < OpenStax::Content::Fragment
|
|
17
17
|
super.except('node')
|
18
18
|
end
|
19
19
|
|
20
|
-
def
|
21
|
-
|
20
|
+
def blank?
|
21
|
+
return @blank unless @blank.nil?
|
22
|
+
|
23
|
+
@blank = if to_html.nil? || to_html.strip.empty?
|
24
|
+
true
|
25
|
+
else
|
26
|
+
node_without_title = node.dup
|
27
|
+
node_without_title.css('[data-type="document-title"]').remove
|
28
|
+
text = node_without_title.text
|
29
|
+
text.nil? || text.strip.empty?
|
30
|
+
end
|
22
31
|
end
|
23
32
|
|
24
|
-
def
|
25
|
-
!
|
33
|
+
def html?
|
34
|
+
!blank?
|
26
35
|
end
|
27
36
|
|
28
37
|
def node
|
@@ -40,9 +40,11 @@ class OpenStax::Content::FragmentSplitter
|
|
40
40
|
# Flatten, remove empty nodes and transform remaining nodes into reading fragments
|
41
41
|
result.map do |obj|
|
42
42
|
next obj unless obj.is_a?(Nokogiri::XML::Node)
|
43
|
-
next if obj.content.nil? || obj.content.strip.empty?
|
44
43
|
|
45
|
-
OpenStax::Content::Fragment::Reading.new
|
44
|
+
fragment = OpenStax::Content::Fragment::Reading.new(
|
45
|
+
node: obj, reference_view_url: reference_view_url
|
46
|
+
)
|
47
|
+
fragment unless fragment.blank?
|
46
48
|
end.compact.tap do |result|
|
47
49
|
@media_nodes.each do |node|
|
48
50
|
# Media processing instructions
|
data/lib/openstax/content/s3.rb
CHANGED
@@ -2,7 +2,7 @@ require 'aws-sdk-s3'
|
|
2
2
|
|
3
3
|
class OpenStax::Content::S3
|
4
4
|
def initialize
|
5
|
-
@ls = {}
|
5
|
+
@ls = Hash.new { |hash, key| hash[key] = Hash.new { |hash, key| hash[key] = {} } }
|
6
6
|
end
|
7
7
|
|
8
8
|
def bucket_name
|
@@ -21,24 +21,65 @@ class OpenStax::Content::S3
|
|
21
21
|
)
|
22
22
|
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
# Returns the archive path for the given archive_version, book_id, page_uuid and extension
|
25
|
+
# If not all arguments are given, returns the prefix instead
|
26
|
+
def path_for(archive_version = nil, book_id = nil, page_uuid = nil, extension = nil)
|
28
27
|
archive_path = OpenStax::Content.archive_path.chomp('/')
|
29
28
|
|
30
29
|
if archive_version.nil?
|
31
|
-
|
32
|
-
|
30
|
+
"#{archive_path}/"
|
31
|
+
elsif book_id.nil?
|
32
|
+
"#{archive_path}/#{archive_version}/contents/"
|
33
|
+
elsif page_uuid.nil?
|
34
|
+
"#{archive_path}/#{archive_version}/contents/#{book_id}:"
|
35
|
+
elsif extension.nil?
|
36
|
+
"#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}."
|
37
|
+
else
|
38
|
+
"#{archive_path}/#{archive_version}/contents/#{book_id}:#{page_uuid}.#{extension}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Without an archive version, returns a list of archive versions
|
43
|
+
# With an archive version, returns a list of book ids (uuid@version)
|
44
|
+
# With an archive version and a book, returns a list of page uuids
|
45
|
+
# With an archive version, book id and page uuid, returns the available extensions, if any
|
46
|
+
def ls(archive_version = nil, book_id = nil, page_uuid = nil)
|
47
|
+
return @ls[archive_version][book_id][page_uuid] \
|
48
|
+
unless @ls[archive_version][book_id][page_uuid].nil?
|
49
|
+
return unless bucket_configured?
|
50
|
+
|
51
|
+
prefix = path_for archive_version, book_id, page_uuid
|
52
|
+
|
53
|
+
delimiter = if archive_version.nil?
|
54
|
+
'/'
|
55
|
+
elsif book_id.nil?
|
56
|
+
':'
|
57
|
+
elsif page_uuid.nil?
|
58
|
+
'.'
|
59
|
+
else
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
|
63
|
+
responses = client.list_objects_v2 bucket: bucket_name, prefix: prefix, delimiter: delimiter
|
64
|
+
|
65
|
+
@ls[archive_version][book_id][page_uuid] = if page_uuid.nil?
|
66
|
+
responses.flat_map(&:common_prefixes).map do |common_prefix|
|
67
|
+
common_prefix.prefix.sub(prefix, '').chomp(delimiter)
|
68
|
+
end
|
33
69
|
else
|
34
|
-
prefix
|
35
|
-
delimiter = ':'
|
70
|
+
responses.flat_map(&:contents).map { |content| content.key.sub(prefix, '') }
|
36
71
|
end
|
72
|
+
end
|
37
73
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
74
|
+
# Checks all books for the given page uuid and returns the path to the first one found
|
75
|
+
def find_page(page_uuid, archive_version: nil, extension: 'json')
|
76
|
+
archive_version ||= ls.last
|
77
|
+
|
78
|
+
ls(archive_version).each do |book_id|
|
79
|
+
return path_for(archive_version, book_id, page_uuid, extension) \
|
80
|
+
if ls(archive_version, book_id, page_uuid).include?(extension)
|
42
81
|
end
|
82
|
+
|
83
|
+
nil
|
43
84
|
end
|
44
85
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: openstax_content
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dante Soares
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-08-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk-s3
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: rspec
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +122,20 @@ dependencies:
|
|
108
122
|
- - ">="
|
109
123
|
- !ruby/object:Gem::Version
|
110
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: webmock
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
111
139
|
description: Ruby bindings to read and parse the OpenStax ABL and the content archive
|
112
140
|
email:
|
113
141
|
- dante.m.soares@rice.edu
|
@@ -117,7 +145,6 @@ extra_rdoc_files: []
|
|
117
145
|
files:
|
118
146
|
- LICENSE
|
119
147
|
- README.md
|
120
|
-
- lib/openstax/content.rb
|
121
148
|
- lib/openstax/content/abl.rb
|
122
149
|
- lib/openstax/content/archive.rb
|
123
150
|
- lib/openstax/content/book.rb
|
@@ -136,6 +163,7 @@ files:
|
|
136
163
|
- lib/openstax/content/s3.rb
|
137
164
|
- lib/openstax/content/title.rb
|
138
165
|
- lib/openstax/content/version.rb
|
166
|
+
- lib/openstax_content.rb
|
139
167
|
homepage: https://github.com/openstax/content-ruby
|
140
168
|
licenses:
|
141
169
|
- AGPL-3.0
|