openstax_content 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +661 -0
- data/README.md +2 -0
- data/lib/openstax/content.rb +14 -0
- data/lib/openstax/content/abl.rb +13 -0
- data/lib/openstax/content/archive.rb +104 -0
- data/lib/openstax/content/book.rb +70 -0
- data/lib/openstax/content/book_part.rb +47 -0
- data/lib/openstax/content/custom_css.rb +9 -0
- data/lib/openstax/content/fragment.rb +19 -0
- data/lib/openstax/content/fragment/embedded.rb +67 -0
- data/lib/openstax/content/fragment/exercise.rb +56 -0
- data/lib/openstax/content/fragment/html.rb +62 -0
- data/lib/openstax/content/fragment/interactive.rb +36 -0
- data/lib/openstax/content/fragment/optional_exercise.rb +4 -0
- data/lib/openstax/content/fragment/reading.rb +9 -0
- data/lib/openstax/content/fragment/video.rb +8 -0
- data/lib/openstax/content/fragment_splitter.rb +193 -0
- data/lib/openstax/content/page.rb +201 -0
- data/lib/openstax/content/s3.rb +44 -0
- data/lib/openstax/content/title.rb +18 -0
- data/lib/openstax/content/version.rb +5 -0
- metadata +162 -0
data/README.md
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
module OpenStax
|
2
|
+
module Content
|
3
|
+
class << self
|
4
|
+
attr_accessor :abl_url, :archive_path, :bucket_name, :domain, :exercises_search_api_url,
|
5
|
+
:logger, :s3_region, :s3_access_key_id, :s3_secret_access_key
|
6
|
+
|
7
|
+
def configure
|
8
|
+
yield self
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
Dir["#{__dir__}/content/**/*.rb"].each { |file| require file }
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class OpenStax::Content::Abl
|
2
|
+
def body
|
3
|
+
@body ||= JSON.parse(Faraday.get(OpenStax::Content.abl_url).body).deep_symbolize_keys
|
4
|
+
end
|
5
|
+
|
6
|
+
def approved_books
|
7
|
+
body[:approved_books]
|
8
|
+
end
|
9
|
+
|
10
|
+
def approved_versions
|
11
|
+
body[:approved_versions]
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'addressable/uri'
|
2
|
+
require 'faraday'
|
3
|
+
|
4
|
+
class OpenStax::Content::Archive
|
5
|
+
def initialize(version)
|
6
|
+
@version = version
|
7
|
+
@slugs = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def base_url
|
11
|
+
@base_url ||= "https://#{OpenStax::Content.domain}/#{
|
12
|
+
OpenStax::Content.archive_path}/#{@version}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def url_for(object)
|
16
|
+
return if object.nil?
|
17
|
+
|
18
|
+
begin
|
19
|
+
uri = Addressable::URI.parse object
|
20
|
+
rescue Addressable::URI::InvalidURIError
|
21
|
+
begin
|
22
|
+
uri = Addressable::URI.parse "/#{object}"
|
23
|
+
rescue Addressable::URI::InvalidURIError
|
24
|
+
OpenStax::Content.logger.warn { "Invalid url: \"#{object}\" in archive link" }
|
25
|
+
|
26
|
+
return object
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
if uri.absolute?
|
31
|
+
OpenStax::Content.logger.warn do
|
32
|
+
"#{self.class.name} received an unexpected absolute URL in url_for: \"#{object}\""
|
33
|
+
end
|
34
|
+
|
35
|
+
# Force absolute URLs to be https
|
36
|
+
uri.scheme = 'https'
|
37
|
+
return uri.to_s
|
38
|
+
end
|
39
|
+
|
40
|
+
if uri.path.empty?
|
41
|
+
OpenStax::Content.logger.warn do
|
42
|
+
"#{self.class.name} received an unexpected fragment-only URL in url_for: \"#{object}\""
|
43
|
+
end
|
44
|
+
|
45
|
+
return object
|
46
|
+
end
|
47
|
+
|
48
|
+
if uri.path.start_with?('../')
|
49
|
+
uri.path = uri.path.sub('..', '')
|
50
|
+
"#{base_url}#{uri.to_s}"
|
51
|
+
else
|
52
|
+
uri.path = "#{uri.path.chomp('.json').chomp('.xhtml')}.json"
|
53
|
+
|
54
|
+
uri.path.start_with?('/') ? "#{base_url}/contents#{uri.to_s}" :
|
55
|
+
"#{base_url}/contents/#{uri.to_s}"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def fetch(object)
|
60
|
+
url = url_for object
|
61
|
+
OpenStax::Content.logger.debug { "Fetching #{url}" }
|
62
|
+
Faraday.get(url).body
|
63
|
+
end
|
64
|
+
|
65
|
+
def json(object)
|
66
|
+
begin
|
67
|
+
JSON.parse(fetch(object)).tap do |hash|
|
68
|
+
@slugs[object] = hash['slug']
|
69
|
+
end
|
70
|
+
rescue JSON::ParserError => err
|
71
|
+
raise "OpenStax Content Archive returned invalid JSON for #{url_for object}: #{err.message}"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def s3
|
76
|
+
@s3 ||= OpenStax::Content::S3.new
|
77
|
+
end
|
78
|
+
|
79
|
+
def add_latest_book_version_if_missing(object)
|
80
|
+
book_id, page_id = object.split(':', 2)
|
81
|
+
book_uuid, book_version = book_id.split('@', 2)
|
82
|
+
return object unless book_version.nil? && s3.bucket_configured?
|
83
|
+
|
84
|
+
s3.ls(@version).each do |book|
|
85
|
+
uuid, version = book.split('@')
|
86
|
+
next unless uuid == book_uuid
|
87
|
+
|
88
|
+
book_version = version
|
89
|
+
break
|
90
|
+
end
|
91
|
+
|
92
|
+
book_id = "#{book_uuid}@#{book_version}".chomp('@')
|
93
|
+
"#{book_id}:#{page_id}".chomp(':')
|
94
|
+
end
|
95
|
+
|
96
|
+
def slug(object)
|
97
|
+
@slugs[object] ||= begin
|
98
|
+
object_with_version = add_latest_book_version_if_missing object
|
99
|
+
slug = json(object_with_version)['slug']
|
100
|
+
@slugs[object_with_version] = slug if object_with_version != object
|
101
|
+
slug
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require_relative 'archive'
|
2
|
+
require_relative 'book_part'
|
3
|
+
|
4
|
+
class OpenStax::Content::Book
|
5
|
+
def initialize(
|
6
|
+
archive_version:, uuid: nil, version: nil, hash: nil, title: nil, tree: nil, root_book_part: nil
|
7
|
+
)
|
8
|
+
@uuid = uuid || (hash || {})['id']
|
9
|
+
raise ArgumentError, 'Either uuid or hash with id key is required' if @uuid.nil?
|
10
|
+
|
11
|
+
@version = version || (hash || {})['version']
|
12
|
+
raise ArgumentError, 'Either version or hash with version key is required' if @version.nil?
|
13
|
+
|
14
|
+
@archive_version = archive_version
|
15
|
+
@hash = hash
|
16
|
+
@title = title
|
17
|
+
@tree = tree
|
18
|
+
@root_book_part = root_book_part
|
19
|
+
end
|
20
|
+
|
21
|
+
attr_reader :archive_version, :uuid, :version
|
22
|
+
|
23
|
+
def archive
|
24
|
+
@archive ||= OpenStax::Content::Archive.new archive_version
|
25
|
+
end
|
26
|
+
|
27
|
+
def url
|
28
|
+
@url ||= archive.url_for "#{uuid}@#{version}"
|
29
|
+
end
|
30
|
+
|
31
|
+
def url_fragment
|
32
|
+
@url_fragment ||= url.chomp('.json')
|
33
|
+
end
|
34
|
+
|
35
|
+
def baked
|
36
|
+
@baked ||= hash['baked']
|
37
|
+
end
|
38
|
+
|
39
|
+
def collated
|
40
|
+
@collated ||= hash.fetch('collated', false)
|
41
|
+
end
|
42
|
+
|
43
|
+
def hash
|
44
|
+
@hash ||= archive.json url
|
45
|
+
end
|
46
|
+
|
47
|
+
def uuid
|
48
|
+
@uuid ||= hash.fetch('id')
|
49
|
+
end
|
50
|
+
|
51
|
+
def short_id
|
52
|
+
@short_id ||= hash['shortId']
|
53
|
+
end
|
54
|
+
|
55
|
+
def version
|
56
|
+
@version ||= hash.fetch('version')
|
57
|
+
end
|
58
|
+
|
59
|
+
def title
|
60
|
+
@title ||= hash.fetch('title')
|
61
|
+
end
|
62
|
+
|
63
|
+
def tree
|
64
|
+
@tree ||= hash.fetch('tree')
|
65
|
+
end
|
66
|
+
|
67
|
+
def root_book_part
|
68
|
+
@root_book_part ||= OpenStax::Content::BookPart.new(hash: tree, is_root: true, book: self)
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require_relative 'title'
|
2
|
+
require_relative 'page'
|
3
|
+
|
4
|
+
class OpenStax::Content::BookPart
|
5
|
+
def initialize(hash: {}, is_root: false, book: nil)
|
6
|
+
@hash = hash
|
7
|
+
@is_root = is_root
|
8
|
+
@book = book
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :hash, :is_root, :book
|
12
|
+
|
13
|
+
def parsed_title
|
14
|
+
@parsed_title ||= OpenStax::Content::Title.new hash.fetch('title')
|
15
|
+
end
|
16
|
+
|
17
|
+
def book_location
|
18
|
+
@book_location ||= parsed_title.book_location
|
19
|
+
end
|
20
|
+
|
21
|
+
def title
|
22
|
+
@title ||= parsed_title.text
|
23
|
+
end
|
24
|
+
|
25
|
+
# Old content used to have id == "subcol" for units and chapters
|
26
|
+
# If we encounter that, just assign a random UUID to them
|
27
|
+
def uuid
|
28
|
+
@uuid ||= begin
|
29
|
+
uuid = hash['id']
|
30
|
+
uuid.nil? || uuid == 'subcol' ? SecureRandom.uuid : uuid.split('@').first
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def contents
|
35
|
+
@contents ||= hash.fetch('contents')
|
36
|
+
end
|
37
|
+
|
38
|
+
def parts
|
39
|
+
@parts ||= contents.map do |hash|
|
40
|
+
if hash.has_key? 'contents'
|
41
|
+
self.class.new book: book, hash: hash
|
42
|
+
else
|
43
|
+
OpenStax::Content::Page.new book: book, hash: hash
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# All fragment subclasses must be serializable with to_yaml and YAML.load
|
2
|
+
# Nokogiri nodes are not serializable, so they must be processed in the initialize method
|
3
|
+
class OpenStax::Content::Fragment
|
4
|
+
attr_reader :title, :labels, :node_id
|
5
|
+
|
6
|
+
def initialize(node:, title: nil, labels: nil)
|
7
|
+
@title = title
|
8
|
+
@labels = labels || []
|
9
|
+
@node_id = node[:id]
|
10
|
+
end
|
11
|
+
|
12
|
+
def blank?
|
13
|
+
false
|
14
|
+
end
|
15
|
+
|
16
|
+
def html?
|
17
|
+
false
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require_relative 'html'
|
2
|
+
|
3
|
+
class OpenStax::Content::Fragment::Embedded < OpenStax::Content::Fragment::Html
|
4
|
+
# Used to get the title
|
5
|
+
TITLE_CSS = '[data-type="title"]'
|
6
|
+
|
7
|
+
# Used to get the title if there are no title nodes
|
8
|
+
LABEL_ATTRIBUTE = '[data-label]'
|
9
|
+
|
10
|
+
# CSS to find embedded content urls
|
11
|
+
TAGGED_URL_CSS = 'iframe.os-embed, a.os-embed, .os-embed iframe, .os-embed a'
|
12
|
+
UNTAGGED_URL_CSS = 'iframe, a'
|
13
|
+
|
14
|
+
CLASS_ATTRIBUTES = [ :iframe_classes, :iframe_title, :default_width, :default_height ]
|
15
|
+
|
16
|
+
class << self
|
17
|
+
attr_accessor *CLASS_ATTRIBUTES
|
18
|
+
|
19
|
+
def inherited(subclass)
|
20
|
+
CLASS_ATTRIBUTES.each do |class_attribute|
|
21
|
+
subclass.send "#{class_attribute}=", send(class_attribute)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
self.iframe_classes = ['os-embed']
|
27
|
+
self.iframe_title = ''
|
28
|
+
|
29
|
+
CLASS_ATTRIBUTES.each do |class_attribute|
|
30
|
+
define_method(class_attribute) { self.class.send class_attribute }
|
31
|
+
end
|
32
|
+
|
33
|
+
attr_reader :url, :width, :height
|
34
|
+
|
35
|
+
def initialize(node:, title: nil, labels: nil)
|
36
|
+
super
|
37
|
+
|
38
|
+
@title ||= begin
|
39
|
+
title_nodes = @node.css(TITLE_CSS)
|
40
|
+
titles = title_nodes.empty? ? @node.css(LABEL_ATTRIBUTE).map do |label|
|
41
|
+
label.attr('data-label')
|
42
|
+
end : title_nodes.map { |node| node.content.strip }
|
43
|
+
titles.uniq.join('; ')
|
44
|
+
end
|
45
|
+
|
46
|
+
url_node = @node.at_css(TAGGED_URL_CSS) || @node.css(UNTAGGED_URL_CSS).last
|
47
|
+
|
48
|
+
@width = url_node&.[]('width') || default_width
|
49
|
+
@height = url_node&.[]('height') || default_height
|
50
|
+
@url = url_node&.[]('src') || url_node&.[]('href')
|
51
|
+
|
52
|
+
if url_node&.name == 'iframe'
|
53
|
+
node_classes = url_node['class'].to_s.split(' ') + iframe_classes
|
54
|
+
url_node['class'] = node_classes.uniq.join(' ')
|
55
|
+
url_node['title'] ||= iframe_title
|
56
|
+
# To always force the default iframe size, change ||= to =
|
57
|
+
url_node['width'] ||= default_width
|
58
|
+
url_node['height'] ||= default_height
|
59
|
+
end
|
60
|
+
|
61
|
+
@to_html = @node.to_html
|
62
|
+
end
|
63
|
+
|
64
|
+
def blank?
|
65
|
+
url.nil? || url.empty?
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require_relative '../fragment'
|
2
|
+
|
3
|
+
class OpenStax::Content::Fragment::Exercise < OpenStax::Content::Fragment
|
4
|
+
# CSS to find the embed code attributes
|
5
|
+
EXERCISE_EMBED_URL_CSS = 'a[href^="#"]'
|
6
|
+
|
7
|
+
# Regex to extract the appropriate tag from the embed code(s)
|
8
|
+
EXERCISE_EMBED_URL_REGEXES = {
|
9
|
+
tag: /\A#ost\/api\/ex\/([\w\s-]+)\z/,
|
10
|
+
nickname: /\A#exercises?\/([\w\s-]+)\z/
|
11
|
+
}
|
12
|
+
|
13
|
+
# CSS to find the exercise embed queries after the urls are absolutized
|
14
|
+
ABSOLUTIZED_EMBED_URL_CSS = 'a[href*="/api/exercises"]'
|
15
|
+
|
16
|
+
# Regex to extract the appropriate embed queries from the absolutized urls
|
17
|
+
ABSOLUTIZED_EMBED_URL_REGEX = \
|
18
|
+
/\/api\/exercises\/?\?q=(tag|nickname)(?::|%3A)(?:"|%22)?([\w\s%-]+?)(?:"|%22)?\z/
|
19
|
+
|
20
|
+
attr_reader :embed_queries
|
21
|
+
|
22
|
+
# This code is run from lib/openstax/cnx/v1/page.rb during import
|
23
|
+
def self.absolutize_exercise_urls!(node)
|
24
|
+
uri = Addressable::URI.parse OpenStax::Content.exercises_search_api_url
|
25
|
+
|
26
|
+
node.css(EXERCISE_EMBED_URL_CSS).each do |anchor|
|
27
|
+
href = anchor.attribute('href')
|
28
|
+
|
29
|
+
EXERCISE_EMBED_URL_REGEXES.each do |field, regex|
|
30
|
+
embed_match = regex.match(href.value)
|
31
|
+
next if embed_match.nil?
|
32
|
+
|
33
|
+
uri.query_values = { q: "#{field}:\"#{embed_match[1]}\"" }
|
34
|
+
href.value = uri.to_s
|
35
|
+
anchor['data-type'] = 'exercise'
|
36
|
+
break
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def initialize(node:, title: nil, labels: [])
|
42
|
+
super
|
43
|
+
|
44
|
+
@embed_queries = node.css(ABSOLUTIZED_EMBED_URL_CSS).map do |anchor|
|
45
|
+
url = anchor.attribute('href').value
|
46
|
+
match = ABSOLUTIZED_EMBED_URL_REGEX.match(url)
|
47
|
+
next if match.nil?
|
48
|
+
|
49
|
+
[ match[1].to_sym, URI.decode_www_form_component(match[2]) ]
|
50
|
+
end.compact
|
51
|
+
end
|
52
|
+
|
53
|
+
def blank?
|
54
|
+
embed_queries.empty? && (node_id.nil? || node_id.empty?)
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'addressable/uri'
|
2
|
+
|
3
|
+
require_relative '../fragment'
|
4
|
+
|
5
|
+
class OpenStax::Content::Fragment::Html < OpenStax::Content::Fragment
|
6
|
+
attr_reader :to_html
|
7
|
+
|
8
|
+
def initialize(node:, title: nil, labels: nil)
|
9
|
+
super
|
10
|
+
|
11
|
+
@node = Nokogiri::HTML.fragment node.to_html
|
12
|
+
@to_html = @node.to_html
|
13
|
+
end
|
14
|
+
|
15
|
+
def as_json(*args)
|
16
|
+
# Don't attempt to serialize @node (it would fail)
|
17
|
+
super.except('node')
|
18
|
+
end
|
19
|
+
|
20
|
+
def html?
|
21
|
+
!to_html.empty?
|
22
|
+
end
|
23
|
+
|
24
|
+
def blank?
|
25
|
+
!html?
|
26
|
+
end
|
27
|
+
|
28
|
+
def node
|
29
|
+
@node ||= Nokogiri::HTML.fragment to_html
|
30
|
+
end
|
31
|
+
|
32
|
+
def has_css?(css, custom_css)
|
33
|
+
!node.at_css(css, custom_css).nil?
|
34
|
+
end
|
35
|
+
|
36
|
+
def append(new_node)
|
37
|
+
(node.at_css('body') || node.root) << new_node
|
38
|
+
|
39
|
+
@to_html = node.to_html
|
40
|
+
end
|
41
|
+
|
42
|
+
def transform_links!
|
43
|
+
node.css('[href]').each do |link|
|
44
|
+
href = link.attributes['href']
|
45
|
+
uri = Addressable::URI.parse(href.value) rescue nil
|
46
|
+
|
47
|
+
# Modify only fragment-only links
|
48
|
+
next if uri.nil? || uri.absolute? || !uri.path.empty?
|
49
|
+
|
50
|
+
# Abort if there is no target or it contains double quotes
|
51
|
+
# or it's still present in this fragment
|
52
|
+
target = uri.fragment
|
53
|
+
next if target.nil? || target.empty? || target.include?('"') ||
|
54
|
+
node.at_css("[id=\"#{target}\"], [name=\"#{target}\"]")
|
55
|
+
|
56
|
+
# Change the link to point to the reference view
|
57
|
+
href.value = "#{@reference_view_url}##{target}"
|
58
|
+
end unless @reference_view_url.nil?
|
59
|
+
|
60
|
+
@to_html = node.to_html
|
61
|
+
end
|
62
|
+
end
|