openstax_content 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +661 -0
- data/README.md +2 -0
- data/lib/openstax/content.rb +14 -0
- data/lib/openstax/content/abl.rb +13 -0
- data/lib/openstax/content/archive.rb +104 -0
- data/lib/openstax/content/book.rb +70 -0
- data/lib/openstax/content/book_part.rb +47 -0
- data/lib/openstax/content/custom_css.rb +9 -0
- data/lib/openstax/content/fragment.rb +19 -0
- data/lib/openstax/content/fragment/embedded.rb +67 -0
- data/lib/openstax/content/fragment/exercise.rb +56 -0
- data/lib/openstax/content/fragment/html.rb +62 -0
- data/lib/openstax/content/fragment/interactive.rb +36 -0
- data/lib/openstax/content/fragment/optional_exercise.rb +4 -0
- data/lib/openstax/content/fragment/reading.rb +9 -0
- data/lib/openstax/content/fragment/video.rb +8 -0
- data/lib/openstax/content/fragment_splitter.rb +193 -0
- data/lib/openstax/content/page.rb +201 -0
- data/lib/openstax/content/s3.rb +44 -0
- data/lib/openstax/content/title.rb +18 -0
- data/lib/openstax/content/version.rb +5 -0
- metadata +162 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
require_relative 'embedded'
|
2
|
+
|
3
|
+
class OpenStax::Content::Fragment::Interactive < OpenStax::Content::Fragment::Embedded
|
4
|
+
# CSS to find interactive containers (anything inside may be replaced with an iframe)
|
5
|
+
CONTAINER_CSS = 'figure.ost-embed-container, .os-figure:has-descendants("a.os-interactive-link"), figure:has-descendants("a.os-interactive-link")'
|
6
|
+
|
7
|
+
# CSS to find links to be embedded inside containers
|
8
|
+
TAGGED_LINK_CSS = 'a.os-embed, a.os-interactive-link'
|
9
|
+
UNTAGGED_LINK_CSS = 'a'
|
10
|
+
|
11
|
+
self.default_width = 960
|
12
|
+
self.default_height = 560
|
13
|
+
self.iframe_classes += ['interactive']
|
14
|
+
|
15
|
+
# This code is run from lib/openstax/cnx/v1/page.rb during import
|
16
|
+
def self.replace_interactive_links_with_iframes!(node)
|
17
|
+
containers = node.css(CONTAINER_CSS, OpenStax::Content::CustomCss.instance)
|
18
|
+
|
19
|
+
containers.each do |container|
|
20
|
+
link_node = node.at_css(TAGGED_LINK_CSS) || node.css(UNTAGGED_LINK_CSS).last
|
21
|
+
|
22
|
+
next if link_node.nil?
|
23
|
+
|
24
|
+
# Build iframe based on the link's URL
|
25
|
+
iframe = Nokogiri::XML::Node.new('iframe', node.document)
|
26
|
+
iframe['title'] = 'Interactive Simulation'
|
27
|
+
iframe['src'] = link_node['href']
|
28
|
+
iframe['class'] = iframe_classes.join(' ')
|
29
|
+
iframe['width'] = default_width
|
30
|
+
iframe['height'] = default_height
|
31
|
+
|
32
|
+
# Replace the container with the new iframe
|
33
|
+
container.replace(iframe)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require_relative 'html'
|
2
|
+
|
3
|
+
class OpenStax::Content::Fragment::Reading < OpenStax::Content::Fragment::Html
|
4
|
+
def initialize(node:, title: nil, labels: nil, reference_view_url: nil)
|
5
|
+
super node: node, title: title, labels: labels
|
6
|
+
|
7
|
+
@reference_view_url = reference_view_url
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require_relative 'fragment/reading'
|
3
|
+
require_relative 'custom_css'
|
4
|
+
|
5
|
+
class OpenStax::Content::FragmentSplitter
|
6
|
+
attr_reader :processing_instructions, :reference_view_url
|
7
|
+
|
8
|
+
def initialize(processing_instructions, reference_view_url)
|
9
|
+
@processing_instructions = processing_instructions.map do |processing_instruction|
|
10
|
+
OpenStruct.new(processing_instruction.to_h).tap do |pi_struct|
|
11
|
+
pi_struct.fragments = [pi_struct.fragments].flatten.map do |fragment|
|
12
|
+
fragment.to_s.split('_').map(&:capitalize).join
|
13
|
+
end unless pi_struct.fragments.nil?
|
14
|
+
pi_struct.only = [pi_struct.only].flatten.map(&:to_s) unless pi_struct.only.nil?
|
15
|
+
pi_struct.except = [pi_struct.except].flatten.map(&:to_s) unless pi_struct.except.nil?
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
@reference_view_url = reference_view_url
|
20
|
+
end
|
21
|
+
|
22
|
+
# Splits the given root node into fragments according to the processing instructions
|
23
|
+
def split_into_fragments(root, type = nil)
|
24
|
+
result = [root.dup]
|
25
|
+
type_string = type.to_s
|
26
|
+
|
27
|
+
pis = processing_instructions.reject do |processing_instruction|
|
28
|
+
processing_instruction.css.nil? ||
|
29
|
+
processing_instruction.css.empty? ||
|
30
|
+
processing_instruction.fragments.nil? ||
|
31
|
+
processing_instruction.fragments == ['Node'] ||
|
32
|
+
(!processing_instruction.only.nil? && !processing_instruction.only.include?(type_string)) ||
|
33
|
+
(!processing_instruction.except.nil? && processing_instruction.except.include?(type_string))
|
34
|
+
end
|
35
|
+
|
36
|
+
@media_nodes = []
|
37
|
+
|
38
|
+
pis.each { |processing_instruction| result = process_array(result, processing_instruction) }
|
39
|
+
|
40
|
+
# Flatten, remove empty nodes and transform remaining nodes into reading fragments
|
41
|
+
result.map do |obj|
|
42
|
+
next obj unless obj.is_a?(Nokogiri::XML::Node)
|
43
|
+
next if obj.content.nil? || obj.content.strip.empty?
|
44
|
+
|
45
|
+
OpenStax::Content::Fragment::Reading.new node: obj, reference_view_url: reference_view_url
|
46
|
+
end.compact.tap do |result|
|
47
|
+
@media_nodes.each do |node|
|
48
|
+
# Media processing instructions
|
49
|
+
node.css('[id], [name]', custom_css).each do |linkable|
|
50
|
+
css_array = []
|
51
|
+
css_array << "[href$=\"##{linkable[:id]}\"]" unless linkable[:id].nil?
|
52
|
+
css_array << "[href$=\"##{linkable[:name]}\"]" unless linkable[:name].nil?
|
53
|
+
css = css_array.join(', ')
|
54
|
+
|
55
|
+
result.select(&:html?)
|
56
|
+
.select { |fragment| fragment.has_css? css, custom_css }
|
57
|
+
.each { |fragment| fragment.append node.dup }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
result.select(&:html?).each(&:transform_links!)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
protected
|
66
|
+
|
67
|
+
def custom_css
|
68
|
+
OpenStax::Content::CustomCss.instance
|
69
|
+
end
|
70
|
+
|
71
|
+
# Returns an instance of the given fragment class
|
72
|
+
def get_fragment_instance(fragment_name, node, labels)
|
73
|
+
fragment_class = OpenStax::Content::Fragment.const_get fragment_name
|
74
|
+
args = { node: node, labels: labels }
|
75
|
+
args[:reference_view_url] = reference_view_url \
|
76
|
+
if fragment_class.is_a? OpenStax::Content::Fragment::Reading
|
77
|
+
fragment = fragment_class.new args
|
78
|
+
fragment unless fragment.blank?
|
79
|
+
end
|
80
|
+
|
81
|
+
# Recursively removes a node and its empty parents
|
82
|
+
def recursive_compact(node, root)
|
83
|
+
return if node == root
|
84
|
+
|
85
|
+
parent = node.parent
|
86
|
+
node.remove
|
87
|
+
|
88
|
+
recursive_compact(parent, root) if parent && (parent.content.nil? || parent.content.empty?)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Recursively removes all siblings before a node and its parents
|
92
|
+
def remove_before(node, root)
|
93
|
+
return if node == root
|
94
|
+
|
95
|
+
parent = node.parent
|
96
|
+
siblings = parent.children
|
97
|
+
index = siblings.index(node)
|
98
|
+
parent.children = siblings.slice(index..-1)
|
99
|
+
remove_before(parent, root)
|
100
|
+
end
|
101
|
+
|
102
|
+
# Recursively removes all siblings after a node and its parents
|
103
|
+
def remove_after(node, root)
|
104
|
+
return if node == root
|
105
|
+
|
106
|
+
parent = node.parent
|
107
|
+
siblings = parent.children
|
108
|
+
index = siblings.index(node)
|
109
|
+
parent.children = siblings.slice(0..index)
|
110
|
+
remove_after(parent, root)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Process a single Nokogiri::XML::Node
|
114
|
+
def process_node(root, processing_instruction)
|
115
|
+
# Find first match
|
116
|
+
node = root.at_css(processing_instruction.css, custom_css)
|
117
|
+
|
118
|
+
# Base case
|
119
|
+
return [ root ] if node.nil?
|
120
|
+
|
121
|
+
num_fragments = processing_instruction.fragments.size
|
122
|
+
|
123
|
+
if num_fragments == 0 # No splitting needed
|
124
|
+
# Remove the match node and any empty parents from the tree
|
125
|
+
recursive_compact(node, root)
|
126
|
+
|
127
|
+
# Repeat the processing until no more matches
|
128
|
+
process_node(root, processing_instruction)
|
129
|
+
else
|
130
|
+
compact_before = true
|
131
|
+
compact_after = true
|
132
|
+
|
133
|
+
# Check for special fragment cases (node)
|
134
|
+
fragments = []
|
135
|
+
processing_instruction.fragments.each_with_index do |fragment, index|
|
136
|
+
if fragment == 'Node'
|
137
|
+
if index == 0
|
138
|
+
# fragments: [node, anything] - Don't remove node from root before fragments
|
139
|
+
compact_before = false
|
140
|
+
elsif index == num_fragments - 1
|
141
|
+
# fragments: [anything, node] - Don't remove node from root after fragments
|
142
|
+
compact_after = false
|
143
|
+
else
|
144
|
+
# General case
|
145
|
+
# Make a copy of the current node (up to the root), but remove all other nodes
|
146
|
+
root_copy = root.dup
|
147
|
+
node_copy = root_copy.at_css(processing_instruction.css, custom_css)
|
148
|
+
|
149
|
+
remove_before(node_copy, root_copy)
|
150
|
+
remove_after(node_copy, root_copy)
|
151
|
+
|
152
|
+
fragments << root_copy
|
153
|
+
end
|
154
|
+
elsif fragment == 'Media'
|
155
|
+
@media_nodes << node
|
156
|
+
else
|
157
|
+
fragments << get_fragment_instance(fragment, node, processing_instruction.labels)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# Need to split the node tree
|
162
|
+
# Copy the node content and find the same match in the copy
|
163
|
+
root_copy = root.dup
|
164
|
+
node_copy = root_copy.at_css(processing_instruction.css, custom_css)
|
165
|
+
|
166
|
+
# One copy retains the content before the match;
|
167
|
+
# the other retains the content after the match
|
168
|
+
remove_after(node, root)
|
169
|
+
remove_before(node_copy, root_copy)
|
170
|
+
|
171
|
+
# Remove the match, its copy and any empty parents from the 2 trees
|
172
|
+
recursive_compact(node, root) if compact_before
|
173
|
+
recursive_compact(node_copy, root_copy) if compact_after
|
174
|
+
|
175
|
+
# Repeat the processing until no more matches
|
176
|
+
[ root ] + fragments + process_node(root_copy, processing_instruction)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Recursively process an array of Nodes and Fragments
|
181
|
+
def process_array(array, processing_instruction)
|
182
|
+
array.flat_map do |obj|
|
183
|
+
case obj
|
184
|
+
when Array
|
185
|
+
process_array(obj, processing_instruction)
|
186
|
+
when Nokogiri::XML::Node
|
187
|
+
process_node(obj, processing_instruction)
|
188
|
+
else
|
189
|
+
obj
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
@@ -0,0 +1,201 @@
|
|
1
|
+
require_relative 'title'
|
2
|
+
require_relative 'fragment/interactive'
|
3
|
+
require_relative 'fragment/exercise'
|
4
|
+
|
5
|
+
class OpenStax::Content::Page
|
6
|
+
# Start parsing here
|
7
|
+
ROOT_CSS = 'html > body'
|
8
|
+
|
9
|
+
# Find snap lab notes
|
10
|
+
SNAP_LAB_CSS = '.snap-lab'
|
11
|
+
SNAP_LAB_TITLE_CSS = '[data-type="title"]'
|
12
|
+
|
13
|
+
# Find nodes that define relevant tags
|
14
|
+
LO_DEF_NODE_CSS = '.ost-learning-objective-def'
|
15
|
+
STD_DEF_NODE_CSS = '.ost-standards-def'
|
16
|
+
TEKS_DEF_NODE_CSS = '.ost-standards-teks'
|
17
|
+
APBIO_DEF_NODE_CSS = '.ost-standards-apbio'
|
18
|
+
|
19
|
+
STD_NAME_NODE_CSS = '.ost-standards-name'
|
20
|
+
STD_DESC_NODE_CSS = '.ost-standards-description'
|
21
|
+
|
22
|
+
# Find specific tags and extract the relevant parts
|
23
|
+
LO_REGEX = /ost-tag-lo-([\w+-]+)/
|
24
|
+
STD_REGEX = /ost-tag-std-([\w+-]+)/
|
25
|
+
TEKS_REGEX = /ost-tag-(teks-[\w+-]+)/
|
26
|
+
|
27
|
+
def self.feature_node(node, feature_ids)
|
28
|
+
feature_ids = [feature_ids].flatten
|
29
|
+
return if feature_ids.empty?
|
30
|
+
|
31
|
+
feature_id_css = feature_ids.map { |feature_id| "##{feature_id}" }.join(', ')
|
32
|
+
node.at_css(feature_id_css)
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize(book: nil, hash: {}, uuid: nil, url: nil, title: nil, content: nil)
|
36
|
+
@uuid = uuid || hash['id']&.split('@', 2)&.first
|
37
|
+
raise ArgumentError, 'Either uuid or hash with id key is required' if @uuid.nil?
|
38
|
+
|
39
|
+
@book = book
|
40
|
+
@hash = hash
|
41
|
+
@url = url
|
42
|
+
@title = title || hash['title']
|
43
|
+
@content = content
|
44
|
+
end
|
45
|
+
|
46
|
+
attr_accessor :chapter_section
|
47
|
+
attr_reader :uuid, :hash
|
48
|
+
|
49
|
+
def book
|
50
|
+
raise ArgumentError, 'Book was not specified' if @book.nil?
|
51
|
+
|
52
|
+
@book
|
53
|
+
end
|
54
|
+
|
55
|
+
def url
|
56
|
+
@url ||= "#{book.url_fragment}:#{uuid}.json"
|
57
|
+
end
|
58
|
+
|
59
|
+
def parsed_title
|
60
|
+
@parsed_title ||= OpenStax::Content::Title.new @title
|
61
|
+
end
|
62
|
+
|
63
|
+
def book_location
|
64
|
+
parsed_title.book_location
|
65
|
+
end
|
66
|
+
|
67
|
+
def title
|
68
|
+
parsed_title.text
|
69
|
+
end
|
70
|
+
|
71
|
+
def full_hash
|
72
|
+
@full_hash ||= book.archive.json url
|
73
|
+
end
|
74
|
+
|
75
|
+
def short_id
|
76
|
+
@short_id ||= full_hash.fetch('shortId', nil)
|
77
|
+
end
|
78
|
+
|
79
|
+
def content
|
80
|
+
@content ||= full_hash.fetch('content')
|
81
|
+
end
|
82
|
+
|
83
|
+
def doc
|
84
|
+
@doc ||= Nokogiri::HTML(content)
|
85
|
+
end
|
86
|
+
|
87
|
+
def root
|
88
|
+
@root ||= doc.at_css(ROOT_CSS)
|
89
|
+
end
|
90
|
+
|
91
|
+
def footnotes
|
92
|
+
@footnotes ||= doc.css('[role=doc-footnote]')
|
93
|
+
end
|
94
|
+
|
95
|
+
# Replaces links to embeddable sims (and maybe videos in the future) with iframes
|
96
|
+
# Changes exercise urls in the doc to be absolute
|
97
|
+
def convert_content!
|
98
|
+
OpenStax::Content::Fragment::Interactive.replace_interactive_links_with_iframes!(doc)
|
99
|
+
OpenStax::Content::Fragment::Exercise.absolutize_exercise_urls!(doc)
|
100
|
+
map_note_format!(doc)
|
101
|
+
@content = doc.to_html
|
102
|
+
@root = nil
|
103
|
+
end
|
104
|
+
|
105
|
+
def snap_lab_nodes
|
106
|
+
root.css(SNAP_LAB_CSS)
|
107
|
+
end
|
108
|
+
|
109
|
+
def snap_lab_title(snap_lab)
|
110
|
+
snap_lab.at_css(SNAP_LAB_TITLE_CSS).try(:text)
|
111
|
+
end
|
112
|
+
|
113
|
+
def los
|
114
|
+
@los ||= tags.select { |tag| tag[:type] == :lo }.map { |tag| tag[:value] }
|
115
|
+
end
|
116
|
+
|
117
|
+
def aplos
|
118
|
+
@aplos ||= tags.select { |tag| tag[:type] == :aplo }.map { |tag| tag[:value] }
|
119
|
+
end
|
120
|
+
|
121
|
+
def tags
|
122
|
+
return @tags.values unless @tags.nil?
|
123
|
+
|
124
|
+
# Start with default cnxmod tag
|
125
|
+
cnxmod_value = "context-cnxmod:#{uuid}"
|
126
|
+
@tags = { cnxmod_value => { value: cnxmod_value, type: :cnxmod } }
|
127
|
+
|
128
|
+
# Extract tag name and description from .ost-standards-def and .os-learning-objective-def.
|
129
|
+
|
130
|
+
# LO tags
|
131
|
+
root.css(LO_DEF_NODE_CSS).each do |node|
|
132
|
+
klass = node.attr('class')
|
133
|
+
lo_value = LO_REGEX.match(klass).try(:[], 1)
|
134
|
+
next if lo_value.nil?
|
135
|
+
|
136
|
+
teks_value = TEKS_REGEX.match(klass).try(:[], 1)
|
137
|
+
description = node.content.strip
|
138
|
+
|
139
|
+
@tags[lo_value] = {
|
140
|
+
value: lo_value,
|
141
|
+
description: description,
|
142
|
+
teks: teks_value,
|
143
|
+
type: :lo
|
144
|
+
}
|
145
|
+
end
|
146
|
+
|
147
|
+
# Other standards
|
148
|
+
root.css(STD_DEF_NODE_CSS).each do |node|
|
149
|
+
klass = node.attr('class')
|
150
|
+
name = node.at_css(STD_NAME_NODE_CSS).try(:content).try(:strip)
|
151
|
+
description = node.at_css(STD_DESC_NODE_CSS).try(:content).try(:strip)
|
152
|
+
value = nil
|
153
|
+
|
154
|
+
if node.matches?(TEKS_DEF_NODE_CSS)
|
155
|
+
value = TEKS_REGEX.match(klass).try(:[], 1)
|
156
|
+
type = :teks
|
157
|
+
elsif node.matches?(APBIO_DEF_NODE_CSS)
|
158
|
+
value = LO_REGEX.match(klass).try(:[], 1)
|
159
|
+
type = :aplo
|
160
|
+
end
|
161
|
+
|
162
|
+
next if value.nil?
|
163
|
+
|
164
|
+
@tags[value] = {
|
165
|
+
value: value,
|
166
|
+
name: name,
|
167
|
+
description: description,
|
168
|
+
type: type
|
169
|
+
}
|
170
|
+
end
|
171
|
+
|
172
|
+
@tags.values
|
173
|
+
end
|
174
|
+
|
175
|
+
protected
|
176
|
+
|
177
|
+
# Adds a container div around note content for styling
|
178
|
+
def map_note_format!(node)
|
179
|
+
note_selector = <<-eos
|
180
|
+
.note:not(.learning-objectives),
|
181
|
+
.example,
|
182
|
+
.grasp-check,
|
183
|
+
[data-type="note"],
|
184
|
+
[data-element-type="check-understanding"]
|
185
|
+
eos
|
186
|
+
|
187
|
+
note_selector = note_selector.gsub(/\s+/, "")
|
188
|
+
|
189
|
+
node.css(note_selector).each do |note|
|
190
|
+
note.set_attribute('data-tutor-transform', true)
|
191
|
+
body = Nokogiri::XML::Node.new('div', doc)
|
192
|
+
body.set_attribute('data-type', 'content')
|
193
|
+
|
194
|
+
content = note.css('>*:not([data-type=title])')
|
195
|
+
content.unlink()
|
196
|
+
|
197
|
+
body.children = content
|
198
|
+
note.add_child(body)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|