openstax_content 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +661 -0
- data/README.md +2 -0
- data/lib/openstax/content.rb +14 -0
- data/lib/openstax/content/abl.rb +13 -0
- data/lib/openstax/content/archive.rb +104 -0
- data/lib/openstax/content/book.rb +70 -0
- data/lib/openstax/content/book_part.rb +47 -0
- data/lib/openstax/content/custom_css.rb +9 -0
- data/lib/openstax/content/fragment.rb +19 -0
- data/lib/openstax/content/fragment/embedded.rb +67 -0
- data/lib/openstax/content/fragment/exercise.rb +56 -0
- data/lib/openstax/content/fragment/html.rb +62 -0
- data/lib/openstax/content/fragment/interactive.rb +36 -0
- data/lib/openstax/content/fragment/optional_exercise.rb +4 -0
- data/lib/openstax/content/fragment/reading.rb +9 -0
- data/lib/openstax/content/fragment/video.rb +8 -0
- data/lib/openstax/content/fragment_splitter.rb +193 -0
- data/lib/openstax/content/page.rb +201 -0
- data/lib/openstax/content/s3.rb +44 -0
- data/lib/openstax/content/title.rb +18 -0
- data/lib/openstax/content/version.rb +5 -0
- metadata +162 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
require_relative 'embedded'
|
2
|
+
|
3
|
+
class OpenStax::Content::Fragment::Interactive < OpenStax::Content::Fragment::Embedded
|
4
|
+
# CSS to find interactive containers (anything inside may be replaced with an iframe)
|
5
|
+
CONTAINER_CSS = 'figure.ost-embed-container, .os-figure:has-descendants("a.os-interactive-link"), figure:has-descendants("a.os-interactive-link")'
|
6
|
+
|
7
|
+
# CSS to find links to be embedded inside containers
|
8
|
+
TAGGED_LINK_CSS = 'a.os-embed, a.os-interactive-link'
|
9
|
+
UNTAGGED_LINK_CSS = 'a'
|
10
|
+
|
11
|
+
self.default_width = 960
|
12
|
+
self.default_height = 560
|
13
|
+
self.iframe_classes += ['interactive']
|
14
|
+
|
15
|
+
# This code is run from lib/openstax/cnx/v1/page.rb during import
|
16
|
+
def self.replace_interactive_links_with_iframes!(node)
|
17
|
+
containers = node.css(CONTAINER_CSS, OpenStax::Content::CustomCss.instance)
|
18
|
+
|
19
|
+
containers.each do |container|
|
20
|
+
link_node = node.at_css(TAGGED_LINK_CSS) || node.css(UNTAGGED_LINK_CSS).last
|
21
|
+
|
22
|
+
next if link_node.nil?
|
23
|
+
|
24
|
+
# Build iframe based on the link's URL
|
25
|
+
iframe = Nokogiri::XML::Node.new('iframe', node.document)
|
26
|
+
iframe['title'] = 'Interactive Simulation'
|
27
|
+
iframe['src'] = link_node['href']
|
28
|
+
iframe['class'] = iframe_classes.join(' ')
|
29
|
+
iframe['width'] = default_width
|
30
|
+
iframe['height'] = default_height
|
31
|
+
|
32
|
+
# Replace the container with the new iframe
|
33
|
+
container.replace(iframe)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require_relative 'html'
|
2
|
+
|
3
|
+
class OpenStax::Content::Fragment::Reading < OpenStax::Content::Fragment::Html
|
4
|
+
def initialize(node:, title: nil, labels: nil, reference_view_url: nil)
|
5
|
+
super node: node, title: title, labels: labels
|
6
|
+
|
7
|
+
@reference_view_url = reference_view_url
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require_relative 'fragment/reading'
|
3
|
+
require_relative 'custom_css'
|
4
|
+
|
5
|
+
class OpenStax::Content::FragmentSplitter
|
6
|
+
attr_reader :processing_instructions, :reference_view_url
|
7
|
+
|
8
|
+
def initialize(processing_instructions, reference_view_url)
|
9
|
+
@processing_instructions = processing_instructions.map do |processing_instruction|
|
10
|
+
OpenStruct.new(processing_instruction.to_h).tap do |pi_struct|
|
11
|
+
pi_struct.fragments = [pi_struct.fragments].flatten.map do |fragment|
|
12
|
+
fragment.to_s.split('_').map(&:capitalize).join
|
13
|
+
end unless pi_struct.fragments.nil?
|
14
|
+
pi_struct.only = [pi_struct.only].flatten.map(&:to_s) unless pi_struct.only.nil?
|
15
|
+
pi_struct.except = [pi_struct.except].flatten.map(&:to_s) unless pi_struct.except.nil?
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
@reference_view_url = reference_view_url
|
20
|
+
end
|
21
|
+
|
22
|
+
# Splits the given root node into fragments according to the processing instructions
|
23
|
+
def split_into_fragments(root, type = nil)
|
24
|
+
result = [root.dup]
|
25
|
+
type_string = type.to_s
|
26
|
+
|
27
|
+
pis = processing_instructions.reject do |processing_instruction|
|
28
|
+
processing_instruction.css.nil? ||
|
29
|
+
processing_instruction.css.empty? ||
|
30
|
+
processing_instruction.fragments.nil? ||
|
31
|
+
processing_instruction.fragments == ['Node'] ||
|
32
|
+
(!processing_instruction.only.nil? && !processing_instruction.only.include?(type_string)) ||
|
33
|
+
(!processing_instruction.except.nil? && processing_instruction.except.include?(type_string))
|
34
|
+
end
|
35
|
+
|
36
|
+
@media_nodes = []
|
37
|
+
|
38
|
+
pis.each { |processing_instruction| result = process_array(result, processing_instruction) }
|
39
|
+
|
40
|
+
# Flatten, remove empty nodes and transform remaining nodes into reading fragments
|
41
|
+
result.map do |obj|
|
42
|
+
next obj unless obj.is_a?(Nokogiri::XML::Node)
|
43
|
+
next if obj.content.nil? || obj.content.strip.empty?
|
44
|
+
|
45
|
+
OpenStax::Content::Fragment::Reading.new node: obj, reference_view_url: reference_view_url
|
46
|
+
end.compact.tap do |result|
|
47
|
+
@media_nodes.each do |node|
|
48
|
+
# Media processing instructions
|
49
|
+
node.css('[id], [name]', custom_css).each do |linkable|
|
50
|
+
css_array = []
|
51
|
+
css_array << "[href$=\"##{linkable[:id]}\"]" unless linkable[:id].nil?
|
52
|
+
css_array << "[href$=\"##{linkable[:name]}\"]" unless linkable[:name].nil?
|
53
|
+
css = css_array.join(', ')
|
54
|
+
|
55
|
+
result.select(&:html?)
|
56
|
+
.select { |fragment| fragment.has_css? css, custom_css }
|
57
|
+
.each { |fragment| fragment.append node.dup }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
result.select(&:html?).each(&:transform_links!)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
protected
|
66
|
+
|
67
|
+
def custom_css
|
68
|
+
OpenStax::Content::CustomCss.instance
|
69
|
+
end
|
70
|
+
|
71
|
+
# Returns an instance of the given fragment class
|
72
|
+
def get_fragment_instance(fragment_name, node, labels)
|
73
|
+
fragment_class = OpenStax::Content::Fragment.const_get fragment_name
|
74
|
+
args = { node: node, labels: labels }
|
75
|
+
args[:reference_view_url] = reference_view_url \
|
76
|
+
if fragment_class.is_a? OpenStax::Content::Fragment::Reading
|
77
|
+
fragment = fragment_class.new args
|
78
|
+
fragment unless fragment.blank?
|
79
|
+
end
|
80
|
+
|
81
|
+
# Recursively removes a node and its empty parents
|
82
|
+
def recursive_compact(node, root)
|
83
|
+
return if node == root
|
84
|
+
|
85
|
+
parent = node.parent
|
86
|
+
node.remove
|
87
|
+
|
88
|
+
recursive_compact(parent, root) if parent && (parent.content.nil? || parent.content.empty?)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Recursively removes all siblings before a node and its parents
|
92
|
+
def remove_before(node, root)
|
93
|
+
return if node == root
|
94
|
+
|
95
|
+
parent = node.parent
|
96
|
+
siblings = parent.children
|
97
|
+
index = siblings.index(node)
|
98
|
+
parent.children = siblings.slice(index..-1)
|
99
|
+
remove_before(parent, root)
|
100
|
+
end
|
101
|
+
|
102
|
+
# Recursively removes all siblings after a node and its parents
|
103
|
+
def remove_after(node, root)
|
104
|
+
return if node == root
|
105
|
+
|
106
|
+
parent = node.parent
|
107
|
+
siblings = parent.children
|
108
|
+
index = siblings.index(node)
|
109
|
+
parent.children = siblings.slice(0..index)
|
110
|
+
remove_after(parent, root)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Process a single Nokogiri::XML::Node
|
114
|
+
def process_node(root, processing_instruction)
|
115
|
+
# Find first match
|
116
|
+
node = root.at_css(processing_instruction.css, custom_css)
|
117
|
+
|
118
|
+
# Base case
|
119
|
+
return [ root ] if node.nil?
|
120
|
+
|
121
|
+
num_fragments = processing_instruction.fragments.size
|
122
|
+
|
123
|
+
if num_fragments == 0 # No splitting needed
|
124
|
+
# Remove the match node and any empty parents from the tree
|
125
|
+
recursive_compact(node, root)
|
126
|
+
|
127
|
+
# Repeat the processing until no more matches
|
128
|
+
process_node(root, processing_instruction)
|
129
|
+
else
|
130
|
+
compact_before = true
|
131
|
+
compact_after = true
|
132
|
+
|
133
|
+
# Check for special fragment cases (node)
|
134
|
+
fragments = []
|
135
|
+
processing_instruction.fragments.each_with_index do |fragment, index|
|
136
|
+
if fragment == 'Node'
|
137
|
+
if index == 0
|
138
|
+
# fragments: [node, anything] - Don't remove node from root before fragments
|
139
|
+
compact_before = false
|
140
|
+
elsif index == num_fragments - 1
|
141
|
+
# fragments: [anything, node] - Don't remove node from root after fragments
|
142
|
+
compact_after = false
|
143
|
+
else
|
144
|
+
# General case
|
145
|
+
# Make a copy of the current node (up to the root), but remove all other nodes
|
146
|
+
root_copy = root.dup
|
147
|
+
node_copy = root_copy.at_css(processing_instruction.css, custom_css)
|
148
|
+
|
149
|
+
remove_before(node_copy, root_copy)
|
150
|
+
remove_after(node_copy, root_copy)
|
151
|
+
|
152
|
+
fragments << root_copy
|
153
|
+
end
|
154
|
+
elsif fragment == 'Media'
|
155
|
+
@media_nodes << node
|
156
|
+
else
|
157
|
+
fragments << get_fragment_instance(fragment, node, processing_instruction.labels)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# Need to split the node tree
|
162
|
+
# Copy the node content and find the same match in the copy
|
163
|
+
root_copy = root.dup
|
164
|
+
node_copy = root_copy.at_css(processing_instruction.css, custom_css)
|
165
|
+
|
166
|
+
# One copy retains the content before the match;
|
167
|
+
# the other retains the content after the match
|
168
|
+
remove_after(node, root)
|
169
|
+
remove_before(node_copy, root_copy)
|
170
|
+
|
171
|
+
# Remove the match, its copy and any empty parents from the 2 trees
|
172
|
+
recursive_compact(node, root) if compact_before
|
173
|
+
recursive_compact(node_copy, root_copy) if compact_after
|
174
|
+
|
175
|
+
# Repeat the processing until no more matches
|
176
|
+
[ root ] + fragments + process_node(root_copy, processing_instruction)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Recursively process an array of Nodes and Fragments
|
181
|
+
def process_array(array, processing_instruction)
|
182
|
+
array.flat_map do |obj|
|
183
|
+
case obj
|
184
|
+
when Array
|
185
|
+
process_array(obj, processing_instruction)
|
186
|
+
when Nokogiri::XML::Node
|
187
|
+
process_node(obj, processing_instruction)
|
188
|
+
else
|
189
|
+
obj
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
@@ -0,0 +1,201 @@
|
|
1
|
+
require_relative 'title'
|
2
|
+
require_relative 'fragment/interactive'
|
3
|
+
require_relative 'fragment/exercise'
|
4
|
+
|
5
|
+
class OpenStax::Content::Page
|
6
|
+
# Start parsing here
|
7
|
+
ROOT_CSS = 'html > body'
|
8
|
+
|
9
|
+
# Find snap lab notes
|
10
|
+
SNAP_LAB_CSS = '.snap-lab'
|
11
|
+
SNAP_LAB_TITLE_CSS = '[data-type="title"]'
|
12
|
+
|
13
|
+
# Find nodes that define relevant tags
|
14
|
+
LO_DEF_NODE_CSS = '.ost-learning-objective-def'
|
15
|
+
STD_DEF_NODE_CSS = '.ost-standards-def'
|
16
|
+
TEKS_DEF_NODE_CSS = '.ost-standards-teks'
|
17
|
+
APBIO_DEF_NODE_CSS = '.ost-standards-apbio'
|
18
|
+
|
19
|
+
STD_NAME_NODE_CSS = '.ost-standards-name'
|
20
|
+
STD_DESC_NODE_CSS = '.ost-standards-description'
|
21
|
+
|
22
|
+
# Find specific tags and extract the relevant parts
|
23
|
+
LO_REGEX = /ost-tag-lo-([\w+-]+)/
|
24
|
+
STD_REGEX = /ost-tag-std-([\w+-]+)/
|
25
|
+
TEKS_REGEX = /ost-tag-(teks-[\w+-]+)/
|
26
|
+
|
27
|
+
def self.feature_node(node, feature_ids)
|
28
|
+
feature_ids = [feature_ids].flatten
|
29
|
+
return if feature_ids.empty?
|
30
|
+
|
31
|
+
feature_id_css = feature_ids.map { |feature_id| "##{feature_id}" }.join(', ')
|
32
|
+
node.at_css(feature_id_css)
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize(book: nil, hash: {}, uuid: nil, url: nil, title: nil, content: nil)
|
36
|
+
@uuid = uuid || hash['id']&.split('@', 2)&.first
|
37
|
+
raise ArgumentError, 'Either uuid or hash with id key is required' if @uuid.nil?
|
38
|
+
|
39
|
+
@book = book
|
40
|
+
@hash = hash
|
41
|
+
@url = url
|
42
|
+
@title = title || hash['title']
|
43
|
+
@content = content
|
44
|
+
end
|
45
|
+
|
46
|
+
attr_accessor :chapter_section
|
47
|
+
attr_reader :uuid, :hash
|
48
|
+
|
49
|
+
def book
|
50
|
+
raise ArgumentError, 'Book was not specified' if @book.nil?
|
51
|
+
|
52
|
+
@book
|
53
|
+
end
|
54
|
+
|
55
|
+
def url
|
56
|
+
@url ||= "#{book.url_fragment}:#{uuid}.json"
|
57
|
+
end
|
58
|
+
|
59
|
+
def parsed_title
|
60
|
+
@parsed_title ||= OpenStax::Content::Title.new @title
|
61
|
+
end
|
62
|
+
|
63
|
+
def book_location
|
64
|
+
parsed_title.book_location
|
65
|
+
end
|
66
|
+
|
67
|
+
def title
|
68
|
+
parsed_title.text
|
69
|
+
end
|
70
|
+
|
71
|
+
def full_hash
|
72
|
+
@full_hash ||= book.archive.json url
|
73
|
+
end
|
74
|
+
|
75
|
+
def short_id
|
76
|
+
@short_id ||= full_hash.fetch('shortId', nil)
|
77
|
+
end
|
78
|
+
|
79
|
+
def content
|
80
|
+
@content ||= full_hash.fetch('content')
|
81
|
+
end
|
82
|
+
|
83
|
+
def doc
|
84
|
+
@doc ||= Nokogiri::HTML(content)
|
85
|
+
end
|
86
|
+
|
87
|
+
def root
|
88
|
+
@root ||= doc.at_css(ROOT_CSS)
|
89
|
+
end
|
90
|
+
|
91
|
+
def footnotes
|
92
|
+
@footnotes ||= doc.css('[role=doc-footnote]')
|
93
|
+
end
|
94
|
+
|
95
|
+
# Replaces links to embeddable sims (and maybe videos in the future) with iframes
|
96
|
+
# Changes exercise urls in the doc to be absolute
|
97
|
+
def convert_content!
|
98
|
+
OpenStax::Content::Fragment::Interactive.replace_interactive_links_with_iframes!(doc)
|
99
|
+
OpenStax::Content::Fragment::Exercise.absolutize_exercise_urls!(doc)
|
100
|
+
map_note_format!(doc)
|
101
|
+
@content = doc.to_html
|
102
|
+
@root = nil
|
103
|
+
end
|
104
|
+
|
105
|
+
def snap_lab_nodes
|
106
|
+
root.css(SNAP_LAB_CSS)
|
107
|
+
end
|
108
|
+
|
109
|
+
def snap_lab_title(snap_lab)
|
110
|
+
snap_lab.at_css(SNAP_LAB_TITLE_CSS).try(:text)
|
111
|
+
end
|
112
|
+
|
113
|
+
def los
|
114
|
+
@los ||= tags.select { |tag| tag[:type] == :lo }.map { |tag| tag[:value] }
|
115
|
+
end
|
116
|
+
|
117
|
+
def aplos
|
118
|
+
@aplos ||= tags.select { |tag| tag[:type] == :aplo }.map { |tag| tag[:value] }
|
119
|
+
end
|
120
|
+
|
121
|
+
def tags
|
122
|
+
return @tags.values unless @tags.nil?
|
123
|
+
|
124
|
+
# Start with default cnxmod tag
|
125
|
+
cnxmod_value = "context-cnxmod:#{uuid}"
|
126
|
+
@tags = { cnxmod_value => { value: cnxmod_value, type: :cnxmod } }
|
127
|
+
|
128
|
+
# Extract tag name and description from .ost-standards-def and .os-learning-objective-def.
|
129
|
+
|
130
|
+
# LO tags
|
131
|
+
root.css(LO_DEF_NODE_CSS).each do |node|
|
132
|
+
klass = node.attr('class')
|
133
|
+
lo_value = LO_REGEX.match(klass).try(:[], 1)
|
134
|
+
next if lo_value.nil?
|
135
|
+
|
136
|
+
teks_value = TEKS_REGEX.match(klass).try(:[], 1)
|
137
|
+
description = node.content.strip
|
138
|
+
|
139
|
+
@tags[lo_value] = {
|
140
|
+
value: lo_value,
|
141
|
+
description: description,
|
142
|
+
teks: teks_value,
|
143
|
+
type: :lo
|
144
|
+
}
|
145
|
+
end
|
146
|
+
|
147
|
+
# Other standards
|
148
|
+
root.css(STD_DEF_NODE_CSS).each do |node|
|
149
|
+
klass = node.attr('class')
|
150
|
+
name = node.at_css(STD_NAME_NODE_CSS).try(:content).try(:strip)
|
151
|
+
description = node.at_css(STD_DESC_NODE_CSS).try(:content).try(:strip)
|
152
|
+
value = nil
|
153
|
+
|
154
|
+
if node.matches?(TEKS_DEF_NODE_CSS)
|
155
|
+
value = TEKS_REGEX.match(klass).try(:[], 1)
|
156
|
+
type = :teks
|
157
|
+
elsif node.matches?(APBIO_DEF_NODE_CSS)
|
158
|
+
value = LO_REGEX.match(klass).try(:[], 1)
|
159
|
+
type = :aplo
|
160
|
+
end
|
161
|
+
|
162
|
+
next if value.nil?
|
163
|
+
|
164
|
+
@tags[value] = {
|
165
|
+
value: value,
|
166
|
+
name: name,
|
167
|
+
description: description,
|
168
|
+
type: type
|
169
|
+
}
|
170
|
+
end
|
171
|
+
|
172
|
+
@tags.values
|
173
|
+
end
|
174
|
+
|
175
|
+
protected
|
176
|
+
|
177
|
+
# Adds a container div around note content for styling
|
178
|
+
def map_note_format!(node)
|
179
|
+
note_selector = <<-eos
|
180
|
+
.note:not(.learning-objectives),
|
181
|
+
.example,
|
182
|
+
.grasp-check,
|
183
|
+
[data-type="note"],
|
184
|
+
[data-element-type="check-understanding"]
|
185
|
+
eos
|
186
|
+
|
187
|
+
note_selector = note_selector.gsub(/\s+/, "")
|
188
|
+
|
189
|
+
node.css(note_selector).each do |note|
|
190
|
+
note.set_attribute('data-tutor-transform', true)
|
191
|
+
body = Nokogiri::XML::Node.new('div', doc)
|
192
|
+
body.set_attribute('data-type', 'content')
|
193
|
+
|
194
|
+
content = note.css('>*:not([data-type=title])')
|
195
|
+
content.unlink()
|
196
|
+
|
197
|
+
body.children = content
|
198
|
+
note.add_child(body)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|