openstax_content 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,36 @@
1
+ require_relative 'embedded'
2
+
3
+ class OpenStax::Content::Fragment::Interactive < OpenStax::Content::Fragment::Embedded
4
+ # CSS to find interactive containers (anything inside may be replaced with an iframe)
5
+ CONTAINER_CSS = 'figure.ost-embed-container, .os-figure:has-descendants("a.os-interactive-link"), figure:has-descendants("a.os-interactive-link")'
6
+
7
+ # CSS to find links to be embedded inside containers
8
+ TAGGED_LINK_CSS = 'a.os-embed, a.os-interactive-link'
9
+ UNTAGGED_LINK_CSS = 'a'
10
+
11
+ self.default_width = 960
12
+ self.default_height = 560
13
+ self.iframe_classes += ['interactive']
14
+
15
+ # This code is run from lib/openstax/cnx/v1/page.rb during import
16
+ def self.replace_interactive_links_with_iframes!(node)
17
+ containers = node.css(CONTAINER_CSS, OpenStax::Content::CustomCss.instance)
18
+
19
+ containers.each do |container|
20
+ link_node = node.at_css(TAGGED_LINK_CSS) || node.css(UNTAGGED_LINK_CSS).last
21
+
22
+ next if link_node.nil?
23
+
24
+ # Build iframe based on the link's URL
25
+ iframe = Nokogiri::XML::Node.new('iframe', node.document)
26
+ iframe['title'] = 'Interactive Simulation'
27
+ iframe['src'] = link_node['href']
28
+ iframe['class'] = iframe_classes.join(' ')
29
+ iframe['width'] = default_width
30
+ iframe['height'] = default_height
31
+
32
+ # Replace the container with the new iframe
33
+ container.replace(iframe)
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,4 @@
1
+ require_relative 'exercise'
2
+
3
+ class OpenStax::Content::Fragment::OptionalExercise < OpenStax::Content::Fragment::Exercise
4
+ end
@@ -0,0 +1,9 @@
1
+ require_relative 'html'
2
+
3
+ class OpenStax::Content::Fragment::Reading < OpenStax::Content::Fragment::Html
4
+ def initialize(node:, title: nil, labels: nil, reference_view_url: nil)
5
+ super node: node, title: title, labels: labels
6
+
7
+ @reference_view_url = reference_view_url
8
+ end
9
+ end
@@ -0,0 +1,8 @@
1
+ require_relative 'embedded'
2
+
3
+ class OpenStax::Content::Fragment::Video < OpenStax::Content::Fragment::Embedded
4
+ self.default_width = 560
5
+ self.default_height = 315
6
+ self.iframe_classes += ['video']
7
+ self.iframe_title = "Video"
8
+ end
@@ -0,0 +1,193 @@
1
+ require 'nokogiri'
2
+ require_relative 'fragment/reading'
3
+ require_relative 'custom_css'
4
+
5
+ class OpenStax::Content::FragmentSplitter
6
+ attr_reader :processing_instructions, :reference_view_url
7
+
8
+ def initialize(processing_instructions, reference_view_url)
9
+ @processing_instructions = processing_instructions.map do |processing_instruction|
10
+ OpenStruct.new(processing_instruction.to_h).tap do |pi_struct|
11
+ pi_struct.fragments = [pi_struct.fragments].flatten.map do |fragment|
12
+ fragment.to_s.split('_').map(&:capitalize).join
13
+ end unless pi_struct.fragments.nil?
14
+ pi_struct.only = [pi_struct.only].flatten.map(&:to_s) unless pi_struct.only.nil?
15
+ pi_struct.except = [pi_struct.except].flatten.map(&:to_s) unless pi_struct.except.nil?
16
+ end
17
+ end
18
+
19
+ @reference_view_url = reference_view_url
20
+ end
21
+
22
+ # Splits the given root node into fragments according to the processing instructions
23
+ def split_into_fragments(root, type = nil)
24
+ result = [root.dup]
25
+ type_string = type.to_s
26
+
27
+ pis = processing_instructions.reject do |processing_instruction|
28
+ processing_instruction.css.nil? ||
29
+ processing_instruction.css.empty? ||
30
+ processing_instruction.fragments.nil? ||
31
+ processing_instruction.fragments == ['Node'] ||
32
+ (!processing_instruction.only.nil? && !processing_instruction.only.include?(type_string)) ||
33
+ (!processing_instruction.except.nil? && processing_instruction.except.include?(type_string))
34
+ end
35
+
36
+ @media_nodes = []
37
+
38
+ pis.each { |processing_instruction| result = process_array(result, processing_instruction) }
39
+
40
+ # Flatten, remove empty nodes and transform remaining nodes into reading fragments
41
+ result.map do |obj|
42
+ next obj unless obj.is_a?(Nokogiri::XML::Node)
43
+ next if obj.content.nil? || obj.content.strip.empty?
44
+
45
+ OpenStax::Content::Fragment::Reading.new node: obj, reference_view_url: reference_view_url
46
+ end.compact.tap do |result|
47
+ @media_nodes.each do |node|
48
+ # Media processing instructions
49
+ node.css('[id], [name]', custom_css).each do |linkable|
50
+ css_array = []
51
+ css_array << "[href$=\"##{linkable[:id]}\"]" unless linkable[:id].nil?
52
+ css_array << "[href$=\"##{linkable[:name]}\"]" unless linkable[:name].nil?
53
+ css = css_array.join(', ')
54
+
55
+ result.select(&:html?)
56
+ .select { |fragment| fragment.has_css? css, custom_css }
57
+ .each { |fragment| fragment.append node.dup }
58
+ end
59
+ end
60
+
61
+ result.select(&:html?).each(&:transform_links!)
62
+ end
63
+ end
64
+
65
+ protected
66
+
67
+ def custom_css
68
+ OpenStax::Content::CustomCss.instance
69
+ end
70
+
71
+ # Returns an instance of the given fragment class
72
+ def get_fragment_instance(fragment_name, node, labels)
73
+ fragment_class = OpenStax::Content::Fragment.const_get fragment_name
74
+ args = { node: node, labels: labels }
75
+ args[:reference_view_url] = reference_view_url \
76
+ if fragment_class.is_a? OpenStax::Content::Fragment::Reading
77
+ fragment = fragment_class.new args
78
+ fragment unless fragment.blank?
79
+ end
80
+
81
+ # Recursively removes a node and its empty parents
82
+ def recursive_compact(node, root)
83
+ return if node == root
84
+
85
+ parent = node.parent
86
+ node.remove
87
+
88
+ recursive_compact(parent, root) if parent && (parent.content.nil? || parent.content.empty?)
89
+ end
90
+
91
+ # Recursively removes all siblings before a node and its parents
92
+ def remove_before(node, root)
93
+ return if node == root
94
+
95
+ parent = node.parent
96
+ siblings = parent.children
97
+ index = siblings.index(node)
98
+ parent.children = siblings.slice(index..-1)
99
+ remove_before(parent, root)
100
+ end
101
+
102
+ # Recursively removes all siblings after a node and its parents
103
+ def remove_after(node, root)
104
+ return if node == root
105
+
106
+ parent = node.parent
107
+ siblings = parent.children
108
+ index = siblings.index(node)
109
+ parent.children = siblings.slice(0..index)
110
+ remove_after(parent, root)
111
+ end
112
+
113
+ # Process a single Nokogiri::XML::Node
114
+ def process_node(root, processing_instruction)
115
+ # Find first match
116
+ node = root.at_css(processing_instruction.css, custom_css)
117
+
118
+ # Base case
119
+ return [ root ] if node.nil?
120
+
121
+ num_fragments = processing_instruction.fragments.size
122
+
123
+ if num_fragments == 0 # No splitting needed
124
+ # Remove the match node and any empty parents from the tree
125
+ recursive_compact(node, root)
126
+
127
+ # Repeat the processing until no more matches
128
+ process_node(root, processing_instruction)
129
+ else
130
+ compact_before = true
131
+ compact_after = true
132
+
133
+ # Check for special fragment cases (node)
134
+ fragments = []
135
+ processing_instruction.fragments.each_with_index do |fragment, index|
136
+ if fragment == 'Node'
137
+ if index == 0
138
+ # fragments: [node, anything] - Don't remove node from root before fragments
139
+ compact_before = false
140
+ elsif index == num_fragments - 1
141
+ # fragments: [anything, node] - Don't remove node from root after fragments
142
+ compact_after = false
143
+ else
144
+ # General case
145
+ # Make a copy of the current node (up to the root), but remove all other nodes
146
+ root_copy = root.dup
147
+ node_copy = root_copy.at_css(processing_instruction.css, custom_css)
148
+
149
+ remove_before(node_copy, root_copy)
150
+ remove_after(node_copy, root_copy)
151
+
152
+ fragments << root_copy
153
+ end
154
+ elsif fragment == 'Media'
155
+ @media_nodes << node
156
+ else
157
+ fragments << get_fragment_instance(fragment, node, processing_instruction.labels)
158
+ end
159
+ end
160
+
161
+ # Need to split the node tree
162
+ # Copy the node content and find the same match in the copy
163
+ root_copy = root.dup
164
+ node_copy = root_copy.at_css(processing_instruction.css, custom_css)
165
+
166
+ # One copy retains the content before the match;
167
+ # the other retains the content after the match
168
+ remove_after(node, root)
169
+ remove_before(node_copy, root_copy)
170
+
171
+ # Remove the match, its copy and any empty parents from the 2 trees
172
+ recursive_compact(node, root) if compact_before
173
+ recursive_compact(node_copy, root_copy) if compact_after
174
+
175
+ # Repeat the processing until no more matches
176
+ [ root ] + fragments + process_node(root_copy, processing_instruction)
177
+ end
178
+ end
179
+
180
+ # Recursively process an array of Nodes and Fragments
181
+ def process_array(array, processing_instruction)
182
+ array.flat_map do |obj|
183
+ case obj
184
+ when Array
185
+ process_array(obj, processing_instruction)
186
+ when Nokogiri::XML::Node
187
+ process_node(obj, processing_instruction)
188
+ else
189
+ obj
190
+ end
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,201 @@
1
+ require_relative 'title'
2
+ require_relative 'fragment/interactive'
3
+ require_relative 'fragment/exercise'
4
+
5
+ class OpenStax::Content::Page
6
+ # Start parsing here
7
+ ROOT_CSS = 'html > body'
8
+
9
+ # Find snap lab notes
10
+ SNAP_LAB_CSS = '.snap-lab'
11
+ SNAP_LAB_TITLE_CSS = '[data-type="title"]'
12
+
13
+ # Find nodes that define relevant tags
14
+ LO_DEF_NODE_CSS = '.ost-learning-objective-def'
15
+ STD_DEF_NODE_CSS = '.ost-standards-def'
16
+ TEKS_DEF_NODE_CSS = '.ost-standards-teks'
17
+ APBIO_DEF_NODE_CSS = '.ost-standards-apbio'
18
+
19
+ STD_NAME_NODE_CSS = '.ost-standards-name'
20
+ STD_DESC_NODE_CSS = '.ost-standards-description'
21
+
22
+ # Find specific tags and extract the relevant parts
23
+ LO_REGEX = /ost-tag-lo-([\w+-]+)/
24
+ STD_REGEX = /ost-tag-std-([\w+-]+)/
25
+ TEKS_REGEX = /ost-tag-(teks-[\w+-]+)/
26
+
27
+ def self.feature_node(node, feature_ids)
28
+ feature_ids = [feature_ids].flatten
29
+ return if feature_ids.empty?
30
+
31
+ feature_id_css = feature_ids.map { |feature_id| "##{feature_id}" }.join(', ')
32
+ node.at_css(feature_id_css)
33
+ end
34
+
35
+ def initialize(book: nil, hash: {}, uuid: nil, url: nil, title: nil, content: nil)
36
+ @uuid = uuid || hash['id']&.split('@', 2)&.first
37
+ raise ArgumentError, 'Either uuid or hash with id key is required' if @uuid.nil?
38
+
39
+ @book = book
40
+ @hash = hash
41
+ @url = url
42
+ @title = title || hash['title']
43
+ @content = content
44
+ end
45
+
46
+ attr_accessor :chapter_section
47
+ attr_reader :uuid, :hash
48
+
49
+ def book
50
+ raise ArgumentError, 'Book was not specified' if @book.nil?
51
+
52
+ @book
53
+ end
54
+
55
+ def url
56
+ @url ||= "#{book.url_fragment}:#{uuid}.json"
57
+ end
58
+
59
+ def parsed_title
60
+ @parsed_title ||= OpenStax::Content::Title.new @title
61
+ end
62
+
63
+ def book_location
64
+ parsed_title.book_location
65
+ end
66
+
67
+ def title
68
+ parsed_title.text
69
+ end
70
+
71
+ def full_hash
72
+ @full_hash ||= book.archive.json url
73
+ end
74
+
75
+ def short_id
76
+ @short_id ||= full_hash.fetch('shortId', nil)
77
+ end
78
+
79
+ def content
80
+ @content ||= full_hash.fetch('content')
81
+ end
82
+
83
+ def doc
84
+ @doc ||= Nokogiri::HTML(content)
85
+ end
86
+
87
+ def root
88
+ @root ||= doc.at_css(ROOT_CSS)
89
+ end
90
+
91
+ def footnotes
92
+ @footnotes ||= doc.css('[role=doc-footnote]')
93
+ end
94
+
95
+ # Replaces links to embeddable sims (and maybe videos in the future) with iframes
96
+ # Changes exercise urls in the doc to be absolute
97
+ def convert_content!
98
+ OpenStax::Content::Fragment::Interactive.replace_interactive_links_with_iframes!(doc)
99
+ OpenStax::Content::Fragment::Exercise.absolutize_exercise_urls!(doc)
100
+ map_note_format!(doc)
101
+ @content = doc.to_html
102
+ @root = nil
103
+ end
104
+
105
+ def snap_lab_nodes
106
+ root.css(SNAP_LAB_CSS)
107
+ end
108
+
109
+ def snap_lab_title(snap_lab)
110
+ snap_lab.at_css(SNAP_LAB_TITLE_CSS).try(:text)
111
+ end
112
+
113
+ def los
114
+ @los ||= tags.select { |tag| tag[:type] == :lo }.map { |tag| tag[:value] }
115
+ end
116
+
117
+ def aplos
118
+ @aplos ||= tags.select { |tag| tag[:type] == :aplo }.map { |tag| tag[:value] }
119
+ end
120
+
121
+ def tags
122
+ return @tags.values unless @tags.nil?
123
+
124
+ # Start with default cnxmod tag
125
+ cnxmod_value = "context-cnxmod:#{uuid}"
126
+ @tags = { cnxmod_value => { value: cnxmod_value, type: :cnxmod } }
127
+
128
+ # Extract tag name and description from .ost-standards-def and .os-learning-objective-def.
129
+
130
+ # LO tags
131
+ root.css(LO_DEF_NODE_CSS).each do |node|
132
+ klass = node.attr('class')
133
+ lo_value = LO_REGEX.match(klass).try(:[], 1)
134
+ next if lo_value.nil?
135
+
136
+ teks_value = TEKS_REGEX.match(klass).try(:[], 1)
137
+ description = node.content.strip
138
+
139
+ @tags[lo_value] = {
140
+ value: lo_value,
141
+ description: description,
142
+ teks: teks_value,
143
+ type: :lo
144
+ }
145
+ end
146
+
147
+ # Other standards
148
+ root.css(STD_DEF_NODE_CSS).each do |node|
149
+ klass = node.attr('class')
150
+ name = node.at_css(STD_NAME_NODE_CSS).try(:content).try(:strip)
151
+ description = node.at_css(STD_DESC_NODE_CSS).try(:content).try(:strip)
152
+ value = nil
153
+
154
+ if node.matches?(TEKS_DEF_NODE_CSS)
155
+ value = TEKS_REGEX.match(klass).try(:[], 1)
156
+ type = :teks
157
+ elsif node.matches?(APBIO_DEF_NODE_CSS)
158
+ value = LO_REGEX.match(klass).try(:[], 1)
159
+ type = :aplo
160
+ end
161
+
162
+ next if value.nil?
163
+
164
+ @tags[value] = {
165
+ value: value,
166
+ name: name,
167
+ description: description,
168
+ type: type
169
+ }
170
+ end
171
+
172
+ @tags.values
173
+ end
174
+
175
+ protected
176
+
177
+ # Adds a container div around note content for styling
178
+ def map_note_format!(node)
179
+ note_selector = <<-eos
180
+ .note:not(.learning-objectives),
181
+ .example,
182
+ .grasp-check,
183
+ [data-type="note"],
184
+ [data-element-type="check-understanding"]
185
+ eos
186
+
187
+ note_selector = note_selector.gsub(/\s+/, "")
188
+
189
+ node.css(note_selector).each do |note|
190
+ note.set_attribute('data-tutor-transform', true)
191
+ body = Nokogiri::XML::Node.new('div', doc)
192
+ body.set_attribute('data-type', 'content')
193
+
194
+ content = note.css('>*:not([data-type=title])')
195
+ content.unlink()
196
+
197
+ body.children = content
198
+ note.add_child(body)
199
+ end
200
+ end
201
+ end