openstax_content 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ require_relative 'embedded'
2
+
3
+ class OpenStax::Content::Fragment::Interactive < OpenStax::Content::Fragment::Embedded
4
+ # CSS to find interactive containers (anything inside may be replaced with an iframe)
5
+ CONTAINER_CSS = 'figure.ost-embed-container, .os-figure:has-descendants("a.os-interactive-link"), figure:has-descendants("a.os-interactive-link")'
6
+
7
+ # CSS to find links to be embedded inside containers
8
+ TAGGED_LINK_CSS = 'a.os-embed, a.os-interactive-link'
9
+ UNTAGGED_LINK_CSS = 'a'
10
+
11
+ self.default_width = 960
12
+ self.default_height = 560
13
+ self.iframe_classes += ['interactive']
14
+
15
+ # This code is run from lib/openstax/cnx/v1/page.rb during import
16
+ def self.replace_interactive_links_with_iframes!(node)
17
+ containers = node.css(CONTAINER_CSS, OpenStax::Content::CustomCss.instance)
18
+
19
+ containers.each do |container|
20
+ link_node = node.at_css(TAGGED_LINK_CSS) || node.css(UNTAGGED_LINK_CSS).last
21
+
22
+ next if link_node.nil?
23
+
24
+ # Build iframe based on the link's URL
25
+ iframe = Nokogiri::XML::Node.new('iframe', node.document)
26
+ iframe['title'] = 'Interactive Simulation'
27
+ iframe['src'] = link_node['href']
28
+ iframe['class'] = iframe_classes.join(' ')
29
+ iframe['width'] = default_width
30
+ iframe['height'] = default_height
31
+
32
+ # Replace the container with the new iframe
33
+ container.replace(iframe)
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,4 @@
1
+ require_relative 'exercise'
2
+
3
+ class OpenStax::Content::Fragment::OptionalExercise < OpenStax::Content::Fragment::Exercise
4
+ end
@@ -0,0 +1,9 @@
1
+ require_relative 'html'
2
+
3
+ class OpenStax::Content::Fragment::Reading < OpenStax::Content::Fragment::Html
4
+ def initialize(node:, title: nil, labels: nil, reference_view_url: nil)
5
+ super node: node, title: title, labels: labels
6
+
7
+ @reference_view_url = reference_view_url
8
+ end
9
+ end
@@ -0,0 +1,8 @@
1
+ require_relative 'embedded'
2
+
3
+ class OpenStax::Content::Fragment::Video < OpenStax::Content::Fragment::Embedded
4
+ self.default_width = 560
5
+ self.default_height = 315
6
+ self.iframe_classes += ['video']
7
+ self.iframe_title = "Video"
8
+ end
@@ -0,0 +1,193 @@
1
+ require 'nokogiri'
2
+ require_relative 'fragment/reading'
3
+ require_relative 'custom_css'
4
+
5
+ class OpenStax::Content::FragmentSplitter
6
+ attr_reader :processing_instructions, :reference_view_url
7
+
8
+ def initialize(processing_instructions, reference_view_url)
9
+ @processing_instructions = processing_instructions.map do |processing_instruction|
10
+ OpenStruct.new(processing_instruction.to_h).tap do |pi_struct|
11
+ pi_struct.fragments = [pi_struct.fragments].flatten.map do |fragment|
12
+ fragment.to_s.split('_').map(&:capitalize).join
13
+ end unless pi_struct.fragments.nil?
14
+ pi_struct.only = [pi_struct.only].flatten.map(&:to_s) unless pi_struct.only.nil?
15
+ pi_struct.except = [pi_struct.except].flatten.map(&:to_s) unless pi_struct.except.nil?
16
+ end
17
+ end
18
+
19
+ @reference_view_url = reference_view_url
20
+ end
21
+
22
+ # Splits the given root node into fragments according to the processing instructions
23
+ def split_into_fragments(root, type = nil)
24
+ result = [root.dup]
25
+ type_string = type.to_s
26
+
27
+ pis = processing_instructions.reject do |processing_instruction|
28
+ processing_instruction.css.nil? ||
29
+ processing_instruction.css.empty? ||
30
+ processing_instruction.fragments.nil? ||
31
+ processing_instruction.fragments == ['Node'] ||
32
+ (!processing_instruction.only.nil? && !processing_instruction.only.include?(type_string)) ||
33
+ (!processing_instruction.except.nil? && processing_instruction.except.include?(type_string))
34
+ end
35
+
36
+ @media_nodes = []
37
+
38
+ pis.each { |processing_instruction| result = process_array(result, processing_instruction) }
39
+
40
+ # Flatten, remove empty nodes and transform remaining nodes into reading fragments
41
+ result.map do |obj|
42
+ next obj unless obj.is_a?(Nokogiri::XML::Node)
43
+ next if obj.content.nil? || obj.content.strip.empty?
44
+
45
+ OpenStax::Content::Fragment::Reading.new node: obj, reference_view_url: reference_view_url
46
+ end.compact.tap do |result|
47
+ @media_nodes.each do |node|
48
+ # Media processing instructions
49
+ node.css('[id], [name]', custom_css).each do |linkable|
50
+ css_array = []
51
+ css_array << "[href$=\"##{linkable[:id]}\"]" unless linkable[:id].nil?
52
+ css_array << "[href$=\"##{linkable[:name]}\"]" unless linkable[:name].nil?
53
+ css = css_array.join(', ')
54
+
55
+ result.select(&:html?)
56
+ .select { |fragment| fragment.has_css? css, custom_css }
57
+ .each { |fragment| fragment.append node.dup }
58
+ end
59
+ end
60
+
61
+ result.select(&:html?).each(&:transform_links!)
62
+ end
63
+ end
64
+
65
+ protected
66
+
67
+ def custom_css
68
+ OpenStax::Content::CustomCss.instance
69
+ end
70
+
71
+ # Returns an instance of the given fragment class
72
+ def get_fragment_instance(fragment_name, node, labels)
73
+ fragment_class = OpenStax::Content::Fragment.const_get fragment_name
74
+ args = { node: node, labels: labels }
75
+ args[:reference_view_url] = reference_view_url \
76
+ if fragment_class.is_a? OpenStax::Content::Fragment::Reading
77
+ fragment = fragment_class.new args
78
+ fragment unless fragment.blank?
79
+ end
80
+
81
+ # Recursively removes a node and its empty parents
82
+ def recursive_compact(node, root)
83
+ return if node == root
84
+
85
+ parent = node.parent
86
+ node.remove
87
+
88
+ recursive_compact(parent, root) if parent && (parent.content.nil? || parent.content.empty?)
89
+ end
90
+
91
+ # Recursively removes all siblings before a node and its parents
92
+ def remove_before(node, root)
93
+ return if node == root
94
+
95
+ parent = node.parent
96
+ siblings = parent.children
97
+ index = siblings.index(node)
98
+ parent.children = siblings.slice(index..-1)
99
+ remove_before(parent, root)
100
+ end
101
+
102
+ # Recursively removes all siblings after a node and its parents
103
+ def remove_after(node, root)
104
+ return if node == root
105
+
106
+ parent = node.parent
107
+ siblings = parent.children
108
+ index = siblings.index(node)
109
+ parent.children = siblings.slice(0..index)
110
+ remove_after(parent, root)
111
+ end
112
+
113
+ # Process a single Nokogiri::XML::Node
114
+ def process_node(root, processing_instruction)
115
+ # Find first match
116
+ node = root.at_css(processing_instruction.css, custom_css)
117
+
118
+ # Base case
119
+ return [ root ] if node.nil?
120
+
121
+ num_fragments = processing_instruction.fragments.size
122
+
123
+ if num_fragments == 0 # No splitting needed
124
+ # Remove the match node and any empty parents from the tree
125
+ recursive_compact(node, root)
126
+
127
+ # Repeat the processing until no more matches
128
+ process_node(root, processing_instruction)
129
+ else
130
+ compact_before = true
131
+ compact_after = true
132
+
133
+ # Check for special fragment cases (node)
134
+ fragments = []
135
+ processing_instruction.fragments.each_with_index do |fragment, index|
136
+ if fragment == 'Node'
137
+ if index == 0
138
+ # fragments: [node, anything] - Don't remove node from root before fragments
139
+ compact_before = false
140
+ elsif index == num_fragments - 1
141
+ # fragments: [anything, node] - Don't remove node from root after fragments
142
+ compact_after = false
143
+ else
144
+ # General case
145
+ # Make a copy of the current node (up to the root), but remove all other nodes
146
+ root_copy = root.dup
147
+ node_copy = root_copy.at_css(processing_instruction.css, custom_css)
148
+
149
+ remove_before(node_copy, root_copy)
150
+ remove_after(node_copy, root_copy)
151
+
152
+ fragments << root_copy
153
+ end
154
+ elsif fragment == 'Media'
155
+ @media_nodes << node
156
+ else
157
+ fragments << get_fragment_instance(fragment, node, processing_instruction.labels)
158
+ end
159
+ end
160
+
161
+ # Need to split the node tree
162
+ # Copy the node content and find the same match in the copy
163
+ root_copy = root.dup
164
+ node_copy = root_copy.at_css(processing_instruction.css, custom_css)
165
+
166
+ # One copy retains the content before the match;
167
+ # the other retains the content after the match
168
+ remove_after(node, root)
169
+ remove_before(node_copy, root_copy)
170
+
171
+ # Remove the match, its copy and any empty parents from the 2 trees
172
+ recursive_compact(node, root) if compact_before
173
+ recursive_compact(node_copy, root_copy) if compact_after
174
+
175
+ # Repeat the processing until no more matches
176
+ [ root ] + fragments + process_node(root_copy, processing_instruction)
177
+ end
178
+ end
179
+
180
+ # Recursively process an array of Nodes and Fragments
181
+ def process_array(array, processing_instruction)
182
+ array.flat_map do |obj|
183
+ case obj
184
+ when Array
185
+ process_array(obj, processing_instruction)
186
+ when Nokogiri::XML::Node
187
+ process_node(obj, processing_instruction)
188
+ else
189
+ obj
190
+ end
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,201 @@
1
+ require_relative 'title'
2
+ require_relative 'fragment/interactive'
3
+ require_relative 'fragment/exercise'
4
+
5
+ class OpenStax::Content::Page
6
+ # Start parsing here
7
+ ROOT_CSS = 'html > body'
8
+
9
+ # Find snap lab notes
10
+ SNAP_LAB_CSS = '.snap-lab'
11
+ SNAP_LAB_TITLE_CSS = '[data-type="title"]'
12
+
13
+ # Find nodes that define relevant tags
14
+ LO_DEF_NODE_CSS = '.ost-learning-objective-def'
15
+ STD_DEF_NODE_CSS = '.ost-standards-def'
16
+ TEKS_DEF_NODE_CSS = '.ost-standards-teks'
17
+ APBIO_DEF_NODE_CSS = '.ost-standards-apbio'
18
+
19
+ STD_NAME_NODE_CSS = '.ost-standards-name'
20
+ STD_DESC_NODE_CSS = '.ost-standards-description'
21
+
22
+ # Find specific tags and extract the relevant parts
23
+ LO_REGEX = /ost-tag-lo-([\w+-]+)/
24
+ STD_REGEX = /ost-tag-std-([\w+-]+)/
25
+ TEKS_REGEX = /ost-tag-(teks-[\w+-]+)/
26
+
27
+ def self.feature_node(node, feature_ids)
28
+ feature_ids = [feature_ids].flatten
29
+ return if feature_ids.empty?
30
+
31
+ feature_id_css = feature_ids.map { |feature_id| "##{feature_id}" }.join(', ')
32
+ node.at_css(feature_id_css)
33
+ end
34
+
35
+ def initialize(book: nil, hash: {}, uuid: nil, url: nil, title: nil, content: nil)
36
+ @uuid = uuid || hash['id']&.split('@', 2)&.first
37
+ raise ArgumentError, 'Either uuid or hash with id key is required' if @uuid.nil?
38
+
39
+ @book = book
40
+ @hash = hash
41
+ @url = url
42
+ @title = title || hash['title']
43
+ @content = content
44
+ end
45
+
46
+ attr_accessor :chapter_section
47
+ attr_reader :uuid, :hash
48
+
49
+ def book
50
+ raise ArgumentError, 'Book was not specified' if @book.nil?
51
+
52
+ @book
53
+ end
54
+
55
+ def url
56
+ @url ||= "#{book.url_fragment}:#{uuid}.json"
57
+ end
58
+
59
+ def parsed_title
60
+ @parsed_title ||= OpenStax::Content::Title.new @title
61
+ end
62
+
63
+ def book_location
64
+ parsed_title.book_location
65
+ end
66
+
67
+ def title
68
+ parsed_title.text
69
+ end
70
+
71
+ def full_hash
72
+ @full_hash ||= book.archive.json url
73
+ end
74
+
75
+ def short_id
76
+ @short_id ||= full_hash.fetch('shortId', nil)
77
+ end
78
+
79
+ def content
80
+ @content ||= full_hash.fetch('content')
81
+ end
82
+
83
+ def doc
84
+ @doc ||= Nokogiri::HTML(content)
85
+ end
86
+
87
+ def root
88
+ @root ||= doc.at_css(ROOT_CSS)
89
+ end
90
+
91
+ def footnotes
92
+ @footnotes ||= doc.css('[role=doc-footnote]')
93
+ end
94
+
95
+ # Replaces links to embeddable sims (and maybe videos in the future) with iframes
96
+ # Changes exercise urls in the doc to be absolute
97
+ def convert_content!
98
+ OpenStax::Content::Fragment::Interactive.replace_interactive_links_with_iframes!(doc)
99
+ OpenStax::Content::Fragment::Exercise.absolutize_exercise_urls!(doc)
100
+ map_note_format!(doc)
101
+ @content = doc.to_html
102
+ @root = nil
103
+ end
104
+
105
+ def snap_lab_nodes
106
+ root.css(SNAP_LAB_CSS)
107
+ end
108
+
109
+ def snap_lab_title(snap_lab)
110
+ snap_lab.at_css(SNAP_LAB_TITLE_CSS).try(:text)
111
+ end
112
+
113
+ def los
114
+ @los ||= tags.select { |tag| tag[:type] == :lo }.map { |tag| tag[:value] }
115
+ end
116
+
117
+ def aplos
118
+ @aplos ||= tags.select { |tag| tag[:type] == :aplo }.map { |tag| tag[:value] }
119
+ end
120
+
121
+ def tags
122
+ return @tags.values unless @tags.nil?
123
+
124
+ # Start with default cnxmod tag
125
+ cnxmod_value = "context-cnxmod:#{uuid}"
126
+ @tags = { cnxmod_value => { value: cnxmod_value, type: :cnxmod } }
127
+
128
+ # Extract tag name and description from .ost-standards-def and .os-learning-objective-def.
129
+
130
+ # LO tags
131
+ root.css(LO_DEF_NODE_CSS).each do |node|
132
+ klass = node.attr('class')
133
+ lo_value = LO_REGEX.match(klass).try(:[], 1)
134
+ next if lo_value.nil?
135
+
136
+ teks_value = TEKS_REGEX.match(klass).try(:[], 1)
137
+ description = node.content.strip
138
+
139
+ @tags[lo_value] = {
140
+ value: lo_value,
141
+ description: description,
142
+ teks: teks_value,
143
+ type: :lo
144
+ }
145
+ end
146
+
147
+ # Other standards
148
+ root.css(STD_DEF_NODE_CSS).each do |node|
149
+ klass = node.attr('class')
150
+ name = node.at_css(STD_NAME_NODE_CSS).try(:content).try(:strip)
151
+ description = node.at_css(STD_DESC_NODE_CSS).try(:content).try(:strip)
152
+ value = nil
153
+
154
+ if node.matches?(TEKS_DEF_NODE_CSS)
155
+ value = TEKS_REGEX.match(klass).try(:[], 1)
156
+ type = :teks
157
+ elsif node.matches?(APBIO_DEF_NODE_CSS)
158
+ value = LO_REGEX.match(klass).try(:[], 1)
159
+ type = :aplo
160
+ end
161
+
162
+ next if value.nil?
163
+
164
+ @tags[value] = {
165
+ value: value,
166
+ name: name,
167
+ description: description,
168
+ type: type
169
+ }
170
+ end
171
+
172
+ @tags.values
173
+ end
174
+
175
+ protected
176
+
177
+ # Adds a container div around note content for styling
178
+ def map_note_format!(node)
179
+ note_selector = <<-eos
180
+ .note:not(.learning-objectives),
181
+ .example,
182
+ .grasp-check,
183
+ [data-type="note"],
184
+ [data-element-type="check-understanding"]
185
+ eos
186
+
187
+ note_selector = note_selector.gsub(/\s+/, "")
188
+
189
+ node.css(note_selector).each do |note|
190
+ note.set_attribute('data-tutor-transform', true)
191
+ body = Nokogiri::XML::Node.new('div', doc)
192
+ body.set_attribute('data-type', 'content')
193
+
194
+ content = note.css('>*:not([data-type=title])')
195
+ content.unlink()
196
+
197
+ body.children = content
198
+ note.add_child(body)
199
+ end
200
+ end
201
+ end