oga 2.3 → 2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 59f11fbee4e3a82ad1a8cca29b80f06fdfac6d34
4
- data.tar.gz: 07088c2201db9c31526cc454b61005c35075fce6
3
+ metadata.gz: 7ce044d495e3c695b739fd41a53ceb854f4ef8b8
4
+ data.tar.gz: 91126db199915b994beab17a60a5ee8009efae5c
5
5
  SHA512:
6
- metadata.gz: 8d33b04c566eceb848f6de1e95d4471ebc959694e85b16e33c01dedabbbf92f5cc15679559202f6e63437424d1dfa945cb9e8ce36282a12a77585da3210ba8e3
7
- data.tar.gz: 15375495eadfe074c7c3d6fe1b23a7d71df186dc57db72ed43592e97a5071e6866ddaaf0c3ac0bf92937d21e9b3bfe6a058008776265debd4e41519c8b013554
6
+ metadata.gz: 94dc66964aff37c6042cdba6d24cdf3a458148d4392c715c708d3e6949c7ef49054fa7a565e67fda12343f71523176ecc4488550a851e28b47f3cde886022ce1
7
+ data.tar.gz: b579fcaedce26f3a7c18925c777a188a3cb9332c2da4930cf14185c707af79b58a2d81572805456168ddcaeb007613b40e6993c99da87333ab923f73cafeae15
data/lib/oga.rb CHANGED
@@ -23,6 +23,7 @@ if RUBY_PLATFORM == 'java'
23
23
  end
24
24
  #:nocov:
25
25
 
26
+ require 'oga/xml/to_xml'
26
27
  require 'oga/xml/html_void_elements'
27
28
  require 'oga/xml/entities'
28
29
  require 'oga/xml/querying'
@@ -42,6 +43,7 @@ require 'oga/xml/default_namespace'
42
43
  require 'oga/xml/attribute'
43
44
  require 'oga/xml/element'
44
45
  require 'oga/xml/node_set'
46
+ require 'oga/xml/generator'
45
47
 
46
48
  require 'oga/xml/sax_parser'
47
49
  require 'oga/xml/pull_parser'
@@ -1,3 +1,3 @@
1
1
  module Oga
2
- VERSION = '2.3'
2
+ VERSION = '2.4'
3
3
  end # Oga
@@ -3,6 +3,7 @@ module Oga
3
3
  # Class for storing information about a single XML attribute.
4
4
  class Attribute
5
5
  include ExpandedName
6
+ include ToXML
6
7
 
7
8
  # The name of the attribute.
8
9
  # @return [String]
@@ -81,19 +82,6 @@ module Oga
81
82
 
82
83
  alias_method :to_s, :text
83
84
 
84
- # @return [String]
85
- def to_xml
86
- if namespace_name
87
- full_name = "#{namespace_name}:#{name}"
88
- else
89
- full_name = name
90
- end
91
-
92
- enc_value = value ? Entities.encode_attribute(value) : nil
93
-
94
- %Q(#{full_name}="#{enc_value}")
95
- end
96
-
97
85
  # @return [String]
98
86
  def inspect
99
87
  segments = []
@@ -2,12 +2,6 @@ module Oga
2
2
  module XML
3
3
  # Class used for storing information about CDATA tags.
4
4
  class Cdata < CharacterNode
5
- # Converts the node back to XML.
6
- #
7
- # @return [String]
8
- def to_xml
9
- "<![CDATA[#{text}]]>"
10
- end
11
5
  end # Cdata
12
6
  end # XML
13
7
  end # Oga
@@ -15,11 +15,6 @@ module Oga
15
15
  @text = options[:text]
16
16
  end
17
17
 
18
- # @return [String]
19
- def to_xml
20
- text.to_s
21
- end
22
-
23
18
  # @return [String]
24
19
  def inspect
25
20
  "#{self.class.to_s.split('::').last}(#{text.inspect})"
@@ -2,12 +2,6 @@ module Oga
2
2
  module XML
3
3
  # Class used for storing information about XML comments.
4
4
  class Comment < CharacterNode
5
- # Converts the node back to XML.
6
- #
7
- # @return [String]
8
- def to_xml
9
- "<!--#{text}-->"
10
- end
11
5
  end # Comment
12
6
  end # XML
13
7
  end # Oga
@@ -2,6 +2,8 @@ module Oga
2
2
  module XML
3
3
  # Class used for storing information about Doctypes.
4
4
  class Doctype
5
+ include ToXML
6
+
5
7
  # The name of the doctype (e.g. "HTML").
6
8
  # @return [String]
7
9
  attr_accessor :name
@@ -39,20 +41,6 @@ module Oga
39
41
  @inline_rules = options[:inline_rules]
40
42
  end
41
43
 
42
- # Converts the doctype back to XML.
43
- #
44
- # @return [String]
45
- def to_xml
46
- segments = "<!DOCTYPE #{name}"
47
-
48
- segments << " #{type}" if type
49
- segments << %Q{ "#{public_id}"} if public_id
50
- segments << %Q{ "#{system_id}"} if system_id
51
- segments << " [#{inline_rules}]" if inline_rules
52
-
53
- segments + '>'
54
- end
55
-
56
44
  # Inspects the doctype.
57
45
  #
58
46
  # @return [String]
@@ -5,6 +5,7 @@ module Oga
5
5
  class Document
6
6
  include Querying
7
7
  include Traversal
8
+ include ToXML
8
9
 
9
10
  # @return [Oga::XML::Doctype]
10
11
  attr_accessor :doctype
@@ -56,23 +57,6 @@ module Oga
56
57
  self
57
58
  end
58
59
 
59
- # Converts the document and its child nodes to XML.
60
- #
61
- # @return [String]
62
- def to_xml
63
- xml = children.map(&:to_xml).join('')
64
-
65
- if doctype
66
- xml = doctype.to_xml + "\n" + xml.strip
67
- end
68
-
69
- if xml_declaration
70
- xml = xml_declaration.to_xml + "\n" + xml.strip
71
- end
72
-
73
- xml
74
- end
75
-
76
60
  # @return [TrueClass|FalseClass]
77
61
  def html?
78
62
  type.equal?(:html)
@@ -99,6 +83,11 @@ Document(
99
83
  )
100
84
  EOF
101
85
  end
86
+
87
+ # @return [FalseClass]
88
+ def literal_html_name?
89
+ false
90
+ end
102
91
  end # Document
103
92
  end # XML
104
93
  end # Oga
@@ -211,30 +211,6 @@ module Oga
211
211
  @children = NodeSet.new([text_node], self)
212
212
  end
213
213
 
214
- # Converts the element and its child elements to XML.
215
- #
216
- # @return [String]
217
- def to_xml
218
- if namespace_name
219
- full_name = "#{namespace_name}:#{name}"
220
- else
221
- full_name = name
222
- end
223
-
224
- body = children.map(&:to_xml).join('')
225
- attrs = ''
226
-
227
- attributes.each do |attr|
228
- attrs << " #{attr.to_xml}"
229
- end
230
-
231
- if self_closing?
232
- return "<#{full_name}#{attrs} />"
233
- else
234
- return "<#{full_name}#{attrs}>#{body}</#{full_name}>"
235
- end
236
- end
237
-
238
214
  # @return [String]
239
215
  def inspect
240
216
  segments = []
@@ -323,6 +299,14 @@ module Oga
323
299
  end
324
300
  end
325
301
 
302
+ # Returns true if the current element name is the name of one of the
303
+ # literal HTML elements.
304
+ #
305
+ # @return [TrueClass|FalseClass]
306
+ def literal_html_name?
307
+ Lexer::LITERAL_HTML_ELEMENTS.allow?(name)
308
+ end
309
+
326
310
  private
327
311
 
328
312
  # Registers namespaces based on any "xmlns" attributes.
@@ -0,0 +1,198 @@
1
+ module Oga
2
+ module XML
3
+ # Class for generating XML as a String based on an existing document.
4
+ #
5
+ # Basic usage:
6
+ #
7
+ # element = Oga::XML::Element.new(name: 'root')
8
+ # element.inner_text = 'hello'
9
+ #
10
+ # gen = Oga::XML::Generator.new(element)
11
+ #
12
+ # gen.to_xml # => "<root>hello</root>"
13
+ #
14
+ # @private
15
+ class Generator
16
+ # @param [Oga::XML::Document|Oga::XML::Node] start The node to serialise.
17
+ def initialize(root)
18
+ @start = root
19
+
20
+ if @start.respond_to?(:root_node)
21
+ @html_mode = @start.root_node.html?
22
+ else
23
+ @html_mode = false
24
+ end
25
+ end
26
+
27
+ # Returns the XML for the current root node.
28
+ #
29
+ # @return [String]
30
+ def to_xml
31
+ current = @start
32
+ output = ''
33
+
34
+ while current
35
+ children = false
36
+
37
+ # Determine what callback to use for the current node. The order of
38
+ # this statement is based on how likely it is for an arm to match.
39
+ case current
40
+ when Oga::XML::Element
41
+ callback = :on_element
42
+ children = true
43
+ when Oga::XML::Text
44
+ callback = :on_text
45
+ when Oga::XML::Cdata
46
+ callback = :on_cdata
47
+ when Oga::XML::Comment
48
+ callback = :on_comment
49
+ when Oga::XML::Attribute
50
+ callback = :on_attribute
51
+ when Oga::XML::ProcessingInstruction
52
+ callback = :on_processing_instruction
53
+ when Oga::XML::Doctype
54
+ callback = :on_doctype
55
+ when Oga::XML::XmlDeclaration
56
+ callback = :on_xml_declaration
57
+ when Oga::XML::Document
58
+ callback = :on_document
59
+ children = true
60
+ else
61
+ raise TypeError, "Can't serialize #{current.class} to XML"
62
+ end
63
+
64
+ send(callback, current, output)
65
+
66
+ if child_node = children && current.children[0]
67
+ current = child_node
68
+ else
69
+ until next_node = current.is_a?(Node) && current.next
70
+ if current.is_a?(Node) && current != @start
71
+ current = current.parent
72
+ end
73
+
74
+ send(:after_element, current, output) if current.is_a?(Element)
75
+
76
+ break if current == @start
77
+ end
78
+
79
+ current = next_node
80
+ end
81
+ end
82
+
83
+ output
84
+ end
85
+
86
+ # @param [Oga::XML::Text] node
87
+ # @param [String] output
88
+ def on_text(node, output)
89
+ if @html_mode && (parent = node.parent) && parent.literal_html_name?
90
+ output << node.text
91
+ else
92
+ output << Entities.encode(node.text)
93
+ end
94
+ end
95
+
96
+ # @param [Oga::XML::Cdata] node
97
+ # @param [String] output
98
+ def on_cdata(node, output)
99
+ output << "<![CDATA[#{node.text}]]>"
100
+ end
101
+
102
+ # @param [Oga::XML::Comment] node
103
+ # @param [String] output
104
+ def on_comment(node, output)
105
+ output << "<!--#{node.text}-->"
106
+ end
107
+
108
+ # @param [Oga::XML::ProcessingInstruction] node
109
+ # @param [String] output
110
+ def on_processing_instruction(node, output)
111
+ output << "<?#{node.name}#{node.text}?>"
112
+ end
113
+
114
+ # @param [Oga::XML::Element] element
115
+ # @param [String] body The content of the element.
116
+ def on_element(element, output)
117
+ name = element.expanded_name
118
+ attrs = ''
119
+
120
+ element.attributes.each do |attr|
121
+ attrs << ' '
122
+ on_attribute(attr, attrs)
123
+ end
124
+
125
+ if self_closing?(element)
126
+ output << "<#{name}#{attrs} />"
127
+ else
128
+ output << "<#{name}#{attrs}>"
129
+ end
130
+ end
131
+
132
+ # @param [Oga::XML::Element] element
133
+ # @param [String] output
134
+ def after_element(element, output)
135
+ output << "</#{element.expanded_name}>" unless self_closing?(element)
136
+ end
137
+
138
+ # @param [Oga::XML::Attribute] attr
139
+ # @param [String] output
140
+ def on_attribute(attr, output)
141
+ name = attr.expanded_name
142
+ enc_value = attr.value ? Entities.encode_attribute(attr.value) : nil
143
+
144
+ output << %Q(#{name}="#{enc_value}")
145
+ end
146
+
147
+ # @param [Oga::XML::Doctype] node
148
+ # @param [String] output
149
+ def on_doctype(node, output)
150
+ output << "<!DOCTYPE #{node.name}"
151
+
152
+ output << " #{node.type}" if node.type
153
+ output << %Q{ "#{node.public_id}"} if node.public_id
154
+ output << %Q{ "#{node.system_id}"} if node.system_id
155
+ output << " [#{node.inline_rules}]" if node.inline_rules
156
+ output << '>'
157
+ end
158
+
159
+ # @param [Oga::XML::Document] node
160
+ # @param [String] output
161
+ def on_document(doc, output)
162
+ if doc.xml_declaration
163
+ on_xml_declaration(doc.xml_declaration, output)
164
+ output << "\n"
165
+ end
166
+
167
+ if doc.doctype
168
+ on_doctype(doc.doctype, output)
169
+ output << "\n"
170
+ end
171
+ end
172
+
173
+ # @param [Oga::XML::XmlDeclaration] node
174
+ # @param [String] output
175
+ def on_xml_declaration(node, output)
176
+ output << '<?xml'
177
+
178
+ [:version, :encoding, :standalone].each do |getter|
179
+ value = node.send(getter)
180
+
181
+ output << %Q{ #{getter}="#{value}"} if value
182
+ end
183
+
184
+ output << ' ?>'
185
+ end
186
+
187
+ # @param [Oga::XML::Element] element
188
+ # @return [TrueClass|FalseClass]
189
+ def self_closing?(element)
190
+ if @html_mode && !HTML_VOID_ELEMENTS.allow?(element.name)
191
+ false
192
+ else
193
+ element.children.empty?
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
@@ -5,10 +5,17 @@ module Oga
5
5
  # nodes.
6
6
  class Node
7
7
  include Traversal
8
+ include ToXML
8
9
 
9
10
  # @return [Oga::XML::NodeSet]
10
11
  attr_reader :node_set
11
12
 
13
+ # @return [Oga::XML::Node]
14
+ attr_accessor :previous
15
+
16
+ # @return [Oga::XML::Node]
17
+ attr_accessor :next
18
+
12
19
  # @param [Hash] options
13
20
  #
14
21
  # @option options [Oga::XML::NodeSet] :node_set The node set that this
@@ -26,6 +33,8 @@ module Oga
26
33
  @node_set = set
27
34
  @root_node = nil
28
35
  @html_p = nil
36
+ @previous = nil
37
+ @next = nil
29
38
  end
30
39
 
31
40
  # Returns the child nodes of the current node.
@@ -54,25 +63,6 @@ module Oga
54
63
  node_set ? node_set.owner : nil
55
64
  end
56
65
 
57
- # Returns the preceding node, or nil if there is none.
58
- #
59
- # @return [Oga::XML::Node]
60
- def previous
61
- index = node_set.index(self) - 1
62
-
63
- index >= 0 ? node_set[index] : nil
64
- end
65
-
66
- # Returns the following node, or nil if there is none.
67
- #
68
- # @return [Oga::XML::Node]
69
- def next
70
- index = node_set.index(self) + 1
71
- length = node_set.length
72
-
73
- index <= length ? node_set[index] : nil
74
- end
75
-
76
66
  # Returns the previous element node or nil if there is none.
77
67
  #
78
68
  # @return [Oga::XML::Element]
@@ -42,10 +42,10 @@ module Oga
42
42
  @owner = owner
43
43
  @existing = {}
44
44
 
45
- @nodes.each do |node|
45
+ @nodes.each_with_index do |node, index|
46
46
  mark_existing(node)
47
47
 
48
- take_ownership(node) if @owner
48
+ take_ownership(node, index) if @owner
49
49
  end
50
50
  end
51
51
 
@@ -98,7 +98,7 @@ module Oga
98
98
 
99
99
  mark_existing(node)
100
100
 
101
- take_ownership(node) if @owner
101
+ take_ownership(node, length - 1) if @owner
102
102
  end
103
103
 
104
104
  alias_method :<<, :push
@@ -113,7 +113,7 @@ module Oga
113
113
 
114
114
  mark_existing(node)
115
115
 
116
- take_ownership(node) if @owner
116
+ take_ownership(node, 0) if @owner
117
117
  end
118
118
 
119
119
  # Shifts a node from the start of the set.
@@ -157,7 +157,7 @@ module Oga
157
157
 
158
158
  mark_existing(node)
159
159
 
160
- take_ownership(node) if @owner
160
+ take_ownership(node, index) if @owner
161
161
  end
162
162
 
163
163
  # Returns the node for the given index.
@@ -224,6 +224,8 @@ module Oga
224
224
  sets << node.node_set
225
225
 
226
226
  node.node_set = nil
227
+ node.next = nil
228
+ node.previous = nil
227
229
  end
228
230
  end
229
231
 
@@ -291,15 +293,34 @@ module Oga
291
293
  # set has an owner.
292
294
  #
293
295
  # @param [Oga::XML::Node] node
294
- def take_ownership(node)
296
+ # @param [Fixnum] index
297
+ def take_ownership(node, index)
295
298
  node.node_set = self
299
+
300
+ node.previous = index > 0 ? @nodes[index - 1] : nil
301
+ node.next = index + 1 < @nodes.length ? @nodes[index + 1] : nil
302
+
303
+ node.previous.next = node if node.previous
304
+ node.next.previous = node if node.next
296
305
  end
297
306
 
298
307
  # Removes ownership of the node if it belongs to the current set.
299
308
  #
300
309
  # @param [Oga::XML::Node] node
301
310
  def remove_ownership(node)
302
- node.node_set = nil if node.node_set == self
311
+ return unless node.node_set == self
312
+
313
+ if previous_node = node.previous
314
+ previous_node.next = node.next
315
+ end
316
+
317
+ if next_node = node.next
318
+ next_node.previous = node.previous
319
+ end
320
+
321
+ node.node_set = nil
322
+ node.previous = nil
323
+ node.next = nil
303
324
  end
304
325
 
305
326
  # @param [Oga::XML::Node|Oga::XML::Attribute] node
@@ -15,11 +15,6 @@ module Oga
15
15
  @name = options[:name]
16
16
  end
17
17
 
18
- # @return [String]
19
- def to_xml
20
- "<?#{name}#{text}?>"
21
- end
22
-
23
18
  # @return [String]
24
19
  def inspect
25
20
  "ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
@@ -28,15 +28,6 @@ module Oga
28
28
  @text
29
29
  end
30
30
 
31
- # @see [Oga::XML::CharacterNode#to_xml]
32
- def to_xml
33
- return super if inside_literal_html?
34
-
35
- Entities.encode(super)
36
- end
37
-
38
- private
39
-
40
31
  # @return [TrueClass|FalseClass]
41
32
  def decode_entities?
42
33
  !@decoded && !inside_literal_html?
@@ -46,8 +37,7 @@ module Oga
46
37
  def inside_literal_html?
47
38
  node = parent
48
39
 
49
- node.is_a?(Element) && html? &&
50
- Lexer::LITERAL_HTML_ELEMENTS.allow?(node.name)
40
+ node && html? && node.literal_html_name?
51
41
  end
52
42
  end # Text
53
43
  end # XML
@@ -0,0 +1,12 @@
1
+ module Oga
2
+ module XML
3
+ # Module that provides a `#to_xml` method that serialises the current node
4
+ # back to XML.
5
+ module ToXML
6
+ # @return [String]
7
+ def to_xml
8
+ Generator.new(self).to_xml
9
+ end
10
+ end
11
+ end
12
+ end
@@ -2,6 +2,8 @@ module Oga
2
2
  module XML
3
3
  # Class containing information about an XML declaration tag.
4
4
  class XmlDeclaration
5
+ include ToXML
6
+
5
7
  # @return [String]
6
8
  attr_accessor :version
7
9
 
@@ -23,21 +25,6 @@ module Oga
23
25
  @standalone = options[:standalone]
24
26
  end
25
27
 
26
- # Converts the declaration tag to XML.
27
- #
28
- # @return [String]
29
- def to_xml
30
- pairs = []
31
-
32
- [:version, :encoding, :standalone].each do |getter|
33
- value = send(getter)
34
-
35
- pairs << %Q{#{getter}="#{value}"} if value
36
- end
37
-
38
- "<?xml #{pairs.join(' ')} ?>"
39
- end
40
-
41
28
  # @return [String]
42
29
  def inspect
43
30
  segments = []
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: oga
3
3
  version: !ruby/object:Gem::Version
4
- version: '2.3'
4
+ version: '2.4'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yorick Peterse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-13 00:00:00.000000000 Z
11
+ date: 2016-09-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ast
@@ -185,6 +185,7 @@ files:
185
185
  - lib/oga/xml/element.rb
186
186
  - lib/oga/xml/entities.rb
187
187
  - lib/oga/xml/expanded_name.rb
188
+ - lib/oga/xml/generator.rb
188
189
  - lib/oga/xml/html_void_elements.rb
189
190
  - lib/oga/xml/lexer.rb
190
191
  - lib/oga/xml/namespace.rb
@@ -196,6 +197,7 @@ files:
196
197
  - lib/oga/xml/querying.rb
197
198
  - lib/oga/xml/sax_parser.rb
198
199
  - lib/oga/xml/text.rb
200
+ - lib/oga/xml/to_xml.rb
199
201
  - lib/oga/xml/traversal.rb
200
202
  - lib/oga/xml/xml_declaration.rb
201
203
  - lib/oga/xpath/compiler.rb
@@ -229,4 +231,3 @@ signing_key:
229
231
  specification_version: 4
230
232
  summary: Oga is an XML/HTML parser written in Ruby.
231
233
  test_files: []
232
- has_rdoc: yard