oga 1.0.2-java → 1.0.3-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/oga/xml/lexer.rb CHANGED
@@ -34,12 +34,20 @@ module Oga
34
34
  # However, it is perfectly save to use different instances per thread.
35
35
  # There is no _global_ state used by this lexer.
36
36
  #
37
- # @!attribute [r] html
38
- # @return [TrueClass|FalseClass]
37
+ # ## Strict Mode
38
+ #
39
+ # By default the lexer is rather permissive regarding the input. For
40
+ # example, missing closing tags are inserted by default. To disable this
41
+ # behaviour the lexer can be run in "strict mode" by setting `:strict` to
42
+ # `true`:
43
+ #
44
+ # lexer = Oga::XML::Lexer.new('...', :strict => true)
45
+ #
46
+ # Strict mode only applies to XML documents.
47
+ #
48
+ # @private
39
49
  #
40
50
  class Lexer
41
- attr_reader :html
42
-
43
51
  # These are all constant/frozen to remove the need for String allocations
44
52
  # every time they are referenced in the lexer.
45
53
  HTML_SCRIPT = 'script'.freeze
@@ -99,13 +107,17 @@ module Oga
99
107
  #
100
108
  # @param [Hash] options
101
109
  #
102
- # @option options [Symbol] :html When set to `true` the lexer will treat
103
- # the input as HTML instead of SGML/XML. This makes it possible to lex
104
- # HTML void elements such as `<link href="">`.
110
+ # @option options [TrueClass|FalseClass] :html When set to `true` the
111
+ # lexer will treat the input as HTML instead of XML. This makes it
112
+ # possible to lex HTML void elements such as `<link href="">`.
113
+ #
114
+ # @option options [TrueClass|FalseClass] :strict Enables/disables strict
115
+ # parsing of XML documents, disabled by default.
105
116
  #
106
117
  def initialize(data, options = {})
107
- @data = data
108
- @html = options[:html]
118
+ @data = data
119
+ @html = options[:html]
120
+ @strict = options[:strict] || false
109
121
 
110
122
  reset
111
123
  end
@@ -163,7 +175,7 @@ module Oga
163
175
 
164
176
  reset
165
177
 
166
- return tokens
178
+ tokens
167
179
  end
168
180
 
169
181
  ##
@@ -193,7 +205,7 @@ module Oga
193
205
  end
194
206
 
195
207
  # Add any missing closing tags
196
- unless @elements.empty?
208
+ if !strict? and !@elements.empty?
197
209
  @elements.length.times { on_element_end }
198
210
  end
199
211
  ensure
@@ -204,21 +216,28 @@ module Oga
204
216
  # @return [TrueClass|FalseClass]
205
217
  #
206
218
  def html?
207
- return !!html
219
+ @html == true
220
+ end
221
+
222
+ ##
223
+ # @return [TrueClass|FalseClass]
224
+ #
225
+ def strict?
226
+ @strict
208
227
  end
209
228
 
210
229
  ##
211
230
  # @return [TrueClass|FalseClass]
212
231
  #
213
232
  def html_script?
214
- return html? && current_element == HTML_SCRIPT
233
+ html? && current_element == HTML_SCRIPT
215
234
  end
216
235
 
217
236
  ##
218
237
  # @return [TrueClass|FalseClass]
219
238
  #
220
239
  def html_style?
221
- return html? && current_element == HTML_STYLE
240
+ html? && current_element == HTML_STYLE
222
241
  end
223
242
 
224
243
  private
@@ -250,7 +269,7 @@ module Oga
250
269
  # @return [String]
251
270
  #
252
271
  def current_element
253
- return @elements.last
272
+ @elements.last
254
273
  end
255
274
 
256
275
  ##
@@ -4,14 +4,12 @@ module Oga
4
4
  # The Namespace class contains information about XML namespaces such as the
5
5
  # name and URI.
6
6
  #
7
- # @!attribute [r] name
8
- # @return [String]
9
- #
10
- # @!attribute [r] uri
11
- # @return [String]
12
- #
13
7
  class Namespace
14
- attr_accessor :name, :uri
8
+ # @return [String]
9
+ attr_accessor :name
10
+
11
+ # @return [String]
12
+ attr_accessor :uri
15
13
 
16
14
  ##
17
15
  # @param [Hash] options
@@ -28,14 +26,14 @@ module Oga
28
26
  # @return [String]
29
27
  #
30
28
  def to_s
31
- return name.to_s
29
+ name.to_s
32
30
  end
33
31
 
34
32
  ##
35
33
  # @return [String]
36
34
  #
37
35
  def inspect
38
- return "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
36
+ "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
39
37
  end
40
38
 
41
39
  ##
@@ -43,7 +41,7 @@ module Oga
43
41
  # @return [TrueClass|FalseClass]
44
42
  #
45
43
  def ==(other)
46
- return other.is_a?(self.class) && name == other.name && uri == other.uri
44
+ other.is_a?(self.class) && name == other.name && uri == other.uri
47
45
  end
48
46
  end # Namespace
49
47
  end # XML
data/lib/oga/xml/node.rb CHANGED
@@ -5,12 +5,10 @@ module Oga
5
5
  # {Oga::XML::NodeSet} and can be used to query surrounding and parent
6
6
  # nodes.
7
7
  #
8
- # @!attribute [r] node_set
9
- # @return [Oga::XML::NodeSet]
10
- #
11
8
  class Node
12
9
  include Traversal
13
10
 
11
+ # @return [Oga::XML::NodeSet]
14
12
  attr_reader :node_set
15
13
 
16
14
  ##
@@ -42,7 +40,7 @@ module Oga
42
40
  # @return [Oga::XML::NodeSet]
43
41
  #
44
42
  def children
45
- return @children ||= NodeSet.new([], self)
43
+ @children ||= NodeSet.new([], self)
46
44
  end
47
45
 
48
46
  ##
@@ -65,7 +63,7 @@ module Oga
65
63
  # @return [Oga::XML::Node]
66
64
  #
67
65
  def parent
68
- return node_set ? node_set.owner : nil
66
+ node_set ? node_set.owner : nil
69
67
  end
70
68
 
71
69
  ##
@@ -76,7 +74,7 @@ module Oga
76
74
  def previous
77
75
  index = node_set.index(self) - 1
78
76
 
79
- return index >= 0 ? node_set[index] : nil
77
+ index >= 0 ? node_set[index] : nil
80
78
  end
81
79
 
82
80
  ##
@@ -88,7 +86,7 @@ module Oga
88
86
  index = node_set.index(self) + 1
89
87
  length = node_set.length
90
88
 
91
- return index <= length ? node_set[index] : nil
89
+ index <= length ? node_set[index] : nil
92
90
  end
93
91
 
94
92
  ##
@@ -142,7 +140,7 @@ module Oga
142
140
  @root_node = node
143
141
  end
144
142
 
145
- return @root_node
143
+ @root_node
146
144
  end
147
145
 
148
146
  ##
@@ -186,14 +184,14 @@ module Oga
186
184
  @html_p = root.is_a?(Document) && root.html?
187
185
  end
188
186
 
189
- return @html_p
187
+ @html_p
190
188
  end
191
189
 
192
190
  ##
193
191
  # @return [TrueClass|FalseClass]
194
192
  #
195
193
  def xml?
196
- return !html?
194
+ !html?
197
195
  end
198
196
  end # Element
199
197
  end # XML
@@ -31,12 +31,10 @@ module Oga
31
31
  # If ownership was not handled then you'd have to manually set the
32
32
  # `element` variable's `node_set` attribute after pushing it into a set.
33
33
  #
34
- # @!attribute [rw] owner
35
- # @return [Oga::XML::Node]
36
- #
37
34
  class NodeSet
38
35
  include Enumerable
39
36
 
37
+ # @return [Oga::XML::Node]
40
38
  attr_accessor :owner
41
39
 
42
40
  ##
@@ -65,7 +63,7 @@ module Oga
65
63
  # @return [Oga::XML::Node]
66
64
  #
67
65
  def last
68
- return @nodes[-1]
66
+ @nodes[-1]
69
67
  end
70
68
 
71
69
  ##
@@ -74,7 +72,7 @@ module Oga
74
72
  # @return [TrueClass|FalseClass]
75
73
  #
76
74
  def empty?
77
- return @nodes.empty?
75
+ @nodes.empty?
78
76
  end
79
77
 
80
78
  ##
@@ -83,7 +81,7 @@ module Oga
83
81
  # @return [Fixnum]
84
82
  #
85
83
  def length
86
- return @nodes.length
84
+ @nodes.length
87
85
  end
88
86
 
89
87
  alias_method :count, :length
@@ -96,7 +94,7 @@ module Oga
96
94
  # @return [Fixnum]
97
95
  #
98
96
  def index(node)
99
- return @nodes.index(node)
97
+ @nodes.index(node)
100
98
  end
101
99
 
102
100
  ##
@@ -137,7 +135,7 @@ module Oga
137
135
 
138
136
  remove_ownership(node)
139
137
 
140
- return node
138
+ node
141
139
  end
142
140
 
143
141
  ##
@@ -150,7 +148,7 @@ module Oga
150
148
 
151
149
  remove_ownership(node)
152
150
 
153
- return node
151
+ node
154
152
  end
155
153
 
156
154
  ##
@@ -174,7 +172,7 @@ module Oga
174
172
  # @return [Oga::XML::Node]
175
173
  #
176
174
  def [](index)
177
- return @nodes[index]
175
+ @nodes[index]
178
176
  end
179
177
 
180
178
  ##
@@ -183,7 +181,7 @@ module Oga
183
181
  # @return [Array]
184
182
  #
185
183
  def to_a
186
- return @nodes
184
+ @nodes
187
185
  end
188
186
 
189
187
  ##
@@ -194,7 +192,7 @@ module Oga
194
192
  # @return [Oga::XML::NodeSet]
195
193
  #
196
194
  def +(other)
197
- return self.class.new(to_a | other.to_a)
195
+ self.class.new(to_a | other.to_a)
198
196
  end
199
197
 
200
198
  ##
@@ -204,7 +202,7 @@ module Oga
204
202
  # @param [Oga::XML::NodeSet] other
205
203
  #
206
204
  def ==(other)
207
- return other.is_a?(NodeSet) && other.equal_nodes?(@nodes)
205
+ other.is_a?(NodeSet) && other.equal_nodes?(@nodes)
208
206
  end
209
207
 
210
208
  ##
@@ -216,7 +214,7 @@ module Oga
216
214
  # @param [Array<Oga::XML::Node>] nodes
217
215
  #
218
216
  def equal_nodes?(nodes)
219
- return @nodes == nodes
217
+ @nodes == nodes
220
218
  end
221
219
 
222
220
  ##
@@ -261,7 +259,7 @@ module Oga
261
259
 
262
260
  remove_ownership(removed) if removed
263
261
 
264
- return removed
262
+ removed
265
263
  end
266
264
 
267
265
  ##
@@ -279,7 +277,7 @@ module Oga
279
277
  end
280
278
  end
281
279
 
282
- return values
280
+ values
283
281
  end
284
282
 
285
283
  alias_method :attr, :attribute
@@ -298,7 +296,7 @@ module Oga
298
296
  end
299
297
  end
300
298
 
301
- return text
299
+ text
302
300
  end
303
301
 
304
302
  ##
@@ -307,7 +305,7 @@ module Oga
307
305
  def inspect
308
306
  values = @nodes.map(&:inspect).join(', ')
309
307
 
310
- return "NodeSet(#{values})"
308
+ "NodeSet(#{values})"
311
309
  end
312
310
 
313
311
  private
@@ -291,7 +291,7 @@ class Parser < LL::Driver
291
291
  # @return [Oga::XML::Document]
292
292
  #
293
293
  def on_document(children = [])
294
- document = Document.new(:type => @lexer.html ? :html : :xml)
294
+ document = Document.new(:type => @lexer.html? ? :html : :xml)
295
295
 
296
296
  children.each do |child|
297
297
  if child.is_a?(Doctype)
@@ -3,10 +3,8 @@ module Oga
3
3
  ##
4
4
  # Class used for storing information about a single processing instruction.
5
5
  #
6
- # @!attribute [rw] name
7
- # @return [String]
8
- #
9
6
  class ProcessingInstruction < CharacterNode
7
+ # @return [String]
10
8
  attr_accessor :name
11
9
 
12
10
  ##
@@ -25,14 +23,14 @@ module Oga
25
23
  # @return [String]
26
24
  #
27
25
  def to_xml
28
- return "<?#{name}#{text}?>"
26
+ "<?#{name}#{text}?>"
29
27
  end
30
28
 
31
29
  ##
32
30
  # @return [String]
33
31
  #
34
32
  def inspect
35
- return "ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
33
+ "ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
36
34
  end
37
35
  end # ProcessingInstruction
38
36
  end # XML
@@ -19,16 +19,13 @@ module Oga
19
19
  # This parses yields proper XML instances such as {Oga::XML::Element}.
20
20
  # Doctypes and XML declarations are ignored by this parser.
21
21
  #
22
- # @!attribute [r] node
23
- # The current node.
24
- # @return [Oga::XML::Node]
25
- #
26
- # @!attribute [r] nesting
27
- # Array containing the names of the currently nested elements.
28
- # @return [Array]
29
- #
30
22
  class PullParser < Parser
31
- attr_reader :node, :nesting
23
+ # @return [Oga::XML::Node]
24
+ attr_reader :node
25
+
26
+ # Array containing the names of the currently nested elements.
27
+ # @return [Array]
28
+ attr_reader :nesting
32
29
 
33
30
  ##
34
31
  # @return [Array]
@@ -13,7 +13,7 @@ module Oga
13
13
  # @see [Oga::XPath::Evaluator#initialize]
14
14
  #
15
15
  def xpath(expression, variables = {})
16
- return XPath::Evaluator.new(self, variables).evaluate(expression)
16
+ XPath::Evaluator.new(self, variables).evaluate(expression)
17
17
  end
18
18
 
19
19
  ##
@@ -25,7 +25,7 @@ module Oga
25
25
  def at_xpath(*args)
26
26
  result = xpath(*args)
27
27
 
28
- return result.is_a?(XML::NodeSet) ? result.first : result
28
+ result.is_a?(XML::NodeSet) ? result.first : result
29
29
  end
30
30
 
31
31
  ##
@@ -37,7 +37,7 @@ module Oga
37
37
  def css(expression)
38
38
  ast = CSS::Parser.parse_with_cache(expression)
39
39
 
40
- return XPath::Evaluator.new(self).evaluate_ast(ast)
40
+ XPath::Evaluator.new(self).evaluate_ast(ast)
41
41
  end
42
42
 
43
43
  ##
@@ -49,7 +49,7 @@ module Oga
49
49
  def at_css(*args)
50
50
  result = css(*args)
51
51
 
52
- return result.is_a?(XML::NodeSet) ? result.first : result
52
+ result.is_a?(XML::NodeSet) ? result.first : result
53
53
  end
54
54
  end # Querying
55
55
  end # XML