oga 1.0.2-java → 1.0.3-java

Sign up to get free protection for your applications and to get access to all the features.
data/lib/oga/xml/lexer.rb CHANGED
@@ -34,12 +34,20 @@ module Oga
34
34
  # However, it is perfectly save to use different instances per thread.
35
35
  # There is no _global_ state used by this lexer.
36
36
  #
37
- # @!attribute [r] html
38
- # @return [TrueClass|FalseClass]
37
+ # ## Strict Mode
38
+ #
39
+ # By default the lexer is rather permissive regarding the input. For
40
+ # example, missing closing tags are inserted by default. To disable this
41
+ # behaviour the lexer can be run in "strict mode" by setting `:strict` to
42
+ # `true`:
43
+ #
44
+ # lexer = Oga::XML::Lexer.new('...', :strict => true)
45
+ #
46
+ # Strict mode only applies to XML documents.
47
+ #
48
+ # @private
39
49
  #
40
50
  class Lexer
41
- attr_reader :html
42
-
43
51
  # These are all constant/frozen to remove the need for String allocations
44
52
  # every time they are referenced in the lexer.
45
53
  HTML_SCRIPT = 'script'.freeze
@@ -99,13 +107,17 @@ module Oga
99
107
  #
100
108
  # @param [Hash] options
101
109
  #
102
- # @option options [Symbol] :html When set to `true` the lexer will treat
103
- # the input as HTML instead of SGML/XML. This makes it possible to lex
104
- # HTML void elements such as `<link href="">`.
110
+ # @option options [TrueClass|FalseClass] :html When set to `true` the
111
+ # lexer will treat the input as HTML instead of XML. This makes it
112
+ # possible to lex HTML void elements such as `<link href="">`.
113
+ #
114
+ # @option options [TrueClass|FalseClass] :strict Enables/disables strict
115
+ # parsing of XML documents, disabled by default.
105
116
  #
106
117
  def initialize(data, options = {})
107
- @data = data
108
- @html = options[:html]
118
+ @data = data
119
+ @html = options[:html]
120
+ @strict = options[:strict] || false
109
121
 
110
122
  reset
111
123
  end
@@ -163,7 +175,7 @@ module Oga
163
175
 
164
176
  reset
165
177
 
166
- return tokens
178
+ tokens
167
179
  end
168
180
 
169
181
  ##
@@ -193,7 +205,7 @@ module Oga
193
205
  end
194
206
 
195
207
  # Add any missing closing tags
196
- unless @elements.empty?
208
+ if !strict? and !@elements.empty?
197
209
  @elements.length.times { on_element_end }
198
210
  end
199
211
  ensure
@@ -204,21 +216,28 @@ module Oga
204
216
  # @return [TrueClass|FalseClass]
205
217
  #
206
218
  def html?
207
- return !!html
219
+ @html == true
220
+ end
221
+
222
+ ##
223
+ # @return [TrueClass|FalseClass]
224
+ #
225
+ def strict?
226
+ @strict
208
227
  end
209
228
 
210
229
  ##
211
230
  # @return [TrueClass|FalseClass]
212
231
  #
213
232
  def html_script?
214
- return html? && current_element == HTML_SCRIPT
233
+ html? && current_element == HTML_SCRIPT
215
234
  end
216
235
 
217
236
  ##
218
237
  # @return [TrueClass|FalseClass]
219
238
  #
220
239
  def html_style?
221
- return html? && current_element == HTML_STYLE
240
+ html? && current_element == HTML_STYLE
222
241
  end
223
242
 
224
243
  private
@@ -250,7 +269,7 @@ module Oga
250
269
  # @return [String]
251
270
  #
252
271
  def current_element
253
- return @elements.last
272
+ @elements.last
254
273
  end
255
274
 
256
275
  ##
@@ -4,14 +4,12 @@ module Oga
4
4
  # The Namespace class contains information about XML namespaces such as the
5
5
  # name and URI.
6
6
  #
7
- # @!attribute [r] name
8
- # @return [String]
9
- #
10
- # @!attribute [r] uri
11
- # @return [String]
12
- #
13
7
  class Namespace
14
- attr_accessor :name, :uri
8
+ # @return [String]
9
+ attr_accessor :name
10
+
11
+ # @return [String]
12
+ attr_accessor :uri
15
13
 
16
14
  ##
17
15
  # @param [Hash] options
@@ -28,14 +26,14 @@ module Oga
28
26
  # @return [String]
29
27
  #
30
28
  def to_s
31
- return name.to_s
29
+ name.to_s
32
30
  end
33
31
 
34
32
  ##
35
33
  # @return [String]
36
34
  #
37
35
  def inspect
38
- return "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
36
+ "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
39
37
  end
40
38
 
41
39
  ##
@@ -43,7 +41,7 @@ module Oga
43
41
  # @return [TrueClass|FalseClass]
44
42
  #
45
43
  def ==(other)
46
- return other.is_a?(self.class) && name == other.name && uri == other.uri
44
+ other.is_a?(self.class) && name == other.name && uri == other.uri
47
45
  end
48
46
  end # Namespace
49
47
  end # XML
data/lib/oga/xml/node.rb CHANGED
@@ -5,12 +5,10 @@ module Oga
5
5
  # {Oga::XML::NodeSet} and can be used to query surrounding and parent
6
6
  # nodes.
7
7
  #
8
- # @!attribute [r] node_set
9
- # @return [Oga::XML::NodeSet]
10
- #
11
8
  class Node
12
9
  include Traversal
13
10
 
11
+ # @return [Oga::XML::NodeSet]
14
12
  attr_reader :node_set
15
13
 
16
14
  ##
@@ -42,7 +40,7 @@ module Oga
42
40
  # @return [Oga::XML::NodeSet]
43
41
  #
44
42
  def children
45
- return @children ||= NodeSet.new([], self)
43
+ @children ||= NodeSet.new([], self)
46
44
  end
47
45
 
48
46
  ##
@@ -65,7 +63,7 @@ module Oga
65
63
  # @return [Oga::XML::Node]
66
64
  #
67
65
  def parent
68
- return node_set ? node_set.owner : nil
66
+ node_set ? node_set.owner : nil
69
67
  end
70
68
 
71
69
  ##
@@ -76,7 +74,7 @@ module Oga
76
74
  def previous
77
75
  index = node_set.index(self) - 1
78
76
 
79
- return index >= 0 ? node_set[index] : nil
77
+ index >= 0 ? node_set[index] : nil
80
78
  end
81
79
 
82
80
  ##
@@ -88,7 +86,7 @@ module Oga
88
86
  index = node_set.index(self) + 1
89
87
  length = node_set.length
90
88
 
91
- return index <= length ? node_set[index] : nil
89
+ index <= length ? node_set[index] : nil
92
90
  end
93
91
 
94
92
  ##
@@ -142,7 +140,7 @@ module Oga
142
140
  @root_node = node
143
141
  end
144
142
 
145
- return @root_node
143
+ @root_node
146
144
  end
147
145
 
148
146
  ##
@@ -186,14 +184,14 @@ module Oga
186
184
  @html_p = root.is_a?(Document) && root.html?
187
185
  end
188
186
 
189
- return @html_p
187
+ @html_p
190
188
  end
191
189
 
192
190
  ##
193
191
  # @return [TrueClass|FalseClass]
194
192
  #
195
193
  def xml?
196
- return !html?
194
+ !html?
197
195
  end
198
196
  end # Element
199
197
  end # XML
@@ -31,12 +31,10 @@ module Oga
31
31
  # If ownership was not handled then you'd have to manually set the
32
32
  # `element` variable's `node_set` attribute after pushing it into a set.
33
33
  #
34
- # @!attribute [rw] owner
35
- # @return [Oga::XML::Node]
36
- #
37
34
  class NodeSet
38
35
  include Enumerable
39
36
 
37
+ # @return [Oga::XML::Node]
40
38
  attr_accessor :owner
41
39
 
42
40
  ##
@@ -65,7 +63,7 @@ module Oga
65
63
  # @return [Oga::XML::Node]
66
64
  #
67
65
  def last
68
- return @nodes[-1]
66
+ @nodes[-1]
69
67
  end
70
68
 
71
69
  ##
@@ -74,7 +72,7 @@ module Oga
74
72
  # @return [TrueClass|FalseClass]
75
73
  #
76
74
  def empty?
77
- return @nodes.empty?
75
+ @nodes.empty?
78
76
  end
79
77
 
80
78
  ##
@@ -83,7 +81,7 @@ module Oga
83
81
  # @return [Fixnum]
84
82
  #
85
83
  def length
86
- return @nodes.length
84
+ @nodes.length
87
85
  end
88
86
 
89
87
  alias_method :count, :length
@@ -96,7 +94,7 @@ module Oga
96
94
  # @return [Fixnum]
97
95
  #
98
96
  def index(node)
99
- return @nodes.index(node)
97
+ @nodes.index(node)
100
98
  end
101
99
 
102
100
  ##
@@ -137,7 +135,7 @@ module Oga
137
135
 
138
136
  remove_ownership(node)
139
137
 
140
- return node
138
+ node
141
139
  end
142
140
 
143
141
  ##
@@ -150,7 +148,7 @@ module Oga
150
148
 
151
149
  remove_ownership(node)
152
150
 
153
- return node
151
+ node
154
152
  end
155
153
 
156
154
  ##
@@ -174,7 +172,7 @@ module Oga
174
172
  # @return [Oga::XML::Node]
175
173
  #
176
174
  def [](index)
177
- return @nodes[index]
175
+ @nodes[index]
178
176
  end
179
177
 
180
178
  ##
@@ -183,7 +181,7 @@ module Oga
183
181
  # @return [Array]
184
182
  #
185
183
  def to_a
186
- return @nodes
184
+ @nodes
187
185
  end
188
186
 
189
187
  ##
@@ -194,7 +192,7 @@ module Oga
194
192
  # @return [Oga::XML::NodeSet]
195
193
  #
196
194
  def +(other)
197
- return self.class.new(to_a | other.to_a)
195
+ self.class.new(to_a | other.to_a)
198
196
  end
199
197
 
200
198
  ##
@@ -204,7 +202,7 @@ module Oga
204
202
  # @param [Oga::XML::NodeSet] other
205
203
  #
206
204
  def ==(other)
207
- return other.is_a?(NodeSet) && other.equal_nodes?(@nodes)
205
+ other.is_a?(NodeSet) && other.equal_nodes?(@nodes)
208
206
  end
209
207
 
210
208
  ##
@@ -216,7 +214,7 @@ module Oga
216
214
  # @param [Array<Oga::XML::Node>] nodes
217
215
  #
218
216
  def equal_nodes?(nodes)
219
- return @nodes == nodes
217
+ @nodes == nodes
220
218
  end
221
219
 
222
220
  ##
@@ -261,7 +259,7 @@ module Oga
261
259
 
262
260
  remove_ownership(removed) if removed
263
261
 
264
- return removed
262
+ removed
265
263
  end
266
264
 
267
265
  ##
@@ -279,7 +277,7 @@ module Oga
279
277
  end
280
278
  end
281
279
 
282
- return values
280
+ values
283
281
  end
284
282
 
285
283
  alias_method :attr, :attribute
@@ -298,7 +296,7 @@ module Oga
298
296
  end
299
297
  end
300
298
 
301
- return text
299
+ text
302
300
  end
303
301
 
304
302
  ##
@@ -307,7 +305,7 @@ module Oga
307
305
  def inspect
308
306
  values = @nodes.map(&:inspect).join(', ')
309
307
 
310
- return "NodeSet(#{values})"
308
+ "NodeSet(#{values})"
311
309
  end
312
310
 
313
311
  private
@@ -291,7 +291,7 @@ class Parser < LL::Driver
291
291
  # @return [Oga::XML::Document]
292
292
  #
293
293
  def on_document(children = [])
294
- document = Document.new(:type => @lexer.html ? :html : :xml)
294
+ document = Document.new(:type => @lexer.html? ? :html : :xml)
295
295
 
296
296
  children.each do |child|
297
297
  if child.is_a?(Doctype)
@@ -3,10 +3,8 @@ module Oga
3
3
  ##
4
4
  # Class used for storing information about a single processing instruction.
5
5
  #
6
- # @!attribute [rw] name
7
- # @return [String]
8
- #
9
6
  class ProcessingInstruction < CharacterNode
7
+ # @return [String]
10
8
  attr_accessor :name
11
9
 
12
10
  ##
@@ -25,14 +23,14 @@ module Oga
25
23
  # @return [String]
26
24
  #
27
25
  def to_xml
28
- return "<?#{name}#{text}?>"
26
+ "<?#{name}#{text}?>"
29
27
  end
30
28
 
31
29
  ##
32
30
  # @return [String]
33
31
  #
34
32
  def inspect
35
- return "ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
33
+ "ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
36
34
  end
37
35
  end # ProcessingInstruction
38
36
  end # XML
@@ -19,16 +19,13 @@ module Oga
19
19
  # This parses yields proper XML instances such as {Oga::XML::Element}.
20
20
  # Doctypes and XML declarations are ignored by this parser.
21
21
  #
22
- # @!attribute [r] node
23
- # The current node.
24
- # @return [Oga::XML::Node]
25
- #
26
- # @!attribute [r] nesting
27
- # Array containing the names of the currently nested elements.
28
- # @return [Array]
29
- #
30
22
  class PullParser < Parser
31
- attr_reader :node, :nesting
23
+ # @return [Oga::XML::Node]
24
+ attr_reader :node
25
+
26
+ # Array containing the names of the currently nested elements.
27
+ # @return [Array]
28
+ attr_reader :nesting
32
29
 
33
30
  ##
34
31
  # @return [Array]
@@ -13,7 +13,7 @@ module Oga
13
13
  # @see [Oga::XPath::Evaluator#initialize]
14
14
  #
15
15
  def xpath(expression, variables = {})
16
- return XPath::Evaluator.new(self, variables).evaluate(expression)
16
+ XPath::Evaluator.new(self, variables).evaluate(expression)
17
17
  end
18
18
 
19
19
  ##
@@ -25,7 +25,7 @@ module Oga
25
25
  def at_xpath(*args)
26
26
  result = xpath(*args)
27
27
 
28
- return result.is_a?(XML::NodeSet) ? result.first : result
28
+ result.is_a?(XML::NodeSet) ? result.first : result
29
29
  end
30
30
 
31
31
  ##
@@ -37,7 +37,7 @@ module Oga
37
37
  def css(expression)
38
38
  ast = CSS::Parser.parse_with_cache(expression)
39
39
 
40
- return XPath::Evaluator.new(self).evaluate_ast(ast)
40
+ XPath::Evaluator.new(self).evaluate_ast(ast)
41
41
  end
42
42
 
43
43
  ##
@@ -49,7 +49,7 @@ module Oga
49
49
  def at_css(*args)
50
50
  result = css(*args)
51
51
 
52
- return result.is_a?(XML::NodeSet) ? result.first : result
52
+ result.is_a?(XML::NodeSet) ? result.first : result
53
53
  end
54
54
  end # Querying
55
55
  end # XML