oga 1.2.3-java → 1.3.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/doc/css_selectors.md +1 -1
  3. data/lib/liboga.jar +0 -0
  4. data/lib/oga.rb +6 -1
  5. data/lib/oga/blacklist.rb +0 -10
  6. data/lib/oga/css/lexer.rb +530 -255
  7. data/lib/oga/css/parser.rb +232 -230
  8. data/lib/oga/entity_decoder.rb +0 -4
  9. data/lib/oga/html/entities.rb +0 -4
  10. data/lib/oga/html/parser.rb +0 -4
  11. data/lib/oga/html/sax_parser.rb +0 -4
  12. data/lib/oga/lru.rb +0 -26
  13. data/lib/oga/oga.rb +0 -8
  14. data/lib/oga/ruby/generator.rb +225 -0
  15. data/lib/oga/ruby/node.rb +189 -0
  16. data/lib/oga/version.rb +1 -1
  17. data/lib/oga/whitelist.rb +0 -6
  18. data/lib/oga/xml/attribute.rb +13 -20
  19. data/lib/oga/xml/cdata.rb +0 -4
  20. data/lib/oga/xml/character_node.rb +0 -8
  21. data/lib/oga/xml/comment.rb +0 -4
  22. data/lib/oga/xml/default_namespace.rb +0 -2
  23. data/lib/oga/xml/doctype.rb +0 -8
  24. data/lib/oga/xml/document.rb +10 -14
  25. data/lib/oga/xml/element.rb +1 -52
  26. data/lib/oga/xml/entities.rb +0 -26
  27. data/lib/oga/xml/expanded_name.rb +12 -0
  28. data/lib/oga/xml/html_void_elements.rb +0 -2
  29. data/lib/oga/xml/lexer.rb +0 -86
  30. data/lib/oga/xml/namespace.rb +0 -10
  31. data/lib/oga/xml/node.rb +18 -34
  32. data/lib/oga/xml/node_set.rb +0 -50
  33. data/lib/oga/xml/parser.rb +13 -50
  34. data/lib/oga/xml/processing_instruction.rb +0 -8
  35. data/lib/oga/xml/pull_parser.rb +0 -18
  36. data/lib/oga/xml/querying.rb +58 -19
  37. data/lib/oga/xml/sax_parser.rb +0 -18
  38. data/lib/oga/xml/text.rb +0 -12
  39. data/lib/oga/xml/traversal.rb +0 -4
  40. data/lib/oga/xml/xml_declaration.rb +0 -8
  41. data/lib/oga/xpath/compiler.rb +1568 -0
  42. data/lib/oga/xpath/conversion.rb +102 -0
  43. data/lib/oga/xpath/lexer.rb +1844 -1238
  44. data/lib/oga/xpath/parser.rb +182 -153
  45. metadata +7 -3
  46. data/lib/oga/xpath/evaluator.rb +0 -1800
@@ -5,7 +5,6 @@ require 'll/setup'
5
5
 
6
6
  module Oga
7
7
  module XML
8
- ##
9
8
  # DOM parser for both XML and HTML.
10
9
  #
11
10
  # This parser does not produce a dedicated AST, instead it emits XML nodes
@@ -186,11 +185,9 @@ class Parser < LL::Driver
186
185
  [:_rule_45, 1], # 45
187
186
  ].freeze
188
187
 
189
- ##
190
188
  # Hash mapping token types and dedicated error labels.
191
189
  #
192
190
  # @return [Hash]
193
- #
194
191
  TOKEN_ERROR_MAPPING = {
195
192
  :T_STRING => 'string',
196
193
  :T_TEXT => 'text',
@@ -215,11 +212,9 @@ class Parser < LL::Driver
215
212
  -1 => 'end of input'
216
213
  }
217
214
 
218
- ##
219
215
  # @param [String|IO] data The input to parse.
220
216
  # @param [Hash] options
221
217
  # @see [Oga::XML::Lexer#initialize]
222
- #
223
218
  def initialize(data, options = {})
224
219
  @data = data
225
220
  @lexer = Lexer.new(data, options)
@@ -227,20 +222,16 @@ class Parser < LL::Driver
227
222
  reset
228
223
  end
229
224
 
230
- ##
231
225
  # Resets the internal state of the parser.
232
- #
233
226
  def reset
234
227
  @line = 1
235
228
 
236
229
  @lexer.reset
237
230
  end
238
231
 
239
- ##
240
232
  # Yields the next token from the lexer.
241
233
  #
242
234
  # @yieldparam [Array]
243
- #
244
235
  def each_token
245
236
  @lexer.advance do |type, value, line|
246
237
  @line = line if line
@@ -251,12 +242,10 @@ class Parser < LL::Driver
251
242
  yield [-1, -1]
252
243
  end
253
244
 
254
- ##
255
245
  # @param [Fixnum] stack_type
256
246
  # @param [Fixnum] stack_value
257
247
  # @param [Symbol] token_type
258
248
  # @param [String] token_value
259
- #
260
249
  def parser_error(stack_type, stack_value, token_type, token_value)
261
250
  case id_to_type(stack_type)
262
251
  when :rule
@@ -275,21 +264,17 @@ class Parser < LL::Driver
275
264
  raise LL::ParserError, message
276
265
  end
277
266
 
278
- ##
279
267
  # @see [LL::Driver#parse]
280
- #
281
268
  def parse
282
269
  retval = super
283
270
 
284
271
  reset
285
272
 
286
- return retval
273
+ retval
287
274
  end
288
275
 
289
- ##
290
276
  # @param [Array] children
291
277
  # @return [Oga::XML::Document]
292
- #
293
278
  def on_document(children = [])
294
279
  document = Document.new(:type => @lexer.html? ? :html : :xml)
295
280
 
@@ -305,45 +290,35 @@ class Parser < LL::Driver
305
290
  end
306
291
  end
307
292
 
308
- return document
293
+ document
309
294
  end
310
295
 
311
- ##
312
296
  # @param [Hash] options
313
- #
314
297
  def on_doctype(options = {})
315
- return Doctype.new(options)
298
+ Doctype.new(options)
316
299
  end
317
300
 
318
- ##
319
301
  # @param [String] text
320
302
  # @return [Oga::XML::Cdata]
321
- #
322
303
  def on_cdata(text = nil)
323
- return Cdata.new(:text => text)
304
+ Cdata.new(:text => text)
324
305
  end
325
306
 
326
- ##
327
307
  # @param [String] text
328
308
  # @return [Oga::XML::Comment]
329
- #
330
309
  def on_comment(text = nil)
331
- return Comment.new(:text => text)
310
+ Comment.new(:text => text)
332
311
  end
333
312
 
334
- ##
335
313
  # @param [String] name
336
314
  # @param [String] text
337
315
  # @return [Oga::XML::ProcessingInstruction]
338
- #
339
316
  def on_proc_ins(name, text = nil)
340
- return ProcessingInstruction.new(:name => name, :text => text)
317
+ ProcessingInstruction.new(:name => name, :text => text)
341
318
  end
342
319
 
343
- ##
344
320
  # @param [Array] attributes
345
321
  # @return [Oga::XML::XmlDeclaration]
346
- #
347
322
  def on_xml_decl(attributes = [])
348
323
  options = {}
349
324
 
@@ -351,23 +326,19 @@ class Parser < LL::Driver
351
326
  options[attr.name.to_sym] = attr.value
352
327
  end
353
328
 
354
- return XmlDeclaration.new(options)
329
+ XmlDeclaration.new(options)
355
330
  end
356
331
 
357
- ##
358
332
  # @param [String] text
359
333
  # @return [Oga::XML::Text]
360
- #
361
334
  def on_text(text)
362
- return Text.new(:text => text)
335
+ Text.new(:text => text)
363
336
  end
364
337
 
365
- ##
366
338
  # @param [String] namespace
367
339
  # @param [String] name
368
340
  # @param [Hash] attributes
369
341
  # @return [Oga::XML::Element]
370
- #
371
342
  def on_element(namespace, name, attributes = {})
372
343
  element = Element.new(
373
344
  :namespace_name => namespace,
@@ -375,47 +346,39 @@ class Parser < LL::Driver
375
346
  :attributes => attributes
376
347
  )
377
348
 
378
- return element
349
+ element
379
350
  end
380
351
 
381
- ##
382
352
  # @param [Oga::XML::Element] element
383
353
  # @param [Array] children
384
354
  # @return [Oga::XML::Element]
385
- #
386
355
  def on_element_children(element, children = [])
387
356
  element.children = children
388
357
 
389
- return element
358
+ element
390
359
  end
391
360
 
392
- ##
393
361
  # @param [Oga::XML::Element] element
394
362
  # @return [Oga::XML::Element]
395
- #
396
363
  def after_element(element)
397
- return element
364
+ element
398
365
  end
399
366
 
400
- ##
401
367
  # @param [String] name
402
368
  # @param [String] ns_name
403
369
  # @param [String] value
404
370
  # @return [Oga::XML::Attribute]
405
- #
406
371
  def on_attribute(name, ns_name = nil, value = nil)
407
- return Attribute.new(
372
+ Attribute.new(
408
373
  :namespace_name => ns_name,
409
374
  :name => name,
410
375
  :value => value
411
376
  )
412
377
  end
413
378
 
414
- ##
415
379
  # @param [Array] attrs
416
- #
417
380
  def on_attributes(attrs)
418
- return attrs
381
+ attrs
419
382
  end
420
383
 
421
384
  def _rule_0(val)
@@ -1,34 +1,26 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # Class used for storing information about a single processing instruction.
5
- #
6
4
  class ProcessingInstruction < CharacterNode
7
5
  # @return [String]
8
6
  attr_accessor :name
9
7
 
10
- ##
11
8
  # @param [Hash] options
12
9
  #
13
10
  # @option options [String] :name The name of the instruction.
14
11
  # @see [Oga::XML::CharacterNode#initialize]
15
- #
16
12
  def initialize(options = {})
17
13
  super
18
14
 
19
15
  @name = options[:name]
20
16
  end
21
17
 
22
- ##
23
18
  # @return [String]
24
- #
25
19
  def to_xml
26
20
  "<?#{name}#{text}?>"
27
21
  end
28
22
 
29
- ##
30
23
  # @return [String]
31
- #
32
24
  def inspect
33
25
  "ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
34
26
  end
@@ -1,6 +1,5 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # The PullParser class can be used to parse an XML document incrementally
5
4
  # instead of parsing it as a whole. This results in lower memory usage and
6
5
  # potentially faster parsing times. The downside is that pull parsers are
@@ -18,7 +17,6 @@ module Oga
18
17
  #
19
18
  # This parses yields proper XML instances such as {Oga::XML::Element}.
20
19
  # Doctypes and XML declarations are ignored by this parser.
21
- #
22
20
  class PullParser < Parser
23
21
  # @return [Oga::XML::Node]
24
22
  attr_reader :node
@@ -27,9 +25,7 @@ module Oga
27
25
  # @return [Array]
28
26
  attr_reader :nesting
29
27
 
30
- ##
31
28
  # @return [Array]
32
- #
33
29
  DISABLED_CALLBACKS = [
34
30
  :on_document,
35
31
  :on_doctype,
@@ -37,9 +33,7 @@ module Oga
37
33
  :on_element_children
38
34
  ]
39
35
 
40
- ##
41
36
  # @return [Array]
42
- #
43
37
  BLOCK_CALLBACKS = [
44
38
  :on_cdata,
45
39
  :on_comment,
@@ -47,11 +41,9 @@ module Oga
47
41
  :on_proc_ins
48
42
  ]
49
43
 
50
- ##
51
44
  # Returns the shorthands that can be used for various node classes.
52
45
  #
53
46
  # @return [Hash]
54
- #
55
47
  NODE_SHORTHANDS = {
56
48
  :text => XML::Text,
57
49
  :node => XML::Node,
@@ -62,9 +54,7 @@ module Oga
62
54
  :xml_declaration => XML::XmlDeclaration
63
55
  }
64
56
 
65
- ##
66
57
  # @see Oga::XML::Parser#reset
67
- #
68
58
  def reset
69
59
  super
70
60
 
@@ -73,11 +63,9 @@ module Oga
73
63
  @node = nil
74
64
  end
75
65
 
76
- ##
77
66
  # Parses the input and yields every node to the supplied block.
78
67
  #
79
68
  # @yieldparam [Oga::XML::Node]
80
- #
81
69
  def parse(&block)
82
70
  @block = block
83
71
 
@@ -86,7 +74,6 @@ module Oga
86
74
  return
87
75
  end
88
76
 
89
- ##
90
77
  # Calls the supplied block if the current node type and optionally the
91
78
  # nesting match. This method allows you to write this:
92
79
  #
@@ -120,7 +107,6 @@ module Oga
120
107
  # returned by {Oga::XML::Node#node_type}.
121
108
  #
122
109
  # @param [Array] nesting The element name nesting to act upon.
123
- #
124
110
  def on(type, nesting = [])
125
111
  if node.is_a?(NODE_SHORTHANDS[type])
126
112
  if nesting.empty? or nesting == self.nesting
@@ -149,9 +135,7 @@ module Oga
149
135
  EOF
150
136
  end
151
137
 
152
- ##
153
138
  # @see Oga::XML::Parser#on_element
154
- #
155
139
  def on_element(*args)
156
140
  @node = super
157
141
 
@@ -162,9 +146,7 @@ module Oga
162
146
  return
163
147
  end
164
148
 
165
- ##
166
149
  # @see Oga::XML::Parser#on_element_children
167
- #
168
150
  def after_element(*args)
169
151
  nesting.pop
170
152
 
@@ -1,51 +1,90 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # The Querying module provides methods that make it easy to run XPath/CSS
5
4
  # queries on XML documents/elements.
6
- #
7
5
  module Querying
8
- ##
9
6
  # Evaluates the given XPath expression.
10
7
  #
8
+ # Querying a document:
9
+ #
10
+ # document = Oga.parse_xml <<-EOF
11
+ # <people>
12
+ # <person age="25">Alice</person>
13
+ # </people>
14
+ # EOF
15
+ #
16
+ # document.xpath('people/person')
17
+ #
18
+ # Querying an element:
19
+ #
20
+ # element = document.at_xpath('people')
21
+ #
22
+ # element.xpath('person')
23
+ #
24
+ # Using variable bindings:
25
+ #
26
+ # document.xpath('people/person[@age = $age]', 'age' => 25)
27
+ #
11
28
  # @param [String] expression The XPath expression to run.
12
- # @param [Hash] variables Variables to bind.
13
- # @see [Oga::XPath::Evaluator#initialize]
14
29
  #
30
+ # @param [Hash] variables Variables to bind. The keys of this Hash should
31
+ # be String values.
32
+ #
33
+ # @return [Oga::XML::NodeSet]
15
34
  def xpath(expression, variables = {})
16
- XPath::Evaluator.new(self, variables).evaluate(expression)
35
+ ast = XPath::Parser.parse_with_cache(expression)
36
+ block = XPath::Compiler.compile_with_cache(ast)
37
+
38
+ block.call(self, variables)
17
39
  end
18
40
 
19
- ##
20
- # Evaluates the given XPath expression and returns the first node in the
21
- # set.
41
+ # Evaluates the XPath expression and returns the first matched node.
22
42
  #
23
- # @see [#xpath]
43
+ # Querying a document:
44
+ #
45
+ # document = Oga.parse_xml <<-EOF
46
+ # <people>
47
+ # <person age="25">Alice</person>
48
+ # </people>
49
+ # EOF
50
+ #
51
+ # person = document.at_xpath('people/person')
24
52
  #
53
+ # person.class # => Oga::XML::Element
54
+ #
55
+ # @see [#xpath]
56
+ # @return [Oga::XML::Node|Oga::XML::Attribute]
25
57
  def at_xpath(*args)
26
58
  result = xpath(*args)
27
59
 
28
60
  result.is_a?(XML::NodeSet) ? result.first : result
29
61
  end
30
62
 
31
- ##
32
63
  # Evaluates the given CSS expression.
33
64
  #
34
- # @param [String] expression The CSS expression to run.
35
- # @see [Oga::XPath::Evaluator#initialize]
65
+ # Querying a document:
66
+ #
67
+ # document = Oga.parse_xml <<-EOF
68
+ # <people>
69
+ # <person age="25">Alice</person>
70
+ # </people>
71
+ # EOF
72
+ #
73
+ # document.css('people person')
36
74
  #
75
+ # @param [String] expression The CSS expression to run.
76
+ # @return [Oga::XML::NodeSet]
37
77
  def css(expression)
38
- ast = CSS::Parser.parse_with_cache(expression)
78
+ ast = CSS::Parser.parse_with_cache(expression)
79
+ block = XPath::Compiler.compile_with_cache(ast)
39
80
 
40
- XPath::Evaluator.new(self).evaluate_ast(ast)
81
+ block.call(self)
41
82
  end
42
83
 
43
- ##
44
- # Evaluates the given CSS expression and returns the first node in the
45
- # set.
84
+ # Evaluates the CSS expression and returns the first matched node.
46
85
  #
47
86
  # @see [#css]
48
- #
87
+ # @return [Oga::XML::Node|Oga::XML::Attribute]
49
88
  def at_css(*args)
50
89
  result = css(*args)
51
90