oga 1.2.3-java → 1.3.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/doc/css_selectors.md +1 -1
  3. data/lib/liboga.jar +0 -0
  4. data/lib/oga.rb +6 -1
  5. data/lib/oga/blacklist.rb +0 -10
  6. data/lib/oga/css/lexer.rb +530 -255
  7. data/lib/oga/css/parser.rb +232 -230
  8. data/lib/oga/entity_decoder.rb +0 -4
  9. data/lib/oga/html/entities.rb +0 -4
  10. data/lib/oga/html/parser.rb +0 -4
  11. data/lib/oga/html/sax_parser.rb +0 -4
  12. data/lib/oga/lru.rb +0 -26
  13. data/lib/oga/oga.rb +0 -8
  14. data/lib/oga/ruby/generator.rb +225 -0
  15. data/lib/oga/ruby/node.rb +189 -0
  16. data/lib/oga/version.rb +1 -1
  17. data/lib/oga/whitelist.rb +0 -6
  18. data/lib/oga/xml/attribute.rb +13 -20
  19. data/lib/oga/xml/cdata.rb +0 -4
  20. data/lib/oga/xml/character_node.rb +0 -8
  21. data/lib/oga/xml/comment.rb +0 -4
  22. data/lib/oga/xml/default_namespace.rb +0 -2
  23. data/lib/oga/xml/doctype.rb +0 -8
  24. data/lib/oga/xml/document.rb +10 -14
  25. data/lib/oga/xml/element.rb +1 -52
  26. data/lib/oga/xml/entities.rb +0 -26
  27. data/lib/oga/xml/expanded_name.rb +12 -0
  28. data/lib/oga/xml/html_void_elements.rb +0 -2
  29. data/lib/oga/xml/lexer.rb +0 -86
  30. data/lib/oga/xml/namespace.rb +0 -10
  31. data/lib/oga/xml/node.rb +18 -34
  32. data/lib/oga/xml/node_set.rb +0 -50
  33. data/lib/oga/xml/parser.rb +13 -50
  34. data/lib/oga/xml/processing_instruction.rb +0 -8
  35. data/lib/oga/xml/pull_parser.rb +0 -18
  36. data/lib/oga/xml/querying.rb +58 -19
  37. data/lib/oga/xml/sax_parser.rb +0 -18
  38. data/lib/oga/xml/text.rb +0 -12
  39. data/lib/oga/xml/traversal.rb +0 -4
  40. data/lib/oga/xml/xml_declaration.rb +0 -8
  41. data/lib/oga/xpath/compiler.rb +1568 -0
  42. data/lib/oga/xpath/conversion.rb +102 -0
  43. data/lib/oga/xpath/lexer.rb +1844 -1238
  44. data/lib/oga/xpath/parser.rb +182 -153
  45. metadata +7 -3
  46. data/lib/oga/xpath/evaluator.rb +0 -1800
@@ -5,7 +5,6 @@ require 'll/setup'
5
5
 
6
6
  module Oga
7
7
  module XML
8
- ##
9
8
  # DOM parser for both XML and HTML.
10
9
  #
11
10
  # This parser does not produce a dedicated AST, instead it emits XML nodes
@@ -186,11 +185,9 @@ class Parser < LL::Driver
186
185
  [:_rule_45, 1], # 45
187
186
  ].freeze
188
187
 
189
- ##
190
188
  # Hash mapping token types and dedicated error labels.
191
189
  #
192
190
  # @return [Hash]
193
- #
194
191
  TOKEN_ERROR_MAPPING = {
195
192
  :T_STRING => 'string',
196
193
  :T_TEXT => 'text',
@@ -215,11 +212,9 @@ class Parser < LL::Driver
215
212
  -1 => 'end of input'
216
213
  }
217
214
 
218
- ##
219
215
  # @param [String|IO] data The input to parse.
220
216
  # @param [Hash] options
221
217
  # @see [Oga::XML::Lexer#initialize]
222
- #
223
218
  def initialize(data, options = {})
224
219
  @data = data
225
220
  @lexer = Lexer.new(data, options)
@@ -227,20 +222,16 @@ class Parser < LL::Driver
227
222
  reset
228
223
  end
229
224
 
230
- ##
231
225
  # Resets the internal state of the parser.
232
- #
233
226
  def reset
234
227
  @line = 1
235
228
 
236
229
  @lexer.reset
237
230
  end
238
231
 
239
- ##
240
232
  # Yields the next token from the lexer.
241
233
  #
242
234
  # @yieldparam [Array]
243
- #
244
235
  def each_token
245
236
  @lexer.advance do |type, value, line|
246
237
  @line = line if line
@@ -251,12 +242,10 @@ class Parser < LL::Driver
251
242
  yield [-1, -1]
252
243
  end
253
244
 
254
- ##
255
245
  # @param [Fixnum] stack_type
256
246
  # @param [Fixnum] stack_value
257
247
  # @param [Symbol] token_type
258
248
  # @param [String] token_value
259
- #
260
249
  def parser_error(stack_type, stack_value, token_type, token_value)
261
250
  case id_to_type(stack_type)
262
251
  when :rule
@@ -275,21 +264,17 @@ class Parser < LL::Driver
275
264
  raise LL::ParserError, message
276
265
  end
277
266
 
278
- ##
279
267
  # @see [LL::Driver#parse]
280
- #
281
268
  def parse
282
269
  retval = super
283
270
 
284
271
  reset
285
272
 
286
- return retval
273
+ retval
287
274
  end
288
275
 
289
- ##
290
276
  # @param [Array] children
291
277
  # @return [Oga::XML::Document]
292
- #
293
278
  def on_document(children = [])
294
279
  document = Document.new(:type => @lexer.html? ? :html : :xml)
295
280
 
@@ -305,45 +290,35 @@ class Parser < LL::Driver
305
290
  end
306
291
  end
307
292
 
308
- return document
293
+ document
309
294
  end
310
295
 
311
- ##
312
296
  # @param [Hash] options
313
- #
314
297
  def on_doctype(options = {})
315
- return Doctype.new(options)
298
+ Doctype.new(options)
316
299
  end
317
300
 
318
- ##
319
301
  # @param [String] text
320
302
  # @return [Oga::XML::Cdata]
321
- #
322
303
  def on_cdata(text = nil)
323
- return Cdata.new(:text => text)
304
+ Cdata.new(:text => text)
324
305
  end
325
306
 
326
- ##
327
307
  # @param [String] text
328
308
  # @return [Oga::XML::Comment]
329
- #
330
309
  def on_comment(text = nil)
331
- return Comment.new(:text => text)
310
+ Comment.new(:text => text)
332
311
  end
333
312
 
334
- ##
335
313
  # @param [String] name
336
314
  # @param [String] text
337
315
  # @return [Oga::XML::ProcessingInstruction]
338
- #
339
316
  def on_proc_ins(name, text = nil)
340
- return ProcessingInstruction.new(:name => name, :text => text)
317
+ ProcessingInstruction.new(:name => name, :text => text)
341
318
  end
342
319
 
343
- ##
344
320
  # @param [Array] attributes
345
321
  # @return [Oga::XML::XmlDeclaration]
346
- #
347
322
  def on_xml_decl(attributes = [])
348
323
  options = {}
349
324
 
@@ -351,23 +326,19 @@ class Parser < LL::Driver
351
326
  options[attr.name.to_sym] = attr.value
352
327
  end
353
328
 
354
- return XmlDeclaration.new(options)
329
+ XmlDeclaration.new(options)
355
330
  end
356
331
 
357
- ##
358
332
  # @param [String] text
359
333
  # @return [Oga::XML::Text]
360
- #
361
334
  def on_text(text)
362
- return Text.new(:text => text)
335
+ Text.new(:text => text)
363
336
  end
364
337
 
365
- ##
366
338
  # @param [String] namespace
367
339
  # @param [String] name
368
340
  # @param [Hash] attributes
369
341
  # @return [Oga::XML::Element]
370
- #
371
342
  def on_element(namespace, name, attributes = {})
372
343
  element = Element.new(
373
344
  :namespace_name => namespace,
@@ -375,47 +346,39 @@ class Parser < LL::Driver
375
346
  :attributes => attributes
376
347
  )
377
348
 
378
- return element
349
+ element
379
350
  end
380
351
 
381
- ##
382
352
  # @param [Oga::XML::Element] element
383
353
  # @param [Array] children
384
354
  # @return [Oga::XML::Element]
385
- #
386
355
  def on_element_children(element, children = [])
387
356
  element.children = children
388
357
 
389
- return element
358
+ element
390
359
  end
391
360
 
392
- ##
393
361
  # @param [Oga::XML::Element] element
394
362
  # @return [Oga::XML::Element]
395
- #
396
363
  def after_element(element)
397
- return element
364
+ element
398
365
  end
399
366
 
400
- ##
401
367
  # @param [String] name
402
368
  # @param [String] ns_name
403
369
  # @param [String] value
404
370
  # @return [Oga::XML::Attribute]
405
- #
406
371
  def on_attribute(name, ns_name = nil, value = nil)
407
- return Attribute.new(
372
+ Attribute.new(
408
373
  :namespace_name => ns_name,
409
374
  :name => name,
410
375
  :value => value
411
376
  )
412
377
  end
413
378
 
414
- ##
415
379
  # @param [Array] attrs
416
- #
417
380
  def on_attributes(attrs)
418
- return attrs
381
+ attrs
419
382
  end
420
383
 
421
384
  def _rule_0(val)
@@ -1,34 +1,26 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # Class used for storing information about a single processing instruction.
5
- #
6
4
  class ProcessingInstruction < CharacterNode
7
5
  # @return [String]
8
6
  attr_accessor :name
9
7
 
10
- ##
11
8
  # @param [Hash] options
12
9
  #
13
10
  # @option options [String] :name The name of the instruction.
14
11
  # @see [Oga::XML::CharacterNode#initialize]
15
- #
16
12
  def initialize(options = {})
17
13
  super
18
14
 
19
15
  @name = options[:name]
20
16
  end
21
17
 
22
- ##
23
18
  # @return [String]
24
- #
25
19
  def to_xml
26
20
  "<?#{name}#{text}?>"
27
21
  end
28
22
 
29
- ##
30
23
  # @return [String]
31
- #
32
24
  def inspect
33
25
  "ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
34
26
  end
@@ -1,6 +1,5 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # The PullParser class can be used to parse an XML document incrementally
5
4
  # instead of parsing it as a whole. This results in lower memory usage and
6
5
  # potentially faster parsing times. The downside is that pull parsers are
@@ -18,7 +17,6 @@ module Oga
18
17
  #
19
18
  # This parses yields proper XML instances such as {Oga::XML::Element}.
20
19
  # Doctypes and XML declarations are ignored by this parser.
21
- #
22
20
  class PullParser < Parser
23
21
  # @return [Oga::XML::Node]
24
22
  attr_reader :node
@@ -27,9 +25,7 @@ module Oga
27
25
  # @return [Array]
28
26
  attr_reader :nesting
29
27
 
30
- ##
31
28
  # @return [Array]
32
- #
33
29
  DISABLED_CALLBACKS = [
34
30
  :on_document,
35
31
  :on_doctype,
@@ -37,9 +33,7 @@ module Oga
37
33
  :on_element_children
38
34
  ]
39
35
 
40
- ##
41
36
  # @return [Array]
42
- #
43
37
  BLOCK_CALLBACKS = [
44
38
  :on_cdata,
45
39
  :on_comment,
@@ -47,11 +41,9 @@ module Oga
47
41
  :on_proc_ins
48
42
  ]
49
43
 
50
- ##
51
44
  # Returns the shorthands that can be used for various node classes.
52
45
  #
53
46
  # @return [Hash]
54
- #
55
47
  NODE_SHORTHANDS = {
56
48
  :text => XML::Text,
57
49
  :node => XML::Node,
@@ -62,9 +54,7 @@ module Oga
62
54
  :xml_declaration => XML::XmlDeclaration
63
55
  }
64
56
 
65
- ##
66
57
  # @see Oga::XML::Parser#reset
67
- #
68
58
  def reset
69
59
  super
70
60
 
@@ -73,11 +63,9 @@ module Oga
73
63
  @node = nil
74
64
  end
75
65
 
76
- ##
77
66
  # Parses the input and yields every node to the supplied block.
78
67
  #
79
68
  # @yieldparam [Oga::XML::Node]
80
- #
81
69
  def parse(&block)
82
70
  @block = block
83
71
 
@@ -86,7 +74,6 @@ module Oga
86
74
  return
87
75
  end
88
76
 
89
- ##
90
77
  # Calls the supplied block if the current node type and optionally the
91
78
  # nesting match. This method allows you to write this:
92
79
  #
@@ -120,7 +107,6 @@ module Oga
120
107
  # returned by {Oga::XML::Node#node_type}.
121
108
  #
122
109
  # @param [Array] nesting The element name nesting to act upon.
123
- #
124
110
  def on(type, nesting = [])
125
111
  if node.is_a?(NODE_SHORTHANDS[type])
126
112
  if nesting.empty? or nesting == self.nesting
@@ -149,9 +135,7 @@ module Oga
149
135
  EOF
150
136
  end
151
137
 
152
- ##
153
138
  # @see Oga::XML::Parser#on_element
154
- #
155
139
  def on_element(*args)
156
140
  @node = super
157
141
 
@@ -162,9 +146,7 @@ module Oga
162
146
  return
163
147
  end
164
148
 
165
- ##
166
149
  # @see Oga::XML::Parser#on_element_children
167
- #
168
150
  def after_element(*args)
169
151
  nesting.pop
170
152
 
@@ -1,51 +1,90 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # The Querying module provides methods that make it easy to run XPath/CSS
5
4
  # queries on XML documents/elements.
6
- #
7
5
  module Querying
8
- ##
9
6
  # Evaluates the given XPath expression.
10
7
  #
8
+ # Querying a document:
9
+ #
10
+ # document = Oga.parse_xml <<-EOF
11
+ # <people>
12
+ # <person age="25">Alice</person>
13
+ # </people>
14
+ # EOF
15
+ #
16
+ # document.xpath('people/person')
17
+ #
18
+ # Querying an element:
19
+ #
20
+ # element = document.at_xpath('people')
21
+ #
22
+ # element.xpath('person')
23
+ #
24
+ # Using variable bindings:
25
+ #
26
+ # document.xpath('people/person[@age = $age]', 'age' => 25)
27
+ #
11
28
  # @param [String] expression The XPath expression to run.
12
- # @param [Hash] variables Variables to bind.
13
- # @see [Oga::XPath::Evaluator#initialize]
14
29
  #
30
+ # @param [Hash] variables Variables to bind. The keys of this Hash should
31
+ # be String values.
32
+ #
33
+ # @return [Oga::XML::NodeSet]
15
34
  def xpath(expression, variables = {})
16
- XPath::Evaluator.new(self, variables).evaluate(expression)
35
+ ast = XPath::Parser.parse_with_cache(expression)
36
+ block = XPath::Compiler.compile_with_cache(ast)
37
+
38
+ block.call(self, variables)
17
39
  end
18
40
 
19
- ##
20
- # Evaluates the given XPath expression and returns the first node in the
21
- # set.
41
+ # Evaluates the XPath expression and returns the first matched node.
22
42
  #
23
- # @see [#xpath]
43
+ # Querying a document:
44
+ #
45
+ # document = Oga.parse_xml <<-EOF
46
+ # <people>
47
+ # <person age="25">Alice</person>
48
+ # </people>
49
+ # EOF
50
+ #
51
+ # person = document.at_xpath('people/person')
24
52
  #
53
+ # person.class # => Oga::XML::Element
54
+ #
55
+ # @see [#xpath]
56
+ # @return [Oga::XML::Node|Oga::XML::Attribute]
25
57
  def at_xpath(*args)
26
58
  result = xpath(*args)
27
59
 
28
60
  result.is_a?(XML::NodeSet) ? result.first : result
29
61
  end
30
62
 
31
- ##
32
63
  # Evaluates the given CSS expression.
33
64
  #
34
- # @param [String] expression The CSS expression to run.
35
- # @see [Oga::XPath::Evaluator#initialize]
65
+ # Querying a document:
66
+ #
67
+ # document = Oga.parse_xml <<-EOF
68
+ # <people>
69
+ # <person age="25">Alice</person>
70
+ # </people>
71
+ # EOF
72
+ #
73
+ # document.css('people person')
36
74
  #
75
+ # @param [String] expression The CSS expression to run.
76
+ # @return [Oga::XML::NodeSet]
37
77
  def css(expression)
38
- ast = CSS::Parser.parse_with_cache(expression)
78
+ ast = CSS::Parser.parse_with_cache(expression)
79
+ block = XPath::Compiler.compile_with_cache(ast)
39
80
 
40
- XPath::Evaluator.new(self).evaluate_ast(ast)
81
+ block.call(self)
41
82
  end
42
83
 
43
- ##
44
- # Evaluates the given CSS expression and returns the first node in the
45
- # set.
84
+ # Evaluates the CSS expression and returns the first matched node.
46
85
  #
47
86
  # @see [#css]
48
- #
87
+ # @return [Oga::XML::Node|Oga::XML::Attribute]
49
88
  def at_css(*args)
50
89
  result = css(*args)
51
90