oga 1.2.3-java → 1.3.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc/css_selectors.md +1 -1
- data/lib/liboga.jar +0 -0
- data/lib/oga.rb +6 -1
- data/lib/oga/blacklist.rb +0 -10
- data/lib/oga/css/lexer.rb +530 -255
- data/lib/oga/css/parser.rb +232 -230
- data/lib/oga/entity_decoder.rb +0 -4
- data/lib/oga/html/entities.rb +0 -4
- data/lib/oga/html/parser.rb +0 -4
- data/lib/oga/html/sax_parser.rb +0 -4
- data/lib/oga/lru.rb +0 -26
- data/lib/oga/oga.rb +0 -8
- data/lib/oga/ruby/generator.rb +225 -0
- data/lib/oga/ruby/node.rb +189 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/whitelist.rb +0 -6
- data/lib/oga/xml/attribute.rb +13 -20
- data/lib/oga/xml/cdata.rb +0 -4
- data/lib/oga/xml/character_node.rb +0 -8
- data/lib/oga/xml/comment.rb +0 -4
- data/lib/oga/xml/default_namespace.rb +0 -2
- data/lib/oga/xml/doctype.rb +0 -8
- data/lib/oga/xml/document.rb +10 -14
- data/lib/oga/xml/element.rb +1 -52
- data/lib/oga/xml/entities.rb +0 -26
- data/lib/oga/xml/expanded_name.rb +12 -0
- data/lib/oga/xml/html_void_elements.rb +0 -2
- data/lib/oga/xml/lexer.rb +0 -86
- data/lib/oga/xml/namespace.rb +0 -10
- data/lib/oga/xml/node.rb +18 -34
- data/lib/oga/xml/node_set.rb +0 -50
- data/lib/oga/xml/parser.rb +13 -50
- data/lib/oga/xml/processing_instruction.rb +0 -8
- data/lib/oga/xml/pull_parser.rb +0 -18
- data/lib/oga/xml/querying.rb +58 -19
- data/lib/oga/xml/sax_parser.rb +0 -18
- data/lib/oga/xml/text.rb +0 -12
- data/lib/oga/xml/traversal.rb +0 -4
- data/lib/oga/xml/xml_declaration.rb +0 -8
- data/lib/oga/xpath/compiler.rb +1568 -0
- data/lib/oga/xpath/conversion.rb +102 -0
- data/lib/oga/xpath/lexer.rb +1844 -1238
- data/lib/oga/xpath/parser.rb +182 -153
- metadata +7 -3
- data/lib/oga/xpath/evaluator.rb +0 -1800
data/lib/oga/xml/parser.rb
CHANGED
@@ -5,7 +5,6 @@ require 'll/setup'
|
|
5
5
|
|
6
6
|
module Oga
|
7
7
|
module XML
|
8
|
-
##
|
9
8
|
# DOM parser for both XML and HTML.
|
10
9
|
#
|
11
10
|
# This parser does not produce a dedicated AST, instead it emits XML nodes
|
@@ -186,11 +185,9 @@ class Parser < LL::Driver
|
|
186
185
|
[:_rule_45, 1], # 45
|
187
186
|
].freeze
|
188
187
|
|
189
|
-
##
|
190
188
|
# Hash mapping token types and dedicated error labels.
|
191
189
|
#
|
192
190
|
# @return [Hash]
|
193
|
-
#
|
194
191
|
TOKEN_ERROR_MAPPING = {
|
195
192
|
:T_STRING => 'string',
|
196
193
|
:T_TEXT => 'text',
|
@@ -215,11 +212,9 @@ class Parser < LL::Driver
|
|
215
212
|
-1 => 'end of input'
|
216
213
|
}
|
217
214
|
|
218
|
-
##
|
219
215
|
# @param [String|IO] data The input to parse.
|
220
216
|
# @param [Hash] options
|
221
217
|
# @see [Oga::XML::Lexer#initialize]
|
222
|
-
#
|
223
218
|
def initialize(data, options = {})
|
224
219
|
@data = data
|
225
220
|
@lexer = Lexer.new(data, options)
|
@@ -227,20 +222,16 @@ class Parser < LL::Driver
|
|
227
222
|
reset
|
228
223
|
end
|
229
224
|
|
230
|
-
##
|
231
225
|
# Resets the internal state of the parser.
|
232
|
-
#
|
233
226
|
def reset
|
234
227
|
@line = 1
|
235
228
|
|
236
229
|
@lexer.reset
|
237
230
|
end
|
238
231
|
|
239
|
-
##
|
240
232
|
# Yields the next token from the lexer.
|
241
233
|
#
|
242
234
|
# @yieldparam [Array]
|
243
|
-
#
|
244
235
|
def each_token
|
245
236
|
@lexer.advance do |type, value, line|
|
246
237
|
@line = line if line
|
@@ -251,12 +242,10 @@ class Parser < LL::Driver
|
|
251
242
|
yield [-1, -1]
|
252
243
|
end
|
253
244
|
|
254
|
-
##
|
255
245
|
# @param [Fixnum] stack_type
|
256
246
|
# @param [Fixnum] stack_value
|
257
247
|
# @param [Symbol] token_type
|
258
248
|
# @param [String] token_value
|
259
|
-
#
|
260
249
|
def parser_error(stack_type, stack_value, token_type, token_value)
|
261
250
|
case id_to_type(stack_type)
|
262
251
|
when :rule
|
@@ -275,21 +264,17 @@ class Parser < LL::Driver
|
|
275
264
|
raise LL::ParserError, message
|
276
265
|
end
|
277
266
|
|
278
|
-
##
|
279
267
|
# @see [LL::Driver#parse]
|
280
|
-
#
|
281
268
|
def parse
|
282
269
|
retval = super
|
283
270
|
|
284
271
|
reset
|
285
272
|
|
286
|
-
|
273
|
+
retval
|
287
274
|
end
|
288
275
|
|
289
|
-
##
|
290
276
|
# @param [Array] children
|
291
277
|
# @return [Oga::XML::Document]
|
292
|
-
#
|
293
278
|
def on_document(children = [])
|
294
279
|
document = Document.new(:type => @lexer.html? ? :html : :xml)
|
295
280
|
|
@@ -305,45 +290,35 @@ class Parser < LL::Driver
|
|
305
290
|
end
|
306
291
|
end
|
307
292
|
|
308
|
-
|
293
|
+
document
|
309
294
|
end
|
310
295
|
|
311
|
-
##
|
312
296
|
# @param [Hash] options
|
313
|
-
#
|
314
297
|
def on_doctype(options = {})
|
315
|
-
|
298
|
+
Doctype.new(options)
|
316
299
|
end
|
317
300
|
|
318
|
-
##
|
319
301
|
# @param [String] text
|
320
302
|
# @return [Oga::XML::Cdata]
|
321
|
-
#
|
322
303
|
def on_cdata(text = nil)
|
323
|
-
|
304
|
+
Cdata.new(:text => text)
|
324
305
|
end
|
325
306
|
|
326
|
-
##
|
327
307
|
# @param [String] text
|
328
308
|
# @return [Oga::XML::Comment]
|
329
|
-
#
|
330
309
|
def on_comment(text = nil)
|
331
|
-
|
310
|
+
Comment.new(:text => text)
|
332
311
|
end
|
333
312
|
|
334
|
-
##
|
335
313
|
# @param [String] name
|
336
314
|
# @param [String] text
|
337
315
|
# @return [Oga::XML::ProcessingInstruction]
|
338
|
-
#
|
339
316
|
def on_proc_ins(name, text = nil)
|
340
|
-
|
317
|
+
ProcessingInstruction.new(:name => name, :text => text)
|
341
318
|
end
|
342
319
|
|
343
|
-
##
|
344
320
|
# @param [Array] attributes
|
345
321
|
# @return [Oga::XML::XmlDeclaration]
|
346
|
-
#
|
347
322
|
def on_xml_decl(attributes = [])
|
348
323
|
options = {}
|
349
324
|
|
@@ -351,23 +326,19 @@ class Parser < LL::Driver
|
|
351
326
|
options[attr.name.to_sym] = attr.value
|
352
327
|
end
|
353
328
|
|
354
|
-
|
329
|
+
XmlDeclaration.new(options)
|
355
330
|
end
|
356
331
|
|
357
|
-
##
|
358
332
|
# @param [String] text
|
359
333
|
# @return [Oga::XML::Text]
|
360
|
-
#
|
361
334
|
def on_text(text)
|
362
|
-
|
335
|
+
Text.new(:text => text)
|
363
336
|
end
|
364
337
|
|
365
|
-
##
|
366
338
|
# @param [String] namespace
|
367
339
|
# @param [String] name
|
368
340
|
# @param [Hash] attributes
|
369
341
|
# @return [Oga::XML::Element]
|
370
|
-
#
|
371
342
|
def on_element(namespace, name, attributes = {})
|
372
343
|
element = Element.new(
|
373
344
|
:namespace_name => namespace,
|
@@ -375,47 +346,39 @@ class Parser < LL::Driver
|
|
375
346
|
:attributes => attributes
|
376
347
|
)
|
377
348
|
|
378
|
-
|
349
|
+
element
|
379
350
|
end
|
380
351
|
|
381
|
-
##
|
382
352
|
# @param [Oga::XML::Element] element
|
383
353
|
# @param [Array] children
|
384
354
|
# @return [Oga::XML::Element]
|
385
|
-
#
|
386
355
|
def on_element_children(element, children = [])
|
387
356
|
element.children = children
|
388
357
|
|
389
|
-
|
358
|
+
element
|
390
359
|
end
|
391
360
|
|
392
|
-
##
|
393
361
|
# @param [Oga::XML::Element] element
|
394
362
|
# @return [Oga::XML::Element]
|
395
|
-
#
|
396
363
|
def after_element(element)
|
397
|
-
|
364
|
+
element
|
398
365
|
end
|
399
366
|
|
400
|
-
##
|
401
367
|
# @param [String] name
|
402
368
|
# @param [String] ns_name
|
403
369
|
# @param [String] value
|
404
370
|
# @return [Oga::XML::Attribute]
|
405
|
-
#
|
406
371
|
def on_attribute(name, ns_name = nil, value = nil)
|
407
|
-
|
372
|
+
Attribute.new(
|
408
373
|
:namespace_name => ns_name,
|
409
374
|
:name => name,
|
410
375
|
:value => value
|
411
376
|
)
|
412
377
|
end
|
413
378
|
|
414
|
-
##
|
415
379
|
# @param [Array] attrs
|
416
|
-
#
|
417
380
|
def on_attributes(attrs)
|
418
|
-
|
381
|
+
attrs
|
419
382
|
end
|
420
383
|
|
421
384
|
def _rule_0(val)
|
@@ -1,34 +1,26 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# Class used for storing information about a single processing instruction.
|
5
|
-
#
|
6
4
|
class ProcessingInstruction < CharacterNode
|
7
5
|
# @return [String]
|
8
6
|
attr_accessor :name
|
9
7
|
|
10
|
-
##
|
11
8
|
# @param [Hash] options
|
12
9
|
#
|
13
10
|
# @option options [String] :name The name of the instruction.
|
14
11
|
# @see [Oga::XML::CharacterNode#initialize]
|
15
|
-
#
|
16
12
|
def initialize(options = {})
|
17
13
|
super
|
18
14
|
|
19
15
|
@name = options[:name]
|
20
16
|
end
|
21
17
|
|
22
|
-
##
|
23
18
|
# @return [String]
|
24
|
-
#
|
25
19
|
def to_xml
|
26
20
|
"<?#{name}#{text}?>"
|
27
21
|
end
|
28
22
|
|
29
|
-
##
|
30
23
|
# @return [String]
|
31
|
-
#
|
32
24
|
def inspect
|
33
25
|
"ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
|
34
26
|
end
|
data/lib/oga/xml/pull_parser.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# The PullParser class can be used to parse an XML document incrementally
|
5
4
|
# instead of parsing it as a whole. This results in lower memory usage and
|
6
5
|
# potentially faster parsing times. The downside is that pull parsers are
|
@@ -18,7 +17,6 @@ module Oga
|
|
18
17
|
#
|
19
18
|
# This parses yields proper XML instances such as {Oga::XML::Element}.
|
20
19
|
# Doctypes and XML declarations are ignored by this parser.
|
21
|
-
#
|
22
20
|
class PullParser < Parser
|
23
21
|
# @return [Oga::XML::Node]
|
24
22
|
attr_reader :node
|
@@ -27,9 +25,7 @@ module Oga
|
|
27
25
|
# @return [Array]
|
28
26
|
attr_reader :nesting
|
29
27
|
|
30
|
-
##
|
31
28
|
# @return [Array]
|
32
|
-
#
|
33
29
|
DISABLED_CALLBACKS = [
|
34
30
|
:on_document,
|
35
31
|
:on_doctype,
|
@@ -37,9 +33,7 @@ module Oga
|
|
37
33
|
:on_element_children
|
38
34
|
]
|
39
35
|
|
40
|
-
##
|
41
36
|
# @return [Array]
|
42
|
-
#
|
43
37
|
BLOCK_CALLBACKS = [
|
44
38
|
:on_cdata,
|
45
39
|
:on_comment,
|
@@ -47,11 +41,9 @@ module Oga
|
|
47
41
|
:on_proc_ins
|
48
42
|
]
|
49
43
|
|
50
|
-
##
|
51
44
|
# Returns the shorthands that can be used for various node classes.
|
52
45
|
#
|
53
46
|
# @return [Hash]
|
54
|
-
#
|
55
47
|
NODE_SHORTHANDS = {
|
56
48
|
:text => XML::Text,
|
57
49
|
:node => XML::Node,
|
@@ -62,9 +54,7 @@ module Oga
|
|
62
54
|
:xml_declaration => XML::XmlDeclaration
|
63
55
|
}
|
64
56
|
|
65
|
-
##
|
66
57
|
# @see Oga::XML::Parser#reset
|
67
|
-
#
|
68
58
|
def reset
|
69
59
|
super
|
70
60
|
|
@@ -73,11 +63,9 @@ module Oga
|
|
73
63
|
@node = nil
|
74
64
|
end
|
75
65
|
|
76
|
-
##
|
77
66
|
# Parses the input and yields every node to the supplied block.
|
78
67
|
#
|
79
68
|
# @yieldparam [Oga::XML::Node]
|
80
|
-
#
|
81
69
|
def parse(&block)
|
82
70
|
@block = block
|
83
71
|
|
@@ -86,7 +74,6 @@ module Oga
|
|
86
74
|
return
|
87
75
|
end
|
88
76
|
|
89
|
-
##
|
90
77
|
# Calls the supplied block if the current node type and optionally the
|
91
78
|
# nesting match. This method allows you to write this:
|
92
79
|
#
|
@@ -120,7 +107,6 @@ module Oga
|
|
120
107
|
# returned by {Oga::XML::Node#node_type}.
|
121
108
|
#
|
122
109
|
# @param [Array] nesting The element name nesting to act upon.
|
123
|
-
#
|
124
110
|
def on(type, nesting = [])
|
125
111
|
if node.is_a?(NODE_SHORTHANDS[type])
|
126
112
|
if nesting.empty? or nesting == self.nesting
|
@@ -149,9 +135,7 @@ module Oga
|
|
149
135
|
EOF
|
150
136
|
end
|
151
137
|
|
152
|
-
##
|
153
138
|
# @see Oga::XML::Parser#on_element
|
154
|
-
#
|
155
139
|
def on_element(*args)
|
156
140
|
@node = super
|
157
141
|
|
@@ -162,9 +146,7 @@ module Oga
|
|
162
146
|
return
|
163
147
|
end
|
164
148
|
|
165
|
-
##
|
166
149
|
# @see Oga::XML::Parser#on_element_children
|
167
|
-
#
|
168
150
|
def after_element(*args)
|
169
151
|
nesting.pop
|
170
152
|
|
data/lib/oga/xml/querying.rb
CHANGED
@@ -1,51 +1,90 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# The Querying module provides methods that make it easy to run XPath/CSS
|
5
4
|
# queries on XML documents/elements.
|
6
|
-
#
|
7
5
|
module Querying
|
8
|
-
##
|
9
6
|
# Evaluates the given XPath expression.
|
10
7
|
#
|
8
|
+
# Querying a document:
|
9
|
+
#
|
10
|
+
# document = Oga.parse_xml <<-EOF
|
11
|
+
# <people>
|
12
|
+
# <person age="25">Alice</person>
|
13
|
+
# </people>
|
14
|
+
# EOF
|
15
|
+
#
|
16
|
+
# document.xpath('people/person')
|
17
|
+
#
|
18
|
+
# Querying an element:
|
19
|
+
#
|
20
|
+
# element = document.at_xpath('people')
|
21
|
+
#
|
22
|
+
# element.xpath('person')
|
23
|
+
#
|
24
|
+
# Using variable bindings:
|
25
|
+
#
|
26
|
+
# document.xpath('people/person[@age = $age]', 'age' => 25)
|
27
|
+
#
|
11
28
|
# @param [String] expression The XPath expression to run.
|
12
|
-
# @param [Hash] variables Variables to bind.
|
13
|
-
# @see [Oga::XPath::Evaluator#initialize]
|
14
29
|
#
|
30
|
+
# @param [Hash] variables Variables to bind. The keys of this Hash should
|
31
|
+
# be String values.
|
32
|
+
#
|
33
|
+
# @return [Oga::XML::NodeSet]
|
15
34
|
def xpath(expression, variables = {})
|
16
|
-
XPath::
|
35
|
+
ast = XPath::Parser.parse_with_cache(expression)
|
36
|
+
block = XPath::Compiler.compile_with_cache(ast)
|
37
|
+
|
38
|
+
block.call(self, variables)
|
17
39
|
end
|
18
40
|
|
19
|
-
|
20
|
-
# Evaluates the given XPath expression and returns the first node in the
|
21
|
-
# set.
|
41
|
+
# Evaluates the XPath expression and returns the first matched node.
|
22
42
|
#
|
23
|
-
#
|
43
|
+
# Querying a document:
|
44
|
+
#
|
45
|
+
# document = Oga.parse_xml <<-EOF
|
46
|
+
# <people>
|
47
|
+
# <person age="25">Alice</person>
|
48
|
+
# </people>
|
49
|
+
# EOF
|
50
|
+
#
|
51
|
+
# person = document.at_xpath('people/person')
|
24
52
|
#
|
53
|
+
# person.class # => Oga::XML::Element
|
54
|
+
#
|
55
|
+
# @see [#xpath]
|
56
|
+
# @return [Oga::XML::Node|Oga::XML::Attribute]
|
25
57
|
def at_xpath(*args)
|
26
58
|
result = xpath(*args)
|
27
59
|
|
28
60
|
result.is_a?(XML::NodeSet) ? result.first : result
|
29
61
|
end
|
30
62
|
|
31
|
-
##
|
32
63
|
# Evaluates the given CSS expression.
|
33
64
|
#
|
34
|
-
#
|
35
|
-
#
|
65
|
+
# Querying a document:
|
66
|
+
#
|
67
|
+
# document = Oga.parse_xml <<-EOF
|
68
|
+
# <people>
|
69
|
+
# <person age="25">Alice</person>
|
70
|
+
# </people>
|
71
|
+
# EOF
|
72
|
+
#
|
73
|
+
# document.css('people person')
|
36
74
|
#
|
75
|
+
# @param [String] expression The CSS expression to run.
|
76
|
+
# @return [Oga::XML::NodeSet]
|
37
77
|
def css(expression)
|
38
|
-
ast
|
78
|
+
ast = CSS::Parser.parse_with_cache(expression)
|
79
|
+
block = XPath::Compiler.compile_with_cache(ast)
|
39
80
|
|
40
|
-
|
81
|
+
block.call(self)
|
41
82
|
end
|
42
83
|
|
43
|
-
|
44
|
-
# Evaluates the given CSS expression and returns the first node in the
|
45
|
-
# set.
|
84
|
+
# Evaluates the CSS expression and returns the first matched node.
|
46
85
|
#
|
47
86
|
# @see [#css]
|
48
|
-
#
|
87
|
+
# @return [Oga::XML::Node|Oga::XML::Attribute]
|
49
88
|
def at_css(*args)
|
50
89
|
result = css(*args)
|
51
90
|
|