oga 1.2.3-java → 1.3.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/css_selectors.md +1 -1
- data/lib/liboga.jar +0 -0
- data/lib/oga.rb +6 -1
- data/lib/oga/blacklist.rb +0 -10
- data/lib/oga/css/lexer.rb +530 -255
- data/lib/oga/css/parser.rb +232 -230
- data/lib/oga/entity_decoder.rb +0 -4
- data/lib/oga/html/entities.rb +0 -4
- data/lib/oga/html/parser.rb +0 -4
- data/lib/oga/html/sax_parser.rb +0 -4
- data/lib/oga/lru.rb +0 -26
- data/lib/oga/oga.rb +0 -8
- data/lib/oga/ruby/generator.rb +225 -0
- data/lib/oga/ruby/node.rb +189 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/whitelist.rb +0 -6
- data/lib/oga/xml/attribute.rb +13 -20
- data/lib/oga/xml/cdata.rb +0 -4
- data/lib/oga/xml/character_node.rb +0 -8
- data/lib/oga/xml/comment.rb +0 -4
- data/lib/oga/xml/default_namespace.rb +0 -2
- data/lib/oga/xml/doctype.rb +0 -8
- data/lib/oga/xml/document.rb +10 -14
- data/lib/oga/xml/element.rb +1 -52
- data/lib/oga/xml/entities.rb +0 -26
- data/lib/oga/xml/expanded_name.rb +12 -0
- data/lib/oga/xml/html_void_elements.rb +0 -2
- data/lib/oga/xml/lexer.rb +0 -86
- data/lib/oga/xml/namespace.rb +0 -10
- data/lib/oga/xml/node.rb +18 -34
- data/lib/oga/xml/node_set.rb +0 -50
- data/lib/oga/xml/parser.rb +13 -50
- data/lib/oga/xml/processing_instruction.rb +0 -8
- data/lib/oga/xml/pull_parser.rb +0 -18
- data/lib/oga/xml/querying.rb +58 -19
- data/lib/oga/xml/sax_parser.rb +0 -18
- data/lib/oga/xml/text.rb +0 -12
- data/lib/oga/xml/traversal.rb +0 -4
- data/lib/oga/xml/xml_declaration.rb +0 -8
- data/lib/oga/xpath/compiler.rb +1568 -0
- data/lib/oga/xpath/conversion.rb +102 -0
- data/lib/oga/xpath/lexer.rb +1844 -1238
- data/lib/oga/xpath/parser.rb +182 -153
- metadata +7 -3
- data/lib/oga/xpath/evaluator.rb +0 -1800
data/lib/oga/xml/parser.rb
CHANGED
@@ -5,7 +5,6 @@ require 'll/setup'
|
|
5
5
|
|
6
6
|
module Oga
|
7
7
|
module XML
|
8
|
-
##
|
9
8
|
# DOM parser for both XML and HTML.
|
10
9
|
#
|
11
10
|
# This parser does not produce a dedicated AST, instead it emits XML nodes
|
@@ -186,11 +185,9 @@ class Parser < LL::Driver
|
|
186
185
|
[:_rule_45, 1], # 45
|
187
186
|
].freeze
|
188
187
|
|
189
|
-
##
|
190
188
|
# Hash mapping token types and dedicated error labels.
|
191
189
|
#
|
192
190
|
# @return [Hash]
|
193
|
-
#
|
194
191
|
TOKEN_ERROR_MAPPING = {
|
195
192
|
:T_STRING => 'string',
|
196
193
|
:T_TEXT => 'text',
|
@@ -215,11 +212,9 @@ class Parser < LL::Driver
|
|
215
212
|
-1 => 'end of input'
|
216
213
|
}
|
217
214
|
|
218
|
-
##
|
219
215
|
# @param [String|IO] data The input to parse.
|
220
216
|
# @param [Hash] options
|
221
217
|
# @see [Oga::XML::Lexer#initialize]
|
222
|
-
#
|
223
218
|
def initialize(data, options = {})
|
224
219
|
@data = data
|
225
220
|
@lexer = Lexer.new(data, options)
|
@@ -227,20 +222,16 @@ class Parser < LL::Driver
|
|
227
222
|
reset
|
228
223
|
end
|
229
224
|
|
230
|
-
##
|
231
225
|
# Resets the internal state of the parser.
|
232
|
-
#
|
233
226
|
def reset
|
234
227
|
@line = 1
|
235
228
|
|
236
229
|
@lexer.reset
|
237
230
|
end
|
238
231
|
|
239
|
-
##
|
240
232
|
# Yields the next token from the lexer.
|
241
233
|
#
|
242
234
|
# @yieldparam [Array]
|
243
|
-
#
|
244
235
|
def each_token
|
245
236
|
@lexer.advance do |type, value, line|
|
246
237
|
@line = line if line
|
@@ -251,12 +242,10 @@ class Parser < LL::Driver
|
|
251
242
|
yield [-1, -1]
|
252
243
|
end
|
253
244
|
|
254
|
-
##
|
255
245
|
# @param [Fixnum] stack_type
|
256
246
|
# @param [Fixnum] stack_value
|
257
247
|
# @param [Symbol] token_type
|
258
248
|
# @param [String] token_value
|
259
|
-
#
|
260
249
|
def parser_error(stack_type, stack_value, token_type, token_value)
|
261
250
|
case id_to_type(stack_type)
|
262
251
|
when :rule
|
@@ -275,21 +264,17 @@ class Parser < LL::Driver
|
|
275
264
|
raise LL::ParserError, message
|
276
265
|
end
|
277
266
|
|
278
|
-
##
|
279
267
|
# @see [LL::Driver#parse]
|
280
|
-
#
|
281
268
|
def parse
|
282
269
|
retval = super
|
283
270
|
|
284
271
|
reset
|
285
272
|
|
286
|
-
|
273
|
+
retval
|
287
274
|
end
|
288
275
|
|
289
|
-
##
|
290
276
|
# @param [Array] children
|
291
277
|
# @return [Oga::XML::Document]
|
292
|
-
#
|
293
278
|
def on_document(children = [])
|
294
279
|
document = Document.new(:type => @lexer.html? ? :html : :xml)
|
295
280
|
|
@@ -305,45 +290,35 @@ class Parser < LL::Driver
|
|
305
290
|
end
|
306
291
|
end
|
307
292
|
|
308
|
-
|
293
|
+
document
|
309
294
|
end
|
310
295
|
|
311
|
-
##
|
312
296
|
# @param [Hash] options
|
313
|
-
#
|
314
297
|
def on_doctype(options = {})
|
315
|
-
|
298
|
+
Doctype.new(options)
|
316
299
|
end
|
317
300
|
|
318
|
-
##
|
319
301
|
# @param [String] text
|
320
302
|
# @return [Oga::XML::Cdata]
|
321
|
-
#
|
322
303
|
def on_cdata(text = nil)
|
323
|
-
|
304
|
+
Cdata.new(:text => text)
|
324
305
|
end
|
325
306
|
|
326
|
-
##
|
327
307
|
# @param [String] text
|
328
308
|
# @return [Oga::XML::Comment]
|
329
|
-
#
|
330
309
|
def on_comment(text = nil)
|
331
|
-
|
310
|
+
Comment.new(:text => text)
|
332
311
|
end
|
333
312
|
|
334
|
-
##
|
335
313
|
# @param [String] name
|
336
314
|
# @param [String] text
|
337
315
|
# @return [Oga::XML::ProcessingInstruction]
|
338
|
-
#
|
339
316
|
def on_proc_ins(name, text = nil)
|
340
|
-
|
317
|
+
ProcessingInstruction.new(:name => name, :text => text)
|
341
318
|
end
|
342
319
|
|
343
|
-
##
|
344
320
|
# @param [Array] attributes
|
345
321
|
# @return [Oga::XML::XmlDeclaration]
|
346
|
-
#
|
347
322
|
def on_xml_decl(attributes = [])
|
348
323
|
options = {}
|
349
324
|
|
@@ -351,23 +326,19 @@ class Parser < LL::Driver
|
|
351
326
|
options[attr.name.to_sym] = attr.value
|
352
327
|
end
|
353
328
|
|
354
|
-
|
329
|
+
XmlDeclaration.new(options)
|
355
330
|
end
|
356
331
|
|
357
|
-
##
|
358
332
|
# @param [String] text
|
359
333
|
# @return [Oga::XML::Text]
|
360
|
-
#
|
361
334
|
def on_text(text)
|
362
|
-
|
335
|
+
Text.new(:text => text)
|
363
336
|
end
|
364
337
|
|
365
|
-
##
|
366
338
|
# @param [String] namespace
|
367
339
|
# @param [String] name
|
368
340
|
# @param [Hash] attributes
|
369
341
|
# @return [Oga::XML::Element]
|
370
|
-
#
|
371
342
|
def on_element(namespace, name, attributes = {})
|
372
343
|
element = Element.new(
|
373
344
|
:namespace_name => namespace,
|
@@ -375,47 +346,39 @@ class Parser < LL::Driver
|
|
375
346
|
:attributes => attributes
|
376
347
|
)
|
377
348
|
|
378
|
-
|
349
|
+
element
|
379
350
|
end
|
380
351
|
|
381
|
-
##
|
382
352
|
# @param [Oga::XML::Element] element
|
383
353
|
# @param [Array] children
|
384
354
|
# @return [Oga::XML::Element]
|
385
|
-
#
|
386
355
|
def on_element_children(element, children = [])
|
387
356
|
element.children = children
|
388
357
|
|
389
|
-
|
358
|
+
element
|
390
359
|
end
|
391
360
|
|
392
|
-
##
|
393
361
|
# @param [Oga::XML::Element] element
|
394
362
|
# @return [Oga::XML::Element]
|
395
|
-
#
|
396
363
|
def after_element(element)
|
397
|
-
|
364
|
+
element
|
398
365
|
end
|
399
366
|
|
400
|
-
##
|
401
367
|
# @param [String] name
|
402
368
|
# @param [String] ns_name
|
403
369
|
# @param [String] value
|
404
370
|
# @return [Oga::XML::Attribute]
|
405
|
-
#
|
406
371
|
def on_attribute(name, ns_name = nil, value = nil)
|
407
|
-
|
372
|
+
Attribute.new(
|
408
373
|
:namespace_name => ns_name,
|
409
374
|
:name => name,
|
410
375
|
:value => value
|
411
376
|
)
|
412
377
|
end
|
413
378
|
|
414
|
-
##
|
415
379
|
# @param [Array] attrs
|
416
|
-
#
|
417
380
|
def on_attributes(attrs)
|
418
|
-
|
381
|
+
attrs
|
419
382
|
end
|
420
383
|
|
421
384
|
def _rule_0(val)
|
@@ -1,34 +1,26 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# Class used for storing information about a single processing instruction.
|
5
|
-
#
|
6
4
|
class ProcessingInstruction < CharacterNode
|
7
5
|
# @return [String]
|
8
6
|
attr_accessor :name
|
9
7
|
|
10
|
-
##
|
11
8
|
# @param [Hash] options
|
12
9
|
#
|
13
10
|
# @option options [String] :name The name of the instruction.
|
14
11
|
# @see [Oga::XML::CharacterNode#initialize]
|
15
|
-
#
|
16
12
|
def initialize(options = {})
|
17
13
|
super
|
18
14
|
|
19
15
|
@name = options[:name]
|
20
16
|
end
|
21
17
|
|
22
|
-
##
|
23
18
|
# @return [String]
|
24
|
-
#
|
25
19
|
def to_xml
|
26
20
|
"<?#{name}#{text}?>"
|
27
21
|
end
|
28
22
|
|
29
|
-
##
|
30
23
|
# @return [String]
|
31
|
-
#
|
32
24
|
def inspect
|
33
25
|
"ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
|
34
26
|
end
|
data/lib/oga/xml/pull_parser.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# The PullParser class can be used to parse an XML document incrementally
|
5
4
|
# instead of parsing it as a whole. This results in lower memory usage and
|
6
5
|
# potentially faster parsing times. The downside is that pull parsers are
|
@@ -18,7 +17,6 @@ module Oga
|
|
18
17
|
#
|
19
18
|
# This parses yields proper XML instances such as {Oga::XML::Element}.
|
20
19
|
# Doctypes and XML declarations are ignored by this parser.
|
21
|
-
#
|
22
20
|
class PullParser < Parser
|
23
21
|
# @return [Oga::XML::Node]
|
24
22
|
attr_reader :node
|
@@ -27,9 +25,7 @@ module Oga
|
|
27
25
|
# @return [Array]
|
28
26
|
attr_reader :nesting
|
29
27
|
|
30
|
-
##
|
31
28
|
# @return [Array]
|
32
|
-
#
|
33
29
|
DISABLED_CALLBACKS = [
|
34
30
|
:on_document,
|
35
31
|
:on_doctype,
|
@@ -37,9 +33,7 @@ module Oga
|
|
37
33
|
:on_element_children
|
38
34
|
]
|
39
35
|
|
40
|
-
##
|
41
36
|
# @return [Array]
|
42
|
-
#
|
43
37
|
BLOCK_CALLBACKS = [
|
44
38
|
:on_cdata,
|
45
39
|
:on_comment,
|
@@ -47,11 +41,9 @@ module Oga
|
|
47
41
|
:on_proc_ins
|
48
42
|
]
|
49
43
|
|
50
|
-
##
|
51
44
|
# Returns the shorthands that can be used for various node classes.
|
52
45
|
#
|
53
46
|
# @return [Hash]
|
54
|
-
#
|
55
47
|
NODE_SHORTHANDS = {
|
56
48
|
:text => XML::Text,
|
57
49
|
:node => XML::Node,
|
@@ -62,9 +54,7 @@ module Oga
|
|
62
54
|
:xml_declaration => XML::XmlDeclaration
|
63
55
|
}
|
64
56
|
|
65
|
-
##
|
66
57
|
# @see Oga::XML::Parser#reset
|
67
|
-
#
|
68
58
|
def reset
|
69
59
|
super
|
70
60
|
|
@@ -73,11 +63,9 @@ module Oga
|
|
73
63
|
@node = nil
|
74
64
|
end
|
75
65
|
|
76
|
-
##
|
77
66
|
# Parses the input and yields every node to the supplied block.
|
78
67
|
#
|
79
68
|
# @yieldparam [Oga::XML::Node]
|
80
|
-
#
|
81
69
|
def parse(&block)
|
82
70
|
@block = block
|
83
71
|
|
@@ -86,7 +74,6 @@ module Oga
|
|
86
74
|
return
|
87
75
|
end
|
88
76
|
|
89
|
-
##
|
90
77
|
# Calls the supplied block if the current node type and optionally the
|
91
78
|
# nesting match. This method allows you to write this:
|
92
79
|
#
|
@@ -120,7 +107,6 @@ module Oga
|
|
120
107
|
# returned by {Oga::XML::Node#node_type}.
|
121
108
|
#
|
122
109
|
# @param [Array] nesting The element name nesting to act upon.
|
123
|
-
#
|
124
110
|
def on(type, nesting = [])
|
125
111
|
if node.is_a?(NODE_SHORTHANDS[type])
|
126
112
|
if nesting.empty? or nesting == self.nesting
|
@@ -149,9 +135,7 @@ module Oga
|
|
149
135
|
EOF
|
150
136
|
end
|
151
137
|
|
152
|
-
##
|
153
138
|
# @see Oga::XML::Parser#on_element
|
154
|
-
#
|
155
139
|
def on_element(*args)
|
156
140
|
@node = super
|
157
141
|
|
@@ -162,9 +146,7 @@ module Oga
|
|
162
146
|
return
|
163
147
|
end
|
164
148
|
|
165
|
-
##
|
166
149
|
# @see Oga::XML::Parser#on_element_children
|
167
|
-
#
|
168
150
|
def after_element(*args)
|
169
151
|
nesting.pop
|
170
152
|
|
data/lib/oga/xml/querying.rb
CHANGED
@@ -1,51 +1,90 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# The Querying module provides methods that make it easy to run XPath/CSS
|
5
4
|
# queries on XML documents/elements.
|
6
|
-
#
|
7
5
|
module Querying
|
8
|
-
##
|
9
6
|
# Evaluates the given XPath expression.
|
10
7
|
#
|
8
|
+
# Querying a document:
|
9
|
+
#
|
10
|
+
# document = Oga.parse_xml <<-EOF
|
11
|
+
# <people>
|
12
|
+
# <person age="25">Alice</person>
|
13
|
+
# </people>
|
14
|
+
# EOF
|
15
|
+
#
|
16
|
+
# document.xpath('people/person')
|
17
|
+
#
|
18
|
+
# Querying an element:
|
19
|
+
#
|
20
|
+
# element = document.at_xpath('people')
|
21
|
+
#
|
22
|
+
# element.xpath('person')
|
23
|
+
#
|
24
|
+
# Using variable bindings:
|
25
|
+
#
|
26
|
+
# document.xpath('people/person[@age = $age]', 'age' => 25)
|
27
|
+
#
|
11
28
|
# @param [String] expression The XPath expression to run.
|
12
|
-
# @param [Hash] variables Variables to bind.
|
13
|
-
# @see [Oga::XPath::Evaluator#initialize]
|
14
29
|
#
|
30
|
+
# @param [Hash] variables Variables to bind. The keys of this Hash should
|
31
|
+
# be String values.
|
32
|
+
#
|
33
|
+
# @return [Oga::XML::NodeSet]
|
15
34
|
def xpath(expression, variables = {})
|
16
|
-
XPath::
|
35
|
+
ast = XPath::Parser.parse_with_cache(expression)
|
36
|
+
block = XPath::Compiler.compile_with_cache(ast)
|
37
|
+
|
38
|
+
block.call(self, variables)
|
17
39
|
end
|
18
40
|
|
19
|
-
|
20
|
-
# Evaluates the given XPath expression and returns the first node in the
|
21
|
-
# set.
|
41
|
+
# Evaluates the XPath expression and returns the first matched node.
|
22
42
|
#
|
23
|
-
#
|
43
|
+
# Querying a document:
|
44
|
+
#
|
45
|
+
# document = Oga.parse_xml <<-EOF
|
46
|
+
# <people>
|
47
|
+
# <person age="25">Alice</person>
|
48
|
+
# </people>
|
49
|
+
# EOF
|
50
|
+
#
|
51
|
+
# person = document.at_xpath('people/person')
|
24
52
|
#
|
53
|
+
# person.class # => Oga::XML::Element
|
54
|
+
#
|
55
|
+
# @see [#xpath]
|
56
|
+
# @return [Oga::XML::Node|Oga::XML::Attribute]
|
25
57
|
def at_xpath(*args)
|
26
58
|
result = xpath(*args)
|
27
59
|
|
28
60
|
result.is_a?(XML::NodeSet) ? result.first : result
|
29
61
|
end
|
30
62
|
|
31
|
-
##
|
32
63
|
# Evaluates the given CSS expression.
|
33
64
|
#
|
34
|
-
#
|
35
|
-
#
|
65
|
+
# Querying a document:
|
66
|
+
#
|
67
|
+
# document = Oga.parse_xml <<-EOF
|
68
|
+
# <people>
|
69
|
+
# <person age="25">Alice</person>
|
70
|
+
# </people>
|
71
|
+
# EOF
|
72
|
+
#
|
73
|
+
# document.css('people person')
|
36
74
|
#
|
75
|
+
# @param [String] expression The CSS expression to run.
|
76
|
+
# @return [Oga::XML::NodeSet]
|
37
77
|
def css(expression)
|
38
|
-
ast
|
78
|
+
ast = CSS::Parser.parse_with_cache(expression)
|
79
|
+
block = XPath::Compiler.compile_with_cache(ast)
|
39
80
|
|
40
|
-
|
81
|
+
block.call(self)
|
41
82
|
end
|
42
83
|
|
43
|
-
|
44
|
-
# Evaluates the given CSS expression and returns the first node in the
|
45
|
-
# set.
|
84
|
+
# Evaluates the CSS expression and returns the first matched node.
|
46
85
|
#
|
47
86
|
# @see [#css]
|
48
|
-
#
|
87
|
+
# @return [Oga::XML::Node|Oga::XML::Attribute]
|
49
88
|
def at_css(*args)
|
50
89
|
result = css(*args)
|
51
90
|
|