oga 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -0
- data/ext/c/lexer.c +394 -312
- data/ext/c/lexer.rl +3 -3
- data/ext/java/org/liboga/xml/Lexer.java +216 -172
- data/ext/java/org/liboga/xml/Lexer.rl +1 -1
- data/ext/ragel/base_lexer.rl +30 -11
- data/lib/oga/blacklist.rb +2 -2
- data/lib/oga/css/parser.rb +26 -28
- data/lib/oga/entity_decoder.rb +2 -2
- data/lib/oga/html/entities.rb +1 -1
- data/lib/oga/lru.rb +6 -6
- data/lib/oga/oga.rb +14 -14
- data/lib/oga/version.rb +1 -1
- data/lib/oga/whitelist.rb +2 -2
- data/lib/oga/xml/attribute.rb +16 -18
- data/lib/oga/xml/cdata.rb +1 -1
- data/lib/oga/xml/character_node.rb +3 -5
- data/lib/oga/xml/comment.rb +1 -1
- data/lib/oga/xml/doctype.rb +21 -23
- data/lib/oga/xml/document.rb +11 -17
- data/lib/oga/xml/element.rb +19 -29
- data/lib/oga/xml/entities.rb +3 -3
- data/lib/oga/xml/lexer.rb +34 -15
- data/lib/oga/xml/namespace.rb +8 -10
- data/lib/oga/xml/node.rb +8 -10
- data/lib/oga/xml/node_set.rb +16 -18
- data/lib/oga/xml/parser.rb +1 -1
- data/lib/oga/xml/processing_instruction.rb +3 -5
- data/lib/oga/xml/pull_parser.rb +6 -9
- data/lib/oga/xml/querying.rb +4 -4
- data/lib/oga/xml/sax_parser.rb +4 -4
- data/lib/oga/xml/text.rb +4 -4
- data/lib/oga/xml/xml_declaration.rb +11 -15
- data/lib/oga/xpath/evaluator.rb +81 -81
- metadata +3 -3
@@ -89,7 +89,7 @@ public class Lexer extends RubyObject
|
|
89
89
|
@JRubyMethod
|
90
90
|
public IRubyObject advance_native(ThreadContext context, RubyString rb_str)
|
91
91
|
{
|
92
|
-
Boolean html_p = this.callMethod(context, "html").isTrue();
|
92
|
+
Boolean html_p = this.callMethod(context, "html?").isTrue();
|
93
93
|
|
94
94
|
Encoding encoding = rb_str.getEncoding();
|
95
95
|
|
data/ext/ragel/base_lexer.rl
CHANGED
@@ -422,9 +422,35 @@
|
|
422
422
|
# Characters that can be used for unquoted HTML attribute values.
|
423
423
|
# See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
|
424
424
|
# for more info.
|
425
|
-
html_unquoted_value =
|
426
|
-
squote | dquote |
|
427
|
-
|
425
|
+
html_unquoted_value =
|
426
|
+
^(squote | dquote | whitespace_or_newline)
|
427
|
+
^('`' | '=' | '<' | '>' | whitespace_or_newline)+;
|
428
|
+
|
429
|
+
# Machine used after matching the "=" of an attribute and just before moving
|
430
|
+
# into the actual attribute value.
|
431
|
+
attribute_pre := |*
|
432
|
+
whitespace_or_newline $count_newlines;
|
433
|
+
|
434
|
+
any => {
|
435
|
+
fhold;
|
436
|
+
|
437
|
+
if ( lines > 0 )
|
438
|
+
{
|
439
|
+
advance_line(lines);
|
440
|
+
|
441
|
+
lines = 0;
|
442
|
+
}
|
443
|
+
|
444
|
+
if ( html_p )
|
445
|
+
{
|
446
|
+
fnext html_attribute_value;
|
447
|
+
}
|
448
|
+
else
|
449
|
+
{
|
450
|
+
fnext xml_attribute_value;
|
451
|
+
}
|
452
|
+
};
|
453
|
+
*|;
|
428
454
|
|
429
455
|
# Machine used for processing HTML attribute values.
|
430
456
|
html_attribute_value := |*
|
@@ -482,14 +508,7 @@
|
|
482
508
|
|
483
509
|
# Attribute values.
|
484
510
|
'=' => {
|
485
|
-
|
486
|
-
{
|
487
|
-
fcall html_attribute_value;
|
488
|
-
}
|
489
|
-
else
|
490
|
-
{
|
491
|
-
fcall xml_attribute_value;
|
492
|
-
}
|
511
|
+
fcall attribute_pre;
|
493
512
|
};
|
494
513
|
|
495
514
|
# We're done with the open tag of the element.
|
data/lib/oga/blacklist.rb
CHANGED
@@ -26,7 +26,7 @@ module Oga
|
|
26
26
|
# @return [TrueClass|FalseClass]
|
27
27
|
#
|
28
28
|
def allow?(name)
|
29
|
-
|
29
|
+
!names.include?(name)
|
30
30
|
end
|
31
31
|
|
32
32
|
##
|
@@ -34,7 +34,7 @@ module Oga
|
|
34
34
|
# @return [Oga::Blacklist]
|
35
35
|
#
|
36
36
|
def +(other)
|
37
|
-
|
37
|
+
self.class.new(names + other.names)
|
38
38
|
end
|
39
39
|
end # Blacklist
|
40
40
|
end # Oga
|
data/lib/oga/css/parser.rb
CHANGED
@@ -271,7 +271,7 @@ class Parser < LL::Driver
|
|
271
271
|
# @return [AST::Node]
|
272
272
|
#
|
273
273
|
def current_element
|
274
|
-
|
274
|
+
@current_element ||= s(:test, nil, '*')
|
275
275
|
end
|
276
276
|
|
277
277
|
##
|
@@ -286,7 +286,7 @@ class Parser < LL::Driver
|
|
286
286
|
def parse
|
287
287
|
reset
|
288
288
|
|
289
|
-
|
289
|
+
super
|
290
290
|
end
|
291
291
|
|
292
292
|
##
|
@@ -298,8 +298,6 @@ class Parser < LL::Driver
|
|
298
298
|
#
|
299
299
|
def on_test(namespace, name)
|
300
300
|
@current_element = s(:test, namespace, name)
|
301
|
-
|
302
|
-
return @current_element
|
303
301
|
end
|
304
302
|
|
305
303
|
##
|
@@ -310,7 +308,7 @@ class Parser < LL::Driver
|
|
310
308
|
def on_pseudo_class(name, arg = nil)
|
311
309
|
handler = "on_pseudo_class_#{name.gsub('-', '_')}"
|
312
310
|
|
313
|
-
|
311
|
+
arg ? send(handler, arg) : send(handler)
|
314
312
|
end
|
315
313
|
|
316
314
|
##
|
@@ -319,7 +317,7 @@ class Parser < LL::Driver
|
|
319
317
|
# @return [AST::Node]
|
320
318
|
#
|
321
319
|
def on_pseudo_class_root
|
322
|
-
|
320
|
+
s(:call, 'not', s(:axis, 'parent', s(:test, nil, '*')))
|
323
321
|
end
|
324
322
|
|
325
323
|
##
|
@@ -329,7 +327,7 @@ class Parser < LL::Driver
|
|
329
327
|
# @return [AST::Node]
|
330
328
|
#
|
331
329
|
def on_pseudo_class_nth_child(arg)
|
332
|
-
|
330
|
+
generate_nth_child('preceding-sibling', arg)
|
333
331
|
end
|
334
332
|
|
335
333
|
##
|
@@ -339,7 +337,7 @@ class Parser < LL::Driver
|
|
339
337
|
# @return [AST::Node]
|
340
338
|
#
|
341
339
|
def on_pseudo_class_nth_last_child(arg)
|
342
|
-
|
340
|
+
generate_nth_child('following-sibling', arg)
|
343
341
|
end
|
344
342
|
|
345
343
|
##
|
@@ -349,7 +347,7 @@ class Parser < LL::Driver
|
|
349
347
|
# @return [AST::Node]
|
350
348
|
#
|
351
349
|
def on_pseudo_class_nth_of_type(arg)
|
352
|
-
|
350
|
+
generate_nth_child('preceding-sibling', arg, current_element)
|
353
351
|
end
|
354
352
|
|
355
353
|
##
|
@@ -359,7 +357,7 @@ class Parser < LL::Driver
|
|
359
357
|
# @return [AST::Node]
|
360
358
|
#
|
361
359
|
def on_pseudo_class_nth_last_of_type(arg)
|
362
|
-
|
360
|
+
generate_nth_child('following-sibling', arg, current_element)
|
363
361
|
end
|
364
362
|
|
365
363
|
##
|
@@ -368,7 +366,7 @@ class Parser < LL::Driver
|
|
368
366
|
# @return [AST::Node]
|
369
367
|
#
|
370
368
|
def on_pseudo_class_first_child
|
371
|
-
|
369
|
+
generate_no_siblings('preceding-sibling')
|
372
370
|
end
|
373
371
|
|
374
372
|
##
|
@@ -377,7 +375,7 @@ class Parser < LL::Driver
|
|
377
375
|
# @return [AST::Node]
|
378
376
|
#
|
379
377
|
def on_pseudo_class_last_child
|
380
|
-
|
378
|
+
generate_no_siblings('following-sibling')
|
381
379
|
end
|
382
380
|
|
383
381
|
##
|
@@ -386,7 +384,7 @@ class Parser < LL::Driver
|
|
386
384
|
# @return [AST::Node]
|
387
385
|
#
|
388
386
|
def on_pseudo_class_first_of_type
|
389
|
-
|
387
|
+
generate_no_siblings('preceding-sibling', current_element)
|
390
388
|
end
|
391
389
|
|
392
390
|
##
|
@@ -395,7 +393,7 @@ class Parser < LL::Driver
|
|
395
393
|
# @return [AST::Node]
|
396
394
|
#
|
397
395
|
def on_pseudo_class_last_of_type
|
398
|
-
|
396
|
+
generate_no_siblings('following-sibling', current_element)
|
399
397
|
end
|
400
398
|
|
401
399
|
##
|
@@ -404,7 +402,7 @@ class Parser < LL::Driver
|
|
404
402
|
# @return [AST::Node]
|
405
403
|
#
|
406
404
|
def on_pseudo_class_only_child
|
407
|
-
|
405
|
+
s(:and, on_pseudo_class_first_child, on_pseudo_class_last_child)
|
408
406
|
end
|
409
407
|
|
410
408
|
##
|
@@ -413,7 +411,7 @@ class Parser < LL::Driver
|
|
413
411
|
# @return [AST::Node]
|
414
412
|
#
|
415
413
|
def on_pseudo_class_only_of_type
|
416
|
-
|
414
|
+
s(:and, on_pseudo_class_first_of_type, on_pseudo_class_last_of_type)
|
417
415
|
end
|
418
416
|
|
419
417
|
##
|
@@ -422,7 +420,7 @@ class Parser < LL::Driver
|
|
422
420
|
# @return [AST::Node]
|
423
421
|
#
|
424
422
|
def on_pseudo_class_empty
|
425
|
-
|
423
|
+
s(:call, 'not', s(:axis, 'child', s(:type_test, 'node')))
|
426
424
|
end
|
427
425
|
|
428
426
|
##
|
@@ -433,7 +431,7 @@ class Parser < LL::Driver
|
|
433
431
|
# @return [AST::Node]
|
434
432
|
#
|
435
433
|
def on_op_eq(attr, value)
|
436
|
-
|
434
|
+
s(:eq, attr, value)
|
437
435
|
end
|
438
436
|
|
439
437
|
##
|
@@ -444,7 +442,7 @@ class Parser < LL::Driver
|
|
444
442
|
# @return [AST::Node]
|
445
443
|
#
|
446
444
|
def on_op_space_in(attr, value)
|
447
|
-
|
445
|
+
s(
|
448
446
|
:call,
|
449
447
|
'contains',
|
450
448
|
s(:call, 'concat', s(:string, ' '), attr, s(:string, ' ')),
|
@@ -460,7 +458,7 @@ class Parser < LL::Driver
|
|
460
458
|
# @return [AST::Node]
|
461
459
|
#
|
462
460
|
def on_op_starts_with(attr, value)
|
463
|
-
|
461
|
+
s(:call, 'starts-with', attr, value)
|
464
462
|
end
|
465
463
|
|
466
464
|
##
|
@@ -471,7 +469,7 @@ class Parser < LL::Driver
|
|
471
469
|
# @return [AST::Node]
|
472
470
|
#
|
473
471
|
def on_op_ends_with(attr, value)
|
474
|
-
|
472
|
+
s(
|
475
473
|
:eq,
|
476
474
|
s(
|
477
475
|
:call,
|
@@ -500,7 +498,7 @@ class Parser < LL::Driver
|
|
500
498
|
# @return [AST::Node]
|
501
499
|
#
|
502
500
|
def on_op_in(attr, value)
|
503
|
-
|
501
|
+
s(:call, 'contains', attr, value)
|
504
502
|
end
|
505
503
|
|
506
504
|
##
|
@@ -511,7 +509,7 @@ class Parser < LL::Driver
|
|
511
509
|
# @return [AST::Node]
|
512
510
|
#
|
513
511
|
def on_op_hyphen_in(attr, value)
|
514
|
-
|
512
|
+
s(
|
515
513
|
:or,
|
516
514
|
s(:eq, attr, value),
|
517
515
|
s(
|
@@ -566,7 +564,7 @@ class Parser < LL::Driver
|
|
566
564
|
# @return [AST::Node]
|
567
565
|
#
|
568
566
|
def generate_no_siblings(axis, test = s(:test, nil, '*'))
|
569
|
-
|
567
|
+
s(:eq, s(:call, 'count', s(:axis, axis, test)), s(:int, 0))
|
570
568
|
end
|
571
569
|
|
572
570
|
##
|
@@ -574,7 +572,7 @@ class Parser < LL::Driver
|
|
574
572
|
# @return [TrueClass|FalseClass]
|
575
573
|
#
|
576
574
|
def int_node?(node)
|
577
|
-
|
575
|
+
node.type.equal?(:int)
|
578
576
|
end
|
579
577
|
|
580
578
|
##
|
@@ -582,7 +580,7 @@ class Parser < LL::Driver
|
|
582
580
|
# @return [TrueClass|FalseClass]
|
583
581
|
#
|
584
582
|
def non_positive_number?(node)
|
585
|
-
|
583
|
+
node.children[0] <= 0
|
586
584
|
end
|
587
585
|
|
588
586
|
##
|
@@ -590,7 +588,7 @@ class Parser < LL::Driver
|
|
590
588
|
# @return [Symbol]
|
591
589
|
#
|
592
590
|
def step_comparison(node)
|
593
|
-
|
591
|
+
node.children[0] >= 0 ? :gte : :lte
|
594
592
|
end
|
595
593
|
|
596
594
|
##
|
@@ -610,7 +608,7 @@ class Parser < LL::Driver
|
|
610
608
|
mod_val = s(:int, 1)
|
611
609
|
end
|
612
610
|
|
613
|
-
|
611
|
+
mod_val
|
614
612
|
end
|
615
613
|
|
616
614
|
def _rule_0(val)
|
data/lib/oga/entity_decoder.rb
CHANGED
@@ -4,7 +4,7 @@ module Oga
|
|
4
4
|
# @see [decode]
|
5
5
|
#
|
6
6
|
def self.try_decode(input, html = false)
|
7
|
-
|
7
|
+
input ? decode(input, html) : nil
|
8
8
|
end
|
9
9
|
|
10
10
|
##
|
@@ -15,7 +15,7 @@ module Oga
|
|
15
15
|
def self.decode(input, html = false)
|
16
16
|
decoder = html ? HTML::Entities : XML::Entities
|
17
17
|
|
18
|
-
|
18
|
+
decoder.decode(input)
|
19
19
|
end
|
20
20
|
end # EntityDecoder
|
21
21
|
end # Oga
|
data/lib/oga/html/entities.rb
CHANGED
data/lib/oga/lru.rb
CHANGED
@@ -50,7 +50,7 @@ module Oga
|
|
50
50
|
# @return [Fixnum]
|
51
51
|
#
|
52
52
|
def maximum
|
53
|
-
|
53
|
+
synchronize { @maximum }
|
54
54
|
end
|
55
55
|
|
56
56
|
##
|
@@ -60,7 +60,7 @@ module Oga
|
|
60
60
|
# @return [Mixed]
|
61
61
|
#
|
62
62
|
def [](key)
|
63
|
-
|
63
|
+
synchronize { @cache[key] }
|
64
64
|
end
|
65
65
|
|
66
66
|
##
|
@@ -90,14 +90,14 @@ module Oga
|
|
90
90
|
# @return [Mixed]
|
91
91
|
#
|
92
92
|
def get_or_set(key)
|
93
|
-
|
93
|
+
synchronize { self[key] ||= yield }
|
94
94
|
end
|
95
95
|
|
96
96
|
##
|
97
97
|
# @return [Array]
|
98
98
|
#
|
99
99
|
def keys
|
100
|
-
|
100
|
+
synchronize { @keys }
|
101
101
|
end
|
102
102
|
|
103
103
|
##
|
@@ -105,7 +105,7 @@ module Oga
|
|
105
105
|
# @return [TrueClass|FalseClass]
|
106
106
|
#
|
107
107
|
def key?(key)
|
108
|
-
|
108
|
+
synchronize { @cache.key?(key) }
|
109
109
|
end
|
110
110
|
|
111
111
|
##
|
@@ -122,7 +122,7 @@ module Oga
|
|
122
122
|
# @return [Fixnum]
|
123
123
|
#
|
124
124
|
def size
|
125
|
-
|
125
|
+
synchronize { @cache.size }
|
126
126
|
end
|
127
127
|
|
128
128
|
alias_method :length, :size
|
data/lib/oga/oga.rb
CHANGED
@@ -5,11 +5,12 @@ module Oga
|
|
5
5
|
# @example
|
6
6
|
# document = Oga.parse_xml('<root>Hello</root>')
|
7
7
|
#
|
8
|
-
# @
|
8
|
+
# @see [Oga::XML::Lexer#initialize]
|
9
|
+
#
|
9
10
|
# @return [Oga::XML::Document]
|
10
11
|
#
|
11
|
-
def self.parse_xml(xml)
|
12
|
-
|
12
|
+
def self.parse_xml(xml, options = {})
|
13
|
+
XML::Parser.new(xml, options).parse
|
13
14
|
end
|
14
15
|
|
15
16
|
##
|
@@ -18,11 +19,12 @@ module Oga
|
|
18
19
|
# @example
|
19
20
|
# document = Oga.parse_html('<html>...</html>')
|
20
21
|
#
|
21
|
-
# @
|
22
|
+
# @see [Oga::XML::Lexer#initialize]
|
23
|
+
#
|
22
24
|
# @return [Oga::XML::Document]
|
23
25
|
#
|
24
|
-
def self.parse_html(html)
|
25
|
-
|
26
|
+
def self.parse_html(html, options = {})
|
27
|
+
HTML::Parser.new(html, options).parse
|
26
28
|
end
|
27
29
|
|
28
30
|
##
|
@@ -33,11 +35,10 @@ module Oga
|
|
33
35
|
#
|
34
36
|
# Oga.sax_parse_html(handler, '<root>Hello</root>')
|
35
37
|
#
|
36
|
-
# @
|
37
|
-
# @param [String|IO] xml The XML to parse.
|
38
|
+
# @see [Oga::XML::SaxParser#initialize]
|
38
39
|
#
|
39
|
-
def self.sax_parse_xml(handler, xml)
|
40
|
-
XML::SaxParser.new(handler, xml).parse
|
40
|
+
def self.sax_parse_xml(handler, xml, options = {})
|
41
|
+
XML::SaxParser.new(handler, xml, options).parse
|
41
42
|
end
|
42
43
|
|
43
44
|
##
|
@@ -48,10 +49,9 @@ module Oga
|
|
48
49
|
#
|
49
50
|
# Oga.sax_parse_html(handler, '<script>foo()</script>')
|
50
51
|
#
|
51
|
-
# @
|
52
|
-
# @param [String|IO] html The HTML to parse.
|
52
|
+
# @see [Oga::XML::SaxParser#initialize]
|
53
53
|
#
|
54
|
-
def self.sax_parse_html(handler, html)
|
55
|
-
HTML::SaxParser.new(handler, html).parse
|
54
|
+
def self.sax_parse_html(handler, html, options = {})
|
55
|
+
HTML::SaxParser.new(handler, html, options).parse
|
56
56
|
end
|
57
57
|
end # Oga
|