oga 1.0.2-java → 1.0.3-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -0
- data/ext/c/lexer.c +394 -312
- data/ext/c/lexer.rl +3 -3
- data/ext/java/org/liboga/xml/Lexer.java +216 -172
- data/ext/java/org/liboga/xml/Lexer.rl +1 -1
- data/ext/ragel/base_lexer.rl +30 -11
- data/lib/liboga.jar +0 -0
- data/lib/oga/blacklist.rb +2 -2
- data/lib/oga/css/parser.rb +26 -28
- data/lib/oga/entity_decoder.rb +2 -2
- data/lib/oga/html/entities.rb +1 -1
- data/lib/oga/lru.rb +6 -6
- data/lib/oga/oga.rb +14 -14
- data/lib/oga/version.rb +1 -1
- data/lib/oga/whitelist.rb +2 -2
- data/lib/oga/xml/attribute.rb +16 -18
- data/lib/oga/xml/cdata.rb +1 -1
- data/lib/oga/xml/character_node.rb +3 -5
- data/lib/oga/xml/comment.rb +1 -1
- data/lib/oga/xml/doctype.rb +21 -23
- data/lib/oga/xml/document.rb +11 -17
- data/lib/oga/xml/element.rb +19 -29
- data/lib/oga/xml/entities.rb +3 -3
- data/lib/oga/xml/lexer.rb +34 -15
- data/lib/oga/xml/namespace.rb +8 -10
- data/lib/oga/xml/node.rb +8 -10
- data/lib/oga/xml/node_set.rb +16 -18
- data/lib/oga/xml/parser.rb +1 -1
- data/lib/oga/xml/processing_instruction.rb +3 -5
- data/lib/oga/xml/pull_parser.rb +6 -9
- data/lib/oga/xml/querying.rb +4 -4
- data/lib/oga/xml/sax_parser.rb +4 -4
- data/lib/oga/xml/text.rb +4 -4
- data/lib/oga/xml/xml_declaration.rb +11 -15
- data/lib/oga/xpath/evaluator.rb +81 -81
- metadata +66 -66
@@ -89,7 +89,7 @@ public class Lexer extends RubyObject
|
|
89
89
|
@JRubyMethod
|
90
90
|
public IRubyObject advance_native(ThreadContext context, RubyString rb_str)
|
91
91
|
{
|
92
|
-
Boolean html_p = this.callMethod(context, "html").isTrue();
|
92
|
+
Boolean html_p = this.callMethod(context, "html?").isTrue();
|
93
93
|
|
94
94
|
Encoding encoding = rb_str.getEncoding();
|
95
95
|
|
data/ext/ragel/base_lexer.rl
CHANGED
@@ -422,9 +422,35 @@
|
|
422
422
|
# Characters that can be used for unquoted HTML attribute values.
|
423
423
|
# See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
|
424
424
|
# for more info.
|
425
|
-
html_unquoted_value =
|
426
|
-
squote | dquote |
|
427
|
-
|
425
|
+
html_unquoted_value =
|
426
|
+
^(squote | dquote | whitespace_or_newline)
|
427
|
+
^('`' | '=' | '<' | '>' | whitespace_or_newline)+;
|
428
|
+
|
429
|
+
# Machine used after matching the "=" of an attribute and just before moving
|
430
|
+
# into the actual attribute value.
|
431
|
+
attribute_pre := |*
|
432
|
+
whitespace_or_newline $count_newlines;
|
433
|
+
|
434
|
+
any => {
|
435
|
+
fhold;
|
436
|
+
|
437
|
+
if ( lines > 0 )
|
438
|
+
{
|
439
|
+
advance_line(lines);
|
440
|
+
|
441
|
+
lines = 0;
|
442
|
+
}
|
443
|
+
|
444
|
+
if ( html_p )
|
445
|
+
{
|
446
|
+
fnext html_attribute_value;
|
447
|
+
}
|
448
|
+
else
|
449
|
+
{
|
450
|
+
fnext xml_attribute_value;
|
451
|
+
}
|
452
|
+
};
|
453
|
+
*|;
|
428
454
|
|
429
455
|
# Machine used for processing HTML attribute values.
|
430
456
|
html_attribute_value := |*
|
@@ -482,14 +508,7 @@
|
|
482
508
|
|
483
509
|
# Attribute values.
|
484
510
|
'=' => {
|
485
|
-
|
486
|
-
{
|
487
|
-
fcall html_attribute_value;
|
488
|
-
}
|
489
|
-
else
|
490
|
-
{
|
491
|
-
fcall xml_attribute_value;
|
492
|
-
}
|
511
|
+
fcall attribute_pre;
|
493
512
|
};
|
494
513
|
|
495
514
|
# We're done with the open tag of the element.
|
data/lib/liboga.jar
CHANGED
Binary file
|
data/lib/oga/blacklist.rb
CHANGED
@@ -26,7 +26,7 @@ module Oga
|
|
26
26
|
# @return [TrueClass|FalseClass]
|
27
27
|
#
|
28
28
|
def allow?(name)
|
29
|
-
|
29
|
+
!names.include?(name)
|
30
30
|
end
|
31
31
|
|
32
32
|
##
|
@@ -34,7 +34,7 @@ module Oga
|
|
34
34
|
# @return [Oga::Blacklist]
|
35
35
|
#
|
36
36
|
def +(other)
|
37
|
-
|
37
|
+
self.class.new(names + other.names)
|
38
38
|
end
|
39
39
|
end # Blacklist
|
40
40
|
end # Oga
|
data/lib/oga/css/parser.rb
CHANGED
@@ -271,7 +271,7 @@ class Parser < LL::Driver
|
|
271
271
|
# @return [AST::Node]
|
272
272
|
#
|
273
273
|
def current_element
|
274
|
-
|
274
|
+
@current_element ||= s(:test, nil, '*')
|
275
275
|
end
|
276
276
|
|
277
277
|
##
|
@@ -286,7 +286,7 @@ class Parser < LL::Driver
|
|
286
286
|
def parse
|
287
287
|
reset
|
288
288
|
|
289
|
-
|
289
|
+
super
|
290
290
|
end
|
291
291
|
|
292
292
|
##
|
@@ -298,8 +298,6 @@ class Parser < LL::Driver
|
|
298
298
|
#
|
299
299
|
def on_test(namespace, name)
|
300
300
|
@current_element = s(:test, namespace, name)
|
301
|
-
|
302
|
-
return @current_element
|
303
301
|
end
|
304
302
|
|
305
303
|
##
|
@@ -310,7 +308,7 @@ class Parser < LL::Driver
|
|
310
308
|
def on_pseudo_class(name, arg = nil)
|
311
309
|
handler = "on_pseudo_class_#{name.gsub('-', '_')}"
|
312
310
|
|
313
|
-
|
311
|
+
arg ? send(handler, arg) : send(handler)
|
314
312
|
end
|
315
313
|
|
316
314
|
##
|
@@ -319,7 +317,7 @@ class Parser < LL::Driver
|
|
319
317
|
# @return [AST::Node]
|
320
318
|
#
|
321
319
|
def on_pseudo_class_root
|
322
|
-
|
320
|
+
s(:call, 'not', s(:axis, 'parent', s(:test, nil, '*')))
|
323
321
|
end
|
324
322
|
|
325
323
|
##
|
@@ -329,7 +327,7 @@ class Parser < LL::Driver
|
|
329
327
|
# @return [AST::Node]
|
330
328
|
#
|
331
329
|
def on_pseudo_class_nth_child(arg)
|
332
|
-
|
330
|
+
generate_nth_child('preceding-sibling', arg)
|
333
331
|
end
|
334
332
|
|
335
333
|
##
|
@@ -339,7 +337,7 @@ class Parser < LL::Driver
|
|
339
337
|
# @return [AST::Node]
|
340
338
|
#
|
341
339
|
def on_pseudo_class_nth_last_child(arg)
|
342
|
-
|
340
|
+
generate_nth_child('following-sibling', arg)
|
343
341
|
end
|
344
342
|
|
345
343
|
##
|
@@ -349,7 +347,7 @@ class Parser < LL::Driver
|
|
349
347
|
# @return [AST::Node]
|
350
348
|
#
|
351
349
|
def on_pseudo_class_nth_of_type(arg)
|
352
|
-
|
350
|
+
generate_nth_child('preceding-sibling', arg, current_element)
|
353
351
|
end
|
354
352
|
|
355
353
|
##
|
@@ -359,7 +357,7 @@ class Parser < LL::Driver
|
|
359
357
|
# @return [AST::Node]
|
360
358
|
#
|
361
359
|
def on_pseudo_class_nth_last_of_type(arg)
|
362
|
-
|
360
|
+
generate_nth_child('following-sibling', arg, current_element)
|
363
361
|
end
|
364
362
|
|
365
363
|
##
|
@@ -368,7 +366,7 @@ class Parser < LL::Driver
|
|
368
366
|
# @return [AST::Node]
|
369
367
|
#
|
370
368
|
def on_pseudo_class_first_child
|
371
|
-
|
369
|
+
generate_no_siblings('preceding-sibling')
|
372
370
|
end
|
373
371
|
|
374
372
|
##
|
@@ -377,7 +375,7 @@ class Parser < LL::Driver
|
|
377
375
|
# @return [AST::Node]
|
378
376
|
#
|
379
377
|
def on_pseudo_class_last_child
|
380
|
-
|
378
|
+
generate_no_siblings('following-sibling')
|
381
379
|
end
|
382
380
|
|
383
381
|
##
|
@@ -386,7 +384,7 @@ class Parser < LL::Driver
|
|
386
384
|
# @return [AST::Node]
|
387
385
|
#
|
388
386
|
def on_pseudo_class_first_of_type
|
389
|
-
|
387
|
+
generate_no_siblings('preceding-sibling', current_element)
|
390
388
|
end
|
391
389
|
|
392
390
|
##
|
@@ -395,7 +393,7 @@ class Parser < LL::Driver
|
|
395
393
|
# @return [AST::Node]
|
396
394
|
#
|
397
395
|
def on_pseudo_class_last_of_type
|
398
|
-
|
396
|
+
generate_no_siblings('following-sibling', current_element)
|
399
397
|
end
|
400
398
|
|
401
399
|
##
|
@@ -404,7 +402,7 @@ class Parser < LL::Driver
|
|
404
402
|
# @return [AST::Node]
|
405
403
|
#
|
406
404
|
def on_pseudo_class_only_child
|
407
|
-
|
405
|
+
s(:and, on_pseudo_class_first_child, on_pseudo_class_last_child)
|
408
406
|
end
|
409
407
|
|
410
408
|
##
|
@@ -413,7 +411,7 @@ class Parser < LL::Driver
|
|
413
411
|
# @return [AST::Node]
|
414
412
|
#
|
415
413
|
def on_pseudo_class_only_of_type
|
416
|
-
|
414
|
+
s(:and, on_pseudo_class_first_of_type, on_pseudo_class_last_of_type)
|
417
415
|
end
|
418
416
|
|
419
417
|
##
|
@@ -422,7 +420,7 @@ class Parser < LL::Driver
|
|
422
420
|
# @return [AST::Node]
|
423
421
|
#
|
424
422
|
def on_pseudo_class_empty
|
425
|
-
|
423
|
+
s(:call, 'not', s(:axis, 'child', s(:type_test, 'node')))
|
426
424
|
end
|
427
425
|
|
428
426
|
##
|
@@ -433,7 +431,7 @@ class Parser < LL::Driver
|
|
433
431
|
# @return [AST::Node]
|
434
432
|
#
|
435
433
|
def on_op_eq(attr, value)
|
436
|
-
|
434
|
+
s(:eq, attr, value)
|
437
435
|
end
|
438
436
|
|
439
437
|
##
|
@@ -444,7 +442,7 @@ class Parser < LL::Driver
|
|
444
442
|
# @return [AST::Node]
|
445
443
|
#
|
446
444
|
def on_op_space_in(attr, value)
|
447
|
-
|
445
|
+
s(
|
448
446
|
:call,
|
449
447
|
'contains',
|
450
448
|
s(:call, 'concat', s(:string, ' '), attr, s(:string, ' ')),
|
@@ -460,7 +458,7 @@ class Parser < LL::Driver
|
|
460
458
|
# @return [AST::Node]
|
461
459
|
#
|
462
460
|
def on_op_starts_with(attr, value)
|
463
|
-
|
461
|
+
s(:call, 'starts-with', attr, value)
|
464
462
|
end
|
465
463
|
|
466
464
|
##
|
@@ -471,7 +469,7 @@ class Parser < LL::Driver
|
|
471
469
|
# @return [AST::Node]
|
472
470
|
#
|
473
471
|
def on_op_ends_with(attr, value)
|
474
|
-
|
472
|
+
s(
|
475
473
|
:eq,
|
476
474
|
s(
|
477
475
|
:call,
|
@@ -500,7 +498,7 @@ class Parser < LL::Driver
|
|
500
498
|
# @return [AST::Node]
|
501
499
|
#
|
502
500
|
def on_op_in(attr, value)
|
503
|
-
|
501
|
+
s(:call, 'contains', attr, value)
|
504
502
|
end
|
505
503
|
|
506
504
|
##
|
@@ -511,7 +509,7 @@ class Parser < LL::Driver
|
|
511
509
|
# @return [AST::Node]
|
512
510
|
#
|
513
511
|
def on_op_hyphen_in(attr, value)
|
514
|
-
|
512
|
+
s(
|
515
513
|
:or,
|
516
514
|
s(:eq, attr, value),
|
517
515
|
s(
|
@@ -566,7 +564,7 @@ class Parser < LL::Driver
|
|
566
564
|
# @return [AST::Node]
|
567
565
|
#
|
568
566
|
def generate_no_siblings(axis, test = s(:test, nil, '*'))
|
569
|
-
|
567
|
+
s(:eq, s(:call, 'count', s(:axis, axis, test)), s(:int, 0))
|
570
568
|
end
|
571
569
|
|
572
570
|
##
|
@@ -574,7 +572,7 @@ class Parser < LL::Driver
|
|
574
572
|
# @return [TrueClass|FalseClass]
|
575
573
|
#
|
576
574
|
def int_node?(node)
|
577
|
-
|
575
|
+
node.type.equal?(:int)
|
578
576
|
end
|
579
577
|
|
580
578
|
##
|
@@ -582,7 +580,7 @@ class Parser < LL::Driver
|
|
582
580
|
# @return [TrueClass|FalseClass]
|
583
581
|
#
|
584
582
|
def non_positive_number?(node)
|
585
|
-
|
583
|
+
node.children[0] <= 0
|
586
584
|
end
|
587
585
|
|
588
586
|
##
|
@@ -590,7 +588,7 @@ class Parser < LL::Driver
|
|
590
588
|
# @return [Symbol]
|
591
589
|
#
|
592
590
|
def step_comparison(node)
|
593
|
-
|
591
|
+
node.children[0] >= 0 ? :gte : :lte
|
594
592
|
end
|
595
593
|
|
596
594
|
##
|
@@ -610,7 +608,7 @@ class Parser < LL::Driver
|
|
610
608
|
mod_val = s(:int, 1)
|
611
609
|
end
|
612
610
|
|
613
|
-
|
611
|
+
mod_val
|
614
612
|
end
|
615
613
|
|
616
614
|
def _rule_0(val)
|
data/lib/oga/entity_decoder.rb
CHANGED
@@ -4,7 +4,7 @@ module Oga
|
|
4
4
|
# @see [decode]
|
5
5
|
#
|
6
6
|
def self.try_decode(input, html = false)
|
7
|
-
|
7
|
+
input ? decode(input, html) : nil
|
8
8
|
end
|
9
9
|
|
10
10
|
##
|
@@ -15,7 +15,7 @@ module Oga
|
|
15
15
|
def self.decode(input, html = false)
|
16
16
|
decoder = html ? HTML::Entities : XML::Entities
|
17
17
|
|
18
|
-
|
18
|
+
decoder.decode(input)
|
19
19
|
end
|
20
20
|
end # EntityDecoder
|
21
21
|
end # Oga
|
data/lib/oga/html/entities.rb
CHANGED
data/lib/oga/lru.rb
CHANGED
@@ -50,7 +50,7 @@ module Oga
|
|
50
50
|
# @return [Fixnum]
|
51
51
|
#
|
52
52
|
def maximum
|
53
|
-
|
53
|
+
synchronize { @maximum }
|
54
54
|
end
|
55
55
|
|
56
56
|
##
|
@@ -60,7 +60,7 @@ module Oga
|
|
60
60
|
# @return [Mixed]
|
61
61
|
#
|
62
62
|
def [](key)
|
63
|
-
|
63
|
+
synchronize { @cache[key] }
|
64
64
|
end
|
65
65
|
|
66
66
|
##
|
@@ -90,14 +90,14 @@ module Oga
|
|
90
90
|
# @return [Mixed]
|
91
91
|
#
|
92
92
|
def get_or_set(key)
|
93
|
-
|
93
|
+
synchronize { self[key] ||= yield }
|
94
94
|
end
|
95
95
|
|
96
96
|
##
|
97
97
|
# @return [Array]
|
98
98
|
#
|
99
99
|
def keys
|
100
|
-
|
100
|
+
synchronize { @keys }
|
101
101
|
end
|
102
102
|
|
103
103
|
##
|
@@ -105,7 +105,7 @@ module Oga
|
|
105
105
|
# @return [TrueClass|FalseClass]
|
106
106
|
#
|
107
107
|
def key?(key)
|
108
|
-
|
108
|
+
synchronize { @cache.key?(key) }
|
109
109
|
end
|
110
110
|
|
111
111
|
##
|
@@ -122,7 +122,7 @@ module Oga
|
|
122
122
|
# @return [Fixnum]
|
123
123
|
#
|
124
124
|
def size
|
125
|
-
|
125
|
+
synchronize { @cache.size }
|
126
126
|
end
|
127
127
|
|
128
128
|
alias_method :length, :size
|
data/lib/oga/oga.rb
CHANGED
@@ -5,11 +5,12 @@ module Oga
|
|
5
5
|
# @example
|
6
6
|
# document = Oga.parse_xml('<root>Hello</root>')
|
7
7
|
#
|
8
|
-
# @
|
8
|
+
# @see [Oga::XML::Lexer#initialize]
|
9
|
+
#
|
9
10
|
# @return [Oga::XML::Document]
|
10
11
|
#
|
11
|
-
def self.parse_xml(xml)
|
12
|
-
|
12
|
+
def self.parse_xml(xml, options = {})
|
13
|
+
XML::Parser.new(xml, options).parse
|
13
14
|
end
|
14
15
|
|
15
16
|
##
|
@@ -18,11 +19,12 @@ module Oga
|
|
18
19
|
# @example
|
19
20
|
# document = Oga.parse_html('<html>...</html>')
|
20
21
|
#
|
21
|
-
# @
|
22
|
+
# @see [Oga::XML::Lexer#initialize]
|
23
|
+
#
|
22
24
|
# @return [Oga::XML::Document]
|
23
25
|
#
|
24
|
-
def self.parse_html(html)
|
25
|
-
|
26
|
+
def self.parse_html(html, options = {})
|
27
|
+
HTML::Parser.new(html, options).parse
|
26
28
|
end
|
27
29
|
|
28
30
|
##
|
@@ -33,11 +35,10 @@ module Oga
|
|
33
35
|
#
|
34
36
|
# Oga.sax_parse_html(handler, '<root>Hello</root>')
|
35
37
|
#
|
36
|
-
# @
|
37
|
-
# @param [String|IO] xml The XML to parse.
|
38
|
+
# @see [Oga::XML::SaxParser#initialize]
|
38
39
|
#
|
39
|
-
def self.sax_parse_xml(handler, xml)
|
40
|
-
XML::SaxParser.new(handler, xml).parse
|
40
|
+
def self.sax_parse_xml(handler, xml, options = {})
|
41
|
+
XML::SaxParser.new(handler, xml, options).parse
|
41
42
|
end
|
42
43
|
|
43
44
|
##
|
@@ -48,10 +49,9 @@ module Oga
|
|
48
49
|
#
|
49
50
|
# Oga.sax_parse_html(handler, '<script>foo()</script>')
|
50
51
|
#
|
51
|
-
# @
|
52
|
-
# @param [String|IO] html The HTML to parse.
|
52
|
+
# @see [Oga::XML::SaxParser#initialize]
|
53
53
|
#
|
54
|
-
def self.sax_parse_html(handler, html)
|
55
|
-
HTML::SaxParser.new(handler, html).parse
|
54
|
+
def self.sax_parse_html(handler, html, options = {})
|
55
|
+
HTML::SaxParser.new(handler, html, options).parse
|
56
56
|
end
|
57
57
|
end # Oga
|