oga 1.0.3-java → 1.1.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/c/lexer.c +164 -181
- data/ext/java/org/liboga/xml/Lexer.java +115 -126
- data/ext/ragel/base_lexer.rl +35 -21
- data/lib/liboga.jar +0 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/xml/attribute.rb +1 -1
- data/lib/oga/xml/entities.rb +30 -0
- data/lib/oga/xml/lexer.rb +1 -1
- data/lib/oga/xml/node.rb +21 -0
- metadata +2 -2
data/ext/ragel/base_lexer.rl
CHANGED
@@ -419,18 +419,24 @@
|
|
419
419
|
any $count_newlines;
|
420
420
|
*|;
|
421
421
|
|
422
|
-
# Characters that can be used for unquoted HTML attribute values.
|
423
|
-
# See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
|
424
|
-
# for more info.
|
425
|
-
html_unquoted_value =
|
426
|
-
^(squote | dquote | whitespace_or_newline)
|
427
|
-
^('`' | '=' | '<' | '>' | whitespace_or_newline)+;
|
428
|
-
|
429
422
|
# Machine used after matching the "=" of an attribute and just before moving
|
430
423
|
# into the actual attribute value.
|
431
424
|
attribute_pre := |*
|
432
425
|
whitespace_or_newline $count_newlines;
|
433
426
|
|
427
|
+
squote | dquote => {
|
428
|
+
fhold;
|
429
|
+
|
430
|
+
if ( lines > 0 )
|
431
|
+
{
|
432
|
+
advance_line(lines);
|
433
|
+
|
434
|
+
lines = 0;
|
435
|
+
}
|
436
|
+
|
437
|
+
fnext quoted_attribute_value;
|
438
|
+
};
|
439
|
+
|
434
440
|
any => {
|
435
441
|
fhold;
|
436
442
|
|
@@ -443,25 +449,33 @@
|
|
443
449
|
|
444
450
|
if ( html_p )
|
445
451
|
{
|
446
|
-
fnext
|
452
|
+
fnext unquoted_attribute_value;
|
447
453
|
}
|
454
|
+
/* XML doesn't support unquoted attribute values */
|
448
455
|
else
|
449
456
|
{
|
450
|
-
|
457
|
+
fret;
|
451
458
|
}
|
452
459
|
};
|
453
460
|
*|;
|
454
461
|
|
455
|
-
# Machine
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
462
|
+
# Machine for processing unquoted HTML attribute values.
|
463
|
+
#
|
464
|
+
# The HTML specification describes a set of characters that can be allowed
|
465
|
+
# in an unquoted value at https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example.
|
466
|
+
#
|
467
|
+
# As is always the case with HTML everybody completely ignores this
|
468
|
+
# specification and thus every library and browser out these is expected to
|
469
|
+
# support input such as `<a href=lol("javascript","is","great")></a>.
|
470
|
+
#
|
471
|
+
# Oga too has to support this, thus the only characters it disallows in
|
472
|
+
# unquoted attribute values are:
|
473
|
+
#
|
474
|
+
# * > (used for terminating open tags)
|
475
|
+
# * whitespace
|
476
|
+
#
|
477
|
+
unquoted_attribute_value := |*
|
478
|
+
^('>' | whitespace_or_newline)+ => {
|
465
479
|
callback_simple(id_on_string_squote);
|
466
480
|
|
467
481
|
callback(id_on_string_body, data, encoding, ts, te);
|
@@ -472,8 +486,8 @@
|
|
472
486
|
any => hold_and_return;
|
473
487
|
*|;
|
474
488
|
|
475
|
-
# Machine used for processing XML attribute values.
|
476
|
-
|
489
|
+
# Machine used for processing quoted XML/HTML attribute values.
|
490
|
+
quoted_attribute_value := |*
|
477
491
|
# The following two actions use "fnext" instead of "fcall". Combined
|
478
492
|
# with "element_head" using "fcall" to jump to this machine this means
|
479
493
|
# we can return back to "element_head" after processing a single string.
|
data/lib/liboga.jar
CHANGED
Binary file
|
data/lib/oga/version.rb
CHANGED
data/lib/oga/xml/attribute.rb
CHANGED
data/lib/oga/xml/entities.rb
CHANGED
@@ -32,6 +32,20 @@ module Oga
|
|
32
32
|
'<' => '<',
|
33
33
|
}
|
34
34
|
|
35
|
+
##
|
36
|
+
# Hash containing characters and the corresponding XML entities to use
|
37
|
+
# when encoding XML/HTML attribute values.
|
38
|
+
#
|
39
|
+
# @return [Hash]
|
40
|
+
#
|
41
|
+
ENCODE_ATTRIBUTE_MAPPING = {
|
42
|
+
'&' => '&',
|
43
|
+
'>' => '>',
|
44
|
+
'<' => '<',
|
45
|
+
"'" => ''',
|
46
|
+
'"' => '"'
|
47
|
+
}
|
48
|
+
|
35
49
|
##
|
36
50
|
# @return [String]
|
37
51
|
#
|
@@ -56,6 +70,12 @@ module Oga
|
|
56
70
|
#
|
57
71
|
ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|'))
|
58
72
|
|
73
|
+
##
|
74
|
+
# @return [Regexp]
|
75
|
+
#
|
76
|
+
ENCODE_ATTRIBUTE_REGEXP =
|
77
|
+
Regexp.new(ENCODE_ATTRIBUTE_MAPPING.keys.join('|'))
|
78
|
+
|
59
79
|
##
|
60
80
|
# Decodes XML entities.
|
61
81
|
#
|
@@ -87,6 +107,16 @@ module Oga
|
|
87
107
|
def self.encode(input, mapping = ENCODE_MAPPING)
|
88
108
|
input.gsub(ENCODE_REGEXP, mapping)
|
89
109
|
end
|
110
|
+
|
111
|
+
##
|
112
|
+
# Encodes special characters in an XML attribute value.
|
113
|
+
#
|
114
|
+
# @param [String] input
|
115
|
+
# @return [String]
|
116
|
+
#
|
117
|
+
def self.encode_attribute(input)
|
118
|
+
input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING)
|
119
|
+
end
|
90
120
|
end # Entities
|
91
121
|
end # XML
|
92
122
|
end # Oga
|
data/lib/oga/xml/lexer.rb
CHANGED
@@ -499,7 +499,7 @@ module Oga
|
|
499
499
|
##
|
500
500
|
# Called on the closing tag of an element.
|
501
501
|
#
|
502
|
-
# @param [String]
|
502
|
+
# @param [String] name The name of the element (minus namespace
|
503
503
|
# prefix). This is not set for self closing tags.
|
504
504
|
#
|
505
505
|
def on_element_end(name = nil)
|
data/lib/oga/xml/node.rb
CHANGED
@@ -152,6 +152,27 @@ module Oga
|
|
152
152
|
return node_set.delete(self) if node_set
|
153
153
|
end
|
154
154
|
|
155
|
+
##
|
156
|
+
# Replaces the current node with another.
|
157
|
+
#
|
158
|
+
# @example Replacing with an element
|
159
|
+
# element = Oga::XML::Element.new(:name => 'div')
|
160
|
+
# some_node.replace(element)
|
161
|
+
#
|
162
|
+
# @example Replacing with a String
|
163
|
+
# some_node.replace('this will replace the current node with a text node')
|
164
|
+
#
|
165
|
+
# @param [String|Oga::XML::Node] other
|
166
|
+
#
|
167
|
+
def replace(other)
|
168
|
+
if other.is_a?(String)
|
169
|
+
other = Text.new(:text => other)
|
170
|
+
end
|
171
|
+
|
172
|
+
before(other)
|
173
|
+
remove
|
174
|
+
end
|
175
|
+
|
155
176
|
##
|
156
177
|
# Inserts the given node before the current node.
|
157
178
|
#
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: oga
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Yorick Peterse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ast
|