oga 1.0.3-java → 1.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/c/lexer.c +164 -181
- data/ext/java/org/liboga/xml/Lexer.java +115 -126
- data/ext/ragel/base_lexer.rl +35 -21
- data/lib/liboga.jar +0 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/xml/attribute.rb +1 -1
- data/lib/oga/xml/entities.rb +30 -0
- data/lib/oga/xml/lexer.rb +1 -1
- data/lib/oga/xml/node.rb +21 -0
- metadata +2 -2
data/ext/ragel/base_lexer.rl
CHANGED
@@ -419,18 +419,24 @@
|
|
419
419
|
any $count_newlines;
|
420
420
|
*|;
|
421
421
|
|
422
|
-
# Characters that can be used for unquoted HTML attribute values.
|
423
|
-
# See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
|
424
|
-
# for more info.
|
425
|
-
html_unquoted_value =
|
426
|
-
^(squote | dquote | whitespace_or_newline)
|
427
|
-
^('`' | '=' | '<' | '>' | whitespace_or_newline)+;
|
428
|
-
|
429
422
|
# Machine used after matching the "=" of an attribute and just before moving
|
430
423
|
# into the actual attribute value.
|
431
424
|
attribute_pre := |*
|
432
425
|
whitespace_or_newline $count_newlines;
|
433
426
|
|
427
|
+
squote | dquote => {
|
428
|
+
fhold;
|
429
|
+
|
430
|
+
if ( lines > 0 )
|
431
|
+
{
|
432
|
+
advance_line(lines);
|
433
|
+
|
434
|
+
lines = 0;
|
435
|
+
}
|
436
|
+
|
437
|
+
fnext quoted_attribute_value;
|
438
|
+
};
|
439
|
+
|
434
440
|
any => {
|
435
441
|
fhold;
|
436
442
|
|
@@ -443,25 +449,33 @@
|
|
443
449
|
|
444
450
|
if ( html_p )
|
445
451
|
{
|
446
|
-
fnext
|
452
|
+
fnext unquoted_attribute_value;
|
447
453
|
}
|
454
|
+
/* XML doesn't support unquoted attribute values */
|
448
455
|
else
|
449
456
|
{
|
450
|
-
|
457
|
+
fret;
|
451
458
|
}
|
452
459
|
};
|
453
460
|
*|;
|
454
461
|
|
455
|
-
# Machine
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
462
|
+
# Machine for processing unquoted HTML attribute values.
|
463
|
+
#
|
464
|
+
# The HTML specification describes a set of characters that can be allowed
|
465
|
+
# in an unquoted value at https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example.
|
466
|
+
#
|
467
|
+
# As is always the case with HTML everybody completely ignores this
|
468
|
+
# specification and thus every library and browser out these is expected to
|
469
|
+
# support input such as `<a href=lol("javascript","is","great")></a>.
|
470
|
+
#
|
471
|
+
# Oga too has to support this, thus the only characters it disallows in
|
472
|
+
# unquoted attribute values are:
|
473
|
+
#
|
474
|
+
# * > (used for terminating open tags)
|
475
|
+
# * whitespace
|
476
|
+
#
|
477
|
+
unquoted_attribute_value := |*
|
478
|
+
^('>' | whitespace_or_newline)+ => {
|
465
479
|
callback_simple(id_on_string_squote);
|
466
480
|
|
467
481
|
callback(id_on_string_body, data, encoding, ts, te);
|
@@ -472,8 +486,8 @@
|
|
472
486
|
any => hold_and_return;
|
473
487
|
*|;
|
474
488
|
|
475
|
-
# Machine used for processing XML attribute values.
|
476
|
-
|
489
|
+
# Machine used for processing quoted XML/HTML attribute values.
|
490
|
+
quoted_attribute_value := |*
|
477
491
|
# The following two actions use "fnext" instead of "fcall". Combined
|
478
492
|
# with "element_head" using "fcall" to jump to this machine this means
|
479
493
|
# we can return back to "element_head" after processing a single string.
|
data/lib/liboga.jar
CHANGED
Binary file
|
data/lib/oga/version.rb
CHANGED
data/lib/oga/xml/attribute.rb
CHANGED
data/lib/oga/xml/entities.rb
CHANGED
@@ -32,6 +32,20 @@ module Oga
|
|
32
32
|
'<' => '<',
|
33
33
|
}
|
34
34
|
|
35
|
+
##
|
36
|
+
# Hash containing characters and the corresponding XML entities to use
|
37
|
+
# when encoding XML/HTML attribute values.
|
38
|
+
#
|
39
|
+
# @return [Hash]
|
40
|
+
#
|
41
|
+
ENCODE_ATTRIBUTE_MAPPING = {
|
42
|
+
'&' => '&',
|
43
|
+
'>' => '>',
|
44
|
+
'<' => '<',
|
45
|
+
"'" => ''',
|
46
|
+
'"' => '"'
|
47
|
+
}
|
48
|
+
|
35
49
|
##
|
36
50
|
# @return [String]
|
37
51
|
#
|
@@ -56,6 +70,12 @@ module Oga
|
|
56
70
|
#
|
57
71
|
ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|'))
|
58
72
|
|
73
|
+
##
|
74
|
+
# @return [Regexp]
|
75
|
+
#
|
76
|
+
ENCODE_ATTRIBUTE_REGEXP =
|
77
|
+
Regexp.new(ENCODE_ATTRIBUTE_MAPPING.keys.join('|'))
|
78
|
+
|
59
79
|
##
|
60
80
|
# Decodes XML entities.
|
61
81
|
#
|
@@ -87,6 +107,16 @@ module Oga
|
|
87
107
|
def self.encode(input, mapping = ENCODE_MAPPING)
|
88
108
|
input.gsub(ENCODE_REGEXP, mapping)
|
89
109
|
end
|
110
|
+
|
111
|
+
##
|
112
|
+
# Encodes special characters in an XML attribute value.
|
113
|
+
#
|
114
|
+
# @param [String] input
|
115
|
+
# @return [String]
|
116
|
+
#
|
117
|
+
def self.encode_attribute(input)
|
118
|
+
input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING)
|
119
|
+
end
|
90
120
|
end # Entities
|
91
121
|
end # XML
|
92
122
|
end # Oga
|
data/lib/oga/xml/lexer.rb
CHANGED
@@ -499,7 +499,7 @@ module Oga
|
|
499
499
|
##
|
500
500
|
# Called on the closing tag of an element.
|
501
501
|
#
|
502
|
-
# @param [String]
|
502
|
+
# @param [String] name The name of the element (minus namespace
|
503
503
|
# prefix). This is not set for self closing tags.
|
504
504
|
#
|
505
505
|
def on_element_end(name = nil)
|
data/lib/oga/xml/node.rb
CHANGED
@@ -152,6 +152,27 @@ module Oga
|
|
152
152
|
return node_set.delete(self) if node_set
|
153
153
|
end
|
154
154
|
|
155
|
+
##
|
156
|
+
# Replaces the current node with another.
|
157
|
+
#
|
158
|
+
# @example Replacing with an element
|
159
|
+
# element = Oga::XML::Element.new(:name => 'div')
|
160
|
+
# some_node.replace(element)
|
161
|
+
#
|
162
|
+
# @example Replacing with a String
|
163
|
+
# some_node.replace('this will replace the current node with a text node')
|
164
|
+
#
|
165
|
+
# @param [String|Oga::XML::Node] other
|
166
|
+
#
|
167
|
+
def replace(other)
|
168
|
+
if other.is_a?(String)
|
169
|
+
other = Text.new(:text => other)
|
170
|
+
end
|
171
|
+
|
172
|
+
before(other)
|
173
|
+
remove
|
174
|
+
end
|
175
|
+
|
155
176
|
##
|
156
177
|
# Inserts the given node before the current node.
|
157
178
|
#
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: oga
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Yorick Peterse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ast
|