oga 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc/changelog.md +9 -0
- data/ext/c/lexer.c +782 -621
- data/ext/c/lexer.rl +5 -5
- data/ext/java/org/liboga/xml/Lexer.java +265 -231
- data/ext/java/org/liboga/xml/Lexer.rl +3 -4
- data/ext/ragel/base_lexer.rl +23 -14
- data/lib/oga/version.rb +1 -1
- data/lib/oga/xml/lexer.rb +6 -7
- data/lib/oga/xml/text.rb +2 -2
- metadata +2 -2
@@ -187,14 +187,13 @@ public class Lexer extends RubyObject
|
|
187
187
|
}
|
188
188
|
|
189
189
|
/**
|
190
|
-
*
|
191
|
-
* Oga::XML::Lexer#inside_html_script? for more information.
|
190
|
+
* See * Oga::XML::Lexer#literal_html_element? for more information.
|
192
191
|
*/
|
193
|
-
public Boolean
|
192
|
+
public Boolean literal_html_element_p()
|
194
193
|
{
|
195
194
|
ThreadContext context = this.runtime.getCurrentContext();
|
196
195
|
|
197
|
-
return this.callMethod(context, "
|
196
|
+
return this.callMethod(context, "literal_html_element?").isTrue();
|
198
197
|
}
|
199
198
|
}
|
200
199
|
|
data/ext/ragel/base_lexer.rl
CHANGED
@@ -328,11 +328,9 @@
|
|
328
328
|
'>' => {
|
329
329
|
callback_simple(id_on_element_open_end);
|
330
330
|
|
331
|
-
if (
|
331
|
+
if ( literal_html_element_p() )
|
332
332
|
{
|
333
|
-
|
334
|
-
|
335
|
-
fnext script_text;
|
333
|
+
fnext literal_html_element;
|
336
334
|
}
|
337
335
|
else
|
338
336
|
{
|
@@ -401,14 +399,15 @@
|
|
401
399
|
};
|
402
400
|
*|;
|
403
401
|
|
404
|
-
#
|
405
|
-
#
|
406
|
-
# machine.
|
407
|
-
|
408
|
-
|
409
|
-
callback(id_on_text, data, encoding, mark, ts);
|
402
|
+
# Certain tags in HTML can contain basically anything except for the literal
|
403
|
+
# closing tag. Two examples are script and style tags. As a result of this
|
404
|
+
# we can't use the regular text machine.
|
405
|
+
literal_html_closing_tags = '</script>' | '</style>';
|
406
|
+
literal_html_allowed = (any* -- literal_html_closing_tags) $count_newlines;
|
410
407
|
|
411
|
-
|
408
|
+
literal_html_element := |*
|
409
|
+
literal_html_allowed => {
|
410
|
+
callback(id_on_text, data, encoding, ts, te);
|
412
411
|
|
413
412
|
if ( lines > 0 )
|
414
413
|
{
|
@@ -416,13 +415,23 @@
|
|
416
415
|
|
417
416
|
lines = 0;
|
418
417
|
}
|
418
|
+
};
|
419
419
|
|
420
|
-
|
420
|
+
literal_html_allowed %{ mark = p; } literal_html_closing_tags => {
|
421
|
+
callback(id_on_text, data, encoding, ts, mark);
|
422
|
+
|
423
|
+
p = mark - 1;
|
424
|
+
mark = 0;
|
425
|
+
|
426
|
+
if ( lines > 0 )
|
427
|
+
{
|
428
|
+
advance_line(lines);
|
429
|
+
|
430
|
+
lines = 0;
|
431
|
+
}
|
421
432
|
|
422
433
|
fnext main;
|
423
434
|
};
|
424
|
-
|
425
|
-
any $count_newlines;
|
426
435
|
*|;
|
427
436
|
|
428
437
|
# The main machine aka the entry point of Ragel.
|
data/lib/oga/version.rb
CHANGED
data/lib/oga/xml/lexer.rb
CHANGED
@@ -41,12 +41,11 @@ module Oga
|
|
41
41
|
attr_reader :html
|
42
42
|
|
43
43
|
##
|
44
|
-
#
|
45
|
-
# tag.
|
44
|
+
# Names of HTML tags of which the content should be lexed as-is.
|
46
45
|
#
|
47
|
-
# @return [
|
46
|
+
# @return [Array]
|
48
47
|
#
|
49
|
-
|
48
|
+
LITERAL_HTML_ELEMENTS = %w{script style}
|
50
49
|
|
51
50
|
##
|
52
51
|
# @param [String|IO] data The data to lex. This can either be a String or
|
@@ -190,12 +189,12 @@ module Oga
|
|
190
189
|
end
|
191
190
|
|
192
191
|
##
|
193
|
-
# Returns true if the current element
|
192
|
+
# Returns true if the current element's content should be lexed as-is.
|
194
193
|
#
|
195
194
|
# @return [TrueClass|FalseClass]
|
196
195
|
#
|
197
|
-
def
|
198
|
-
return html? && current_element
|
196
|
+
def literal_html_element?
|
197
|
+
return html? && LITERAL_HTML_ELEMENTS.include?(current_element)
|
199
198
|
end
|
200
199
|
|
201
200
|
##
|
data/lib/oga/xml/text.rb
CHANGED
@@ -12,8 +12,8 @@ module Oga
|
|
12
12
|
node = parent
|
13
13
|
root = root_node
|
14
14
|
|
15
|
-
if root.is_a?(Document) and node.is_a?(Element) \
|
16
|
-
and
|
15
|
+
if root.is_a?(Document) and node.is_a?(Element) and root.html? \
|
16
|
+
and Lexer::LITERAL_HTML_ELEMENTS.include?(node.name)
|
17
17
|
return super
|
18
18
|
else
|
19
19
|
return Entities.encode(super)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: oga
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yorick Peterse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: racc
|