oga 0.2.2-java → 0.2.3-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/changelog.md +9 -0
- data/ext/c/lexer.c +782 -621
- data/ext/c/lexer.rl +5 -5
- data/ext/java/org/liboga/xml/Lexer.java +265 -231
- data/ext/java/org/liboga/xml/Lexer.rl +3 -4
- data/ext/ragel/base_lexer.rl +23 -14
- data/lib/liboga.jar +0 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/xml/lexer.rb +6 -7
- data/lib/oga/xml/text.rb +2 -2
- metadata +2 -2
@@ -187,14 +187,13 @@ public class Lexer extends RubyObject
|
|
187
187
|
}
|
188
188
|
|
189
189
|
/**
|
190
|
-
*
|
191
|
-
* Oga::XML::Lexer#inside_html_script? for more information.
|
190
|
+
* See * Oga::XML::Lexer#literal_html_element? for more information.
|
192
191
|
*/
|
193
|
-
public Boolean
|
192
|
+
public Boolean literal_html_element_p()
|
194
193
|
{
|
195
194
|
ThreadContext context = this.runtime.getCurrentContext();
|
196
195
|
|
197
|
-
return this.callMethod(context, "
|
196
|
+
return this.callMethod(context, "literal_html_element?").isTrue();
|
198
197
|
}
|
199
198
|
}
|
200
199
|
|
data/ext/ragel/base_lexer.rl
CHANGED
@@ -328,11 +328,9 @@
|
|
328
328
|
'>' => {
|
329
329
|
callback_simple(id_on_element_open_end);
|
330
330
|
|
331
|
-
if (
|
331
|
+
if ( literal_html_element_p() )
|
332
332
|
{
|
333
|
-
|
334
|
-
|
335
|
-
fnext script_text;
|
333
|
+
fnext literal_html_element;
|
336
334
|
}
|
337
335
|
else
|
338
336
|
{
|
@@ -401,14 +399,15 @@
|
|
401
399
|
};
|
402
400
|
*|;
|
403
401
|
|
404
|
-
#
|
405
|
-
#
|
406
|
-
# machine.
|
407
|
-
|
408
|
-
|
409
|
-
callback(id_on_text, data, encoding, mark, ts);
|
402
|
+
# Certain tags in HTML can contain basically anything except for the literal
|
403
|
+
# closing tag. Two examples are script and style tags. As a result of this
|
404
|
+
# we can't use the regular text machine.
|
405
|
+
literal_html_closing_tags = '</script>' | '</style>';
|
406
|
+
literal_html_allowed = (any* -- literal_html_closing_tags) $count_newlines;
|
410
407
|
|
411
|
-
|
408
|
+
literal_html_element := |*
|
409
|
+
literal_html_allowed => {
|
410
|
+
callback(id_on_text, data, encoding, ts, te);
|
412
411
|
|
413
412
|
if ( lines > 0 )
|
414
413
|
{
|
@@ -416,13 +415,23 @@
|
|
416
415
|
|
417
416
|
lines = 0;
|
418
417
|
}
|
418
|
+
};
|
419
419
|
|
420
|
-
|
420
|
+
literal_html_allowed %{ mark = p; } literal_html_closing_tags => {
|
421
|
+
callback(id_on_text, data, encoding, ts, mark);
|
422
|
+
|
423
|
+
p = mark - 1;
|
424
|
+
mark = 0;
|
425
|
+
|
426
|
+
if ( lines > 0 )
|
427
|
+
{
|
428
|
+
advance_line(lines);
|
429
|
+
|
430
|
+
lines = 0;
|
431
|
+
}
|
421
432
|
|
422
433
|
fnext main;
|
423
434
|
};
|
424
|
-
|
425
|
-
any $count_newlines;
|
426
435
|
*|;
|
427
436
|
|
428
437
|
# The main machine aka the entry point of Ragel.
|
data/lib/liboga.jar
CHANGED
Binary file
|
data/lib/oga/version.rb
CHANGED
data/lib/oga/xml/lexer.rb
CHANGED
@@ -41,12 +41,11 @@ module Oga
|
|
41
41
|
attr_reader :html
|
42
42
|
|
43
43
|
##
|
44
|
-
#
|
45
|
-
# tag.
|
44
|
+
# Names of HTML tags of which the content should be lexed as-is.
|
46
45
|
#
|
47
|
-
# @return [
|
46
|
+
# @return [Array]
|
48
47
|
#
|
49
|
-
|
48
|
+
LITERAL_HTML_ELEMENTS = %w{script style}
|
50
49
|
|
51
50
|
##
|
52
51
|
# @param [String|IO] data The data to lex. This can either be a String or
|
@@ -190,12 +189,12 @@ module Oga
|
|
190
189
|
end
|
191
190
|
|
192
191
|
##
|
193
|
-
# Returns true if the current element
|
192
|
+
# Returns true if the current element's content should be lexed as-is.
|
194
193
|
#
|
195
194
|
# @return [TrueClass|FalseClass]
|
196
195
|
#
|
197
|
-
def
|
198
|
-
return html? && current_element
|
196
|
+
def literal_html_element?
|
197
|
+
return html? && LITERAL_HTML_ELEMENTS.include?(current_element)
|
199
198
|
end
|
200
199
|
|
201
200
|
##
|
data/lib/oga/xml/text.rb
CHANGED
@@ -12,8 +12,8 @@ module Oga
|
|
12
12
|
node = parent
|
13
13
|
root = root_node
|
14
14
|
|
15
|
-
if root.is_a?(Document) and node.is_a?(Element) \
|
16
|
-
and
|
15
|
+
if root.is_a?(Document) and node.is_a?(Element) and root.html? \
|
16
|
+
and Lexer::LITERAL_HTML_ELEMENTS.include?(node.name)
|
17
17
|
return super
|
18
18
|
else
|
19
19
|
return Entities.encode(super)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: oga
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Yorick Peterse
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: racc
|