oga 0.2.2-java → 0.2.3-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -187,14 +187,13 @@ public class Lexer extends RubyObject
187
187
  }
188
188
 
189
189
  /**
190
- * Returns true if we're in an HTML script tag. See
191
- * Oga::XML::Lexer#inside_html_script? for more information.
190
+ * See * Oga::XML::Lexer#literal_html_element? for more information.
192
191
  */
193
- public Boolean inside_html_script_p()
192
+ public Boolean literal_html_element_p()
194
193
  {
195
194
  ThreadContext context = this.runtime.getCurrentContext();
196
195
 
197
- return this.callMethod(context, "inside_html_script?").isTrue();
196
+ return this.callMethod(context, "literal_html_element?").isTrue();
198
197
  }
199
198
  }
200
199
 
@@ -328,11 +328,9 @@
328
328
  '>' => {
329
329
  callback_simple(id_on_element_open_end);
330
330
 
331
- if ( inside_html_script_p() )
331
+ if ( literal_html_element_p() )
332
332
  {
333
- mark = ts + 1;
334
-
335
- fnext script_text;
333
+ fnext literal_html_element;
336
334
  }
337
335
  else
338
336
  {
@@ -401,14 +399,15 @@
401
399
  };
402
400
  *|;
403
401
 
404
- # <script> tags in HTML can contain basically anything except for the
405
- # literal "</script>". As a result of this we can't use the regular text
406
- # machine.
407
- script_text := |*
408
- '</script>' => {
409
- callback(id_on_text, data, encoding, mark, ts);
402
+ # Certain tags in HTML can contain basically anything except for the literal
403
+ # closing tag. Two examples are script and style tags. As a result of this
404
+ # we can't use the regular text machine.
405
+ literal_html_closing_tags = '</script>' | '</style>';
406
+ literal_html_allowed = (any* -- literal_html_closing_tags) $count_newlines;
410
407
 
411
- mark = 0;
408
+ literal_html_element := |*
409
+ literal_html_allowed => {
410
+ callback(id_on_text, data, encoding, ts, te);
412
411
 
413
412
  if ( lines > 0 )
414
413
  {
@@ -416,13 +415,23 @@
416
415
 
417
416
  lines = 0;
418
417
  }
418
+ };
419
419
 
420
- callback_simple(id_on_element_end);
420
+ literal_html_allowed %{ mark = p; } literal_html_closing_tags => {
421
+ callback(id_on_text, data, encoding, ts, mark);
422
+
423
+ p = mark - 1;
424
+ mark = 0;
425
+
426
+ if ( lines > 0 )
427
+ {
428
+ advance_line(lines);
429
+
430
+ lines = 0;
431
+ }
421
432
 
422
433
  fnext main;
423
434
  };
424
-
425
- any $count_newlines;
426
435
  *|;
427
436
 
428
437
  # The main machine aka the entry point of Ragel.
Binary file
@@ -1,3 +1,3 @@
1
1
  module Oga
2
- VERSION = '0.2.2'
2
+ VERSION = '0.2.3'
3
3
  end # Oga
@@ -41,12 +41,11 @@ module Oga
41
41
  attr_reader :html
42
42
 
43
43
  ##
44
- # Element name used to determine if a tag being processed is a Javascript
45
- # tag.
44
+ # Names of HTML tags of which the content should be lexed as-is.
46
45
  #
47
- # @return [String]
46
+ # @return [Array]
48
47
  #
49
- SCRIPT_TAG = 'script'.freeze
48
+ LITERAL_HTML_ELEMENTS = %w{script style}
50
49
 
51
50
  ##
52
51
  # @param [String|IO] data The data to lex. This can either be a String or
@@ -190,12 +189,12 @@ module Oga
190
189
  end
191
190
 
192
191
  ##
193
- # Returns true if the current element is the HTML `<script>` element.
192
+ # Returns true if the current element's content should be lexed as-is.
194
193
  #
195
194
  # @return [TrueClass|FalseClass]
196
195
  #
197
- def inside_html_script?
198
- return html? && current_element == SCRIPT_TAG
196
+ def literal_html_element?
197
+ return html? && LITERAL_HTML_ELEMENTS.include?(current_element)
199
198
  end
200
199
 
201
200
  ##
@@ -12,8 +12,8 @@ module Oga
12
12
  node = parent
13
13
  root = root_node
14
14
 
15
- if root.is_a?(Document) and node.is_a?(Element) \
16
- and node.name == Lexer::SCRIPT_TAG and root.html?
15
+ if root.is_a?(Document) and node.is_a?(Element) and root.html? \
16
+ and Lexer::LITERAL_HTML_ELEMENTS.include?(node.name)
17
17
  return super
18
18
  else
19
19
  return Entities.encode(super)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: oga
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: java
6
6
  authors:
7
7
  - Yorick Peterse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-03 00:00:00.000000000 Z
11
+ date: 2015-03-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: racc