oga 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -187,14 +187,13 @@ public class Lexer extends RubyObject
187
187
  }
188
188
 
189
189
  /**
190
- * Returns true if we're in an HTML script tag. See
191
- * Oga::XML::Lexer#inside_html_script? for more information.
190
+ * See * Oga::XML::Lexer#literal_html_element? for more information.
192
191
  */
193
- public Boolean inside_html_script_p()
192
+ public Boolean literal_html_element_p()
194
193
  {
195
194
  ThreadContext context = this.runtime.getCurrentContext();
196
195
 
197
- return this.callMethod(context, "inside_html_script?").isTrue();
196
+ return this.callMethod(context, "literal_html_element?").isTrue();
198
197
  }
199
198
  }
200
199
 
@@ -328,11 +328,9 @@
328
328
  '>' => {
329
329
  callback_simple(id_on_element_open_end);
330
330
 
331
- if ( inside_html_script_p() )
331
+ if ( literal_html_element_p() )
332
332
  {
333
- mark = ts + 1;
334
-
335
- fnext script_text;
333
+ fnext literal_html_element;
336
334
  }
337
335
  else
338
336
  {
@@ -401,14 +399,15 @@
401
399
  };
402
400
  *|;
403
401
 
404
- # <script> tags in HTML can contain basically anything except for the
405
- # literal "</script>". As a result of this we can't use the regular text
406
- # machine.
407
- script_text := |*
408
- '</script>' => {
409
- callback(id_on_text, data, encoding, mark, ts);
402
+ # Certain tags in HTML can contain basically anything except for the literal
403
+ # closing tag. Two examples are script and style tags. As a result of this
404
+ # we can't use the regular text machine.
405
+ literal_html_closing_tags = '</script>' | '</style>';
406
+ literal_html_allowed = (any* -- literal_html_closing_tags) $count_newlines;
410
407
 
411
- mark = 0;
408
+ literal_html_element := |*
409
+ literal_html_allowed => {
410
+ callback(id_on_text, data, encoding, ts, te);
412
411
 
413
412
  if ( lines > 0 )
414
413
  {
@@ -416,13 +415,23 @@
416
415
 
417
416
  lines = 0;
418
417
  }
418
+ };
419
419
 
420
- callback_simple(id_on_element_end);
420
+ literal_html_allowed %{ mark = p; } literal_html_closing_tags => {
421
+ callback(id_on_text, data, encoding, ts, mark);
422
+
423
+ p = mark - 1;
424
+ mark = 0;
425
+
426
+ if ( lines > 0 )
427
+ {
428
+ advance_line(lines);
429
+
430
+ lines = 0;
431
+ }
421
432
 
422
433
  fnext main;
423
434
  };
424
-
425
- any $count_newlines;
426
435
  *|;
427
436
 
428
437
  # The main machine aka the entry point of Ragel.
data/lib/oga/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Oga
2
- VERSION = '0.2.2'
2
+ VERSION = '0.2.3'
3
3
  end # Oga
data/lib/oga/xml/lexer.rb CHANGED
@@ -41,12 +41,11 @@ module Oga
41
41
  attr_reader :html
42
42
 
43
43
  ##
44
- # Element name used to determine if a tag being processed is a Javascript
45
- # tag.
44
+ # Names of HTML tags of which the content should be lexed as-is.
46
45
  #
47
- # @return [String]
46
+ # @return [Array]
48
47
  #
49
- SCRIPT_TAG = 'script'.freeze
48
+ LITERAL_HTML_ELEMENTS = %w{script style}
50
49
 
51
50
  ##
52
51
  # @param [String|IO] data The data to lex. This can either be a String or
@@ -190,12 +189,12 @@ module Oga
190
189
  end
191
190
 
192
191
  ##
193
- # Returns true if the current element is the HTML `<script>` element.
192
+ # Returns true if the current element's content should be lexed as-is.
194
193
  #
195
194
  # @return [TrueClass|FalseClass]
196
195
  #
197
- def inside_html_script?
198
- return html? && current_element == SCRIPT_TAG
196
+ def literal_html_element?
197
+ return html? && LITERAL_HTML_ELEMENTS.include?(current_element)
199
198
  end
200
199
 
201
200
  ##
data/lib/oga/xml/text.rb CHANGED
@@ -12,8 +12,8 @@ module Oga
12
12
  node = parent
13
13
  root = root_node
14
14
 
15
- if root.is_a?(Document) and node.is_a?(Element) \
16
- and node.name == Lexer::SCRIPT_TAG and root.html?
15
+ if root.is_a?(Document) and node.is_a?(Element) and root.html? \
16
+ and Lexer::LITERAL_HTML_ELEMENTS.include?(node.name)
17
17
  return super
18
18
  else
19
19
  return Entities.encode(super)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: oga
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yorick Peterse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-03 00:00:00.000000000 Z
11
+ date: 2015-03-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: racc