oga 1.0.3-java → 1.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -419,18 +419,24 @@
419
419
  any $count_newlines;
420
420
  *|;
421
421
 
422
- # Characters that can be used for unquoted HTML attribute values.
423
- # See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
424
- # for more info.
425
- html_unquoted_value =
426
- ^(squote | dquote | whitespace_or_newline)
427
- ^('`' | '=' | '<' | '>' | whitespace_or_newline)+;
428
-
429
422
  # Machine used after matching the "=" of an attribute and just before moving
430
423
  # into the actual attribute value.
431
424
  attribute_pre := |*
432
425
  whitespace_or_newline $count_newlines;
433
426
 
427
+ squote | dquote => {
428
+ fhold;
429
+
430
+ if ( lines > 0 )
431
+ {
432
+ advance_line(lines);
433
+
434
+ lines = 0;
435
+ }
436
+
437
+ fnext quoted_attribute_value;
438
+ };
439
+
434
440
  any => {
435
441
  fhold;
436
442
 
@@ -443,25 +449,33 @@
443
449
 
444
450
  if ( html_p )
445
451
  {
446
- fnext html_attribute_value;
452
+ fnext unquoted_attribute_value;
447
453
  }
454
+ /* XML doesn't support unquoted attribute values */
448
455
  else
449
456
  {
450
- fnext xml_attribute_value;
457
+ fret;
451
458
  }
452
459
  };
453
460
  *|;
454
461
 
455
- # Machine used for processing HTML attribute values.
456
- html_attribute_value := |*
457
- squote | dquote => {
458
- fhold;
459
- fnext xml_attribute_value;
460
- };
461
-
462
- # Unquoted attribute values are lexed as if they were single quoted
463
- # strings.
464
- html_unquoted_value => {
462
+ # Machine for processing unquoted HTML attribute values.
463
+ #
464
+ # The HTML specification describes a set of characters that can be allowed
465
+ # in an unquoted value at https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example.
466
+ #
467
+ # As is always the case with HTML everybody completely ignores this
468
+ # specification and thus every library and browser out these is expected to
469
+ # support input such as `<a href=lol("javascript","is","great")></a>.
470
+ #
471
+ # Oga too has to support this, thus the only characters it disallows in
472
+ # unquoted attribute values are:
473
+ #
474
+ # * > (used for terminating open tags)
475
+ # * whitespace
476
+ #
477
+ unquoted_attribute_value := |*
478
+ ^('>' | whitespace_or_newline)+ => {
465
479
  callback_simple(id_on_string_squote);
466
480
 
467
481
  callback(id_on_string_body, data, encoding, ts, te);
@@ -472,8 +486,8 @@
472
486
  any => hold_and_return;
473
487
  *|;
474
488
 
475
- # Machine used for processing XML attribute values.
476
- xml_attribute_value := |*
489
+ # Machine used for processing quoted XML/HTML attribute values.
490
+ quoted_attribute_value := |*
477
491
  # The following two actions use "fnext" instead of "fcall". Combined
478
492
  # with "element_head" using "fcall" to jump to this machine this means
479
493
  # we can return back to "element_head" after processing a single string.
Binary file
@@ -1,3 +1,3 @@
1
1
  module Oga
2
- VERSION = '1.0.3'
2
+ VERSION = '1.1.0'
3
3
  end # Oga
@@ -101,7 +101,7 @@ module Oga
101
101
  full_name = name
102
102
  end
103
103
 
104
- enc_value = value ? Entities.encode(value) : nil
104
+ enc_value = value ? Entities.encode_attribute(value) : nil
105
105
 
106
106
  %Q(#{full_name}="#{enc_value}")
107
107
  end
@@ -32,6 +32,20 @@ module Oga
32
32
  '<' => '&lt;',
33
33
  }
34
34
 
35
+ ##
36
+ # Hash containing characters and the corresponding XML entities to use
37
+ # when encoding XML/HTML attribute values.
38
+ #
39
+ # @return [Hash]
40
+ #
41
+ ENCODE_ATTRIBUTE_MAPPING = {
42
+ '&' => '&amp;',
43
+ '>' => '&gt;',
44
+ '<' => '&lt;',
45
+ "'" => '&apos;',
46
+ '"' => '&quot;'
47
+ }
48
+
35
49
  ##
36
50
  # @return [String]
37
51
  #
@@ -56,6 +70,12 @@ module Oga
56
70
  #
57
71
  ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|'))
58
72
 
73
+ ##
74
+ # @return [Regexp]
75
+ #
76
+ ENCODE_ATTRIBUTE_REGEXP =
77
+ Regexp.new(ENCODE_ATTRIBUTE_MAPPING.keys.join('|'))
78
+
59
79
  ##
60
80
  # Decodes XML entities.
61
81
  #
@@ -87,6 +107,16 @@ module Oga
87
107
  def self.encode(input, mapping = ENCODE_MAPPING)
88
108
  input.gsub(ENCODE_REGEXP, mapping)
89
109
  end
110
+
111
+ ##
112
+ # Encodes special characters in an XML attribute value.
113
+ #
114
+ # @param [String] input
115
+ # @return [String]
116
+ #
117
+ def self.encode_attribute(input)
118
+ input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING)
119
+ end
90
120
  end # Entities
91
121
  end # XML
92
122
  end # Oga
@@ -499,7 +499,7 @@ module Oga
499
499
  ##
500
500
  # Called on the closing tag of an element.
501
501
  #
502
- # @param [String] ns_name The name of the element (minus namespace
502
+ # @param [String] name The name of the element (minus namespace
503
503
  # prefix). This is not set for self closing tags.
504
504
  #
505
505
  def on_element_end(name = nil)
@@ -152,6 +152,27 @@ module Oga
152
152
  return node_set.delete(self) if node_set
153
153
  end
154
154
 
155
+ ##
156
+ # Replaces the current node with another.
157
+ #
158
+ # @example Replacing with an element
159
+ # element = Oga::XML::Element.new(:name => 'div')
160
+ # some_node.replace(element)
161
+ #
162
+ # @example Replacing with a String
163
+ # some_node.replace('this will replace the current node with a text node')
164
+ #
165
+ # @param [String|Oga::XML::Node] other
166
+ #
167
+ def replace(other)
168
+ if other.is_a?(String)
169
+ other = Text.new(:text => other)
170
+ end
171
+
172
+ before(other)
173
+ remove
174
+ end
175
+
155
176
  ##
156
177
  # Inserts the given node before the current node.
157
178
  #
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: oga
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.1.0
5
5
  platform: java
6
6
  authors:
7
7
  - Yorick Peterse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-16 00:00:00.000000000 Z
11
+ date: 2015-06-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ast