rexml 3.4.1 → 3.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rexml/node.rb CHANGED
@@ -26,7 +26,7 @@ module REXML
26
26
  # REXML::Formatters package for changing the output style.
27
27
  def to_s indent=nil
28
28
  unless indent.nil?
29
- Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated", uplevel: 1)
29
+ Kernel.warn( "#{self.class.name}#to_s(indent) parameter is deprecated", uplevel: 1)
30
30
  f = REXML::Formatters::Pretty.new( indent )
31
31
  f.write( self, rv = "" )
32
32
  else
@@ -68,7 +68,7 @@ module REXML
68
68
  each_recursive {|node|
69
69
  return node if block.call(node)
70
70
  }
71
- return nil
71
+ nil
72
72
  end
73
73
 
74
74
  # Returns the position that +self+ holds in its parent's array, indexed
@@ -144,6 +144,7 @@ module REXML
144
144
  PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
145
145
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
146
146
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
147
+ EQUAL_PATTERN = /\s*=\s*/um
147
148
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
148
149
  NAME_PATTERN = /#{NAME}/um
149
150
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
@@ -168,6 +169,7 @@ module REXML
168
169
  @entity_expansion_limit = Security.entity_expansion_limit
169
170
  @entity_expansion_text_limit = Security.entity_expansion_text_limit
170
171
  @source.ensure_buffer
172
+ @version = nil
171
173
  end
172
174
 
173
175
  def add_listener( listener )
@@ -206,12 +208,12 @@ module REXML
206
208
 
207
209
  # Returns true if there are no more events
208
210
  def empty?
209
- return (@source.empty? and @stack.empty?)
211
+ (@source.empty? and @stack.empty?)
210
212
  end
211
213
 
212
214
  # Returns true if there are more events. Synonymous with !empty?
213
215
  def has_next?
214
- return !(@source.empty? and @stack.empty?)
216
+ !(@source.empty? and @stack.empty?)
215
217
  end
216
218
 
217
219
  # Push an event back on the head of the stream. This method
@@ -264,6 +266,11 @@ module REXML
264
266
  path = "/" + @tags.join("/")
265
267
  raise ParseException.new("Missing end tag for '#{path}'", @source)
266
268
  end
269
+
270
+ unless @document_status == :in_element
271
+ raise ParseException.new("Malformed XML: No root element", @source)
272
+ end
273
+
267
274
  return [ :end_document ]
268
275
  end
269
276
  return @stack.shift if @stack.size > 0
@@ -277,17 +284,10 @@ module REXML
277
284
  return process_instruction
278
285
  elsif @source.match?("<!", true)
279
286
  if @source.match?("--", true)
280
- md = @source.match(/(.*?)-->/um, true)
281
- if md.nil?
282
- raise REXML::ParseException.new("Unclosed comment", @source)
283
- end
284
- if /--|-\z/.match?(md[1])
285
- raise REXML::ParseException.new("Malformed comment", @source)
286
- end
287
- return [ :comment, md[1] ]
287
+ return [ :comment, process_comment ]
288
288
  elsif @source.match?("DOCTYPE", true)
289
289
  base_error_message = "Malformed DOCTYPE"
290
- unless @source.match?(/\s+/um, true)
290
+ unless @source.skip_spaces
291
291
  if @source.match?(">")
292
292
  message = "#{base_error_message}: name is missing"
293
293
  else
@@ -297,7 +297,7 @@ module REXML
297
297
  raise REXML::ParseException.new(message, @source)
298
298
  end
299
299
  name = parse_name(base_error_message)
300
- @source.match?(/\s*/um, true) # skip spaces
300
+ @source.skip_spaces
301
301
  if @source.match?("[", true)
302
302
  id = [nil, nil, nil]
303
303
  @document_status = :in_doctype
@@ -313,7 +313,7 @@ module REXML
313
313
  # For backward compatibility
314
314
  id[1], id[2] = id[2], nil
315
315
  end
316
- @source.match?(/\s*/um, true) # skip spaces
316
+ @source.skip_spaces
317
317
  if @source.match?("[", true)
318
318
  @document_status = :in_doctype
319
319
  elsif @source.match?(">", true)
@@ -326,7 +326,7 @@ module REXML
326
326
  end
327
327
  args = [:start_doctype, name, *id]
328
328
  if @document_status == :after_doctype
329
- @source.match?(/\s*/um, true)
329
+ @source.skip_spaces
330
330
  @stack << [ :end_doctype ]
331
331
  end
332
332
  return args
@@ -337,7 +337,7 @@ module REXML
337
337
  end
338
338
  end
339
339
  if @document_status == :in_doctype
340
- @source.match?(/\s*/um, true) # skip spaces
340
+ @source.skip_spaces
341
341
  start_position = @source.position
342
342
  if @source.match?("<!", true)
343
343
  if @source.match?("ELEMENT", true)
@@ -398,7 +398,7 @@ module REXML
398
398
  return [ :attlistdecl, element, pairs, contents ]
399
399
  elsif @source.match?("NOTATION", true)
400
400
  base_error_message = "Malformed notation declaration"
401
- unless @source.match?(/\s+/um, true)
401
+ unless @source.skip_spaces
402
402
  if @source.match?(">")
403
403
  message = "#{base_error_message}: name is missing"
404
404
  else
@@ -411,31 +411,28 @@ module REXML
411
411
  id = parse_id(base_error_message,
412
412
  accept_external_id: true,
413
413
  accept_public_id: true)
414
- @source.match?(/\s*/um, true) # skip spaces
414
+ @source.skip_spaces
415
415
  unless @source.match?(">", true)
416
416
  message = "#{base_error_message}: garbage before end >"
417
417
  raise REXML::ParseException.new(message, @source)
418
418
  end
419
419
  return [:notationdecl, name, *id]
420
- elsif md = @source.match(/--(.*?)-->/um, true)
421
- case md[1]
422
- when /--/, /-\z/
423
- raise REXML::ParseException.new("Malformed comment", @source)
424
- end
425
- return [ :comment, md[1] ] if md
420
+ elsif @source.match?("--", true)
421
+ return [ :comment, process_comment ]
422
+ else
423
+ raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor ELEMENT,ENTITY,ATTLIST,NOTATION", @source)
426
424
  end
427
425
  elsif match = @source.match(/(%.*?;)\s*/um, true)
428
426
  return [ :externalentity, match[1] ]
429
427
  elsif @source.match?(/\]\s*>/um, true)
430
428
  @document_status = :after_doctype
431
429
  return [ :end_doctype ]
432
- end
433
- if @document_status == :in_doctype
430
+ else
434
431
  raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
435
432
  end
436
433
  end
437
434
  if @document_status == :after_doctype
438
- @source.match?(/\s*/um, true)
435
+ @source.skip_spaces
439
436
  end
440
437
  begin
441
438
  start_position = @source.position
@@ -460,23 +457,19 @@ module REXML
460
457
  end
461
458
  return [ :end_element, last_tag ]
462
459
  elsif @source.match?("!", true)
463
- md = @source.match(/([^>]*>)/um)
464
460
  #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
465
- raise REXML::ParseException.new("Malformed node", @source) unless md
466
- if md[0][0] == ?-
467
- md = @source.match(/--(.*?)-->/um, true)
468
-
469
- if md.nil? || /--|-\z/.match?(md[1])
470
- raise REXML::ParseException.new("Malformed comment", @source)
461
+ if @source.match?("--", true)
462
+ return [ :comment, process_comment ]
463
+ elsif @source.match?("[CDATA[", true)
464
+ text = @source.read_until("]]>")
465
+ if text.chomp!("]]>")
466
+ return [ :cdata, text ]
467
+ else
468
+ raise REXML::ParseException.new("Malformed CDATA: Missing end ']]>'", @source)
471
469
  end
472
-
473
- return [ :comment, md[1] ]
474
470
  else
475
- md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
476
- return [ :cdata, md[1] ] if md
471
+ raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor CDATA", @source)
477
472
  end
478
- raise REXML::ParseException.new( "Declarations can only occur "+
479
- "in the doctype declaration.", @source)
480
473
  elsif @source.match?("?", true)
481
474
  return process_instruction
482
475
  else
@@ -536,7 +529,8 @@ module REXML
536
529
  raise REXML::ParseException.new( "Exception parsing",
537
530
  @source, self, (error ? error : $!) )
538
531
  end
539
- return [ :dummy ]
532
+ # NOTE: The end of the method never runs, because it is unreachable.
533
+ # All branches of code above have explicit unconditional return or raise statements.
540
534
  end
541
535
  private :pull_event
542
536
 
@@ -655,6 +649,10 @@ module REXML
655
649
  true
656
650
  end
657
651
 
652
+ def normalize_xml_declaration_encoding(xml_declaration_encoding)
653
+ /\AUTF-16(?:BE|LE)\z/i.match?(xml_declaration_encoding) ? "UTF-16" : nil
654
+ end
655
+
658
656
  def parse_name(base_error_message)
659
657
  md = @source.match(Private::NAME_PATTERN, true)
660
658
  unless md
@@ -734,39 +732,99 @@ module REXML
734
732
  end
735
733
  end
736
734
 
735
+ def process_comment
736
+ text = @source.read_until("-->")
737
+ unless text.chomp!("-->")
738
+ raise REXML::ParseException.new("Unclosed comment: Missing end '-->'", @source)
739
+ end
740
+
741
+ if text.include? "--" or text.end_with?("-")
742
+ raise REXML::ParseException.new("Malformed comment", @source)
743
+ end
744
+ text
745
+ end
746
+
737
747
  def process_instruction
738
748
  name = parse_name("Malformed XML: Invalid processing instruction node")
739
- if @source.match?(/\s+/um, true)
740
- match_data = @source.match(/(.*?)\?>/um, true)
741
- unless match_data
742
- raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
749
+ if name == "xml"
750
+ xml_declaration
751
+ else # PITarget
752
+ if @source.skip_spaces # e.g. <?name content?>
753
+ start_position = @source.position
754
+ content = @source.read_until("?>")
755
+ unless content.chomp!("?>")
756
+ @source.position = start_position
757
+ raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
758
+ end
759
+ else # e.g. <?name?>
760
+ content = nil
761
+ unless @source.match?("?>", true)
762
+ raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
763
+ end
743
764
  end
744
- content = match_data[1]
745
- else
746
- content = nil
765
+ [:processing_instruction, name, content]
766
+ end
767
+ end
768
+
769
+ def xml_declaration
770
+ unless @version.nil?
771
+ raise ParseException.new("Malformed XML: XML declaration is duplicated", @source)
772
+ end
773
+ if @document_status
774
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
775
+ end
776
+ unless @source.skip_spaces
777
+ raise ParseException.new("Malformed XML: XML declaration misses spaces before version", @source)
778
+ end
779
+ unless @source.match?("version", true)
780
+ raise ParseException.new("Malformed XML: XML declaration misses version", @source)
781
+ end
782
+ @version = parse_attribute_value_with_equal("xml")
783
+ unless @source.skip_spaces
747
784
  unless @source.match?("?>", true)
748
- raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
785
+ raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
749
786
  end
787
+ encoding = normalize_xml_declaration_encoding(@source.encoding)
788
+ return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.0"?>
750
789
  end
751
- if name == "xml"
752
- if @document_status
753
- raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
754
- end
755
- version = VERSION.match(content)
756
- version = version[1] unless version.nil?
757
- encoding = ENCODING.match(content)
758
- encoding = encoding[1] unless encoding.nil?
759
- if need_source_encoding_update?(encoding)
760
- @source.encoding = encoding
790
+
791
+ if @source.match?("encoding", true)
792
+ encoding = parse_attribute_value_with_equal("xml")
793
+ unless @source.skip_spaces
794
+ unless @source.match?("?>", true)
795
+ raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
796
+ end
797
+ if need_source_encoding_update?(encoding)
798
+ @source.encoding = encoding
799
+ end
800
+ encoding ||= normalize_xml_declaration_encoding(@source.encoding)
801
+ return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.1" encoding="UTF-8"?>
761
802
  end
762
- if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
763
- encoding = "UTF-16"
803
+ end
804
+
805
+ if @source.match?("standalone", true)
806
+ standalone = parse_attribute_value_with_equal("xml")
807
+ case standalone
808
+ when "yes", "no"
809
+ else
810
+ raise ParseException.new("Malformed XML: XML declaration standalone is not yes or no : <#{standalone}>", @source)
764
811
  end
765
- standalone = STANDALONE.match(content)
766
- standalone = standalone[1] unless standalone.nil?
767
- return [ :xmldecl, version, encoding, standalone ]
768
812
  end
769
- [:processing_instruction, name, content]
813
+ @source.skip_spaces
814
+ unless @source.match?("?>", true)
815
+ raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
816
+ end
817
+
818
+ if need_source_encoding_update?(encoding)
819
+ @source.encoding = encoding
820
+ end
821
+ encoding ||= normalize_xml_declaration_encoding(@source.encoding)
822
+
823
+ # e.g. <?xml version="1.0" ?>
824
+ # <?xml version="1.1" encoding="UTF-8" ?>
825
+ # <?xml version="1.1" standalone="yes"?>
826
+ # <?xml version="1.1" encoding="UTF-8" standalone="yes" ?>
827
+ [ :xmldecl, @version, encoding, standalone ]
770
828
  end
771
829
 
772
830
  if StringScanner::Version < "3.1.1"
@@ -788,6 +846,25 @@ module REXML
788
846
  end
789
847
  end
790
848
 
849
+ def parse_attribute_value_with_equal(name)
850
+ unless @source.match?(Private::EQUAL_PATTERN, true)
851
+ message = "Missing attribute equal: <#{name}>"
852
+ raise REXML::ParseException.new(message, @source)
853
+ end
854
+ unless quote = scan_quote
855
+ message = "Missing attribute value start quote: <#{name}>"
856
+ raise REXML::ParseException.new(message, @source)
857
+ end
858
+ start_position = @source.position
859
+ value = @source.read_until(quote)
860
+ unless value.chomp!(quote)
861
+ @source.position = start_position
862
+ message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
863
+ raise REXML::ParseException.new(message, @source)
864
+ end
865
+ value
866
+ end
867
+
791
868
  def parse_attributes(prefixes)
792
869
  attributes = {}
793
870
  expanded_names = {}
@@ -802,23 +879,8 @@ module REXML
802
879
  name = match[1]
803
880
  prefix = match[2]
804
881
  local_part = match[3]
805
-
806
- unless @source.match?(/\s*=\s*/um, true)
807
- message = "Missing attribute equal: <#{name}>"
808
- raise REXML::ParseException.new(message, @source)
809
- end
810
- unless quote = scan_quote
811
- message = "Missing attribute value start quote: <#{name}>"
812
- raise REXML::ParseException.new(message, @source)
813
- end
814
- start_position = @source.position
815
- value = @source.read_until(quote)
816
- unless value.chomp!(quote)
817
- @source.position = start_position
818
- message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
819
- raise REXML::ParseException.new(message, @source)
820
- end
821
- @source.match?(/\s*/um, true)
882
+ value = parse_attribute_value_with_equal(name)
883
+ @source.skip_spaces
822
884
  if prefix == "xmlns"
823
885
  if local_part == "xml"
824
886
  if value != Private::XML_PREFIXED_NAMESPACE
@@ -215,7 +215,7 @@ module REXML
215
215
  else
216
216
  path << yield( parsed )
217
217
  end
218
- return path.squeeze(" ")
218
+ path.squeeze(" ")
219
219
  end
220
220
  # For backward compatibility
221
221
  alias_method :preciate_to_string, :predicate_to_path
@@ -252,7 +252,7 @@ module REXML
252
252
  path = path[1..-1]
253
253
  end
254
254
  end
255
- return RelativeLocationPath( path, parsed ) if path.size > 0
255
+ RelativeLocationPath( path, parsed ) if path.size > 0
256
256
  end
257
257
 
258
258
  #RelativeLocationPath
@@ -388,7 +388,7 @@ module REXML
388
388
  else
389
389
  path = original_path
390
390
  end
391
- return path
391
+ path
392
392
  end
393
393
 
394
394
  # Filters the supplied nodeset on the predicate(s)
@@ -600,7 +600,7 @@ module REXML
600
600
  end
601
601
  rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/
602
602
  parsed.concat(n)
603
- return rest
603
+ rest
604
604
  end
605
605
 
606
606
  #| FilterExpr Predicate
@@ -41,7 +41,7 @@ module REXML
41
41
  else
42
42
  results = filter([element], path)
43
43
  end
44
- return results
44
+ results
45
45
  end
46
46
 
47
47
  # Given an array of nodes it filters the array based on the path. The
@@ -51,18 +51,18 @@ module REXML
51
51
  return elements if path.nil? or path == '' or elements.size == 0
52
52
  case path
53
53
  when /^\/\//u # Descendant
54
- return axe( elements, "descendant-or-self", $' )
54
+ axe( elements, "descendant-or-self", $' )
55
55
  when /^\/?\b(\w[-\w]*)\b::/u # Axe
56
- return axe( elements, $1, $' )
56
+ axe( elements, $1, $' )
57
57
  when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child
58
58
  rest = $'
59
59
  results = []
60
60
  elements.each do |element|
61
61
  results |= filter( element.to_a, rest )
62
62
  end
63
- return results
63
+ results
64
64
  when /^\/?(\w[-\w]*)\(/u # / Function
65
- return function( elements, $1, $' )
65
+ function( elements, $1, $' )
66
66
  when Namespace::NAMESPLIT # Element name
67
67
  name = $2
68
68
  ns = $1
@@ -73,21 +73,21 @@ module REXML
73
73
  (element.name == name and
74
74
  element.namespace == Functions.namespace_context[ns])))
75
75
  end
76
- return filter( elements, rest )
76
+ filter( elements, rest )
77
77
  when /^\/\[/u
78
78
  matches = []
79
79
  elements.each do |element|
80
80
  matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
81
81
  end
82
- return matches
82
+ matches
83
83
  when /^\[/u # Predicate
84
- return predicate( elements, path )
84
+ predicate( elements, path )
85
85
  when /^\/?\.\.\./u # Ancestor
86
- return axe( elements, "ancestor", $' )
86
+ axe( elements, "ancestor", $' )
87
87
  when /^\/?\.\./u # Parent
88
- return filter( elements.collect{|e|e.parent}, $' )
88
+ filter( elements.collect{|e|e.parent}, $' )
89
89
  when /^\/?\./u # Self
90
- return filter( elements, $' )
90
+ filter( elements, $' )
91
91
  when /^\*/u # Any
92
92
  results = []
93
93
  elements.each do |element|
@@ -98,9 +98,10 @@ module REXML
98
98
  # results |= filter( children, $' )
99
99
  #end
100
100
  end
101
- return results
101
+ results
102
+ else
103
+ []
102
104
  end
103
- return []
104
105
  end
105
106
 
106
107
  def QuickPath::axe( elements, axe_name, rest )
@@ -138,7 +139,7 @@ module REXML
138
139
  matches = filter(elements.collect{|element|
139
140
  element.previous_sibling}.uniq, rest )
140
141
  end
141
- return matches.uniq
142
+ matches.uniq
142
143
  end
143
144
 
144
145
  OPERAND_ = '((?=(?:(?!and|or).)*[^\s<>=])[^\s<>=]+)'
@@ -200,15 +201,15 @@ module REXML
200
201
  results << element
201
202
  end
202
203
  end
203
- return filter( results, rest )
204
+ filter( results, rest )
204
205
  end
205
206
 
206
207
  def QuickPath::attribute( name )
207
- return Functions.node.attributes[name] if Functions.node.kind_of? Element
208
+ Functions.node.attributes[name] if Functions.node.kind_of? Element
208
209
  end
209
210
 
210
211
  def QuickPath::name()
211
- return Functions.node.name if Functions.node.kind_of? Element
212
+ Functions.node.name if Functions.node.kind_of? Element
212
213
  end
213
214
 
214
215
  def QuickPath::method_missing( id, *args )
@@ -234,7 +235,7 @@ module REXML
234
235
  results << element if Functions.pair[0] == res
235
236
  end
236
237
  end
237
- return results
238
+ results
238
239
  end
239
240
 
240
241
  def QuickPath::parse_args( element, string )
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.4.1"
34
+ VERSION = "3.4.3"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
@@ -10,7 +10,7 @@ module REXML
10
10
 
11
11
  # Get the entity expansion limit. By default the limit is set to 10000.
12
12
  def self.entity_expansion_limit
13
- return @@entity_expansion_limit
13
+ @@entity_expansion_limit
14
14
  end
15
15
 
16
16
  @@entity_expansion_text_limit = 10_240
@@ -22,7 +22,7 @@ module REXML
22
22
 
23
23
  # Get the entity expansion limit. By default the limit is set to 10240.
24
24
  def self.entity_expansion_text_limit
25
- return @@entity_expansion_text_limit
25
+ @@entity_expansion_text_limit
26
26
  end
27
27
  end
28
28
  end
data/lib/rexml/source.rb CHANGED
@@ -65,9 +65,10 @@ module REXML
65
65
  attr_reader :encoding
66
66
 
67
67
  module Private
68
+ SPACES_PATTERN = /\s+/um
68
69
  SCANNER_RESET_SIZE = 100000
69
70
  PRE_DEFINED_TERM_PATTERNS = {}
70
- pre_defined_terms = ["'", '"', "<"]
71
+ pre_defined_terms = ["'", '"', "<", "]]>", "?>"]
71
72
  if StringScanner::Version < "3.1.1"
72
73
  pre_defined_terms.each do |term|
73
74
  PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
@@ -150,6 +151,10 @@ module REXML
150
151
  end
151
152
  end
152
153
 
154
+ def skip_spaces
155
+ @scanner.skip(Private::SPACES_PATTERN) ? true : false
156
+ end
157
+
153
158
  def position
154
159
  @scanner.pos
155
160
  end
@@ -267,7 +272,7 @@ module REXML
267
272
  @scanner << readline(term)
268
273
  end
269
274
  if str
270
- read if @scanner.eos? and !@source.eof?
275
+ read if @scanner.eos? and @source and !@source.eof?
271
276
  str
272
277
  else
273
278
  rest = @scanner.rest