rexml 3.3.8 → 3.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ module REXML
5
5
  # ID ---> Encoding name
6
6
  attr_reader :encoding
7
7
  def encoding=(encoding)
8
- encoding = encoding.name if encoding.is_a?(Encoding)
8
+ encoding = encoding.name if encoding.is_a?(::Encoding)
9
9
  if encoding.is_a?(String)
10
10
  original_encoding = encoding
11
11
  encoding = find_encoding(encoding)
@@ -13,12 +13,9 @@ module REXML
13
13
  raise ArgumentError, "Bad encoding name #{original_encoding}"
14
14
  end
15
15
  end
16
+ encoding = encoding.upcase if encoding
16
17
  return false if defined?(@encoding) and encoding == @encoding
17
- if encoding
18
- @encoding = encoding.upcase
19
- else
20
- @encoding = 'UTF-8'
21
- end
18
+ @encoding = encoding || "UTF-8"
22
19
  true
23
20
  end
24
21
 
@@ -39,11 +39,11 @@ module REXML
39
39
 
40
40
  def Functions::text( )
41
41
  if @@context[:node].node_type == :element
42
- return @@context[:node].find_all{|n| n.node_type == :text}.collect{|n| n.value}
42
+ @@context[:node].find_all{|n| n.node_type == :text}.collect{|n| n.value}
43
43
  elsif @@context[:node].node_type == :text
44
- return @@context[:node].value
44
+ @@context[:node].value
45
45
  else
46
- return false
46
+ false
47
47
  end
48
48
  end
49
49
 
@@ -49,7 +49,7 @@ module REXML
49
49
  # See the rexml/formatters package
50
50
  #
51
51
  def write writer, indent=-1, transitive=false, ie_hack=false
52
- Kernel.warn( "#{self.class.name}.write is deprecated", uplevel: 1)
52
+ Kernel.warn( "#{self.class.name}#write is deprecated", uplevel: 1)
53
53
  indent(writer, indent)
54
54
  writer << START
55
55
  writer << @target
@@ -42,11 +42,11 @@ module REXML
42
42
  # Compares names optionally WITH namespaces
43
43
  def has_name?( other, ns=nil )
44
44
  if ns
45
- return (namespace() == ns and name() == other)
45
+ namespace() == ns and name() == other
46
46
  elsif other.include? ":"
47
- return fully_expanded_name == other
47
+ fully_expanded_name == other
48
48
  else
49
- return name == other
49
+ name == other
50
50
  end
51
51
  end
52
52
 
@@ -57,7 +57,7 @@ module REXML
57
57
  def fully_expanded_name
58
58
  ns = prefix
59
59
  return "#{ns}:#@name" if ns.size > 0
60
- return @name
60
+ @name
61
61
  end
62
62
  end
63
63
  end
data/lib/rexml/node.rb CHANGED
@@ -26,7 +26,7 @@ module REXML
26
26
  # REXML::Formatters package for changing the output style.
27
27
  def to_s indent=nil
28
28
  unless indent.nil?
29
- Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated", uplevel: 1)
29
+ Kernel.warn( "#{self.class.name}#to_s(indent) parameter is deprecated", uplevel: 1)
30
30
  f = REXML::Formatters::Pretty.new( indent )
31
31
  f.write( self, rv = "" )
32
32
  else
@@ -68,7 +68,7 @@ module REXML
68
68
  each_recursive {|node|
69
69
  return node if block.call(node)
70
70
  }
71
- return nil
71
+ nil
72
72
  end
73
73
 
74
74
  # Returns the position that +self+ holds in its parent's array, indexed
@@ -144,13 +144,14 @@ module REXML
144
144
  PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
145
145
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
146
146
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
147
+ EQUAL_PATTERN = /\s*=\s*/um
147
148
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
148
149
  NAME_PATTERN = /#{NAME}/um
149
150
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
150
151
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
151
152
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
152
153
  CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
153
- CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
154
+ CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
154
155
  DEFAULT_ENTITIES_PATTERNS = {}
155
156
  default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
156
157
  default_entities.each do |term|
@@ -167,6 +168,8 @@ module REXML
167
168
  @entity_expansion_count = 0
168
169
  @entity_expansion_limit = Security.entity_expansion_limit
169
170
  @entity_expansion_text_limit = Security.entity_expansion_text_limit
171
+ @source.ensure_buffer
172
+ @version = nil
170
173
  end
171
174
 
172
175
  def add_listener( listener )
@@ -180,6 +183,10 @@ module REXML
180
183
 
181
184
  def stream=( source )
182
185
  @source = SourceFactory.create_from( source )
186
+ reset
187
+ end
188
+
189
+ def reset
183
190
  @closed = nil
184
191
  @have_root = false
185
192
  @document_status = nil
@@ -201,12 +208,12 @@ module REXML
201
208
 
202
209
  # Returns true if there are no more events
203
210
  def empty?
204
- return (@source.empty? and @stack.empty?)
211
+ (@source.empty? and @stack.empty?)
205
212
  end
206
213
 
207
214
  # Returns true if there are more events. Synonymous with !empty?
208
215
  def has_next?
209
- return !(@source.empty? and @stack.empty?)
216
+ !(@source.empty? and @stack.empty?)
210
217
  end
211
218
 
212
219
  # Push an event back on the head of the stream. This method
@@ -259,6 +266,11 @@ module REXML
259
266
  path = "/" + @tags.join("/")
260
267
  raise ParseException.new("Missing end tag for '#{path}'", @source)
261
268
  end
269
+
270
+ unless @document_status == :in_element
271
+ raise ParseException.new("Malformed XML: No root element", @source)
272
+ end
273
+
262
274
  return [ :end_document ]
263
275
  end
264
276
  return @stack.shift if @stack.size > 0
@@ -268,22 +280,15 @@ module REXML
268
280
  @source.ensure_buffer
269
281
  if @document_status == nil
270
282
  start_position = @source.position
271
- if @source.match("<?", true)
283
+ if @source.match?("<?", true)
272
284
  return process_instruction
273
- elsif @source.match("<!", true)
274
- if @source.match("--", true)
275
- md = @source.match(/(.*?)-->/um, true)
276
- if md.nil?
277
- raise REXML::ParseException.new("Unclosed comment", @source)
278
- end
279
- if /--|-\z/.match?(md[1])
280
- raise REXML::ParseException.new("Malformed comment", @source)
281
- end
282
- return [ :comment, md[1] ]
283
- elsif @source.match("DOCTYPE", true)
285
+ elsif @source.match?("<!", true)
286
+ if @source.match?("--", true)
287
+ return [ :comment, process_comment ]
288
+ elsif @source.match?("DOCTYPE", true)
284
289
  base_error_message = "Malformed DOCTYPE"
285
- unless @source.match(/\s+/um, true)
286
- if @source.match(">")
290
+ unless @source.skip_spaces
291
+ if @source.match?(">")
287
292
  message = "#{base_error_message}: name is missing"
288
293
  else
289
294
  message = "#{base_error_message}: invalid name"
@@ -292,10 +297,11 @@ module REXML
292
297
  raise REXML::ParseException.new(message, @source)
293
298
  end
294
299
  name = parse_name(base_error_message)
295
- if @source.match(/\s*\[/um, true)
300
+ @source.skip_spaces
301
+ if @source.match?("[", true)
296
302
  id = [nil, nil, nil]
297
303
  @document_status = :in_doctype
298
- elsif @source.match(/\s*>/um, true)
304
+ elsif @source.match?(">", true)
299
305
  id = [nil, nil, nil]
300
306
  @document_status = :after_doctype
301
307
  @source.ensure_buffer
@@ -307,9 +313,10 @@ module REXML
307
313
  # For backward compatibility
308
314
  id[1], id[2] = id[2], nil
309
315
  end
310
- if @source.match(/\s*\[/um, true)
316
+ @source.skip_spaces
317
+ if @source.match?("[", true)
311
318
  @document_status = :in_doctype
312
- elsif @source.match(/\s*>/um, true)
319
+ elsif @source.match?(">", true)
313
320
  @document_status = :after_doctype
314
321
  @source.ensure_buffer
315
322
  else
@@ -319,7 +326,7 @@ module REXML
319
326
  end
320
327
  args = [:start_doctype, name, *id]
321
328
  if @document_status == :after_doctype
322
- @source.match(/\s*/um, true)
329
+ @source.skip_spaces
323
330
  @stack << [ :end_doctype ]
324
331
  end
325
332
  return args
@@ -330,14 +337,14 @@ module REXML
330
337
  end
331
338
  end
332
339
  if @document_status == :in_doctype
333
- @source.match(/\s*/um, true) # skip spaces
340
+ @source.skip_spaces
334
341
  start_position = @source.position
335
- if @source.match("<!", true)
336
- if @source.match("ELEMENT", true)
342
+ if @source.match?("<!", true)
343
+ if @source.match?("ELEMENT", true)
337
344
  md = @source.match(/(.*?)>/um, true)
338
345
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
339
346
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
340
- elsif @source.match("ENTITY", true)
347
+ elsif @source.match?("ENTITY", true)
341
348
  match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
342
349
  unless match_data
343
350
  raise REXML::ParseException.new("Malformed entity declaration", @source)
@@ -369,11 +376,11 @@ module REXML
369
376
  end
370
377
  match << '%' if ref
371
378
  return match
372
- elsif @source.match("ATTLIST", true)
379
+ elsif @source.match?("ATTLIST", true)
373
380
  md = @source.match(Private::ATTLISTDECL_END, true)
374
381
  raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
375
382
  element = md[1]
376
- contents = md[0]
383
+ contents = "<!ATTLIST" + md[0]
377
384
 
378
385
  pairs = {}
379
386
  values = md[0].strip.scan( ATTDEF_RE )
@@ -389,10 +396,10 @@ module REXML
389
396
  end
390
397
  end
391
398
  return [ :attlistdecl, element, pairs, contents ]
392
- elsif @source.match("NOTATION", true)
399
+ elsif @source.match?("NOTATION", true)
393
400
  base_error_message = "Malformed notation declaration"
394
- unless @source.match(/\s+/um, true)
395
- if @source.match(">")
401
+ unless @source.skip_spaces
402
+ if @source.match?(">")
396
403
  message = "#{base_error_message}: name is missing"
397
404
  else
398
405
  message = "#{base_error_message}: invalid name"
@@ -404,39 +411,37 @@ module REXML
404
411
  id = parse_id(base_error_message,
405
412
  accept_external_id: true,
406
413
  accept_public_id: true)
407
- unless @source.match(/\s*>/um, true)
414
+ @source.skip_spaces
415
+ unless @source.match?(">", true)
408
416
  message = "#{base_error_message}: garbage before end >"
409
417
  raise REXML::ParseException.new(message, @source)
410
418
  end
411
419
  return [:notationdecl, name, *id]
412
- elsif md = @source.match(/--(.*?)-->/um, true)
413
- case md[1]
414
- when /--/, /-\z/
415
- raise REXML::ParseException.new("Malformed comment", @source)
416
- end
417
- return [ :comment, md[1] ] if md
420
+ elsif @source.match?("--", true)
421
+ return [ :comment, process_comment ]
422
+ else
423
+ raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor ELEMENT,ENTITY,ATTLIST,NOTATION", @source)
418
424
  end
419
425
  elsif match = @source.match(/(%.*?;)\s*/um, true)
420
426
  return [ :externalentity, match[1] ]
421
- elsif @source.match(/\]\s*>/um, true)
427
+ elsif @source.match?(/\]\s*>/um, true)
422
428
  @document_status = :after_doctype
423
429
  return [ :end_doctype ]
424
- end
425
- if @document_status == :in_doctype
430
+ else
426
431
  raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
427
432
  end
428
433
  end
429
434
  if @document_status == :after_doctype
430
- @source.match(/\s*/um, true)
435
+ @source.skip_spaces
431
436
  end
432
437
  begin
433
438
  start_position = @source.position
434
- if @source.match("<", true)
439
+ if @source.match?("<", true)
435
440
  # :text's read_until may remain only "<" in buffer. In the
436
441
  # case, buffer is empty here. So we need to fill buffer
437
442
  # here explicitly.
438
443
  @source.ensure_buffer
439
- if @source.match("/", true)
444
+ if @source.match?("/", true)
440
445
  @namespaces_restore_stack.pop
441
446
  last_tag = @tags.pop
442
447
  md = @source.match(Private::CLOSE_PATTERN, true)
@@ -451,25 +456,21 @@ module REXML
451
456
  raise REXML::ParseException.new(message, @source)
452
457
  end
453
458
  return [ :end_element, last_tag ]
454
- elsif @source.match("!", true)
455
- md = @source.match(/([^>]*>)/um)
459
+ elsif @source.match?("!", true)
456
460
  #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
457
- raise REXML::ParseException.new("Malformed node", @source) unless md
458
- if md[0][0] == ?-
459
- md = @source.match(/--(.*?)-->/um, true)
460
-
461
- if md.nil? || /--|-\z/.match?(md[1])
462
- raise REXML::ParseException.new("Malformed comment", @source)
461
+ if @source.match?("--", true)
462
+ return [ :comment, process_comment ]
463
+ elsif @source.match?("[CDATA[", true)
464
+ text = @source.read_until("]]>")
465
+ if text.chomp!("]]>")
466
+ return [ :cdata, text ]
467
+ else
468
+ raise REXML::ParseException.new("Malformed CDATA: Missing end ']]>'", @source)
463
469
  end
464
-
465
- return [ :comment, md[1] ]
466
470
  else
467
- md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
468
- return [ :cdata, md[1] ] if md
471
+ raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor CDATA", @source)
469
472
  end
470
- raise REXML::ParseException.new( "Declarations can only occur "+
471
- "in the doctype declaration.", @source)
472
- elsif @source.match("?", true)
473
+ elsif @source.match?("?", true)
473
474
  return process_instruction
474
475
  else
475
476
  # Get the next tag
@@ -528,7 +529,8 @@ module REXML
528
529
  raise REXML::ParseException.new( "Exception parsing",
529
530
  @source, self, (error ? error : $!) )
530
531
  end
531
- return [ :dummy ]
532
+ # NOTE: The end of the method never runs, because it is unreachable.
533
+ # All branches of code above have explicit unconditional return or raise statements.
532
534
  end
533
535
  private :pull_event
534
536
 
@@ -569,8 +571,12 @@ module REXML
569
571
  return rv if matches.size == 0
570
572
  rv.gsub!( Private::CHARACTER_REFERENCES ) {
571
573
  m=$1
572
- m = "0#{m}" if m[0] == ?x
573
- [Integer(m)].pack('U*')
574
+ if m.start_with?("x")
575
+ code_point = Integer(m[1..-1], 16)
576
+ else
577
+ code_point = Integer(m, 10)
578
+ end
579
+ [code_point].pack('U*')
574
580
  }
575
581
  matches.collect!{|x|x[0]}.compact!
576
582
  if filter
@@ -643,10 +649,14 @@ module REXML
643
649
  true
644
650
  end
645
651
 
652
+ def normalize_xml_declaration_encoding(xml_declaration_encoding)
653
+ /\AUTF-16(?:BE|LE)\z/i.match?(xml_declaration_encoding) ? "UTF-16" : nil
654
+ end
655
+
646
656
  def parse_name(base_error_message)
647
657
  md = @source.match(Private::NAME_PATTERN, true)
648
658
  unless md
649
- if @source.match(/\S/um)
659
+ if @source.match?(/\S/um)
650
660
  message = "#{base_error_message}: invalid name"
651
661
  else
652
662
  message = "#{base_error_message}: name is missing"
@@ -688,73 +698,171 @@ module REXML
688
698
  accept_public_id:)
689
699
  public = /\A\s*PUBLIC/um
690
700
  system = /\A\s*SYSTEM/um
691
- if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
692
- if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
701
+ if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
702
+ if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
693
703
  return "public ID literal is missing"
694
704
  end
695
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
705
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
696
706
  return "invalid public ID literal"
697
707
  end
698
708
  if accept_public_id
699
- if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
709
+ if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
700
710
  return "system ID literal is missing"
701
711
  end
702
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
712
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
703
713
  return "invalid system literal"
704
714
  end
705
715
  "garbage after system literal"
706
716
  else
707
717
  "garbage after public ID literal"
708
718
  end
709
- elsif accept_external_id and @source.match(/#{system}/um)
710
- if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
719
+ elsif accept_external_id and @source.match?(/#{system}/um)
720
+ if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
711
721
  return "system literal is missing"
712
722
  end
713
- unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
723
+ unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
714
724
  return "invalid system literal"
715
725
  end
716
726
  "garbage after system literal"
717
727
  else
718
- unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
728
+ unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
719
729
  return "invalid ID type"
720
730
  end
721
731
  "ID type is missing"
722
732
  end
723
733
  end
724
734
 
735
+ def process_comment
736
+ text = @source.read_until("-->")
737
+ unless text.chomp!("-->")
738
+ raise REXML::ParseException.new("Unclosed comment: Missing end '-->'", @source)
739
+ end
740
+
741
+ if text.include? "--" or text.end_with?("-")
742
+ raise REXML::ParseException.new("Malformed comment", @source)
743
+ end
744
+ text
745
+ end
746
+
725
747
  def process_instruction
726
748
  name = parse_name("Malformed XML: Invalid processing instruction node")
727
- if @source.match(/\s+/um, true)
728
- match_data = @source.match(/(.*?)\?>/um, true)
729
- unless match_data
730
- raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
749
+ if name == "xml"
750
+ xml_declaration
751
+ else # PITarget
752
+ if @source.skip_spaces # e.g. <?name content?>
753
+ start_position = @source.position
754
+ content = @source.read_until("?>")
755
+ unless content.chomp!("?>")
756
+ @source.position = start_position
757
+ raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
758
+ end
759
+ else # e.g. <?name?>
760
+ content = nil
761
+ unless @source.match?("?>", true)
762
+ raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
763
+ end
731
764
  end
732
- content = match_data[1]
733
- else
734
- content = nil
735
- unless @source.match("?>", true)
736
- raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
765
+ [:processing_instruction, name, content]
766
+ end
767
+ end
768
+
769
+ def xml_declaration
770
+ unless @version.nil?
771
+ raise ParseException.new("Malformed XML: XML declaration is duplicated", @source)
772
+ end
773
+ if @document_status
774
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
775
+ end
776
+ unless @source.skip_spaces
777
+ raise ParseException.new("Malformed XML: XML declaration misses spaces before version", @source)
778
+ end
779
+ unless @source.match?("version", true)
780
+ raise ParseException.new("Malformed XML: XML declaration misses version", @source)
781
+ end
782
+ @version = parse_attribute_value_with_equal("xml")
783
+ unless @source.skip_spaces
784
+ unless @source.match?("?>", true)
785
+ raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
737
786
  end
787
+ encoding = normalize_xml_declaration_encoding(@source.encoding)
788
+ return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.0"?>
738
789
  end
739
- if name == "xml"
740
- if @document_status
741
- raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
790
+
791
+ if @source.match?("encoding", true)
792
+ encoding = parse_attribute_value_with_equal("xml")
793
+ unless @source.skip_spaces
794
+ unless @source.match?("?>", true)
795
+ raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
796
+ end
797
+ if need_source_encoding_update?(encoding)
798
+ @source.encoding = encoding
799
+ end
800
+ encoding ||= normalize_xml_declaration_encoding(@source.encoding)
801
+ return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.1" encoding="UTF-8"?>
742
802
  end
743
- version = VERSION.match(content)
744
- version = version[1] unless version.nil?
745
- encoding = ENCODING.match(content)
746
- encoding = encoding[1] unless encoding.nil?
747
- if need_source_encoding_update?(encoding)
748
- @source.encoding = encoding
803
+ end
804
+
805
+ if @source.match?("standalone", true)
806
+ standalone = parse_attribute_value_with_equal("xml")
807
+ case standalone
808
+ when "yes", "no"
809
+ else
810
+ raise ParseException.new("Malformed XML: XML declaration standalone is not yes or no : <#{standalone}>", @source)
749
811
  end
750
- if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
751
- encoding = "UTF-16"
812
+ end
813
+ @source.skip_spaces
814
+ unless @source.match?("?>", true)
815
+ raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
816
+ end
817
+
818
+ if need_source_encoding_update?(encoding)
819
+ @source.encoding = encoding
820
+ end
821
+ encoding ||= normalize_xml_declaration_encoding(@source.encoding)
822
+
823
+ # e.g. <?xml version="1.0" ?>
824
+ # <?xml version="1.1" encoding="UTF-8" ?>
825
+ # <?xml version="1.1" standalone="yes"?>
826
+ # <?xml version="1.1" encoding="UTF-8" standalone="yes" ?>
827
+ [ :xmldecl, @version, encoding, standalone ]
828
+ end
829
+
830
+ if StringScanner::Version < "3.1.1"
831
+ def scan_quote
832
+ @source.match(/(['"])/, true)&.[](1)
833
+ end
834
+ else
835
+ def scan_quote
836
+ case @source.peek_byte
837
+ when 34 # '"'.ord
838
+ @source.scan_byte
839
+ '"'
840
+ when 39 # "'".ord
841
+ @source.scan_byte
842
+ "'"
843
+ else
844
+ nil
752
845
  end
753
- standalone = STANDALONE.match(content)
754
- standalone = standalone[1] unless standalone.nil?
755
- return [ :xmldecl, version, encoding, standalone ]
756
846
  end
757
- [:processing_instruction, name, content]
847
+ end
848
+
849
+ def parse_attribute_value_with_equal(name)
850
+ unless @source.match?(Private::EQUAL_PATTERN, true)
851
+ message = "Missing attribute equal: <#{name}>"
852
+ raise REXML::ParseException.new(message, @source)
853
+ end
854
+ unless quote = scan_quote
855
+ message = "Missing attribute value start quote: <#{name}>"
856
+ raise REXML::ParseException.new(message, @source)
857
+ end
858
+ start_position = @source.position
859
+ value = @source.read_until(quote)
860
+ unless value.chomp!(quote)
861
+ @source.position = start_position
862
+ message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
863
+ raise REXML::ParseException.new(message, @source)
864
+ end
865
+ value
758
866
  end
759
867
 
760
868
  def parse_attributes(prefixes)
@@ -762,33 +870,17 @@ module REXML
762
870
  expanded_names = {}
763
871
  closed = false
764
872
  while true
765
- if @source.match(">", true)
873
+ if @source.match?(">", true)
766
874
  return attributes, closed
767
- elsif @source.match("/>", true)
875
+ elsif @source.match?("/>", true)
768
876
  closed = true
769
877
  return attributes, closed
770
878
  elsif match = @source.match(QNAME, true)
771
879
  name = match[1]
772
880
  prefix = match[2]
773
881
  local_part = match[3]
774
-
775
- unless @source.match(/\s*=\s*/um, true)
776
- message = "Missing attribute equal: <#{name}>"
777
- raise REXML::ParseException.new(message, @source)
778
- end
779
- unless match = @source.match(/(['"])/, true)
780
- message = "Missing attribute value start quote: <#{name}>"
781
- raise REXML::ParseException.new(message, @source)
782
- end
783
- quote = match[1]
784
- start_position = @source.position
785
- value = @source.read_until(quote)
786
- unless value.chomp!(quote)
787
- @source.position = start_position
788
- message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
789
- raise REXML::ParseException.new(message, @source)
790
- end
791
- @source.match(/\s*/um, true)
882
+ value = parse_attribute_value_with_equal(name)
883
+ @source.skip_spaces
792
884
  if prefix == "xmlns"
793
885
  if local_part == "xml"
794
886
  if value != Private::XML_PREFIXED_NAMESPACE
@@ -93,6 +93,10 @@ module REXML
93
93
  def unshift token
94
94
  @my_stack.unshift token
95
95
  end
96
+
97
+ def reset
98
+ @parser.reset
99
+ end
96
100
  end
97
101
 
98
102
  # A parsing event. The contents of the event are accessed as an +Array?,