rexml 3.3.7 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1a402bb00d8bf352521fb6ca5354ba92a22d110feedcba40a50e2de5abad277a
4
- data.tar.gz: 51f7b5893eef8d8183eb14c719064368029b18c9909b3454047e308c7425ce5b
3
+ metadata.gz: a9dc6a26dcc5ba93c112d65fa910e49ca970108c726cdce28324d7771a0831a3
4
+ data.tar.gz: b03ad34d3180aeeaa1ecc7ab21bf5ffe5f2845107a2c35ca3198653f80b932fa
5
5
  SHA512:
6
- metadata.gz: ff091fe421748562931d65301e66dc1d4d313e1c28cce753bc9f31a1f9bac65c0b4939db70117e47f2c3158daa24b708e2519a98a9638114f4e5a1c0d1265e7c
7
- data.tar.gz: 720bc72a86eacebbe9a990152d4d0dfcde2e50c71b3fbabaaba44dec91b2f6ff7ca6180b86622cf0ffb36355ab5e5d43f8948e67c70ab4fca1f8bf0882a3585d
6
+ metadata.gz: c0d493943fab795f3c8fc8490a40750382e3c4cf38c73532b1f850612384795c2bb916afc70ebff0bd26e9e2f304ea6a22299a0481523bd0322d5655df05edbd
7
+ data.tar.gz: bfb02a2bfadb24cbdeed951e06e113e17b123015271cabfffacc3ecc4bbb1bd7c7f56e358d42173feb8b333309f725d57b76f155fea814d70c6decae3b791165
data/NEWS.md CHANGED
@@ -1,5 +1,100 @@
1
1
  # News
2
2
 
3
+ ## 3.4.1 - 2025-02-16 {#version-3-4-1}
4
+
5
+ ### Improvement
6
+
7
+ * Improved performance.
8
+ * GH-226
9
+ * GH-227
10
+ * GH-237
11
+ * Patch by NAITOH Jun
12
+
13
+ ### Fixes
14
+
15
+ * Fix serialization of ATTLIST is incorrect
16
+ * GH-233
17
+ * GH-234
18
+ * Patch by OlofKalufs
19
+ * Reported by OlofKalufs
20
+
21
+ ### Thanks
22
+
23
+ * NAITOH Jun
24
+
25
+ * OlofKalufs
26
+
27
+ ## 3.4.0 - 2024-12-15 {#version-3-4-0}
28
+
29
+ ### Improvement
30
+
31
+ * Improved performance.
32
+ * GH-216
33
+ * Patch by NAITOH Jun
34
+
35
+ * JRuby: Improved parse performance.
36
+ * GH-219
37
+ * Patch by João Duarte
38
+
39
+ * Added support for reusing pull parser.
40
+ * GH-214
41
+ * GH-220
42
+ * Patch by Dmitry Pogrebnoy
43
+
44
+ * Improved error handling when source is `IO`.
45
+ * GH-221
46
+ * Patch by NAITOH Jun
47
+
48
+ ### Thanks
49
+
50
+ * NAITOH Jun
51
+
52
+ * João Duarte
53
+
54
+ * Dmitry Pogrebnoy
55
+
56
+ ## 3.3.9 - 2024-10-24 {#version-3-3-9}
57
+
58
+ ### Improvements
59
+
60
+ * Improved performance.
61
+ * GH-210
62
+ * Patch by NAITOH Jun.
63
+
64
+ ### Fixes
65
+
66
+ * Fixed a parse bug for text only invalid XML.
67
+ * GH-215
68
+ * Patch by NAITOH Jun.
69
+
70
+ * Fixed a parse bug that `&#0x...;` is accepted as a character
71
+ reference.
72
+
73
+ ### Thanks
74
+
75
+ * NAITOH Jun
76
+
77
+ ## 3.3.8 - 2024-09-29 {#version-3-3-8}
78
+
79
+ ### Improvements
80
+
81
+ * SAX2: Improve parse performance.
82
+ * GH-207
83
+ * Patch by NAITOH Jun.
84
+
85
+ ### Fixes
86
+
87
+ * Fixed a bug that unexpected attribute namespace conflict error for
88
+ the predefined "xml" namespace is reported.
89
+ * GH-208
90
+ * Patch by KITAITI Makoto
91
+
92
+ ### Thanks
93
+
94
+ * NAITOH Jun
95
+
96
+ * KITAITI Makoto
97
+
3
98
  ## 3.3.7 - 2024-09-04 {#version-3-3-7}
4
99
 
5
100
  ### Improvements
@@ -150,12 +150,13 @@ module REXML
150
150
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
151
151
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
152
152
  CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
153
- CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
153
+ CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
154
154
  DEFAULT_ENTITIES_PATTERNS = {}
155
155
  default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
156
156
  default_entities.each do |term|
157
157
  DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
158
158
  end
159
+ XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
159
160
  end
160
161
  private_constant :Private
161
162
 
@@ -166,6 +167,7 @@ module REXML
166
167
  @entity_expansion_count = 0
167
168
  @entity_expansion_limit = Security.entity_expansion_limit
168
169
  @entity_expansion_text_limit = Security.entity_expansion_text_limit
170
+ @source.ensure_buffer
169
171
  end
170
172
 
171
173
  def add_listener( listener )
@@ -179,13 +181,17 @@ module REXML
179
181
 
180
182
  def stream=( source )
181
183
  @source = SourceFactory.create_from( source )
184
+ reset
185
+ end
186
+
187
+ def reset
182
188
  @closed = nil
183
189
  @have_root = false
184
190
  @document_status = nil
185
191
  @tags = []
186
192
  @stack = []
187
193
  @entities = []
188
- @namespaces = {}
194
+ @namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
189
195
  @namespaces_restore_stack = []
190
196
  end
191
197
 
@@ -267,10 +273,10 @@ module REXML
267
273
  @source.ensure_buffer
268
274
  if @document_status == nil
269
275
  start_position = @source.position
270
- if @source.match("<?", true)
276
+ if @source.match?("<?", true)
271
277
  return process_instruction
272
- elsif @source.match("<!", true)
273
- if @source.match("--", true)
278
+ elsif @source.match?("<!", true)
279
+ if @source.match?("--", true)
274
280
  md = @source.match(/(.*?)-->/um, true)
275
281
  if md.nil?
276
282
  raise REXML::ParseException.new("Unclosed comment", @source)
@@ -279,10 +285,10 @@ module REXML
279
285
  raise REXML::ParseException.new("Malformed comment", @source)
280
286
  end
281
287
  return [ :comment, md[1] ]
282
- elsif @source.match("DOCTYPE", true)
288
+ elsif @source.match?("DOCTYPE", true)
283
289
  base_error_message = "Malformed DOCTYPE"
284
- unless @source.match(/\s+/um, true)
285
- if @source.match(">")
290
+ unless @source.match?(/\s+/um, true)
291
+ if @source.match?(">")
286
292
  message = "#{base_error_message}: name is missing"
287
293
  else
288
294
  message = "#{base_error_message}: invalid name"
@@ -291,10 +297,11 @@ module REXML
291
297
  raise REXML::ParseException.new(message, @source)
292
298
  end
293
299
  name = parse_name(base_error_message)
294
- if @source.match(/\s*\[/um, true)
300
+ @source.match?(/\s*/um, true) # skip spaces
301
+ if @source.match?("[", true)
295
302
  id = [nil, nil, nil]
296
303
  @document_status = :in_doctype
297
- elsif @source.match(/\s*>/um, true)
304
+ elsif @source.match?(">", true)
298
305
  id = [nil, nil, nil]
299
306
  @document_status = :after_doctype
300
307
  @source.ensure_buffer
@@ -306,9 +313,10 @@ module REXML
306
313
  # For backward compatibility
307
314
  id[1], id[2] = id[2], nil
308
315
  end
309
- if @source.match(/\s*\[/um, true)
316
+ @source.match?(/\s*/um, true) # skip spaces
317
+ if @source.match?("[", true)
310
318
  @document_status = :in_doctype
311
- elsif @source.match(/\s*>/um, true)
319
+ elsif @source.match?(">", true)
312
320
  @document_status = :after_doctype
313
321
  @source.ensure_buffer
314
322
  else
@@ -318,7 +326,7 @@ module REXML
318
326
  end
319
327
  args = [:start_doctype, name, *id]
320
328
  if @document_status == :after_doctype
321
- @source.match(/\s*/um, true)
329
+ @source.match?(/\s*/um, true)
322
330
  @stack << [ :end_doctype ]
323
331
  end
324
332
  return args
@@ -329,14 +337,14 @@ module REXML
329
337
  end
330
338
  end
331
339
  if @document_status == :in_doctype
332
- @source.match(/\s*/um, true) # skip spaces
340
+ @source.match?(/\s*/um, true) # skip spaces
333
341
  start_position = @source.position
334
- if @source.match("<!", true)
335
- if @source.match("ELEMENT", true)
342
+ if @source.match?("<!", true)
343
+ if @source.match?("ELEMENT", true)
336
344
  md = @source.match(/(.*?)>/um, true)
337
345
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
338
346
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
339
- elsif @source.match("ENTITY", true)
347
+ elsif @source.match?("ENTITY", true)
340
348
  match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
341
349
  unless match_data
342
350
  raise REXML::ParseException.new("Malformed entity declaration", @source)
@@ -368,11 +376,11 @@ module REXML
368
376
  end
369
377
  match << '%' if ref
370
378
  return match
371
- elsif @source.match("ATTLIST", true)
379
+ elsif @source.match?("ATTLIST", true)
372
380
  md = @source.match(Private::ATTLISTDECL_END, true)
373
381
  raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
374
382
  element = md[1]
375
- contents = md[0]
383
+ contents = "<!ATTLIST" + md[0]
376
384
 
377
385
  pairs = {}
378
386
  values = md[0].strip.scan( ATTDEF_RE )
@@ -388,10 +396,10 @@ module REXML
388
396
  end
389
397
  end
390
398
  return [ :attlistdecl, element, pairs, contents ]
391
- elsif @source.match("NOTATION", true)
399
+ elsif @source.match?("NOTATION", true)
392
400
  base_error_message = "Malformed notation declaration"
393
- unless @source.match(/\s+/um, true)
394
- if @source.match(">")
401
+ unless @source.match?(/\s+/um, true)
402
+ if @source.match?(">")
395
403
  message = "#{base_error_message}: name is missing"
396
404
  else
397
405
  message = "#{base_error_message}: invalid name"
@@ -403,7 +411,8 @@ module REXML
403
411
  id = parse_id(base_error_message,
404
412
  accept_external_id: true,
405
413
  accept_public_id: true)
406
- unless @source.match(/\s*>/um, true)
414
+ @source.match?(/\s*/um, true) # skip spaces
415
+ unless @source.match?(">", true)
407
416
  message = "#{base_error_message}: garbage before end >"
408
417
  raise REXML::ParseException.new(message, @source)
409
418
  end
@@ -417,7 +426,7 @@ module REXML
417
426
  end
418
427
  elsif match = @source.match(/(%.*?;)\s*/um, true)
419
428
  return [ :externalentity, match[1] ]
420
- elsif @source.match(/\]\s*>/um, true)
429
+ elsif @source.match?(/\]\s*>/um, true)
421
430
  @document_status = :after_doctype
422
431
  return [ :end_doctype ]
423
432
  end
@@ -426,16 +435,16 @@ module REXML
426
435
  end
427
436
  end
428
437
  if @document_status == :after_doctype
429
- @source.match(/\s*/um, true)
438
+ @source.match?(/\s*/um, true)
430
439
  end
431
440
  begin
432
441
  start_position = @source.position
433
- if @source.match("<", true)
442
+ if @source.match?("<", true)
434
443
  # :text's read_until may remain only "<" in buffer. In the
435
444
  # case, buffer is empty here. So we need to fill buffer
436
445
  # here explicitly.
437
446
  @source.ensure_buffer
438
- if @source.match("/", true)
447
+ if @source.match?("/", true)
439
448
  @namespaces_restore_stack.pop
440
449
  last_tag = @tags.pop
441
450
  md = @source.match(Private::CLOSE_PATTERN, true)
@@ -450,7 +459,7 @@ module REXML
450
459
  raise REXML::ParseException.new(message, @source)
451
460
  end
452
461
  return [ :end_element, last_tag ]
453
- elsif @source.match("!", true)
462
+ elsif @source.match?("!", true)
454
463
  md = @source.match(/([^>]*>)/um)
455
464
  #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
456
465
  raise REXML::ParseException.new("Malformed node", @source) unless md
@@ -468,7 +477,7 @@ module REXML
468
477
  end
469
478
  raise REXML::ParseException.new( "Declarations can only occur "+
470
479
  "in the doctype declaration.", @source)
471
- elsif @source.match("?", true)
480
+ elsif @source.match?("?", true)
472
481
  return process_instruction
473
482
  else
474
483
  # Get the next tag
@@ -568,8 +577,12 @@ module REXML
568
577
  return rv if matches.size == 0
569
578
  rv.gsub!( Private::CHARACTER_REFERENCES ) {
570
579
  m=$1
571
- m = "0#{m}" if m[0] == ?x
572
- [Integer(m)].pack('U*')
580
+ if m.start_with?("x")
581
+ code_point = Integer(m[1..-1], 16)
582
+ else
583
+ code_point = Integer(m, 10)
584
+ end
585
+ [code_point].pack('U*')
573
586
  }
574
587
  matches.collect!{|x|x[0]}.compact!
575
588
  if filter
@@ -645,7 +658,7 @@ module REXML
645
658
  def parse_name(base_error_message)
646
659
  md = @source.match(Private::NAME_PATTERN, true)
647
660
  unless md
648
- if @source.match(/\S/um)
661
+ if @source.match?(/\S/um)
649
662
  message = "#{base_error_message}: invalid name"
650
663
  else
651
664
  message = "#{base_error_message}: name is missing"
@@ -687,34 +700,34 @@ module REXML
687
700
  accept_public_id:)
688
701
  public = /\A\s*PUBLIC/um
689
702
  system = /\A\s*SYSTEM/um
690
- if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
691
- if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
703
+ if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
704
+ if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
692
705
  return "public ID literal is missing"
693
706
  end
694
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
707
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
695
708
  return "invalid public ID literal"
696
709
  end
697
710
  if accept_public_id
698
- if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
711
+ if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
699
712
  return "system ID literal is missing"
700
713
  end
701
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
714
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
702
715
  return "invalid system literal"
703
716
  end
704
717
  "garbage after system literal"
705
718
  else
706
719
  "garbage after public ID literal"
707
720
  end
708
- elsif accept_external_id and @source.match(/#{system}/um)
709
- if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
721
+ elsif accept_external_id and @source.match?(/#{system}/um)
722
+ if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
710
723
  return "system literal is missing"
711
724
  end
712
- unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
725
+ unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
713
726
  return "invalid system literal"
714
727
  end
715
728
  "garbage after system literal"
716
729
  else
717
- unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
730
+ unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
718
731
  return "invalid ID type"
719
732
  end
720
733
  "ID type is missing"
@@ -723,7 +736,7 @@ module REXML
723
736
 
724
737
  def process_instruction
725
738
  name = parse_name("Malformed XML: Invalid processing instruction node")
726
- if @source.match(/\s+/um, true)
739
+ if @source.match?(/\s+/um, true)
727
740
  match_data = @source.match(/(.*?)\?>/um, true)
728
741
  unless match_data
729
742
  raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
@@ -731,7 +744,7 @@ module REXML
731
744
  content = match_data[1]
732
745
  else
733
746
  content = nil
734
- unless @source.match("?>", true)
747
+ unless @source.match?("?>", true)
735
748
  raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
736
749
  end
737
750
  end
@@ -756,14 +769,33 @@ module REXML
756
769
  [:processing_instruction, name, content]
757
770
  end
758
771
 
772
+ if StringScanner::Version < "3.1.1"
773
+ def scan_quote
774
+ @source.match(/(['"])/, true)&.[](1)
775
+ end
776
+ else
777
+ def scan_quote
778
+ case @source.peek_byte
779
+ when 34 # '"'.ord
780
+ @source.scan_byte
781
+ '"'
782
+ when 39 # "'".ord
783
+ @source.scan_byte
784
+ "'"
785
+ else
786
+ nil
787
+ end
788
+ end
789
+ end
790
+
759
791
  def parse_attributes(prefixes)
760
792
  attributes = {}
761
793
  expanded_names = {}
762
794
  closed = false
763
795
  while true
764
- if @source.match(">", true)
796
+ if @source.match?(">", true)
765
797
  return attributes, closed
766
- elsif @source.match("/>", true)
798
+ elsif @source.match?("/>", true)
767
799
  closed = true
768
800
  return attributes, closed
769
801
  elsif match = @source.match(QNAME, true)
@@ -771,15 +803,14 @@ module REXML
771
803
  prefix = match[2]
772
804
  local_part = match[3]
773
805
 
774
- unless @source.match(/\s*=\s*/um, true)
806
+ unless @source.match?(/\s*=\s*/um, true)
775
807
  message = "Missing attribute equal: <#{name}>"
776
808
  raise REXML::ParseException.new(message, @source)
777
809
  end
778
- unless match = @source.match(/(['"])/, true)
810
+ unless quote = scan_quote
779
811
  message = "Missing attribute value start quote: <#{name}>"
780
812
  raise REXML::ParseException.new(message, @source)
781
813
  end
782
- quote = match[1]
783
814
  start_position = @source.position
784
815
  value = @source.read_until(quote)
785
816
  unless value.chomp!(quote)
@@ -787,10 +818,10 @@ module REXML
787
818
  message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
788
819
  raise REXML::ParseException.new(message, @source)
789
820
  end
790
- @source.match(/\s*/um, true)
821
+ @source.match?(/\s*/um, true)
791
822
  if prefix == "xmlns"
792
823
  if local_part == "xml"
793
- if value != "http://www.w3.org/XML/1998/namespace"
824
+ if value != Private::XML_PREFIXED_NAMESPACE
794
825
  msg = "The 'xml' prefix must not be bound to any other namespace "+
795
826
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
796
827
  raise REXML::ParseException.new( msg, @source, self )
@@ -93,6 +93,10 @@ module REXML
93
93
  def unshift token
94
94
  @my_stack.unshift token
95
95
  end
96
+
97
+ def reset
98
+ @parser.reset
99
+ end
96
100
  end
97
101
 
98
102
  # A parsing event. The contents of the event are accessed as an +Array?,
@@ -259,6 +259,8 @@ module REXML
259
259
  end
260
260
 
261
261
  def get_namespace( prefix )
262
+ return nil if @namespace_stack.empty?
263
+
262
264
  uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
263
265
  (@namespace_stack.find { |ns| not ns[nil].nil? })
264
266
  uris[-1][prefix] unless uris.nil? or 0 == uris.size
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.7"
34
+ VERSION = "3.4.1"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # coding: US-ASCII
2
2
  # frozen_string_literal: false
3
3
 
4
+ require "stringio"
4
5
  require "strscan"
5
6
 
6
7
  require_relative 'encoding'
@@ -18,6 +19,16 @@ module REXML
18
19
  pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
19
20
  super(pattern)
20
21
  end
22
+
23
+ def match?(pattern)
24
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
25
+ super(pattern)
26
+ end
27
+
28
+ def skip(pattern)
29
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
30
+ super(pattern)
31
+ end
21
32
  end
22
33
  end
23
34
  using StringScannerCheckScanString
@@ -35,7 +46,6 @@ module REXML
35
46
  arg.respond_to? :eof?
36
47
  IOSource.new(arg)
37
48
  elsif arg.respond_to? :to_str
38
- require 'stringio'
39
49
  IOSource.new(StringIO.new(arg))
40
50
  elsif arg.kind_of? Source
41
51
  arg
@@ -58,8 +68,14 @@ module REXML
58
68
  SCANNER_RESET_SIZE = 100000
59
69
  PRE_DEFINED_TERM_PATTERNS = {}
60
70
  pre_defined_terms = ["'", '"', "<"]
61
- pre_defined_terms.each do |term|
62
- PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
71
+ if StringScanner::Version < "3.1.1"
72
+ pre_defined_terms.each do |term|
73
+ PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
74
+ end
75
+ else
76
+ pre_defined_terms.each do |term|
77
+ PRE_DEFINED_TERM_PATTERNS[term] = term
78
+ end
63
79
  end
64
80
  end
65
81
  private_constant :Private
@@ -77,6 +93,7 @@ module REXML
77
93
  detect_encoding
78
94
  end
79
95
  @line = 0
96
+ @encoded_terms = {}
80
97
  end
81
98
 
82
99
  # The current buffer (what we're going to read next)
@@ -125,6 +142,14 @@ module REXML
125
142
  end
126
143
  end
127
144
 
145
+ def match?(pattern, cons=false)
146
+ if cons
147
+ !@scanner.skip(pattern).nil?
148
+ else
149
+ !@scanner.match?(pattern).nil?
150
+ end
151
+ end
152
+
128
153
  def position
129
154
  @scanner.pos
130
155
  end
@@ -133,6 +158,14 @@ module REXML
133
158
  @scanner.pos = pos
134
159
  end
135
160
 
161
+ def peek_byte
162
+ @scanner.peek_byte
163
+ end
164
+
165
+ def scan_byte
166
+ @scanner.scan_byte
167
+ end
168
+
136
169
  # @return true if the Source is exhausted
137
170
  def empty?
138
171
  @scanner.eos?
@@ -227,7 +260,7 @@ module REXML
227
260
 
228
261
  def read_until(term)
229
262
  pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
230
- term = encode(term)
263
+ term = @encoded_terms[term] ||= encode(term)
231
264
  until str = @scanner.scan_until(pattern)
232
265
  break if @source.nil?
233
266
  break if @source.eof?
@@ -266,6 +299,23 @@ module REXML
266
299
  md.nil? ? nil : @scanner
267
300
  end
268
301
 
302
+ def match?( pattern, cons=false )
303
+ # To avoid performance issue, we need to increase bytes to read per scan
304
+ min_bytes = 1
305
+ while true
306
+ if cons
307
+ n_matched_bytes = @scanner.skip(pattern)
308
+ else
309
+ n_matched_bytes = @scanner.match?(pattern)
310
+ end
311
+ return true if n_matched_bytes
312
+ return false if pattern.is_a?(String)
313
+ return false if @source.nil?
314
+ return false unless read(nil, min_bytes)
315
+ min_bytes *= 2
316
+ end
317
+ end
318
+
269
319
  def empty?
270
320
  super and ( @source.nil? || @source.eof? )
271
321
  end
@@ -285,7 +335,7 @@ module REXML
285
335
  rescue
286
336
  end
287
337
  @er_source.seek(pos)
288
- rescue IOError
338
+ rescue IOError, SystemCallError
289
339
  pos = -1
290
340
  line = -1
291
341
  end
@@ -294,14 +344,19 @@ module REXML
294
344
 
295
345
  private
296
346
  def readline(term = nil)
297
- str = @source.readline(term || @line_break)
298
347
  if @pending_buffer
348
+ begin
349
+ str = @source.readline(term || @line_break)
350
+ rescue IOError
351
+ end
299
352
  if str.nil?
300
353
  str = @pending_buffer
301
354
  else
302
355
  str = @pending_buffer + str
303
356
  end
304
357
  @pending_buffer = nil
358
+ else
359
+ str = @source.readline(term || @line_break)
305
360
  end
306
361
  return nil if str.nil?
307
362
 
data/lib/rexml/text.rb CHANGED
@@ -29,31 +29,16 @@ module REXML
29
29
  (0x10000..0x10FFFF)
30
30
  ]
31
31
 
32
- if String.method_defined? :encode
33
- VALID_XML_CHARS = Regexp.new('^['+
34
- VALID_CHAR.map { |item|
35
- case item
36
- when Integer
37
- [item].pack('U').force_encoding('utf-8')
38
- when Range
39
- [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
40
- end
41
- }.join +
42
- ']*$')
43
- else
44
- VALID_XML_CHARS = /^(
45
- [\x09\x0A\x0D\x20-\x7E] # ASCII
46
- | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
47
- | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
48
- | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
49
- | \xEF[\x80-\xBE]{2} #
50
- | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
51
- | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
52
- | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
53
- | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
54
- | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
55
- )*$/nx;
56
- end
32
+ VALID_XML_CHARS = Regexp.new('^['+
33
+ VALID_CHAR.map { |item|
34
+ case item
35
+ when Integer
36
+ [item].pack('U').force_encoding('utf-8')
37
+ when Range
38
+ [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
39
+ end
40
+ }.join +
41
+ ']*$')
57
42
 
58
43
  # Constructor
59
44
  # +arg+ if a String, the content is set to the String. If a Text,
@@ -132,21 +117,11 @@ module REXML
132
117
 
133
118
  # illegal anywhere
134
119
  if !string.match?(VALID_XML_CHARS)
135
- if String.method_defined? :encode
136
- string.chars.each do |c|
137
- case c.ord
138
- when *VALID_CHAR
139
- else
140
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
141
- end
142
- end
143
- else
144
- string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
145
- case c.unpack('U')
146
- when *VALID_CHAR
147
- else
148
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
149
- end
120
+ string.chars.each do |c|
121
+ case c.ord
122
+ when *VALID_CHAR
123
+ else
124
+ raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
150
125
  end
151
126
  end
152
127
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.7
4
+ version: 3.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-09-04 00:00:00.000000000 Z
10
+ date: 2025-02-16 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: An XML toolkit for Ruby
13
13
  email:
@@ -102,7 +102,7 @@ homepage: https://github.com/ruby/rexml
102
102
  licenses:
103
103
  - BSD-2-Clause
104
104
  metadata:
105
- changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.7
105
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.1
106
106
  rdoc_options:
107
107
  - "--main"
108
108
  - README.md
@@ -119,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
119
119
  - !ruby/object:Gem::Version
120
120
  version: '0'
121
121
  requirements: []
122
- rubygems_version: 3.6.0.dev
122
+ rubygems_version: 3.6.2
123
123
  specification_version: 4
124
124
  summary: An XML toolkit for Ruby
125
125
  test_files: []