rexml 3.3.6 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4b79c22060286dad847e18d30b4b336bda21d2772ccb35413fb9ba51a0012ed2
4
- data.tar.gz: feb56a4a3071541e983acd33b8baa6b9052f8d67d871102cfe6e69773a0cfcfe
3
+ metadata.gz: a9dc6a26dcc5ba93c112d65fa910e49ca970108c726cdce28324d7771a0831a3
4
+ data.tar.gz: b03ad34d3180aeeaa1ecc7ab21bf5ffe5f2845107a2c35ca3198653f80b932fa
5
5
  SHA512:
6
- metadata.gz: b615c95f8624212e151443ad03ba9b64f39aee8a200ea212150a10116340157cfda1bf974ab3d03161c0fb37d866e8c1c69ccc6a9549a13398452b32166af2d8
7
- data.tar.gz: db7dcac658e1f51f30575c24d6f36dc256349331fa1951c8fdfaf214baf97a5a446a1fcc411358a76d2c6fc36388ec8b1178adeacc3225d16d5d95ac53a8c4b3
6
+ metadata.gz: c0d493943fab795f3c8fc8490a40750382e3c4cf38c73532b1f850612384795c2bb916afc70ebff0bd26e9e2f304ea6a22299a0481523bd0322d5655df05edbd
7
+ data.tar.gz: bfb02a2bfadb24cbdeed951e06e113e17b123015271cabfffacc3ecc4bbb1bd7c7f56e358d42173feb8b333309f725d57b76f155fea814d70c6decae3b791165
data/NEWS.md CHANGED
@@ -1,5 +1,122 @@
1
1
  # News
2
2
 
3
+ ## 3.4.1 - 2025-02-16 {#version-3-4-1}
4
+
5
+ ### Improvement
6
+
7
+ * Improved performance.
8
+ * GH-226
9
+ * GH-227
10
+ * GH-237
11
+ * Patch by NAITOH Jun
12
+
13
+ ### Fixes
14
+
15
+ * Fix serialization of ATTLIST is incorrect
16
+ * GH-233
17
+ * GH-234
18
+ * Patch by OlofKalufs
19
+ * Reported by OlofKalufs
20
+
21
+ ### Thanks
22
+
23
+ * NAITOH Jun
24
+
25
+ * OlofKalufs
26
+
27
+ ## 3.4.0 - 2024-12-15 {#version-3-4-0}
28
+
29
+ ### Improvement
30
+
31
+ * Improved performance.
32
+ * GH-216
33
+ * Patch by NAITOH Jun
34
+
35
+ * JRuby: Improved parse performance.
36
+ * GH-219
37
+ * Patch by João Duarte
38
+
39
+ * Added support for reusing pull parser.
40
+ * GH-214
41
+ * GH-220
42
+ * Patch by Dmitry Pogrebnoy
43
+
44
+ * Improved error handling when source is `IO`.
45
+ * GH-221
46
+ * Patch by NAITOH Jun
47
+
48
+ ### Thanks
49
+
50
+ * NAITOH Jun
51
+
52
+ * João Duarte
53
+
54
+ * Dmitry Pogrebnoy
55
+
56
+ ## 3.3.9 - 2024-10-24 {#version-3-3-9}
57
+
58
+ ### Improvements
59
+
60
+ * Improved performance.
61
+ * GH-210
62
+ * Patch by NAITOH Jun.
63
+
64
+ ### Fixes
65
+
66
+ * Fixed a parse bug for text only invalid XML.
67
+ * GH-215
68
+ * Patch by NAITOH Jun.
69
+
70
+ * Fixed a parse bug that `&#0x...;` is accepted as a character
71
+ reference.
72
+
73
+ ### Thanks
74
+
75
+ * NAITOH Jun
76
+
77
+ ## 3.3.8 - 2024-09-29 {#version-3-3-8}
78
+
79
+ ### Improvements
80
+
81
+ * SAX2: Improve parse performance.
82
+ * GH-207
83
+ * Patch by NAITOH Jun.
84
+
85
+ ### Fixes
86
+
87
+ * Fixed a bug that unexpected attribute namespace conflict error for
88
+ the predefined "xml" namespace is reported.
89
+ * GH-208
90
+ * Patch by KITAITI Makoto
91
+
92
+ ### Thanks
93
+
94
+ * NAITOH Jun
95
+
96
+ * KITAITI Makoto
97
+
98
+ ## 3.3.7 - 2024-09-04 {#version-3-3-7}
99
+
100
+ ### Improvements
101
+
102
+ * Added local entity expansion limit methods
103
+ * GH-192
104
+ * GH-202
105
+ * Reported by takuya kodama.
106
+ * Patch by NAITOH Jun.
107
+
108
+ * Removed explicit strscan dependency
109
+ * GH-204
110
+ * Patch by Bo Anderson.
111
+
112
+ ### Thanks
113
+
114
+ * takuya kodama
115
+
116
+ * NAITOH Jun
117
+
118
+ * Bo Anderson
119
+
3
120
  ## 3.3.6 - 2024-08-22 {#version-3-3-6}
4
121
 
5
122
  ### Improvements
@@ -148,8 +148,9 @@ module REXML
148
148
  # have been expanded to their values
149
149
  def value
150
150
  return @unnormalized if @unnormalized
151
- @unnormalized = Text::unnormalize( @normalized, doctype )
152
- @unnormalized
151
+
152
+ @unnormalized = Text::unnormalize(@normalized, doctype,
153
+ entity_expansion_text_limit: @element&.document&.entity_expansion_text_limit)
153
154
  end
154
155
 
155
156
  # The normalized value of this attribute. That is, the attribute with
@@ -91,6 +91,8 @@ module REXML
91
91
  #
92
92
  def initialize( source = nil, context = {} )
93
93
  @entity_expansion_count = 0
94
+ @entity_expansion_limit = Security.entity_expansion_limit
95
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
94
96
  super()
95
97
  @context = context
96
98
  return if source.nil?
@@ -431,10 +433,12 @@ module REXML
431
433
  end
432
434
 
433
435
  attr_reader :entity_expansion_count
436
+ attr_writer :entity_expansion_limit
437
+ attr_accessor :entity_expansion_text_limit
434
438
 
435
439
  def record_entity_expansion
436
440
  @entity_expansion_count += 1
437
- if @entity_expansion_count > Security.entity_expansion_limit
441
+ if @entity_expansion_count > @entity_expansion_limit
438
442
  raise "number of entity expansions exceeded, processing aborted."
439
443
  end
440
444
  end
data/lib/rexml/entity.rb CHANGED
@@ -71,9 +71,12 @@ module REXML
71
71
  # Evaluates to the unnormalized value of this entity; that is, replacing
72
72
  # &ent; entities.
73
73
  def unnormalized
74
- document.record_entity_expansion unless document.nil?
74
+ document&.record_entity_expansion
75
+
75
76
  return nil if @value.nil?
76
- @unnormalized = Text::unnormalize(@value, parent)
77
+
78
+ @unnormalized = Text::unnormalize(@value, parent,
79
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
77
80
  end
78
81
 
79
82
  #once :unnormalized
@@ -150,12 +150,13 @@ module REXML
150
150
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
151
151
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
152
152
  CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
153
- CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
153
+ CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
154
154
  DEFAULT_ENTITIES_PATTERNS = {}
155
155
  default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
156
156
  default_entities.each do |term|
157
157
  DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
158
158
  end
159
+ XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
159
160
  end
160
161
  private_constant :Private
161
162
 
@@ -164,6 +165,9 @@ module REXML
164
165
  @listeners = []
165
166
  @prefixes = Set.new
166
167
  @entity_expansion_count = 0
168
+ @entity_expansion_limit = Security.entity_expansion_limit
169
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
170
+ @source.ensure_buffer
167
171
  end
168
172
 
169
173
  def add_listener( listener )
@@ -172,16 +176,22 @@ module REXML
172
176
 
173
177
  attr_reader :source
174
178
  attr_reader :entity_expansion_count
179
+ attr_writer :entity_expansion_limit
180
+ attr_writer :entity_expansion_text_limit
175
181
 
176
182
  def stream=( source )
177
183
  @source = SourceFactory.create_from( source )
184
+ reset
185
+ end
186
+
187
+ def reset
178
188
  @closed = nil
179
189
  @have_root = false
180
190
  @document_status = nil
181
191
  @tags = []
182
192
  @stack = []
183
193
  @entities = []
184
- @namespaces = {}
194
+ @namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
185
195
  @namespaces_restore_stack = []
186
196
  end
187
197
 
@@ -263,10 +273,10 @@ module REXML
263
273
  @source.ensure_buffer
264
274
  if @document_status == nil
265
275
  start_position = @source.position
266
- if @source.match("<?", true)
276
+ if @source.match?("<?", true)
267
277
  return process_instruction
268
- elsif @source.match("<!", true)
269
- if @source.match("--", true)
278
+ elsif @source.match?("<!", true)
279
+ if @source.match?("--", true)
270
280
  md = @source.match(/(.*?)-->/um, true)
271
281
  if md.nil?
272
282
  raise REXML::ParseException.new("Unclosed comment", @source)
@@ -275,10 +285,10 @@ module REXML
275
285
  raise REXML::ParseException.new("Malformed comment", @source)
276
286
  end
277
287
  return [ :comment, md[1] ]
278
- elsif @source.match("DOCTYPE", true)
288
+ elsif @source.match?("DOCTYPE", true)
279
289
  base_error_message = "Malformed DOCTYPE"
280
- unless @source.match(/\s+/um, true)
281
- if @source.match(">")
290
+ unless @source.match?(/\s+/um, true)
291
+ if @source.match?(">")
282
292
  message = "#{base_error_message}: name is missing"
283
293
  else
284
294
  message = "#{base_error_message}: invalid name"
@@ -287,10 +297,11 @@ module REXML
287
297
  raise REXML::ParseException.new(message, @source)
288
298
  end
289
299
  name = parse_name(base_error_message)
290
- if @source.match(/\s*\[/um, true)
300
+ @source.match?(/\s*/um, true) # skip spaces
301
+ if @source.match?("[", true)
291
302
  id = [nil, nil, nil]
292
303
  @document_status = :in_doctype
293
- elsif @source.match(/\s*>/um, true)
304
+ elsif @source.match?(">", true)
294
305
  id = [nil, nil, nil]
295
306
  @document_status = :after_doctype
296
307
  @source.ensure_buffer
@@ -302,9 +313,10 @@ module REXML
302
313
  # For backward compatibility
303
314
  id[1], id[2] = id[2], nil
304
315
  end
305
- if @source.match(/\s*\[/um, true)
316
+ @source.match?(/\s*/um, true) # skip spaces
317
+ if @source.match?("[", true)
306
318
  @document_status = :in_doctype
307
- elsif @source.match(/\s*>/um, true)
319
+ elsif @source.match?(">", true)
308
320
  @document_status = :after_doctype
309
321
  @source.ensure_buffer
310
322
  else
@@ -314,7 +326,7 @@ module REXML
314
326
  end
315
327
  args = [:start_doctype, name, *id]
316
328
  if @document_status == :after_doctype
317
- @source.match(/\s*/um, true)
329
+ @source.match?(/\s*/um, true)
318
330
  @stack << [ :end_doctype ]
319
331
  end
320
332
  return args
@@ -325,14 +337,14 @@ module REXML
325
337
  end
326
338
  end
327
339
  if @document_status == :in_doctype
328
- @source.match(/\s*/um, true) # skip spaces
340
+ @source.match?(/\s*/um, true) # skip spaces
329
341
  start_position = @source.position
330
- if @source.match("<!", true)
331
- if @source.match("ELEMENT", true)
342
+ if @source.match?("<!", true)
343
+ if @source.match?("ELEMENT", true)
332
344
  md = @source.match(/(.*?)>/um, true)
333
345
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
334
346
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
335
- elsif @source.match("ENTITY", true)
347
+ elsif @source.match?("ENTITY", true)
336
348
  match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
337
349
  unless match_data
338
350
  raise REXML::ParseException.new("Malformed entity declaration", @source)
@@ -364,11 +376,11 @@ module REXML
364
376
  end
365
377
  match << '%' if ref
366
378
  return match
367
- elsif @source.match("ATTLIST", true)
379
+ elsif @source.match?("ATTLIST", true)
368
380
  md = @source.match(Private::ATTLISTDECL_END, true)
369
381
  raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
370
382
  element = md[1]
371
- contents = md[0]
383
+ contents = "<!ATTLIST" + md[0]
372
384
 
373
385
  pairs = {}
374
386
  values = md[0].strip.scan( ATTDEF_RE )
@@ -384,10 +396,10 @@ module REXML
384
396
  end
385
397
  end
386
398
  return [ :attlistdecl, element, pairs, contents ]
387
- elsif @source.match("NOTATION", true)
399
+ elsif @source.match?("NOTATION", true)
388
400
  base_error_message = "Malformed notation declaration"
389
- unless @source.match(/\s+/um, true)
390
- if @source.match(">")
401
+ unless @source.match?(/\s+/um, true)
402
+ if @source.match?(">")
391
403
  message = "#{base_error_message}: name is missing"
392
404
  else
393
405
  message = "#{base_error_message}: invalid name"
@@ -399,7 +411,8 @@ module REXML
399
411
  id = parse_id(base_error_message,
400
412
  accept_external_id: true,
401
413
  accept_public_id: true)
402
- unless @source.match(/\s*>/um, true)
414
+ @source.match?(/\s*/um, true) # skip spaces
415
+ unless @source.match?(">", true)
403
416
  message = "#{base_error_message}: garbage before end >"
404
417
  raise REXML::ParseException.new(message, @source)
405
418
  end
@@ -413,7 +426,7 @@ module REXML
413
426
  end
414
427
  elsif match = @source.match(/(%.*?;)\s*/um, true)
415
428
  return [ :externalentity, match[1] ]
416
- elsif @source.match(/\]\s*>/um, true)
429
+ elsif @source.match?(/\]\s*>/um, true)
417
430
  @document_status = :after_doctype
418
431
  return [ :end_doctype ]
419
432
  end
@@ -422,16 +435,16 @@ module REXML
422
435
  end
423
436
  end
424
437
  if @document_status == :after_doctype
425
- @source.match(/\s*/um, true)
438
+ @source.match?(/\s*/um, true)
426
439
  end
427
440
  begin
428
441
  start_position = @source.position
429
- if @source.match("<", true)
442
+ if @source.match?("<", true)
430
443
  # :text's read_until may remain only "<" in buffer. In the
431
444
  # case, buffer is empty here. So we need to fill buffer
432
445
  # here explicitly.
433
446
  @source.ensure_buffer
434
- if @source.match("/", true)
447
+ if @source.match?("/", true)
435
448
  @namespaces_restore_stack.pop
436
449
  last_tag = @tags.pop
437
450
  md = @source.match(Private::CLOSE_PATTERN, true)
@@ -446,7 +459,7 @@ module REXML
446
459
  raise REXML::ParseException.new(message, @source)
447
460
  end
448
461
  return [ :end_element, last_tag ]
449
- elsif @source.match("!", true)
462
+ elsif @source.match?("!", true)
450
463
  md = @source.match(/([^>]*>)/um)
451
464
  #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
452
465
  raise REXML::ParseException.new("Malformed node", @source) unless md
@@ -464,7 +477,7 @@ module REXML
464
477
  end
465
478
  raise REXML::ParseException.new( "Declarations can only occur "+
466
479
  "in the doctype declaration.", @source)
467
- elsif @source.match("?", true)
480
+ elsif @source.match?("?", true)
468
481
  return process_instruction
469
482
  else
470
483
  # Get the next tag
@@ -564,8 +577,12 @@ module REXML
564
577
  return rv if matches.size == 0
565
578
  rv.gsub!( Private::CHARACTER_REFERENCES ) {
566
579
  m=$1
567
- m = "0#{m}" if m[0] == ?x
568
- [Integer(m)].pack('U*')
580
+ if m.start_with?("x")
581
+ code_point = Integer(m[1..-1], 16)
582
+ else
583
+ code_point = Integer(m, 10)
584
+ end
585
+ [code_point].pack('U*')
569
586
  }
570
587
  matches.collect!{|x|x[0]}.compact!
571
588
  if filter
@@ -585,7 +602,7 @@ module REXML
585
602
  end
586
603
  re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
587
604
  rv.gsub!( re, entity_value )
588
- if rv.bytesize > Security.entity_expansion_text_limit
605
+ if rv.bytesize > @entity_expansion_text_limit
589
606
  raise "entity expansion has grown too large"
590
607
  end
591
608
  else
@@ -627,7 +644,7 @@ module REXML
627
644
 
628
645
  def record_entity_expansion(delta=1)
629
646
  @entity_expansion_count += delta
630
- if @entity_expansion_count > Security.entity_expansion_limit
647
+ if @entity_expansion_count > @entity_expansion_limit
631
648
  raise "number of entity expansions exceeded, processing aborted."
632
649
  end
633
650
  end
@@ -641,7 +658,7 @@ module REXML
641
658
  def parse_name(base_error_message)
642
659
  md = @source.match(Private::NAME_PATTERN, true)
643
660
  unless md
644
- if @source.match(/\S/um)
661
+ if @source.match?(/\S/um)
645
662
  message = "#{base_error_message}: invalid name"
646
663
  else
647
664
  message = "#{base_error_message}: name is missing"
@@ -683,34 +700,34 @@ module REXML
683
700
  accept_public_id:)
684
701
  public = /\A\s*PUBLIC/um
685
702
  system = /\A\s*SYSTEM/um
686
- if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
687
- if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
703
+ if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
704
+ if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
688
705
  return "public ID literal is missing"
689
706
  end
690
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
707
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
691
708
  return "invalid public ID literal"
692
709
  end
693
710
  if accept_public_id
694
- if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
711
+ if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
695
712
  return "system ID literal is missing"
696
713
  end
697
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
714
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
698
715
  return "invalid system literal"
699
716
  end
700
717
  "garbage after system literal"
701
718
  else
702
719
  "garbage after public ID literal"
703
720
  end
704
- elsif accept_external_id and @source.match(/#{system}/um)
705
- if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
721
+ elsif accept_external_id and @source.match?(/#{system}/um)
722
+ if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
706
723
  return "system literal is missing"
707
724
  end
708
- unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
725
+ unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
709
726
  return "invalid system literal"
710
727
  end
711
728
  "garbage after system literal"
712
729
  else
713
- unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
730
+ unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
714
731
  return "invalid ID type"
715
732
  end
716
733
  "ID type is missing"
@@ -719,7 +736,7 @@ module REXML
719
736
 
720
737
  def process_instruction
721
738
  name = parse_name("Malformed XML: Invalid processing instruction node")
722
- if @source.match(/\s+/um, true)
739
+ if @source.match?(/\s+/um, true)
723
740
  match_data = @source.match(/(.*?)\?>/um, true)
724
741
  unless match_data
725
742
  raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
@@ -727,7 +744,7 @@ module REXML
727
744
  content = match_data[1]
728
745
  else
729
746
  content = nil
730
- unless @source.match("?>", true)
747
+ unless @source.match?("?>", true)
731
748
  raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
732
749
  end
733
750
  end
@@ -752,14 +769,33 @@ module REXML
752
769
  [:processing_instruction, name, content]
753
770
  end
754
771
 
772
+ if StringScanner::Version < "3.1.1"
773
+ def scan_quote
774
+ @source.match(/(['"])/, true)&.[](1)
775
+ end
776
+ else
777
+ def scan_quote
778
+ case @source.peek_byte
779
+ when 34 # '"'.ord
780
+ @source.scan_byte
781
+ '"'
782
+ when 39 # "'".ord
783
+ @source.scan_byte
784
+ "'"
785
+ else
786
+ nil
787
+ end
788
+ end
789
+ end
790
+
755
791
  def parse_attributes(prefixes)
756
792
  attributes = {}
757
793
  expanded_names = {}
758
794
  closed = false
759
795
  while true
760
- if @source.match(">", true)
796
+ if @source.match?(">", true)
761
797
  return attributes, closed
762
- elsif @source.match("/>", true)
798
+ elsif @source.match?("/>", true)
763
799
  closed = true
764
800
  return attributes, closed
765
801
  elsif match = @source.match(QNAME, true)
@@ -767,15 +803,14 @@ module REXML
767
803
  prefix = match[2]
768
804
  local_part = match[3]
769
805
 
770
- unless @source.match(/\s*=\s*/um, true)
806
+ unless @source.match?(/\s*=\s*/um, true)
771
807
  message = "Missing attribute equal: <#{name}>"
772
808
  raise REXML::ParseException.new(message, @source)
773
809
  end
774
- unless match = @source.match(/(['"])/, true)
810
+ unless quote = scan_quote
775
811
  message = "Missing attribute value start quote: <#{name}>"
776
812
  raise REXML::ParseException.new(message, @source)
777
813
  end
778
- quote = match[1]
779
814
  start_position = @source.position
780
815
  value = @source.read_until(quote)
781
816
  unless value.chomp!(quote)
@@ -783,10 +818,10 @@ module REXML
783
818
  message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
784
819
  raise REXML::ParseException.new(message, @source)
785
820
  end
786
- @source.match(/\s*/um, true)
821
+ @source.match?(/\s*/um, true)
787
822
  if prefix == "xmlns"
788
823
  if local_part == "xml"
789
- if value != "http://www.w3.org/XML/1998/namespace"
824
+ if value != Private::XML_PREFIXED_NAMESPACE
790
825
  msg = "The 'xml' prefix must not be bound to any other namespace "+
791
826
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
792
827
  raise REXML::ParseException.new( msg, @source, self )
@@ -51,6 +51,14 @@ module REXML
51
51
  @parser.entity_expansion_count
52
52
  end
53
53
 
54
+ def entity_expansion_limit=( limit )
55
+ @parser.entity_expansion_limit = limit
56
+ end
57
+
58
+ def entity_expansion_text_limit=( limit )
59
+ @parser.entity_expansion_text_limit = limit
60
+ end
61
+
54
62
  def each
55
63
  while has_next?
56
64
  yield self.pull
@@ -85,6 +93,10 @@ module REXML
85
93
  def unshift token
86
94
  @my_stack.unshift token
87
95
  end
96
+
97
+ def reset
98
+ @parser.reset
99
+ end
88
100
  end
89
101
 
90
102
  # A parsing event. The contents of the event are accessed as an +Array?,
@@ -26,6 +26,14 @@ module REXML
26
26
  @parser.entity_expansion_count
27
27
  end
28
28
 
29
+ def entity_expansion_limit=( limit )
30
+ @parser.entity_expansion_limit = limit
31
+ end
32
+
33
+ def entity_expansion_text_limit=( limit )
34
+ @parser.entity_expansion_text_limit = limit
35
+ end
36
+
29
37
  def add_listener( listener )
30
38
  @parser.add_listener( listener )
31
39
  end
@@ -251,6 +259,8 @@ module REXML
251
259
  end
252
260
 
253
261
  def get_namespace( prefix )
262
+ return nil if @namespace_stack.empty?
263
+
254
264
  uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
255
265
  (@namespace_stack.find { |ns| not ns[nil].nil? })
256
266
  uris[-1][prefix] unless uris.nil? or 0 == uris.size
@@ -18,6 +18,14 @@ module REXML
18
18
  @parser.entity_expansion_count
19
19
  end
20
20
 
21
+ def entity_expansion_limit=( limit )
22
+ @parser.entity_expansion_limit = limit
23
+ end
24
+
25
+ def entity_expansion_text_limit=( limit )
26
+ @parser.entity_expansion_text_limit = limit
27
+ end
28
+
21
29
  def parse
22
30
  # entity string
23
31
  while true
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.6"
34
+ VERSION = "3.4.1"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # coding: US-ASCII
2
2
  # frozen_string_literal: false
3
3
 
4
+ require "stringio"
4
5
  require "strscan"
5
6
 
6
7
  require_relative 'encoding'
@@ -18,6 +19,16 @@ module REXML
18
19
  pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
19
20
  super(pattern)
20
21
  end
22
+
23
+ def match?(pattern)
24
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
25
+ super(pattern)
26
+ end
27
+
28
+ def skip(pattern)
29
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
30
+ super(pattern)
31
+ end
21
32
  end
22
33
  end
23
34
  using StringScannerCheckScanString
@@ -35,7 +46,6 @@ module REXML
35
46
  arg.respond_to? :eof?
36
47
  IOSource.new(arg)
37
48
  elsif arg.respond_to? :to_str
38
- require 'stringio'
39
49
  IOSource.new(StringIO.new(arg))
40
50
  elsif arg.kind_of? Source
41
51
  arg
@@ -58,8 +68,14 @@ module REXML
58
68
  SCANNER_RESET_SIZE = 100000
59
69
  PRE_DEFINED_TERM_PATTERNS = {}
60
70
  pre_defined_terms = ["'", '"', "<"]
61
- pre_defined_terms.each do |term|
62
- PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
71
+ if StringScanner::Version < "3.1.1"
72
+ pre_defined_terms.each do |term|
73
+ PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
74
+ end
75
+ else
76
+ pre_defined_terms.each do |term|
77
+ PRE_DEFINED_TERM_PATTERNS[term] = term
78
+ end
63
79
  end
64
80
  end
65
81
  private_constant :Private
@@ -77,6 +93,7 @@ module REXML
77
93
  detect_encoding
78
94
  end
79
95
  @line = 0
96
+ @encoded_terms = {}
80
97
  end
81
98
 
82
99
  # The current buffer (what we're going to read next)
@@ -125,6 +142,14 @@ module REXML
125
142
  end
126
143
  end
127
144
 
145
+ def match?(pattern, cons=false)
146
+ if cons
147
+ !@scanner.skip(pattern).nil?
148
+ else
149
+ !@scanner.match?(pattern).nil?
150
+ end
151
+ end
152
+
128
153
  def position
129
154
  @scanner.pos
130
155
  end
@@ -133,6 +158,14 @@ module REXML
133
158
  @scanner.pos = pos
134
159
  end
135
160
 
161
+ def peek_byte
162
+ @scanner.peek_byte
163
+ end
164
+
165
+ def scan_byte
166
+ @scanner.scan_byte
167
+ end
168
+
136
169
  # @return true if the Source is exhausted
137
170
  def empty?
138
171
  @scanner.eos?
@@ -227,7 +260,7 @@ module REXML
227
260
 
228
261
  def read_until(term)
229
262
  pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
230
- term = encode(term)
263
+ term = @encoded_terms[term] ||= encode(term)
231
264
  until str = @scanner.scan_until(pattern)
232
265
  break if @source.nil?
233
266
  break if @source.eof?
@@ -266,6 +299,23 @@ module REXML
266
299
  md.nil? ? nil : @scanner
267
300
  end
268
301
 
302
+ def match?( pattern, cons=false )
303
+ # To avoid performance issue, we need to increase bytes to read per scan
304
+ min_bytes = 1
305
+ while true
306
+ if cons
307
+ n_matched_bytes = @scanner.skip(pattern)
308
+ else
309
+ n_matched_bytes = @scanner.match?(pattern)
310
+ end
311
+ return true if n_matched_bytes
312
+ return false if pattern.is_a?(String)
313
+ return false if @source.nil?
314
+ return false unless read(nil, min_bytes)
315
+ min_bytes *= 2
316
+ end
317
+ end
318
+
269
319
  def empty?
270
320
  super and ( @source.nil? || @source.eof? )
271
321
  end
@@ -285,7 +335,7 @@ module REXML
285
335
  rescue
286
336
  end
287
337
  @er_source.seek(pos)
288
- rescue IOError
338
+ rescue IOError, SystemCallError
289
339
  pos = -1
290
340
  line = -1
291
341
  end
@@ -294,14 +344,19 @@ module REXML
294
344
 
295
345
  private
296
346
  def readline(term = nil)
297
- str = @source.readline(term || @line_break)
298
347
  if @pending_buffer
348
+ begin
349
+ str = @source.readline(term || @line_break)
350
+ rescue IOError
351
+ end
299
352
  if str.nil?
300
353
  str = @pending_buffer
301
354
  else
302
355
  str = @pending_buffer + str
303
356
  end
304
357
  @pending_buffer = nil
358
+ else
359
+ str = @source.readline(term || @line_break)
305
360
  end
306
361
  return nil if str.nil?
307
362
 
data/lib/rexml/text.rb CHANGED
@@ -29,31 +29,16 @@ module REXML
29
29
  (0x10000..0x10FFFF)
30
30
  ]
31
31
 
32
- if String.method_defined? :encode
33
- VALID_XML_CHARS = Regexp.new('^['+
34
- VALID_CHAR.map { |item|
35
- case item
36
- when Integer
37
- [item].pack('U').force_encoding('utf-8')
38
- when Range
39
- [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
40
- end
41
- }.join +
42
- ']*$')
43
- else
44
- VALID_XML_CHARS = /^(
45
- [\x09\x0A\x0D\x20-\x7E] # ASCII
46
- | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
47
- | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
48
- | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
49
- | \xEF[\x80-\xBE]{2} #
50
- | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
51
- | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
52
- | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
53
- | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
54
- | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
55
- )*$/nx;
56
- end
32
+ VALID_XML_CHARS = Regexp.new('^['+
33
+ VALID_CHAR.map { |item|
34
+ case item
35
+ when Integer
36
+ [item].pack('U').force_encoding('utf-8')
37
+ when Range
38
+ [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
39
+ end
40
+ }.join +
41
+ ']*$')
57
42
 
58
43
  # Constructor
59
44
  # +arg+ if a String, the content is set to the String. If a Text,
@@ -132,21 +117,11 @@ module REXML
132
117
 
133
118
  # illegal anywhere
134
119
  if !string.match?(VALID_XML_CHARS)
135
- if String.method_defined? :encode
136
- string.chars.each do |c|
137
- case c.ord
138
- when *VALID_CHAR
139
- else
140
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
141
- end
142
- end
143
- else
144
- string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
145
- case c.unpack('U')
146
- when *VALID_CHAR
147
- else
148
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
149
- end
120
+ string.chars.each do |c|
121
+ case c.ord
122
+ when *VALID_CHAR
123
+ else
124
+ raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
150
125
  end
151
126
  end
152
127
  end
@@ -268,7 +243,8 @@ module REXML
268
243
  # u = Text.new( "sean russell", false, nil, true )
269
244
  # u.value #-> "sean russell"
270
245
  def value
271
- @unnormalized ||= Text::unnormalize( @string, doctype )
246
+ @unnormalized ||= Text::unnormalize(@string, doctype,
247
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
272
248
  end
273
249
 
274
250
  # Sets the contents of this text node. This expects the text to be
@@ -411,11 +387,12 @@ module REXML
411
387
  end
412
388
 
413
389
  # Unescapes all possible entities
414
- def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
390
+ def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
391
+ entity_expansion_text_limit ||= Security.entity_expansion_text_limit
415
392
  sum = 0
416
393
  string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
417
394
  s = Text.expand($&, doctype, filter)
418
- if sum + s.bytesize > Security.entity_expansion_text_limit
395
+ if sum + s.bytesize > entity_expansion_text_limit
419
396
  raise "entity expansion has grown too large"
420
397
  else
421
398
  sum += s.bytesize
metadata CHANGED
@@ -1,28 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.6
4
+ version: 3.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-08-22 00:00:00.000000000 Z
11
- dependencies:
12
- - !ruby/object:Gem::Dependency
13
- name: strscan
14
- requirement: !ruby/object:Gem::Requirement
15
- requirements:
16
- - - ">="
17
- - !ruby/object:Gem::Version
18
- version: '0'
19
- type: :runtime
20
- prerelease: false
21
- version_requirements: !ruby/object:Gem::Requirement
22
- requirements:
23
- - - ">="
24
- - !ruby/object:Gem::Version
25
- version: '0'
10
+ date: 2025-02-16 00:00:00.000000000 Z
11
+ dependencies: []
26
12
  description: An XML toolkit for Ruby
27
13
  email:
28
14
  - kou@cozmixng.org
@@ -116,7 +102,7 @@ homepage: https://github.com/ruby/rexml
116
102
  licenses:
117
103
  - BSD-2-Clause
118
104
  metadata:
119
- changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.6
105
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.1
120
106
  rdoc_options:
121
107
  - "--main"
122
108
  - README.md
@@ -133,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
133
119
  - !ruby/object:Gem::Version
134
120
  version: '0'
135
121
  requirements: []
136
- rubygems_version: 3.6.0.dev
122
+ rubygems_version: 3.6.2
137
123
  specification_version: 4
138
124
  summary: An XML toolkit for Ruby
139
125
  test_files: []