rexml 3.3.6 → 3.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4b79c22060286dad847e18d30b4b336bda21d2772ccb35413fb9ba51a0012ed2
4
- data.tar.gz: feb56a4a3071541e983acd33b8baa6b9052f8d67d871102cfe6e69773a0cfcfe
3
+ metadata.gz: a9dc6a26dcc5ba93c112d65fa910e49ca970108c726cdce28324d7771a0831a3
4
+ data.tar.gz: b03ad34d3180aeeaa1ecc7ab21bf5ffe5f2845107a2c35ca3198653f80b932fa
5
5
  SHA512:
6
- metadata.gz: b615c95f8624212e151443ad03ba9b64f39aee8a200ea212150a10116340157cfda1bf974ab3d03161c0fb37d866e8c1c69ccc6a9549a13398452b32166af2d8
7
- data.tar.gz: db7dcac658e1f51f30575c24d6f36dc256349331fa1951c8fdfaf214baf97a5a446a1fcc411358a76d2c6fc36388ec8b1178adeacc3225d16d5d95ac53a8c4b3
6
+ metadata.gz: c0d493943fab795f3c8fc8490a40750382e3c4cf38c73532b1f850612384795c2bb916afc70ebff0bd26e9e2f304ea6a22299a0481523bd0322d5655df05edbd
7
+ data.tar.gz: bfb02a2bfadb24cbdeed951e06e113e17b123015271cabfffacc3ecc4bbb1bd7c7f56e358d42173feb8b333309f725d57b76f155fea814d70c6decae3b791165
data/NEWS.md CHANGED
@@ -1,5 +1,122 @@
1
1
  # News
2
2
 
3
+ ## 3.4.1 - 2025-02-16 {#version-3-4-1}
4
+
5
+ ### Improvement
6
+
7
+ * Improved performance.
8
+ * GH-226
9
+ * GH-227
10
+ * GH-237
11
+ * Patch by NAITOH Jun
12
+
13
+ ### Fixes
14
+
15
+ * Fix serialization of ATTLIST is incorrect
16
+ * GH-233
17
+ * GH-234
18
+ * Patch by OlofKalufs
19
+ * Reported by OlofKalufs
20
+
21
+ ### Thanks
22
+
23
+ * NAITOH Jun
24
+
25
+ * OlofKalufs
26
+
27
+ ## 3.4.0 - 2024-12-15 {#version-3-4-0}
28
+
29
+ ### Improvement
30
+
31
+ * Improved performance.
32
+ * GH-216
33
+ * Patch by NAITOH Jun
34
+
35
+ * JRuby: Improved parse performance.
36
+ * GH-219
37
+ * Patch by João Duarte
38
+
39
+ * Added support for reusing pull parser.
40
+ * GH-214
41
+ * GH-220
42
+ * Patch by Dmitry Pogrebnoy
43
+
44
+ * Improved error handling when source is `IO`.
45
+ * GH-221
46
+ * Patch by NAITOH Jun
47
+
48
+ ### Thanks
49
+
50
+ * NAITOH Jun
51
+
52
+ * João Duarte
53
+
54
+ * Dmitry Pogrebnoy
55
+
56
+ ## 3.3.9 - 2024-10-24 {#version-3-3-9}
57
+
58
+ ### Improvements
59
+
60
+ * Improved performance.
61
+ * GH-210
62
+ * Patch by NAITOH Jun.
63
+
64
+ ### Fixes
65
+
66
+ * Fixed a parse bug for text only invalid XML.
67
+ * GH-215
68
+ * Patch by NAITOH Jun.
69
+
70
+ * Fixed a parse bug that `&#0x...;` is accepted as a character
71
+ reference.
72
+
73
+ ### Thanks
74
+
75
+ * NAITOH Jun
76
+
77
+ ## 3.3.8 - 2024-09-29 {#version-3-3-8}
78
+
79
+ ### Improvements
80
+
81
+ * SAX2: Improve parse performance.
82
+ * GH-207
83
+ * Patch by NAITOH Jun.
84
+
85
+ ### Fixes
86
+
87
+ * Fixed a bug that unexpected attribute namespace conflict error for
88
+ the predefined "xml" namespace is reported.
89
+ * GH-208
90
+ * Patch by KITAITI Makoto
91
+
92
+ ### Thanks
93
+
94
+ * NAITOH Jun
95
+
96
+ * KITAITI Makoto
97
+
98
+ ## 3.3.7 - 2024-09-04 {#version-3-3-7}
99
+
100
+ ### Improvements
101
+
102
+ * Added local entity expansion limit methods
103
+ * GH-192
104
+ * GH-202
105
+ * Reported by takuya kodama.
106
+ * Patch by NAITOH Jun.
107
+
108
+ * Removed explicit strscan dependency
109
+ * GH-204
110
+ * Patch by Bo Anderson.
111
+
112
+ ### Thanks
113
+
114
+ * takuya kodama
115
+
116
+ * NAITOH Jun
117
+
118
+ * Bo Anderson
119
+
3
120
  ## 3.3.6 - 2024-08-22 {#version-3-3-6}
4
121
 
5
122
  ### Improvements
@@ -148,8 +148,9 @@ module REXML
148
148
  # have been expanded to their values
149
149
  def value
150
150
  return @unnormalized if @unnormalized
151
- @unnormalized = Text::unnormalize( @normalized, doctype )
152
- @unnormalized
151
+
152
+ @unnormalized = Text::unnormalize(@normalized, doctype,
153
+ entity_expansion_text_limit: @element&.document&.entity_expansion_text_limit)
153
154
  end
154
155
 
155
156
  # The normalized value of this attribute. That is, the attribute with
@@ -91,6 +91,8 @@ module REXML
91
91
  #
92
92
  def initialize( source = nil, context = {} )
93
93
  @entity_expansion_count = 0
94
+ @entity_expansion_limit = Security.entity_expansion_limit
95
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
94
96
  super()
95
97
  @context = context
96
98
  return if source.nil?
@@ -431,10 +433,12 @@ module REXML
431
433
  end
432
434
 
433
435
  attr_reader :entity_expansion_count
436
+ attr_writer :entity_expansion_limit
437
+ attr_accessor :entity_expansion_text_limit
434
438
 
435
439
  def record_entity_expansion
436
440
  @entity_expansion_count += 1
437
- if @entity_expansion_count > Security.entity_expansion_limit
441
+ if @entity_expansion_count > @entity_expansion_limit
438
442
  raise "number of entity expansions exceeded, processing aborted."
439
443
  end
440
444
  end
data/lib/rexml/entity.rb CHANGED
@@ -71,9 +71,12 @@ module REXML
71
71
  # Evaluates to the unnormalized value of this entity; that is, replacing
72
72
  # &ent; entities.
73
73
  def unnormalized
74
- document.record_entity_expansion unless document.nil?
74
+ document&.record_entity_expansion
75
+
75
76
  return nil if @value.nil?
76
- @unnormalized = Text::unnormalize(@value, parent)
77
+
78
+ @unnormalized = Text::unnormalize(@value, parent,
79
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
77
80
  end
78
81
 
79
82
  #once :unnormalized
@@ -150,12 +150,13 @@ module REXML
150
150
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
151
151
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
152
152
  CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
153
- CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
153
+ CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
154
154
  DEFAULT_ENTITIES_PATTERNS = {}
155
155
  default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
156
156
  default_entities.each do |term|
157
157
  DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
158
158
  end
159
+ XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
159
160
  end
160
161
  private_constant :Private
161
162
 
@@ -164,6 +165,9 @@ module REXML
164
165
  @listeners = []
165
166
  @prefixes = Set.new
166
167
  @entity_expansion_count = 0
168
+ @entity_expansion_limit = Security.entity_expansion_limit
169
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
170
+ @source.ensure_buffer
167
171
  end
168
172
 
169
173
  def add_listener( listener )
@@ -172,16 +176,22 @@ module REXML
172
176
 
173
177
  attr_reader :source
174
178
  attr_reader :entity_expansion_count
179
+ attr_writer :entity_expansion_limit
180
+ attr_writer :entity_expansion_text_limit
175
181
 
176
182
  def stream=( source )
177
183
  @source = SourceFactory.create_from( source )
184
+ reset
185
+ end
186
+
187
+ def reset
178
188
  @closed = nil
179
189
  @have_root = false
180
190
  @document_status = nil
181
191
  @tags = []
182
192
  @stack = []
183
193
  @entities = []
184
- @namespaces = {}
194
+ @namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
185
195
  @namespaces_restore_stack = []
186
196
  end
187
197
 
@@ -263,10 +273,10 @@ module REXML
263
273
  @source.ensure_buffer
264
274
  if @document_status == nil
265
275
  start_position = @source.position
266
- if @source.match("<?", true)
276
+ if @source.match?("<?", true)
267
277
  return process_instruction
268
- elsif @source.match("<!", true)
269
- if @source.match("--", true)
278
+ elsif @source.match?("<!", true)
279
+ if @source.match?("--", true)
270
280
  md = @source.match(/(.*?)-->/um, true)
271
281
  if md.nil?
272
282
  raise REXML::ParseException.new("Unclosed comment", @source)
@@ -275,10 +285,10 @@ module REXML
275
285
  raise REXML::ParseException.new("Malformed comment", @source)
276
286
  end
277
287
  return [ :comment, md[1] ]
278
- elsif @source.match("DOCTYPE", true)
288
+ elsif @source.match?("DOCTYPE", true)
279
289
  base_error_message = "Malformed DOCTYPE"
280
- unless @source.match(/\s+/um, true)
281
- if @source.match(">")
290
+ unless @source.match?(/\s+/um, true)
291
+ if @source.match?(">")
282
292
  message = "#{base_error_message}: name is missing"
283
293
  else
284
294
  message = "#{base_error_message}: invalid name"
@@ -287,10 +297,11 @@ module REXML
287
297
  raise REXML::ParseException.new(message, @source)
288
298
  end
289
299
  name = parse_name(base_error_message)
290
- if @source.match(/\s*\[/um, true)
300
+ @source.match?(/\s*/um, true) # skip spaces
301
+ if @source.match?("[", true)
291
302
  id = [nil, nil, nil]
292
303
  @document_status = :in_doctype
293
- elsif @source.match(/\s*>/um, true)
304
+ elsif @source.match?(">", true)
294
305
  id = [nil, nil, nil]
295
306
  @document_status = :after_doctype
296
307
  @source.ensure_buffer
@@ -302,9 +313,10 @@ module REXML
302
313
  # For backward compatibility
303
314
  id[1], id[2] = id[2], nil
304
315
  end
305
- if @source.match(/\s*\[/um, true)
316
+ @source.match?(/\s*/um, true) # skip spaces
317
+ if @source.match?("[", true)
306
318
  @document_status = :in_doctype
307
- elsif @source.match(/\s*>/um, true)
319
+ elsif @source.match?(">", true)
308
320
  @document_status = :after_doctype
309
321
  @source.ensure_buffer
310
322
  else
@@ -314,7 +326,7 @@ module REXML
314
326
  end
315
327
  args = [:start_doctype, name, *id]
316
328
  if @document_status == :after_doctype
317
- @source.match(/\s*/um, true)
329
+ @source.match?(/\s*/um, true)
318
330
  @stack << [ :end_doctype ]
319
331
  end
320
332
  return args
@@ -325,14 +337,14 @@ module REXML
325
337
  end
326
338
  end
327
339
  if @document_status == :in_doctype
328
- @source.match(/\s*/um, true) # skip spaces
340
+ @source.match?(/\s*/um, true) # skip spaces
329
341
  start_position = @source.position
330
- if @source.match("<!", true)
331
- if @source.match("ELEMENT", true)
342
+ if @source.match?("<!", true)
343
+ if @source.match?("ELEMENT", true)
332
344
  md = @source.match(/(.*?)>/um, true)
333
345
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
334
346
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
335
- elsif @source.match("ENTITY", true)
347
+ elsif @source.match?("ENTITY", true)
336
348
  match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
337
349
  unless match_data
338
350
  raise REXML::ParseException.new("Malformed entity declaration", @source)
@@ -364,11 +376,11 @@ module REXML
364
376
  end
365
377
  match << '%' if ref
366
378
  return match
367
- elsif @source.match("ATTLIST", true)
379
+ elsif @source.match?("ATTLIST", true)
368
380
  md = @source.match(Private::ATTLISTDECL_END, true)
369
381
  raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
370
382
  element = md[1]
371
- contents = md[0]
383
+ contents = "<!ATTLIST" + md[0]
372
384
 
373
385
  pairs = {}
374
386
  values = md[0].strip.scan( ATTDEF_RE )
@@ -384,10 +396,10 @@ module REXML
384
396
  end
385
397
  end
386
398
  return [ :attlistdecl, element, pairs, contents ]
387
- elsif @source.match("NOTATION", true)
399
+ elsif @source.match?("NOTATION", true)
388
400
  base_error_message = "Malformed notation declaration"
389
- unless @source.match(/\s+/um, true)
390
- if @source.match(">")
401
+ unless @source.match?(/\s+/um, true)
402
+ if @source.match?(">")
391
403
  message = "#{base_error_message}: name is missing"
392
404
  else
393
405
  message = "#{base_error_message}: invalid name"
@@ -399,7 +411,8 @@ module REXML
399
411
  id = parse_id(base_error_message,
400
412
  accept_external_id: true,
401
413
  accept_public_id: true)
402
- unless @source.match(/\s*>/um, true)
414
+ @source.match?(/\s*/um, true) # skip spaces
415
+ unless @source.match?(">", true)
403
416
  message = "#{base_error_message}: garbage before end >"
404
417
  raise REXML::ParseException.new(message, @source)
405
418
  end
@@ -413,7 +426,7 @@ module REXML
413
426
  end
414
427
  elsif match = @source.match(/(%.*?;)\s*/um, true)
415
428
  return [ :externalentity, match[1] ]
416
- elsif @source.match(/\]\s*>/um, true)
429
+ elsif @source.match?(/\]\s*>/um, true)
417
430
  @document_status = :after_doctype
418
431
  return [ :end_doctype ]
419
432
  end
@@ -422,16 +435,16 @@ module REXML
422
435
  end
423
436
  end
424
437
  if @document_status == :after_doctype
425
- @source.match(/\s*/um, true)
438
+ @source.match?(/\s*/um, true)
426
439
  end
427
440
  begin
428
441
  start_position = @source.position
429
- if @source.match("<", true)
442
+ if @source.match?("<", true)
430
443
  # :text's read_until may remain only "<" in buffer. In the
431
444
  # case, buffer is empty here. So we need to fill buffer
432
445
  # here explicitly.
433
446
  @source.ensure_buffer
434
- if @source.match("/", true)
447
+ if @source.match?("/", true)
435
448
  @namespaces_restore_stack.pop
436
449
  last_tag = @tags.pop
437
450
  md = @source.match(Private::CLOSE_PATTERN, true)
@@ -446,7 +459,7 @@ module REXML
446
459
  raise REXML::ParseException.new(message, @source)
447
460
  end
448
461
  return [ :end_element, last_tag ]
449
- elsif @source.match("!", true)
462
+ elsif @source.match?("!", true)
450
463
  md = @source.match(/([^>]*>)/um)
451
464
  #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
452
465
  raise REXML::ParseException.new("Malformed node", @source) unless md
@@ -464,7 +477,7 @@ module REXML
464
477
  end
465
478
  raise REXML::ParseException.new( "Declarations can only occur "+
466
479
  "in the doctype declaration.", @source)
467
- elsif @source.match("?", true)
480
+ elsif @source.match?("?", true)
468
481
  return process_instruction
469
482
  else
470
483
  # Get the next tag
@@ -564,8 +577,12 @@ module REXML
564
577
  return rv if matches.size == 0
565
578
  rv.gsub!( Private::CHARACTER_REFERENCES ) {
566
579
  m=$1
567
- m = "0#{m}" if m[0] == ?x
568
- [Integer(m)].pack('U*')
580
+ if m.start_with?("x")
581
+ code_point = Integer(m[1..-1], 16)
582
+ else
583
+ code_point = Integer(m, 10)
584
+ end
585
+ [code_point].pack('U*')
569
586
  }
570
587
  matches.collect!{|x|x[0]}.compact!
571
588
  if filter
@@ -585,7 +602,7 @@ module REXML
585
602
  end
586
603
  re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
587
604
  rv.gsub!( re, entity_value )
588
- if rv.bytesize > Security.entity_expansion_text_limit
605
+ if rv.bytesize > @entity_expansion_text_limit
589
606
  raise "entity expansion has grown too large"
590
607
  end
591
608
  else
@@ -627,7 +644,7 @@ module REXML
627
644
 
628
645
  def record_entity_expansion(delta=1)
629
646
  @entity_expansion_count += delta
630
- if @entity_expansion_count > Security.entity_expansion_limit
647
+ if @entity_expansion_count > @entity_expansion_limit
631
648
  raise "number of entity expansions exceeded, processing aborted."
632
649
  end
633
650
  end
@@ -641,7 +658,7 @@ module REXML
641
658
  def parse_name(base_error_message)
642
659
  md = @source.match(Private::NAME_PATTERN, true)
643
660
  unless md
644
- if @source.match(/\S/um)
661
+ if @source.match?(/\S/um)
645
662
  message = "#{base_error_message}: invalid name"
646
663
  else
647
664
  message = "#{base_error_message}: name is missing"
@@ -683,34 +700,34 @@ module REXML
683
700
  accept_public_id:)
684
701
  public = /\A\s*PUBLIC/um
685
702
  system = /\A\s*SYSTEM/um
686
- if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
687
- if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
703
+ if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
704
+ if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
688
705
  return "public ID literal is missing"
689
706
  end
690
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
707
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
691
708
  return "invalid public ID literal"
692
709
  end
693
710
  if accept_public_id
694
- if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
711
+ if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
695
712
  return "system ID literal is missing"
696
713
  end
697
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
714
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
698
715
  return "invalid system literal"
699
716
  end
700
717
  "garbage after system literal"
701
718
  else
702
719
  "garbage after public ID literal"
703
720
  end
704
- elsif accept_external_id and @source.match(/#{system}/um)
705
- if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
721
+ elsif accept_external_id and @source.match?(/#{system}/um)
722
+ if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
706
723
  return "system literal is missing"
707
724
  end
708
- unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
725
+ unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
709
726
  return "invalid system literal"
710
727
  end
711
728
  "garbage after system literal"
712
729
  else
713
- unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
730
+ unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
714
731
  return "invalid ID type"
715
732
  end
716
733
  "ID type is missing"
@@ -719,7 +736,7 @@ module REXML
719
736
 
720
737
  def process_instruction
721
738
  name = parse_name("Malformed XML: Invalid processing instruction node")
722
- if @source.match(/\s+/um, true)
739
+ if @source.match?(/\s+/um, true)
723
740
  match_data = @source.match(/(.*?)\?>/um, true)
724
741
  unless match_data
725
742
  raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
@@ -727,7 +744,7 @@ module REXML
727
744
  content = match_data[1]
728
745
  else
729
746
  content = nil
730
- unless @source.match("?>", true)
747
+ unless @source.match?("?>", true)
731
748
  raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
732
749
  end
733
750
  end
@@ -752,14 +769,33 @@ module REXML
752
769
  [:processing_instruction, name, content]
753
770
  end
754
771
 
772
+ if StringScanner::Version < "3.1.1"
773
+ def scan_quote
774
+ @source.match(/(['"])/, true)&.[](1)
775
+ end
776
+ else
777
+ def scan_quote
778
+ case @source.peek_byte
779
+ when 34 # '"'.ord
780
+ @source.scan_byte
781
+ '"'
782
+ when 39 # "'".ord
783
+ @source.scan_byte
784
+ "'"
785
+ else
786
+ nil
787
+ end
788
+ end
789
+ end
790
+
755
791
  def parse_attributes(prefixes)
756
792
  attributes = {}
757
793
  expanded_names = {}
758
794
  closed = false
759
795
  while true
760
- if @source.match(">", true)
796
+ if @source.match?(">", true)
761
797
  return attributes, closed
762
- elsif @source.match("/>", true)
798
+ elsif @source.match?("/>", true)
763
799
  closed = true
764
800
  return attributes, closed
765
801
  elsif match = @source.match(QNAME, true)
@@ -767,15 +803,14 @@ module REXML
767
803
  prefix = match[2]
768
804
  local_part = match[3]
769
805
 
770
- unless @source.match(/\s*=\s*/um, true)
806
+ unless @source.match?(/\s*=\s*/um, true)
771
807
  message = "Missing attribute equal: <#{name}>"
772
808
  raise REXML::ParseException.new(message, @source)
773
809
  end
774
- unless match = @source.match(/(['"])/, true)
810
+ unless quote = scan_quote
775
811
  message = "Missing attribute value start quote: <#{name}>"
776
812
  raise REXML::ParseException.new(message, @source)
777
813
  end
778
- quote = match[1]
779
814
  start_position = @source.position
780
815
  value = @source.read_until(quote)
781
816
  unless value.chomp!(quote)
@@ -783,10 +818,10 @@ module REXML
783
818
  message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
784
819
  raise REXML::ParseException.new(message, @source)
785
820
  end
786
- @source.match(/\s*/um, true)
821
+ @source.match?(/\s*/um, true)
787
822
  if prefix == "xmlns"
788
823
  if local_part == "xml"
789
- if value != "http://www.w3.org/XML/1998/namespace"
824
+ if value != Private::XML_PREFIXED_NAMESPACE
790
825
  msg = "The 'xml' prefix must not be bound to any other namespace "+
791
826
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
792
827
  raise REXML::ParseException.new( msg, @source, self )
@@ -51,6 +51,14 @@ module REXML
51
51
  @parser.entity_expansion_count
52
52
  end
53
53
 
54
+ def entity_expansion_limit=( limit )
55
+ @parser.entity_expansion_limit = limit
56
+ end
57
+
58
+ def entity_expansion_text_limit=( limit )
59
+ @parser.entity_expansion_text_limit = limit
60
+ end
61
+
54
62
  def each
55
63
  while has_next?
56
64
  yield self.pull
@@ -85,6 +93,10 @@ module REXML
85
93
  def unshift token
86
94
  @my_stack.unshift token
87
95
  end
96
+
97
+ def reset
98
+ @parser.reset
99
+ end
88
100
  end
89
101
 
90
102
  # A parsing event. The contents of the event are accessed as an +Array?,
@@ -26,6 +26,14 @@ module REXML
26
26
  @parser.entity_expansion_count
27
27
  end
28
28
 
29
+ def entity_expansion_limit=( limit )
30
+ @parser.entity_expansion_limit = limit
31
+ end
32
+
33
+ def entity_expansion_text_limit=( limit )
34
+ @parser.entity_expansion_text_limit = limit
35
+ end
36
+
29
37
  def add_listener( listener )
30
38
  @parser.add_listener( listener )
31
39
  end
@@ -251,6 +259,8 @@ module REXML
251
259
  end
252
260
 
253
261
  def get_namespace( prefix )
262
+ return nil if @namespace_stack.empty?
263
+
254
264
  uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
255
265
  (@namespace_stack.find { |ns| not ns[nil].nil? })
256
266
  uris[-1][prefix] unless uris.nil? or 0 == uris.size
@@ -18,6 +18,14 @@ module REXML
18
18
  @parser.entity_expansion_count
19
19
  end
20
20
 
21
+ def entity_expansion_limit=( limit )
22
+ @parser.entity_expansion_limit = limit
23
+ end
24
+
25
+ def entity_expansion_text_limit=( limit )
26
+ @parser.entity_expansion_text_limit = limit
27
+ end
28
+
21
29
  def parse
22
30
  # entity string
23
31
  while true
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.6"
34
+ VERSION = "3.4.1"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # coding: US-ASCII
2
2
  # frozen_string_literal: false
3
3
 
4
+ require "stringio"
4
5
  require "strscan"
5
6
 
6
7
  require_relative 'encoding'
@@ -18,6 +19,16 @@ module REXML
18
19
  pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
19
20
  super(pattern)
20
21
  end
22
+
23
+ def match?(pattern)
24
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
25
+ super(pattern)
26
+ end
27
+
28
+ def skip(pattern)
29
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
30
+ super(pattern)
31
+ end
21
32
  end
22
33
  end
23
34
  using StringScannerCheckScanString
@@ -35,7 +46,6 @@ module REXML
35
46
  arg.respond_to? :eof?
36
47
  IOSource.new(arg)
37
48
  elsif arg.respond_to? :to_str
38
- require 'stringio'
39
49
  IOSource.new(StringIO.new(arg))
40
50
  elsif arg.kind_of? Source
41
51
  arg
@@ -58,8 +68,14 @@ module REXML
58
68
  SCANNER_RESET_SIZE = 100000
59
69
  PRE_DEFINED_TERM_PATTERNS = {}
60
70
  pre_defined_terms = ["'", '"', "<"]
61
- pre_defined_terms.each do |term|
62
- PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
71
+ if StringScanner::Version < "3.1.1"
72
+ pre_defined_terms.each do |term|
73
+ PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
74
+ end
75
+ else
76
+ pre_defined_terms.each do |term|
77
+ PRE_DEFINED_TERM_PATTERNS[term] = term
78
+ end
63
79
  end
64
80
  end
65
81
  private_constant :Private
@@ -77,6 +93,7 @@ module REXML
77
93
  detect_encoding
78
94
  end
79
95
  @line = 0
96
+ @encoded_terms = {}
80
97
  end
81
98
 
82
99
  # The current buffer (what we're going to read next)
@@ -125,6 +142,14 @@ module REXML
125
142
  end
126
143
  end
127
144
 
145
+ def match?(pattern, cons=false)
146
+ if cons
147
+ !@scanner.skip(pattern).nil?
148
+ else
149
+ !@scanner.match?(pattern).nil?
150
+ end
151
+ end
152
+
128
153
  def position
129
154
  @scanner.pos
130
155
  end
@@ -133,6 +158,14 @@ module REXML
133
158
  @scanner.pos = pos
134
159
  end
135
160
 
161
+ def peek_byte
162
+ @scanner.peek_byte
163
+ end
164
+
165
+ def scan_byte
166
+ @scanner.scan_byte
167
+ end
168
+
136
169
  # @return true if the Source is exhausted
137
170
  def empty?
138
171
  @scanner.eos?
@@ -227,7 +260,7 @@ module REXML
227
260
 
228
261
  def read_until(term)
229
262
  pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
230
- term = encode(term)
263
+ term = @encoded_terms[term] ||= encode(term)
231
264
  until str = @scanner.scan_until(pattern)
232
265
  break if @source.nil?
233
266
  break if @source.eof?
@@ -266,6 +299,23 @@ module REXML
266
299
  md.nil? ? nil : @scanner
267
300
  end
268
301
 
302
+ def match?( pattern, cons=false )
303
+ # To avoid performance issue, we need to increase bytes to read per scan
304
+ min_bytes = 1
305
+ while true
306
+ if cons
307
+ n_matched_bytes = @scanner.skip(pattern)
308
+ else
309
+ n_matched_bytes = @scanner.match?(pattern)
310
+ end
311
+ return true if n_matched_bytes
312
+ return false if pattern.is_a?(String)
313
+ return false if @source.nil?
314
+ return false unless read(nil, min_bytes)
315
+ min_bytes *= 2
316
+ end
317
+ end
318
+
269
319
  def empty?
270
320
  super and ( @source.nil? || @source.eof? )
271
321
  end
@@ -285,7 +335,7 @@ module REXML
285
335
  rescue
286
336
  end
287
337
  @er_source.seek(pos)
288
- rescue IOError
338
+ rescue IOError, SystemCallError
289
339
  pos = -1
290
340
  line = -1
291
341
  end
@@ -294,14 +344,19 @@ module REXML
294
344
 
295
345
  private
296
346
  def readline(term = nil)
297
- str = @source.readline(term || @line_break)
298
347
  if @pending_buffer
348
+ begin
349
+ str = @source.readline(term || @line_break)
350
+ rescue IOError
351
+ end
299
352
  if str.nil?
300
353
  str = @pending_buffer
301
354
  else
302
355
  str = @pending_buffer + str
303
356
  end
304
357
  @pending_buffer = nil
358
+ else
359
+ str = @source.readline(term || @line_break)
305
360
  end
306
361
  return nil if str.nil?
307
362
 
data/lib/rexml/text.rb CHANGED
@@ -29,31 +29,16 @@ module REXML
29
29
  (0x10000..0x10FFFF)
30
30
  ]
31
31
 
32
- if String.method_defined? :encode
33
- VALID_XML_CHARS = Regexp.new('^['+
34
- VALID_CHAR.map { |item|
35
- case item
36
- when Integer
37
- [item].pack('U').force_encoding('utf-8')
38
- when Range
39
- [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
40
- end
41
- }.join +
42
- ']*$')
43
- else
44
- VALID_XML_CHARS = /^(
45
- [\x09\x0A\x0D\x20-\x7E] # ASCII
46
- | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
47
- | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
48
- | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
49
- | \xEF[\x80-\xBE]{2} #
50
- | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
51
- | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
52
- | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
53
- | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
54
- | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
55
- )*$/nx;
56
- end
32
+ VALID_XML_CHARS = Regexp.new('^['+
33
+ VALID_CHAR.map { |item|
34
+ case item
35
+ when Integer
36
+ [item].pack('U').force_encoding('utf-8')
37
+ when Range
38
+ [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
39
+ end
40
+ }.join +
41
+ ']*$')
57
42
 
58
43
  # Constructor
59
44
  # +arg+ if a String, the content is set to the String. If a Text,
@@ -132,21 +117,11 @@ module REXML
132
117
 
133
118
  # illegal anywhere
134
119
  if !string.match?(VALID_XML_CHARS)
135
- if String.method_defined? :encode
136
- string.chars.each do |c|
137
- case c.ord
138
- when *VALID_CHAR
139
- else
140
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
141
- end
142
- end
143
- else
144
- string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
145
- case c.unpack('U')
146
- when *VALID_CHAR
147
- else
148
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
149
- end
120
+ string.chars.each do |c|
121
+ case c.ord
122
+ when *VALID_CHAR
123
+ else
124
+ raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
150
125
  end
151
126
  end
152
127
  end
@@ -268,7 +243,8 @@ module REXML
268
243
  # u = Text.new( "sean russell", false, nil, true )
269
244
  # u.value #-> "sean russell"
270
245
  def value
271
- @unnormalized ||= Text::unnormalize( @string, doctype )
246
+ @unnormalized ||= Text::unnormalize(@string, doctype,
247
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
272
248
  end
273
249
 
274
250
  # Sets the contents of this text node. This expects the text to be
@@ -411,11 +387,12 @@ module REXML
411
387
  end
412
388
 
413
389
  # Unescapes all possible entities
414
- def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
390
+ def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
391
+ entity_expansion_text_limit ||= Security.entity_expansion_text_limit
415
392
  sum = 0
416
393
  string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
417
394
  s = Text.expand($&, doctype, filter)
418
- if sum + s.bytesize > Security.entity_expansion_text_limit
395
+ if sum + s.bytesize > entity_expansion_text_limit
419
396
  raise "entity expansion has grown too large"
420
397
  else
421
398
  sum += s.bytesize
metadata CHANGED
@@ -1,28 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.6
4
+ version: 3.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-08-22 00:00:00.000000000 Z
11
- dependencies:
12
- - !ruby/object:Gem::Dependency
13
- name: strscan
14
- requirement: !ruby/object:Gem::Requirement
15
- requirements:
16
- - - ">="
17
- - !ruby/object:Gem::Version
18
- version: '0'
19
- type: :runtime
20
- prerelease: false
21
- version_requirements: !ruby/object:Gem::Requirement
22
- requirements:
23
- - - ">="
24
- - !ruby/object:Gem::Version
25
- version: '0'
10
+ date: 2025-02-16 00:00:00.000000000 Z
11
+ dependencies: []
26
12
  description: An XML toolkit for Ruby
27
13
  email:
28
14
  - kou@cozmixng.org
@@ -116,7 +102,7 @@ homepage: https://github.com/ruby/rexml
116
102
  licenses:
117
103
  - BSD-2-Clause
118
104
  metadata:
119
- changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.6
105
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.1
120
106
  rdoc_options:
121
107
  - "--main"
122
108
  - README.md
@@ -133,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
133
119
  - !ruby/object:Gem::Version
134
120
  version: '0'
135
121
  requirements: []
136
- rubygems_version: 3.6.0.dev
122
+ rubygems_version: 3.6.2
137
123
  specification_version: 4
138
124
  summary: An XML toolkit for Ruby
139
125
  test_files: []