rexml 3.3.2 → 3.3.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 70ccd1465a05dba3d53dcfc4a98e76dec865a4f6ac833b954aff4234bce6c255
4
- data.tar.gz: 53f43fab8f531e0ba7461ce091e5eae6bec27b12e9139450c7b3e748b4eeacdc
3
+ metadata.gz: 8e2ee370ff6c1ab70149f6743a12ddf1eeae2c2af3c20f8cb7c6e56ff9699eec
4
+ data.tar.gz: 158254197a12b1038b9b5e116c9abc89a329ef97acda8031399a56d3aee45fe9
5
5
  SHA512:
6
- metadata.gz: b46818d79ae57075c4e0bd620802e82c6958dddc7da1b182504c3fdc16685c887ac0ddd6a4838a080483abba330839e9ef4b2db22cc81b9eae3eac71ac14c965
7
- data.tar.gz: 1e5205905eb435c02038dd0539de22472f5364ffc47635f13a1752cb79a423dcca558fb47394ac5d624b358e779b07cbcafedfd06b99742026856f9988109976
6
+ metadata.gz: 6b805e28e50ef71bbc5d0349fdd4ec57ec4811bba94fe4c3f8aa17bedb81971da48e98205c53a8eadd18f07b69a2f68c8200529d546aef4187f9f3e903670857
7
+ data.tar.gz: df3e369135f9b156475772a77702a91d45b8ee64ad49f608b2b33dc63d7b07dd271d7ac458d0b5e944e613798a0940231282997a747c4838e3e5c3afaf60253b
data/NEWS.md CHANGED
@@ -1,5 +1,70 @@
1
1
  # News
2
2
 
3
+ ## 3.3.5 - 2024-08-12 {#version-3-3-5}
4
+
5
+ ### Fixes
6
+
7
+ * Fixed a bug that `REXML::Security.entity_expansion_text_limit`
8
+ check has wrong text size calculation in SAX and pull parsers.
9
+ * GH-193
10
+ * GH-195
11
+ * Reported by Viktor Ivarsson.
12
+ * Patch by NAITOH Jun.
13
+
14
+ ### Thanks
15
+
16
+ * Viktor Ivarsson
17
+
18
+ * NAITOH Jun
19
+
20
+ ## 3.3.4 - 2024-08-01 {#version-3-3-4}
21
+
22
+ ### Fixes
23
+
24
+ * Fixed a bug that `REXML::Security` isn't defined when
25
+ `REXML::Parsers::StreamParser` is used and
26
+ `rexml/parsers/streamparser` is only required.
27
+ * GH-189
28
+ * Patch by takuya kodama.
29
+
30
+ ### Thanks
31
+
32
+ * takuya kodama
33
+
34
+ ## 3.3.3 - 2024-08-01 {#version-3-3-3}
35
+
36
+ ### Improvements
37
+
38
+ * Added support for detecting invalid XML that has unsupported
39
+ content before root element
40
+ * GH-184
41
+ * Patch by NAITOH Jun.
42
+
43
+ * Added support for `REXML::Security.entity_expansion_limit=` and
44
+ `REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
45
+ parsers
46
+ * GH-187
47
+ * Patch by NAITOH Jun.
48
+
49
+ * Added more tests for invalid XMLs.
50
+ * GH-183
51
+ * Patch by Watson.
52
+
53
+ * Added more performance tests.
54
+ * Patch by Watson.
55
+
56
+ * Improved parse performance.
57
+ * GH-186
58
+ * Patch by tomoya ishida.
59
+
60
+ ### Thanks
61
+
62
+ * NAITOH Jun
63
+
64
+ * Watson
65
+
66
+ * tomoya ishida
67
+
3
68
  ## 3.3.2 - 2024-07-16 {#version-3-3-2}
4
69
 
5
70
  ### Improvements
@@ -15,6 +80,9 @@
15
80
  * GH-172
16
81
  * GH-173
17
82
  * GH-174
83
+ * GH-175
84
+ * GH-176
85
+ * GH-177
18
86
  * Patch by Watson.
19
87
 
20
88
  * Added support for raising a parse exception when an XML has extra
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
  require_relative '../parseexception'
3
3
  require_relative '../undefinednamespaceexception'
4
+ require_relative '../security'
4
5
  require_relative '../source'
5
6
  require 'set'
6
7
  require "strscan"
@@ -124,19 +125,10 @@ module REXML
124
125
  }
125
126
 
126
127
  module Private
127
- # Terminal requires two or more letters.
128
- INSTRUCTION_TERM = "?>"
129
- COMMENT_TERM = "-->"
130
- CDATA_TERM = "]]>"
131
- DOCTYPE_TERM = "]>"
132
- # Read to the end of DOCTYPE because there is no proper ENTITY termination
133
- ENTITY_TERM = DOCTYPE_TERM
134
-
135
- INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
136
128
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
137
129
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
138
130
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
139
- NAME_PATTERN = /\s*#{NAME}/um
131
+ NAME_PATTERN = /#{NAME}/um
140
132
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
141
133
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
142
134
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
@@ -154,6 +146,7 @@ module REXML
154
146
  self.stream = source
155
147
  @listeners = []
156
148
  @prefixes = Set.new
149
+ @entity_expansion_count = 0
157
150
  end
158
151
 
159
152
  def add_listener( listener )
@@ -161,6 +154,7 @@ module REXML
161
154
  end
162
155
 
163
156
  attr_reader :source
157
+ attr_reader :entity_expansion_count
164
158
 
165
159
  def stream=( source )
166
160
  @source = SourceFactory.create_from( source )
@@ -248,10 +242,10 @@ module REXML
248
242
  if @document_status == nil
249
243
  start_position = @source.position
250
244
  if @source.match("<?", true)
251
- return process_instruction(start_position)
245
+ return process_instruction
252
246
  elsif @source.match("<!", true)
253
247
  if @source.match("--", true)
254
- md = @source.match(/(.*?)-->/um, true, term: Private::COMMENT_TERM)
248
+ md = @source.match(/(.*?)-->/um, true)
255
249
  if md.nil?
256
250
  raise REXML::ParseException.new("Unclosed comment", @source)
257
251
  end
@@ -318,7 +312,11 @@ module REXML
318
312
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
319
313
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
320
314
  elsif @source.match("ENTITY", true)
321
- match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true, term: Private::ENTITY_TERM).captures.compact]
315
+ match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
316
+ unless match_data
317
+ raise REXML::ParseException.new("Malformed entity declaration", @source)
318
+ end
319
+ match = [:entitydecl, *match_data.captures.compact]
322
320
  ref = false
323
321
  if match[1] == '%'
324
322
  ref = true
@@ -383,14 +381,14 @@ module REXML
383
381
  raise REXML::ParseException.new(message, @source)
384
382
  end
385
383
  return [:notationdecl, name, *id]
386
- elsif md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
384
+ elsif md = @source.match(/--(.*?)-->/um, true)
387
385
  case md[1]
388
386
  when /--/, /-\z/
389
387
  raise REXML::ParseException.new("Malformed comment", @source)
390
388
  end
391
389
  return [ :comment, md[1] ] if md
392
390
  end
393
- elsif match = @source.match(/(%.*?;)\s*/um, true, term: Private::DOCTYPE_TERM)
391
+ elsif match = @source.match(/(%.*?;)\s*/um, true)
394
392
  return [ :externalentity, match[1] ]
395
393
  elsif @source.match(/\]\s*>/um, true)
396
394
  @document_status = :after_doctype
@@ -430,7 +428,7 @@ module REXML
430
428
  #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
431
429
  raise REXML::ParseException.new("Malformed node", @source) unless md
432
430
  if md[0][0] == ?-
433
- md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
431
+ md = @source.match(/--(.*?)-->/um, true)
434
432
 
435
433
  if md.nil? || /--|-\z/.match?(md[1])
436
434
  raise REXML::ParseException.new("Malformed comment", @source)
@@ -438,13 +436,13 @@ module REXML
438
436
 
439
437
  return [ :comment, md[1] ]
440
438
  else
441
- md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true, term: Private::CDATA_TERM)
439
+ md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
442
440
  return [ :cdata, md[1] ] if md
443
441
  end
444
442
  raise REXML::ParseException.new( "Declarations can only occur "+
445
443
  "in the doctype declaration.", @source)
446
444
  elsif @source.match("?", true)
447
- return process_instruction(start_position)
445
+ return process_instruction
448
446
  else
449
447
  # Get the next tag
450
448
  md = @source.match(Private::TAG_PATTERN, true)
@@ -482,11 +480,15 @@ module REXML
482
480
  if text.chomp!("<")
483
481
  @source.position -= "<".bytesize
484
482
  end
485
- if @tags.empty? and @have_root
483
+ if @tags.empty?
486
484
  unless /\A\s*\z/.match?(text)
487
- raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
485
+ if @have_root
486
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
487
+ else
488
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
489
+ end
488
490
  end
489
- return pull_event
491
+ return pull_event if @have_root
490
492
  end
491
493
  return [ :text, text ]
492
494
  end
@@ -505,7 +507,9 @@ module REXML
505
507
  def entity( reference, entities )
506
508
  value = nil
507
509
  value = entities[ reference ] if entities
508
- if not value
510
+ if value
511
+ record_entity_expansion
512
+ else
509
513
  value = DEFAULT_ENTITIES[ reference ]
510
514
  value = value[2] if value
511
515
  end
@@ -550,6 +554,9 @@ module REXML
550
554
  if entity_value
551
555
  re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
552
556
  rv.gsub!( re, entity_value )
557
+ if rv.bytesize > Security.entity_expansion_text_limit
558
+ raise "entity expansion has grown too large"
559
+ end
553
560
  else
554
561
  er = DEFAULT_ENTITIES[entity_reference]
555
562
  rv.gsub!( er[0], er[2] ) if er
@@ -562,6 +569,14 @@ module REXML
562
569
  end
563
570
 
564
571
  private
572
+
573
+ def record_entity_expansion
574
+ @entity_expansion_count += 1
575
+ if @entity_expansion_count > Security.entity_expansion_limit
576
+ raise "number of entity expansions exceeded, processing aborted."
577
+ end
578
+ end
579
+
565
580
  def need_source_encoding_update?(xml_declaration_encoding)
566
581
  return false if xml_declaration_encoding.nil?
567
582
  return false if /\AUTF-16\z/i =~ xml_declaration_encoding
@@ -571,14 +586,14 @@ module REXML
571
586
  def parse_name(base_error_message)
572
587
  md = @source.match(Private::NAME_PATTERN, true)
573
588
  unless md
574
- if @source.match(/\s*\S/um)
589
+ if @source.match(/\S/um)
575
590
  message = "#{base_error_message}: invalid name"
576
591
  else
577
592
  message = "#{base_error_message}: name is missing"
578
593
  end
579
594
  raise REXML::ParseException.new(message, @source)
580
595
  end
581
- md[1]
596
+ md[0]
582
597
  end
583
598
 
584
599
  def parse_id(base_error_message,
@@ -647,18 +662,24 @@ module REXML
647
662
  end
648
663
  end
649
664
 
650
- def process_instruction(start_position)
651
- match_data = @source.match(Private::INSTRUCTION_END, true, term: Private::INSTRUCTION_TERM)
652
- unless match_data
653
- message = "Invalid processing instruction node"
654
- @source.position = start_position
655
- raise REXML::ParseException.new(message, @source)
665
+ def process_instruction
666
+ name = parse_name("Malformed XML: Invalid processing instruction node")
667
+ if @source.match(/\s+/um, true)
668
+ match_data = @source.match(/(.*?)\?>/um, true)
669
+ unless match_data
670
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
671
+ end
672
+ content = match_data[1]
673
+ else
674
+ content = nil
675
+ unless @source.match("?>", true)
676
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
677
+ end
656
678
  end
657
- if match_data[1] == "xml"
679
+ if name == "xml"
658
680
  if @document_status
659
681
  raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
660
682
  end
661
- content = match_data[2]
662
683
  version = VERSION.match(content)
663
684
  version = version[1] unless version.nil?
664
685
  encoding = ENCODING.match(content)
@@ -673,7 +694,7 @@ module REXML
673
694
  standalone = standalone[1] unless standalone.nil?
674
695
  return [ :xmldecl, version, encoding, standalone ]
675
696
  end
676
- [:processing_instruction, match_data[1], match_data[2]]
697
+ [:processing_instruction, name, content]
677
698
  end
678
699
 
679
700
  def parse_attributes(prefixes, curr_ns)
@@ -47,6 +47,10 @@ module REXML
47
47
  @listeners << listener
48
48
  end
49
49
 
50
+ def entity_expansion_count
51
+ @parser.entity_expansion_count
52
+ end
53
+
50
54
  def each
51
55
  while has_next?
52
56
  yield self.pull
@@ -22,6 +22,10 @@ module REXML
22
22
  @parser.source
23
23
  end
24
24
 
25
+ def entity_expansion_count
26
+ @parser.entity_expansion_count
27
+ end
28
+
25
29
  def add_listener( listener )
26
30
  @parser.add_listener( listener )
27
31
  end
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.2"
34
+ VERSION = "3.3.5"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -117,7 +117,7 @@ module REXML
117
117
  def ensure_buffer
118
118
  end
119
119
 
120
- def match(pattern, cons=false, term: nil)
120
+ def match(pattern, cons=false)
121
121
  if cons
122
122
  @scanner.scan(pattern).nil? ? nil : @scanner
123
123
  else
@@ -204,10 +204,20 @@ module REXML
204
204
  end
205
205
  end
206
206
 
207
- def read(term = nil)
207
+ def read(term = nil, min_bytes = 1)
208
208
  term = encode(term) if term
209
209
  begin
210
- @scanner << readline(term)
210
+ str = readline(term)
211
+ @scanner << str
212
+ read_bytes = str.bytesize
213
+ begin
214
+ while read_bytes < min_bytes
215
+ str = readline(term)
216
+ @scanner << str
217
+ read_bytes += str.bytesize
218
+ end
219
+ rescue IOError
220
+ end
211
221
  true
212
222
  rescue Exception, NameError
213
223
  @source = nil
@@ -237,10 +247,9 @@ module REXML
237
247
  read if @scanner.eos? && @source
238
248
  end
239
249
 
240
- # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
241
- # - ">"
242
- # - "XXX>" (X is any string excluding '>')
243
- def match( pattern, cons=false, term: nil )
250
+ def match( pattern, cons=false )
251
+ # To avoid performance issue, we need to increase bytes to read per scan
252
+ min_bytes = 1
244
253
  while true
245
254
  if cons
246
255
  md = @scanner.scan(pattern)
@@ -250,7 +259,8 @@ module REXML
250
259
  break if md
251
260
  return nil if pattern.is_a?(String)
252
261
  return nil if @source.nil?
253
- return nil unless read(term)
262
+ return nil unless read(nil, min_bytes)
263
+ min_bytes *= 2
254
264
  end
255
265
 
256
266
  md.nil? ? nil : @scanner
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.2
4
+ version: 3.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-07-16 00:00:00.000000000 Z
10
+ date: 2024-08-12 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: strscan
@@ -116,7 +116,7 @@ homepage: https://github.com/ruby/rexml
116
116
  licenses:
117
117
  - BSD-2-Clause
118
118
  metadata:
119
- changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.2
119
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.5
120
120
  rdoc_options:
121
121
  - "--main"
122
122
  - README.md