rexml 3.3.2 → 3.3.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 70ccd1465a05dba3d53dcfc4a98e76dec865a4f6ac833b954aff4234bce6c255
4
- data.tar.gz: 53f43fab8f531e0ba7461ce091e5eae6bec27b12e9139450c7b3e748b4eeacdc
3
+ metadata.gz: 5e5e2317fb4a12cc855de221be85a9d62c2966c4997ead5a4ede3600561d5ede
4
+ data.tar.gz: a2b8f326e706211d00a9a8446b84ebd658c9cb82a4f7c98e5760ed2b10d8866c
5
5
  SHA512:
6
- metadata.gz: b46818d79ae57075c4e0bd620802e82c6958dddc7da1b182504c3fdc16685c887ac0ddd6a4838a080483abba330839e9ef4b2db22cc81b9eae3eac71ac14c965
7
- data.tar.gz: 1e5205905eb435c02038dd0539de22472f5364ffc47635f13a1752cb79a423dcca558fb47394ac5d624b358e779b07cbcafedfd06b99742026856f9988109976
6
+ metadata.gz: 2d26167dc282f9ff928b263927a9f003bddb6591a938b43dfddcd8a2fe2c1ddb4f931f09ec52dd3bf1912953365dcaafafb359bdd6dba1f9ca33a55bbc62ec5b
7
+ data.tar.gz: b3216114c5978079b102a6492cd0d8afde5eaf0af5ebc803873dc7a9ad4dc9afa785000c923f296b88c3b5c663a543348f65a3734801149f792518a1bcb5844c
data/NEWS.md CHANGED
@@ -1,5 +1,39 @@
1
1
  # News
2
2
 
3
+ ## 3.3.3 - 2024-08-01 {#version-3-3-3}
4
+
5
+ ### Improvements
6
+
7
+ * Added support for detecting invalid XML that has unsupported
8
+ content before root element
9
+ * GH-184
10
+ * Patch by NAITOH Jun.
11
+
12
+ * Added support for `REXML::Security.entity_expansion_limit=` and
13
+ `REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
14
+ parsers
15
+ * GH-187
16
+ * Patch by NAITOH Jun.
17
+
18
+ * Added more tests for invalid XMLs.
19
+ * GH-183
20
+ * Patch by Watson.
21
+
22
+ * Added more performance tests.
23
+ * Patch by Watson.
24
+
25
+ * Improved parse performance.
26
+ * GH-186
27
+ * Patch by tomoya ishida.
28
+
29
+ ### Thanks
30
+
31
+ * NAITOH Jun
32
+
33
+ * Watson
34
+
35
+ * tomoya ishida
36
+
3
37
  ## 3.3.2 - 2024-07-16 {#version-3-3-2}
4
38
 
5
39
  ### Improvements
@@ -15,6 +49,9 @@
15
49
  * GH-172
16
50
  * GH-173
17
51
  * GH-174
52
+ * GH-175
53
+ * GH-176
54
+ * GH-177
18
55
  * Patch by Watson.
19
56
 
20
57
  * Added support for raising a parse exception when an XML has extra
@@ -124,19 +124,10 @@ module REXML
124
124
  }
125
125
 
126
126
  module Private
127
- # Terminal requires two or more letters.
128
- INSTRUCTION_TERM = "?>"
129
- COMMENT_TERM = "-->"
130
- CDATA_TERM = "]]>"
131
- DOCTYPE_TERM = "]>"
132
- # Read to the end of DOCTYPE because there is no proper ENTITY termination
133
- ENTITY_TERM = DOCTYPE_TERM
134
-
135
- INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
136
127
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
137
128
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
138
129
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
139
- NAME_PATTERN = /\s*#{NAME}/um
130
+ NAME_PATTERN = /#{NAME}/um
140
131
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
141
132
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
142
133
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
@@ -154,6 +145,7 @@ module REXML
154
145
  self.stream = source
155
146
  @listeners = []
156
147
  @prefixes = Set.new
148
+ @entity_expansion_count = 0
157
149
  end
158
150
 
159
151
  def add_listener( listener )
@@ -161,6 +153,7 @@ module REXML
161
153
  end
162
154
 
163
155
  attr_reader :source
156
+ attr_reader :entity_expansion_count
164
157
 
165
158
  def stream=( source )
166
159
  @source = SourceFactory.create_from( source )
@@ -248,10 +241,10 @@ module REXML
248
241
  if @document_status == nil
249
242
  start_position = @source.position
250
243
  if @source.match("<?", true)
251
- return process_instruction(start_position)
244
+ return process_instruction
252
245
  elsif @source.match("<!", true)
253
246
  if @source.match("--", true)
254
- md = @source.match(/(.*?)-->/um, true, term: Private::COMMENT_TERM)
247
+ md = @source.match(/(.*?)-->/um, true)
255
248
  if md.nil?
256
249
  raise REXML::ParseException.new("Unclosed comment", @source)
257
250
  end
@@ -318,7 +311,11 @@ module REXML
318
311
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
319
312
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
320
313
  elsif @source.match("ENTITY", true)
321
- match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true, term: Private::ENTITY_TERM).captures.compact]
314
+ match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
315
+ unless match_data
316
+ raise REXML::ParseException.new("Malformed entity declaration", @source)
317
+ end
318
+ match = [:entitydecl, *match_data.captures.compact]
322
319
  ref = false
323
320
  if match[1] == '%'
324
321
  ref = true
@@ -383,14 +380,14 @@ module REXML
383
380
  raise REXML::ParseException.new(message, @source)
384
381
  end
385
382
  return [:notationdecl, name, *id]
386
- elsif md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
383
+ elsif md = @source.match(/--(.*?)-->/um, true)
387
384
  case md[1]
388
385
  when /--/, /-\z/
389
386
  raise REXML::ParseException.new("Malformed comment", @source)
390
387
  end
391
388
  return [ :comment, md[1] ] if md
392
389
  end
393
- elsif match = @source.match(/(%.*?;)\s*/um, true, term: Private::DOCTYPE_TERM)
390
+ elsif match = @source.match(/(%.*?;)\s*/um, true)
394
391
  return [ :externalentity, match[1] ]
395
392
  elsif @source.match(/\]\s*>/um, true)
396
393
  @document_status = :after_doctype
@@ -430,7 +427,7 @@ module REXML
430
427
  #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
431
428
  raise REXML::ParseException.new("Malformed node", @source) unless md
432
429
  if md[0][0] == ?-
433
- md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
430
+ md = @source.match(/--(.*?)-->/um, true)
434
431
 
435
432
  if md.nil? || /--|-\z/.match?(md[1])
436
433
  raise REXML::ParseException.new("Malformed comment", @source)
@@ -438,13 +435,13 @@ module REXML
438
435
 
439
436
  return [ :comment, md[1] ]
440
437
  else
441
- md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true, term: Private::CDATA_TERM)
438
+ md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
442
439
  return [ :cdata, md[1] ] if md
443
440
  end
444
441
  raise REXML::ParseException.new( "Declarations can only occur "+
445
442
  "in the doctype declaration.", @source)
446
443
  elsif @source.match("?", true)
447
- return process_instruction(start_position)
444
+ return process_instruction
448
445
  else
449
446
  # Get the next tag
450
447
  md = @source.match(Private::TAG_PATTERN, true)
@@ -482,11 +479,15 @@ module REXML
482
479
  if text.chomp!("<")
483
480
  @source.position -= "<".bytesize
484
481
  end
485
- if @tags.empty? and @have_root
482
+ if @tags.empty?
486
483
  unless /\A\s*\z/.match?(text)
487
- raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
484
+ if @have_root
485
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
486
+ else
487
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
488
+ end
488
489
  end
489
- return pull_event
490
+ return pull_event if @have_root
490
491
  end
491
492
  return [ :text, text ]
492
493
  end
@@ -505,7 +506,9 @@ module REXML
505
506
  def entity( reference, entities )
506
507
  value = nil
507
508
  value = entities[ reference ] if entities
508
- if not value
509
+ if value
510
+ record_entity_expansion
511
+ else
509
512
  value = DEFAULT_ENTITIES[ reference ]
510
513
  value = value[2] if value
511
514
  end
@@ -544,12 +547,17 @@ module REXML
544
547
  }
545
548
  matches.collect!{|x|x[0]}.compact!
546
549
  if matches.size > 0
550
+ sum = 0
547
551
  matches.each do |entity_reference|
548
552
  unless filter and filter.include?(entity_reference)
549
553
  entity_value = entity( entity_reference, entities )
550
554
  if entity_value
551
555
  re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
552
556
  rv.gsub!( re, entity_value )
557
+ sum += rv.bytesize
558
+ if sum > Security.entity_expansion_text_limit
559
+ raise "entity expansion has grown too large"
560
+ end
553
561
  else
554
562
  er = DEFAULT_ENTITIES[entity_reference]
555
563
  rv.gsub!( er[0], er[2] ) if er
@@ -562,6 +570,14 @@ module REXML
562
570
  end
563
571
 
564
572
  private
573
+
574
+ def record_entity_expansion
575
+ @entity_expansion_count += 1
576
+ if @entity_expansion_count > Security.entity_expansion_limit
577
+ raise "number of entity expansions exceeded, processing aborted."
578
+ end
579
+ end
580
+
565
581
  def need_source_encoding_update?(xml_declaration_encoding)
566
582
  return false if xml_declaration_encoding.nil?
567
583
  return false if /\AUTF-16\z/i =~ xml_declaration_encoding
@@ -571,14 +587,14 @@ module REXML
571
587
  def parse_name(base_error_message)
572
588
  md = @source.match(Private::NAME_PATTERN, true)
573
589
  unless md
574
- if @source.match(/\s*\S/um)
590
+ if @source.match(/\S/um)
575
591
  message = "#{base_error_message}: invalid name"
576
592
  else
577
593
  message = "#{base_error_message}: name is missing"
578
594
  end
579
595
  raise REXML::ParseException.new(message, @source)
580
596
  end
581
- md[1]
597
+ md[0]
582
598
  end
583
599
 
584
600
  def parse_id(base_error_message,
@@ -647,18 +663,24 @@ module REXML
647
663
  end
648
664
  end
649
665
 
650
- def process_instruction(start_position)
651
- match_data = @source.match(Private::INSTRUCTION_END, true, term: Private::INSTRUCTION_TERM)
652
- unless match_data
653
- message = "Invalid processing instruction node"
654
- @source.position = start_position
655
- raise REXML::ParseException.new(message, @source)
666
+ def process_instruction
667
+ name = parse_name("Malformed XML: Invalid processing instruction node")
668
+ if @source.match(/\s+/um, true)
669
+ match_data = @source.match(/(.*?)\?>/um, true)
670
+ unless match_data
671
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
672
+ end
673
+ content = match_data[1]
674
+ else
675
+ content = nil
676
+ unless @source.match("?>", true)
677
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
678
+ end
656
679
  end
657
- if match_data[1] == "xml"
680
+ if name == "xml"
658
681
  if @document_status
659
682
  raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
660
683
  end
661
- content = match_data[2]
662
684
  version = VERSION.match(content)
663
685
  version = version[1] unless version.nil?
664
686
  encoding = ENCODING.match(content)
@@ -673,7 +695,7 @@ module REXML
673
695
  standalone = standalone[1] unless standalone.nil?
674
696
  return [ :xmldecl, version, encoding, standalone ]
675
697
  end
676
- [:processing_instruction, match_data[1], match_data[2]]
698
+ [:processing_instruction, name, content]
677
699
  end
678
700
 
679
701
  def parse_attributes(prefixes, curr_ns)
@@ -47,6 +47,10 @@ module REXML
47
47
  @listeners << listener
48
48
  end
49
49
 
50
+ def entity_expansion_count
51
+ @parser.entity_expansion_count
52
+ end
53
+
50
54
  def each
51
55
  while has_next?
52
56
  yield self.pull
@@ -22,6 +22,10 @@ module REXML
22
22
  @parser.source
23
23
  end
24
24
 
25
+ def entity_expansion_count
26
+ @parser.entity_expansion_count
27
+ end
28
+
25
29
  def add_listener( listener )
26
30
  @parser.add_listener( listener )
27
31
  end
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.2"
34
+ VERSION = "3.3.3"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -117,7 +117,7 @@ module REXML
117
117
  def ensure_buffer
118
118
  end
119
119
 
120
- def match(pattern, cons=false, term: nil)
120
+ def match(pattern, cons=false)
121
121
  if cons
122
122
  @scanner.scan(pattern).nil? ? nil : @scanner
123
123
  else
@@ -204,10 +204,20 @@ module REXML
204
204
  end
205
205
  end
206
206
 
207
- def read(term = nil)
207
+ def read(term = nil, min_bytes = 1)
208
208
  term = encode(term) if term
209
209
  begin
210
- @scanner << readline(term)
210
+ str = readline(term)
211
+ @scanner << str
212
+ read_bytes = str.bytesize
213
+ begin
214
+ while read_bytes < min_bytes
215
+ str = readline(term)
216
+ @scanner << str
217
+ read_bytes += str.bytesize
218
+ end
219
+ rescue IOError
220
+ end
211
221
  true
212
222
  rescue Exception, NameError
213
223
  @source = nil
@@ -237,10 +247,9 @@ module REXML
237
247
  read if @scanner.eos? && @source
238
248
  end
239
249
 
240
- # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
241
- # - ">"
242
- # - "XXX>" (X is any string excluding '>')
243
- def match( pattern, cons=false, term: nil )
250
+ def match( pattern, cons=false )
251
+ # To avoid performance issue, we need to increase bytes to read per scan
252
+ min_bytes = 1
244
253
  while true
245
254
  if cons
246
255
  md = @scanner.scan(pattern)
@@ -250,7 +259,8 @@ module REXML
250
259
  break if md
251
260
  return nil if pattern.is_a?(String)
252
261
  return nil if @source.nil?
253
- return nil unless read(term)
262
+ return nil unless read(nil, min_bytes)
263
+ min_bytes *= 2
254
264
  end
255
265
 
256
266
  md.nil? ? nil : @scanner
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.2
4
+ version: 3.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-07-16 00:00:00.000000000 Z
10
+ date: 2024-08-01 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: strscan
@@ -116,7 +116,7 @@ homepage: https://github.com/ruby/rexml
116
116
  licenses:
117
117
  - BSD-2-Clause
118
118
  metadata:
119
- changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.2
119
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.3
120
120
  rdoc_options:
121
121
  - "--main"
122
122
  - README.md