rexml 3.3.1 → 3.3.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: afaa8e7d5241253a1c36a218f94eeff525cc19378d2ed104f738abfc01693889
4
- data.tar.gz: 665e18c0db75cce5e3db16c674c02e986ff9141df54fd7ff3da704b4403a928d
3
+ metadata.gz: e47ba1209ca1ca2ae0584348378fcefe05de5dc277273d434a37d62e04c676b3
4
+ data.tar.gz: 867f9e01423f83063aac7c59e07670c88c20f527f676e28cdf9d098248293c56
5
5
  SHA512:
6
- metadata.gz: 86ea7a0ce4847b320f297b1eb03158003c2931847c07ea118f0a7413f476660dcf40baec8b59a92a2e7096eb665ace359b04c5d8e82617b7162305465472c88d
7
- data.tar.gz: ae248f28516ab6c76170623bcc5e5a30389596823133fd0a13cb74235d6101dd469235bab8b1e15bcbd7a7795f04b44e4674dfdcb1712109dce58001cea01648
6
+ metadata.gz: d87d9cd9384218f3a9bd65870cef99e057022c83bae434318daeab781444378ea830ce46ae20879954f2ae54a7a00cc54eac2839784b989612315ddef909c809
7
+ data.tar.gz: 1e61927c65b9a058626d0ab19c7f5af0d49169d896e76402e0152476cc772dabf41b8f7a135040b12f5c46eac933de8e60d21fdea8388ed7342be8cc6f9114e9
data/NEWS.md CHANGED
@@ -1,5 +1,104 @@
1
1
  # News
2
2
 
3
+ ## 3.3.4 - 2024-08-01 {#version-3-3-4}
4
+
5
+ ### Fixes
6
+
7
+ * Fixed a bug that `REXML::Security` isn't defined when
8
+ `REXML::Parsers::StreamParser` is used and
9
+ `rexml/parsers/streamparser` is only required.
10
+ * GH-189
11
+ * Patch by takuya kodama.
12
+
13
+ ### Thanks
14
+
15
+ * takuya kodama
16
+
17
+ ## 3.3.3 - 2024-08-01 {#version-3-3-3}
18
+
19
+ ### Improvements
20
+
21
+ * Added support for detecting invalid XML that has unsupported
22
+ content before root element
23
+ * GH-184
24
+ * Patch by NAITOH Jun.
25
+
26
+ * Added support for `REXML::Security.entity_expansion_limit=` and
27
+ `REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
28
+ parsers
29
+ * GH-187
30
+ * Patch by NAITOH Jun.
31
+
32
+ * Added more tests for invalid XMLs.
33
+ * GH-183
34
+ * Patch by Watson.
35
+
36
+ * Added more performance tests.
37
+ * Patch by Watson.
38
+
39
+ * Improved parse performance.
40
+ * GH-186
41
+ * Patch by tomoya ishida.
42
+
43
+ ### Thanks
44
+
45
+ * NAITOH Jun
46
+
47
+ * Watson
48
+
49
+ * tomoya ishida
50
+
51
+ ## 3.3.2 - 2024-07-16 {#version-3-3-2}
52
+
53
+ ### Improvements
54
+
55
+ * Improved parse performance.
56
+ * GH-160
57
+ * Patch by NAITOH Jun.
58
+
59
+ * Improved parse performance.
60
+ * GH-169
61
+ * GH-170
62
+ * GH-171
63
+ * GH-172
64
+ * GH-173
65
+ * GH-174
66
+ * GH-175
67
+ * GH-176
68
+ * GH-177
69
+ * Patch by Watson.
70
+
71
+ * Added support for raising a parse exception when an XML has extra
72
+ content after the root element.
73
+ * GH-161
74
+ * Patch by NAITOH Jun.
75
+
76
+ * Added support for raising a parse exception when an XML
77
+ declaration exists in wrong position.
78
+ * GH-162
79
+ * Patch by NAITOH Jun.
80
+
81
+ * Removed needless a space after XML declaration in pretty print mode.
82
+ * GH-164
83
+ * Patch by NAITOH Jun.
84
+
85
+ * Stopped to emit `:text` event after the root element.
86
+ * GH-167
87
+ * Patch by NAITOH Jun.
88
+
89
+ ### Fixes
90
+
91
+ * Fixed a bug that SAX2 parser doesn't expand predefined entities for
92
+ `characters` callback.
93
+ * GH-168
94
+ * Patch by NAITOH Jun.
95
+
96
+ ### Thanks
97
+
98
+ * NAITOH Jun
99
+
100
+ * Watson
101
+
3
102
  ## 3.3.1 - 2024-06-25 {#version-3-3-1}
4
103
 
5
104
  ### Improvements
@@ -111,7 +111,7 @@ module REXML
111
111
  # itself, then we don't need a carriage return... which makes this
112
112
  # logic more complex.
113
113
  node.children.each { |child|
114
- next if child == node.children[-1] and child.instance_of?(Text)
114
+ next if child.instance_of?(Text)
115
115
  unless child == node.children[0] or child.instance_of?(Text) or
116
116
  (child == node.children[1] and !node.children[0].writethis)
117
117
  output << "\n"
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
  require_relative '../parseexception'
3
3
  require_relative '../undefinednamespaceexception'
4
+ require_relative '../security'
4
5
  require_relative '../source'
5
6
  require 'set'
6
7
  require "strscan"
@@ -124,11 +125,10 @@ module REXML
124
125
  }
125
126
 
126
127
  module Private
127
- INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
128
128
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
129
129
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
130
130
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
131
- NAME_PATTERN = /\s*#{NAME}/um
131
+ NAME_PATTERN = /#{NAME}/um
132
132
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
133
133
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
134
134
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
@@ -146,6 +146,7 @@ module REXML
146
146
  self.stream = source
147
147
  @listeners = []
148
148
  @prefixes = Set.new
149
+ @entity_expansion_count = 0
149
150
  end
150
151
 
151
152
  def add_listener( listener )
@@ -153,10 +154,12 @@ module REXML
153
154
  end
154
155
 
155
156
  attr_reader :source
157
+ attr_reader :entity_expansion_count
156
158
 
157
159
  def stream=( source )
158
160
  @source = SourceFactory.create_from( source )
159
161
  @closed = nil
162
+ @have_root = false
160
163
  @document_status = nil
161
164
  @tags = []
162
165
  @stack = []
@@ -239,7 +242,7 @@ module REXML
239
242
  if @document_status == nil
240
243
  start_position = @source.position
241
244
  if @source.match("<?", true)
242
- return process_instruction(start_position)
245
+ return process_instruction
243
246
  elsif @source.match("<!", true)
244
247
  if @source.match("--", true)
245
248
  md = @source.match(/(.*?)-->/um, true)
@@ -309,7 +312,11 @@ module REXML
309
312
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
310
313
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
311
314
  elsif @source.match("ENTITY", true)
312
- match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true).captures.compact]
315
+ match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
316
+ unless match_data
317
+ raise REXML::ParseException.new("Malformed entity declaration", @source)
318
+ end
319
+ match = [:entitydecl, *match_data.captures.compact]
313
320
  ref = false
314
321
  if match[1] == '%'
315
322
  ref = true
@@ -341,7 +348,7 @@ module REXML
341
348
  contents = md[0]
342
349
 
343
350
  pairs = {}
344
- values = md[0].scan( ATTDEF_RE )
351
+ values = md[0].strip.scan( ATTDEF_RE )
345
352
  values.each do |attdef|
346
353
  unless attdef[3] == "#IMPLIED"
347
354
  attdef.compact!
@@ -435,7 +442,7 @@ module REXML
435
442
  raise REXML::ParseException.new( "Declarations can only occur "+
436
443
  "in the doctype declaration.", @source)
437
444
  elsif @source.match("?", true)
438
- return process_instruction(start_position)
445
+ return process_instruction
439
446
  else
440
447
  # Get the next tag
441
448
  md = @source.match(Private::TAG_PATTERN, true)
@@ -460,8 +467,12 @@ module REXML
460
467
  @closed = tag
461
468
  @nsstack.shift
462
469
  else
470
+ if @tags.empty? and @have_root
471
+ raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
472
+ end
463
473
  @tags.push( tag )
464
474
  end
475
+ @have_root = true
465
476
  return [ :start_element, tag, attributes ]
466
477
  end
467
478
  else
@@ -469,6 +480,16 @@ module REXML
469
480
  if text.chomp!("<")
470
481
  @source.position -= "<".bytesize
471
482
  end
483
+ if @tags.empty?
484
+ unless /\A\s*\z/.match?(text)
485
+ if @have_root
486
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
487
+ else
488
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
489
+ end
490
+ end
491
+ return pull_event if @have_root
492
+ end
472
493
  return [ :text, text ]
473
494
  end
474
495
  rescue REXML::UndefinedNamespaceException
@@ -486,7 +507,9 @@ module REXML
486
507
  def entity( reference, entities )
487
508
  value = nil
488
509
  value = entities[ reference ] if entities
489
- if not value
510
+ if value
511
+ record_entity_expansion
512
+ else
490
513
  value = DEFAULT_ENTITIES[ reference ]
491
514
  value = value[2] if value
492
515
  end
@@ -511,7 +534,11 @@ module REXML
511
534
 
512
535
  # Unescapes all possible entities
513
536
  def unnormalize( string, entities=nil, filter=nil )
514
- rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
537
+ if string.include?("\r")
538
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
539
+ else
540
+ rv = string.dup
541
+ end
515
542
  matches = rv.scan( REFERENCE_RE )
516
543
  return rv if matches.size == 0
517
544
  rv.gsub!( Private::CHARACTER_REFERENCES ) {
@@ -521,12 +548,17 @@ module REXML
521
548
  }
522
549
  matches.collect!{|x|x[0]}.compact!
523
550
  if matches.size > 0
551
+ sum = 0
524
552
  matches.each do |entity_reference|
525
553
  unless filter and filter.include?(entity_reference)
526
554
  entity_value = entity( entity_reference, entities )
527
555
  if entity_value
528
556
  re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
529
557
  rv.gsub!( re, entity_value )
558
+ sum += rv.bytesize
559
+ if sum > Security.entity_expansion_text_limit
560
+ raise "entity expansion has grown too large"
561
+ end
530
562
  else
531
563
  er = DEFAULT_ENTITIES[entity_reference]
532
564
  rv.gsub!( er[0], er[2] ) if er
@@ -539,6 +571,14 @@ module REXML
539
571
  end
540
572
 
541
573
  private
574
+
575
+ def record_entity_expansion
576
+ @entity_expansion_count += 1
577
+ if @entity_expansion_count > Security.entity_expansion_limit
578
+ raise "number of entity expansions exceeded, processing aborted."
579
+ end
580
+ end
581
+
542
582
  def need_source_encoding_update?(xml_declaration_encoding)
543
583
  return false if xml_declaration_encoding.nil?
544
584
  return false if /\AUTF-16\z/i =~ xml_declaration_encoding
@@ -548,14 +588,14 @@ module REXML
548
588
  def parse_name(base_error_message)
549
589
  md = @source.match(Private::NAME_PATTERN, true)
550
590
  unless md
551
- if @source.match(/\s*\S/um)
591
+ if @source.match(/\S/um)
552
592
  message = "#{base_error_message}: invalid name"
553
593
  else
554
594
  message = "#{base_error_message}: name is missing"
555
595
  end
556
596
  raise REXML::ParseException.new(message, @source)
557
597
  end
558
- md[1]
598
+ md[0]
559
599
  end
560
600
 
561
601
  def parse_id(base_error_message,
@@ -624,15 +664,24 @@ module REXML
624
664
  end
625
665
  end
626
666
 
627
- def process_instruction(start_position)
628
- match_data = @source.match(Private::INSTRUCTION_END, true)
629
- unless match_data
630
- message = "Invalid processing instruction node"
631
- @source.position = start_position
632
- raise REXML::ParseException.new(message, @source)
667
+ def process_instruction
668
+ name = parse_name("Malformed XML: Invalid processing instruction node")
669
+ if @source.match(/\s+/um, true)
670
+ match_data = @source.match(/(.*?)\?>/um, true)
671
+ unless match_data
672
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
673
+ end
674
+ content = match_data[1]
675
+ else
676
+ content = nil
677
+ unless @source.match("?>", true)
678
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
679
+ end
633
680
  end
634
- if @document_status.nil? and match_data[1] == "xml"
635
- content = match_data[2]
681
+ if name == "xml"
682
+ if @document_status
683
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
684
+ end
636
685
  version = VERSION.match(content)
637
686
  version = version[1] unless version.nil?
638
687
  encoding = ENCODING.match(content)
@@ -647,7 +696,7 @@ module REXML
647
696
  standalone = standalone[1] unless standalone.nil?
648
697
  return [ :xmldecl, version, encoding, standalone ]
649
698
  end
650
- [:processing_instruction, match_data[1], match_data[2]]
699
+ [:processing_instruction, name, content]
651
700
  end
652
701
 
653
702
  def parse_attributes(prefixes, curr_ns)
@@ -47,6 +47,10 @@ module REXML
47
47
  @listeners << listener
48
48
  end
49
49
 
50
+ def entity_expansion_count
51
+ @parser.entity_expansion_count
52
+ end
53
+
50
54
  def each
51
55
  while has_next?
52
56
  yield self.pull
@@ -22,6 +22,10 @@ module REXML
22
22
  @parser.source
23
23
  end
24
24
 
25
+ def entity_expansion_count
26
+ @parser.entity_expansion_count
27
+ end
28
+
25
29
  def add_listener( listener )
26
30
  @parser.add_listener( listener )
27
31
  end
@@ -157,25 +161,8 @@ module REXML
157
161
  end
158
162
  end
159
163
  when :text
160
- #normalized = @parser.normalize( event[1] )
161
- #handle( :characters, normalized )
162
- copy = event[1].clone
163
-
164
- esub = proc { |match|
165
- if @entities.has_key?($1)
166
- @entities[$1].gsub(Text::REFERENCE, &esub)
167
- else
168
- match
169
- end
170
- }
171
-
172
- copy.gsub!( Text::REFERENCE, &esub )
173
- copy.gsub!( Text::NUMERICENTITY ) {|m|
174
- m=$1
175
- m = "0#{m}" if m[0] == ?x
176
- [Integer(m)].pack('U*')
177
- }
178
- handle( :characters, copy )
164
+ unnormalized = @parser.unnormalize( event[1], @entities )
165
+ handle( :characters, unnormalized )
179
166
  when :entitydecl
180
167
  handle_entitydecl( event )
181
168
  when :processing_instruction, :comment, :attlistdecl,
@@ -36,8 +36,8 @@ module REXML
36
36
  @listener.tag_end( event[1] )
37
37
  @tag_stack.pop
38
38
  when :text
39
- normalized = @parser.unnormalize( event[1] )
40
- @listener.text( normalized )
39
+ unnormalized = @parser.unnormalize( event[1] )
40
+ @listener.text( unnormalized )
41
41
  when :processing_instruction
42
42
  @listener.instruction( *event[1,2] )
43
43
  when :start_doctype
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.1"
34
+ VERSION = "3.3.4"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -204,10 +204,20 @@ module REXML
204
204
  end
205
205
  end
206
206
 
207
- def read(term = nil)
207
+ def read(term = nil, min_bytes = 1)
208
208
  term = encode(term) if term
209
209
  begin
210
- @scanner << readline(term)
210
+ str = readline(term)
211
+ @scanner << str
212
+ read_bytes = str.bytesize
213
+ begin
214
+ while read_bytes < min_bytes
215
+ str = readline(term)
216
+ @scanner << str
217
+ read_bytes += str.bytesize
218
+ end
219
+ rescue IOError
220
+ end
211
221
  true
212
222
  rescue Exception, NameError
213
223
  @source = nil
@@ -237,10 +247,9 @@ module REXML
237
247
  read if @scanner.eos? && @source
238
248
  end
239
249
 
240
- # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
241
- # - ">"
242
- # - "XXX>" (X is any string excluding '>')
243
250
  def match( pattern, cons=false )
251
+ # To avoid performance issue, we need to increase bytes to read per scan
252
+ min_bytes = 1
244
253
  while true
245
254
  if cons
246
255
  md = @scanner.scan(pattern)
@@ -250,7 +259,8 @@ module REXML
250
259
  break if md
251
260
  return nil if pattern.is_a?(String)
252
261
  return nil if @source.nil?
253
- return nil unless read
262
+ return nil unless read(nil, min_bytes)
263
+ min_bytes *= 2
254
264
  end
255
265
 
256
266
  md.nil? ? nil : @scanner
data/lib/rexml/text.rb CHANGED
@@ -151,25 +151,45 @@ module REXML
151
151
  end
152
152
  end
153
153
 
154
- # context sensitive
155
- string.scan(pattern) do
156
- if $1[-1] != ?;
157
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
158
- elsif $1[0] == ?&
159
- if $5 and $5[0] == ?#
160
- case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
161
- when *VALID_CHAR
154
+ pos = 0
155
+ while (index = string.index(/<|&/, pos))
156
+ if string[index] == "<"
157
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
158
+ end
159
+
160
+ unless (end_index = string.index(/[^\s];/, index + 1))
161
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162
+ end
163
+
164
+ value = string[(index + 1)..end_index]
165
+ if /\s/.match?(value)
166
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
167
+ end
168
+
169
+ if value[0] == "#"
170
+ character_reference = value[1..-1]
171
+
172
+ unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
173
+ if character_reference[0] == "x" || character_reference[-1] == "x"
174
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162
175
  else
163
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
176
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
164
177
  end
165
- # FIXME: below can't work but this needs API change.
166
- # elsif @parent and $3 and !SUBSTITUTES.include?($1)
167
- # if !doctype or !doctype.entities.has_key?($3)
168
- # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
169
- # end
170
178
  end
179
+
180
+ case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
181
+ when *VALID_CHAR
182
+ else
183
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
184
+ end
185
+ elsif !(/\A#{Entity::NAME}\z/um.match?(value))
186
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
171
187
  end
188
+
189
+ pos = end_index + 1
172
190
  end
191
+
192
+ string
173
193
  end
174
194
 
175
195
  def node_type
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.1
4
+ version: 3.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-06-25 00:00:00.000000000 Z
10
+ date: 2024-08-01 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: strscan
@@ -116,7 +116,7 @@ homepage: https://github.com/ruby/rexml
116
116
  licenses:
117
117
  - BSD-2-Clause
118
118
  metadata:
119
- changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.1
119
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.4
120
120
  rdoc_options:
121
121
  - "--main"
122
122
  - README.md