rexml 3.3.2 → 3.3.6

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 70ccd1465a05dba3d53dcfc4a98e76dec865a4f6ac833b954aff4234bce6c255
4
- data.tar.gz: 53f43fab8f531e0ba7461ce091e5eae6bec27b12e9139450c7b3e748b4eeacdc
3
+ metadata.gz: 4b79c22060286dad847e18d30b4b336bda21d2772ccb35413fb9ba51a0012ed2
4
+ data.tar.gz: feb56a4a3071541e983acd33b8baa6b9052f8d67d871102cfe6e69773a0cfcfe
5
5
  SHA512:
6
- metadata.gz: b46818d79ae57075c4e0bd620802e82c6958dddc7da1b182504c3fdc16685c887ac0ddd6a4838a080483abba330839e9ef4b2db22cc81b9eae3eac71ac14c965
7
- data.tar.gz: 1e5205905eb435c02038dd0539de22472f5364ffc47635f13a1752cb79a423dcca558fb47394ac5d624b358e779b07cbcafedfd06b99742026856f9988109976
6
+ metadata.gz: b615c95f8624212e151443ad03ba9b64f39aee8a200ea212150a10116340157cfda1bf974ab3d03161c0fb37d866e8c1c69ccc6a9549a13398452b32166af2d8
7
+ data.tar.gz: db7dcac658e1f51f30575c24d6f36dc256349331fa1951c8fdfaf214baf97a5a446a1fcc411358a76d2c6fc36388ec8b1178adeacc3225d16d5d95ac53a8c4b3
data/NEWS.md CHANGED
@@ -1,5 +1,109 @@
1
1
  # News
2
2
 
3
+ ## 3.3.6 - 2024-08-22 {#version-3-3-6}
4
+
5
+ ### Improvements
6
+
7
+ * Removed duplicated entity expansions for performance.
8
+ * GH-194
9
+ * Patch by Viktor Ivarsson.
10
+
11
+ * Improved namespace conflicted attribute check performance. It was
12
+ too slow for deep elements.
13
+ * Reported by l33thaxor.
14
+
15
+ ### Fixes
16
+
17
+ * Fixed a bug that default entity expansions are counted for
18
+ security check. Default entity expansions should not be counted
19
+ because they don't have a security risk.
20
+ * GH-198
21
+ * GH-199
22
+ * Patch Viktor Ivarsson
23
+
24
+ * Fixed a parser bug that parameter entity references in internal
25
+ subsets are expanded. It's not allowed in the XML specification.
26
+ * GH-191
27
+ * Patch by NAITOH Jun.
28
+
29
+ * Fixed a stream parser bug that user-defined entity references in
30
+ text aren't expanded.
31
+ * GH-200
32
+ * Patch by NAITOH Jun.
33
+
34
+ ### Thanks
35
+
36
+ * Viktor Ivarsson
37
+
38
+ * NAITOH Jun
39
+
40
+ * l33thaxor
41
+
42
+ ## 3.3.5 - 2024-08-12 {#version-3-3-5}
43
+
44
+ ### Fixes
45
+
46
+ * Fixed a bug that `REXML::Security.entity_expansion_text_limit`
47
+ check has wrong text size calculation in SAX and pull parsers.
48
+ * GH-193
49
+ * GH-195
50
+ * Reported by Viktor Ivarsson.
51
+ * Patch by NAITOH Jun.
52
+
53
+ ### Thanks
54
+
55
+ * Viktor Ivarsson
56
+
57
+ * NAITOH Jun
58
+
59
+ ## 3.3.4 - 2024-08-01 {#version-3-3-4}
60
+
61
+ ### Fixes
62
+
63
+ * Fixed a bug that `REXML::Security` isn't defined when
64
+ `REXML::Parsers::StreamParser` is used and
65
+ `rexml/parsers/streamparser` is only required.
66
+ * GH-189
67
+ * Patch by takuya kodama.
68
+
69
+ ### Thanks
70
+
71
+ * takuya kodama
72
+
73
+ ## 3.3.3 - 2024-08-01 {#version-3-3-3}
74
+
75
+ ### Improvements
76
+
77
+ * Added support for detecting invalid XML that has unsupported
78
+ content before root element
79
+ * GH-184
80
+ * Patch by NAITOH Jun.
81
+
82
+ * Added support for `REXML::Security.entity_expansion_limit=` and
83
+ `REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
84
+ parsers
85
+ * GH-187
86
+ * Patch by NAITOH Jun.
87
+
88
+ * Added more tests for invalid XMLs.
89
+ * GH-183
90
+ * Patch by Watson.
91
+
92
+ * Added more performance tests.
93
+ * Patch by Watson.
94
+
95
+ * Improved parse performance.
96
+ * GH-186
97
+ * Patch by tomoya ishida.
98
+
99
+ ### Thanks
100
+
101
+ * NAITOH Jun
102
+
103
+ * Watson
104
+
105
+ * tomoya ishida
106
+
3
107
  ## 3.3.2 - 2024-07-16 {#version-3-3-2}
4
108
 
5
109
  ### Improvements
@@ -15,6 +119,9 @@
15
119
  * GH-172
16
120
  * GH-173
17
121
  * GH-174
122
+ * GH-175
123
+ * GH-176
124
+ * GH-177
18
125
  * Patch by Watson.
19
126
 
20
127
  * Added support for raising a parse exception when an XML has extra
data/lib/rexml/element.rb CHANGED
@@ -441,9 +441,14 @@ module REXML
441
441
  # Related: #root_node, #document.
442
442
  #
443
443
  def root
444
- return elements[1] if self.kind_of? Document
445
- return self if parent.kind_of? Document or parent.nil?
446
- return parent.root
444
+ target = self
445
+ while target
446
+ return target.elements[1] if target.kind_of? Document
447
+ parent = target.parent
448
+ return target if parent.kind_of? Document or parent.nil?
449
+ target = parent
450
+ end
451
+ nil
447
452
  end
448
453
 
449
454
  # :call-seq:
@@ -619,8 +624,12 @@ module REXML
619
624
  else
620
625
  prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
621
626
  end
622
- ns = attributes[ prefix ]
623
- ns = parent.namespace(prefix) if ns.nil? and parent
627
+ ns = nil
628
+ target = self
629
+ while ns.nil? and target
630
+ ns = target.attributes[prefix]
631
+ target = target.parent
632
+ end
624
633
  ns = '' if ns.nil? and prefix == 'xmlns'
625
634
  return ns
626
635
  end
@@ -2375,17 +2384,6 @@ module REXML
2375
2384
  elsif old_attr.kind_of? Hash
2376
2385
  old_attr[value.prefix] = value
2377
2386
  elsif old_attr.prefix != value.prefix
2378
- # Check for conflicting namespaces
2379
- if value.prefix != "xmlns" and old_attr.prefix != "xmlns"
2380
- old_namespace = old_attr.namespace
2381
- new_namespace = value.namespace
2382
- if old_namespace == new_namespace
2383
- raise ParseException.new(
2384
- "Namespace conflict in adding attribute \"#{value.name}\": "+
2385
- "Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+
2386
- "prefix \"#{value.prefix}\" = \"#{new_namespace}\"")
2387
- end
2388
- end
2389
2387
  store value.name, {old_attr.prefix => old_attr,
2390
2388
  value.prefix => value}
2391
2389
  else
data/lib/rexml/entity.rb CHANGED
@@ -12,6 +12,7 @@ module REXML
12
12
  EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
13
13
  NDATADECL = "\\s+NDATA\\s+#{NAME}"
14
14
  PEREFERENCE = "%#{NAME};"
15
+ PEREFERENCE_RE = /#{PEREFERENCE}/um
15
16
  ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
16
17
  PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
17
18
  ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
@@ -19,7 +20,7 @@ module REXML
19
20
  GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
20
21
  ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
21
22
 
22
- attr_reader :name, :external, :ref, :ndata, :pubid
23
+ attr_reader :name, :external, :ref, :ndata, :pubid, :value
23
24
 
24
25
  # Create a new entity. Simple entities can be constructed by passing a
25
26
  # name, value to the constructor; this creates a generic, plain entity
@@ -68,14 +69,11 @@ module REXML
68
69
  end
69
70
 
70
71
  # Evaluates to the unnormalized value of this entity; that is, replacing
71
- # all entities -- both %ent; and &ent; entities. This differs from
72
- # +value()+ in that +value+ only replaces %ent; entities.
72
+ # &ent; entities.
73
73
  def unnormalized
74
74
  document.record_entity_expansion unless document.nil?
75
- v = value()
76
- return nil if v.nil?
77
- @unnormalized = Text::unnormalize(v, parent)
78
- @unnormalized
75
+ return nil if @value.nil?
76
+ @unnormalized = Text::unnormalize(@value, parent)
79
77
  end
80
78
 
81
79
  #once :unnormalized
@@ -121,46 +119,6 @@ module REXML
121
119
  write rv
122
120
  rv
123
121
  end
124
-
125
- PEREFERENCE_RE = /#{PEREFERENCE}/um
126
- # Returns the value of this entity. At the moment, only internal entities
127
- # are processed. If the value contains internal references (IE,
128
- # %blah;), those are replaced with their values. IE, if the doctype
129
- # contains:
130
- # <!ENTITY % foo "bar">
131
- # <!ENTITY yada "nanoo %foo; nanoo>
132
- # then:
133
- # doctype.entity('yada').value #-> "nanoo bar nanoo"
134
- def value
135
- @resolved_value ||= resolve_value
136
- end
137
-
138
- def parent=(other)
139
- @resolved_value = nil
140
- super
141
- end
142
-
143
- private
144
- def resolve_value
145
- return nil if @value.nil?
146
- return @value unless @value.match?(PEREFERENCE_RE)
147
-
148
- matches = @value.scan(PEREFERENCE_RE)
149
- rv = @value.clone
150
- if @parent
151
- sum = 0
152
- matches.each do |entity_reference|
153
- entity_value = @parent.entity( entity_reference[0] )
154
- if sum + entity_value.bytesize > Security.entity_expansion_text_limit
155
- raise "entity expansion has grown too large"
156
- else
157
- sum += entity_value.bytesize
158
- end
159
- rv.gsub!( /%#{entity_reference.join};/um, entity_value )
160
- end
161
- end
162
- rv
163
- end
164
122
  end
165
123
 
166
124
  # This is a set of entity constants -- the ones defined in the XML
@@ -1,12 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
  require_relative '../parseexception'
3
3
  require_relative '../undefinednamespaceexception'
4
+ require_relative '../security'
4
5
  require_relative '../source'
5
6
  require 'set'
6
7
  require "strscan"
7
8
 
8
9
  module REXML
9
10
  module Parsers
11
+ unless [].respond_to?(:tally)
12
+ module EnumerableTally
13
+ refine Enumerable do
14
+ def tally
15
+ counts = {}
16
+ each do |item|
17
+ counts[item] ||= 0
18
+ counts[item] += 1
19
+ end
20
+ counts
21
+ end
22
+ end
23
+ end
24
+ using EnumerableTally
25
+ end
26
+
10
27
  if StringScanner::Version < "3.0.8"
11
28
  module StringScannerCaptures
12
29
  refine StringScanner do
@@ -124,19 +141,11 @@ module REXML
124
141
  }
125
142
 
126
143
  module Private
127
- # Terminal requires two or more letters.
128
- INSTRUCTION_TERM = "?>"
129
- COMMENT_TERM = "-->"
130
- CDATA_TERM = "]]>"
131
- DOCTYPE_TERM = "]>"
132
- # Read to the end of DOCTYPE because there is no proper ENTITY termination
133
- ENTITY_TERM = DOCTYPE_TERM
134
-
135
- INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
144
+ PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
136
145
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
137
146
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
138
147
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
139
- NAME_PATTERN = /\s*#{NAME}/um
148
+ NAME_PATTERN = /#{NAME}/um
140
149
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
141
150
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
142
151
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
@@ -154,6 +163,7 @@ module REXML
154
163
  self.stream = source
155
164
  @listeners = []
156
165
  @prefixes = Set.new
166
+ @entity_expansion_count = 0
157
167
  end
158
168
 
159
169
  def add_listener( listener )
@@ -161,6 +171,7 @@ module REXML
161
171
  end
162
172
 
163
173
  attr_reader :source
174
+ attr_reader :entity_expansion_count
164
175
 
165
176
  def stream=( source )
166
177
  @source = SourceFactory.create_from( source )
@@ -170,7 +181,8 @@ module REXML
170
181
  @tags = []
171
182
  @stack = []
172
183
  @entities = []
173
- @nsstack = []
184
+ @namespaces = {}
185
+ @namespaces_restore_stack = []
174
186
  end
175
187
 
176
188
  def position
@@ -238,6 +250,10 @@ module REXML
238
250
  if @document_status == :in_doctype
239
251
  raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
240
252
  end
253
+ unless @tags.empty?
254
+ path = "/" + @tags.join("/")
255
+ raise ParseException.new("Missing end tag for '#{path}'", @source)
256
+ end
241
257
  return [ :end_document ]
242
258
  end
243
259
  return @stack.shift if @stack.size > 0
@@ -248,10 +264,10 @@ module REXML
248
264
  if @document_status == nil
249
265
  start_position = @source.position
250
266
  if @source.match("<?", true)
251
- return process_instruction(start_position)
267
+ return process_instruction
252
268
  elsif @source.match("<!", true)
253
269
  if @source.match("--", true)
254
- md = @source.match(/(.*?)-->/um, true, term: Private::COMMENT_TERM)
270
+ md = @source.match(/(.*?)-->/um, true)
255
271
  if md.nil?
256
272
  raise REXML::ParseException.new("Unclosed comment", @source)
257
273
  end
@@ -270,7 +286,6 @@ module REXML
270
286
  @source.position = start_position
271
287
  raise REXML::ParseException.new(message, @source)
272
288
  end
273
- @nsstack.unshift(Set.new)
274
289
  name = parse_name(base_error_message)
275
290
  if @source.match(/\s*\[/um, true)
276
291
  id = [nil, nil, nil]
@@ -318,7 +333,11 @@ module REXML
318
333
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
319
334
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
320
335
  elsif @source.match("ENTITY", true)
321
- match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true, term: Private::ENTITY_TERM).captures.compact]
336
+ match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
337
+ unless match_data
338
+ raise REXML::ParseException.new("Malformed entity declaration", @source)
339
+ end
340
+ match = [:entitydecl, *match_data.captures.compact]
322
341
  ref = false
323
342
  if match[1] == '%'
324
343
  ref = true
@@ -336,6 +355,8 @@ module REXML
336
355
  match[4] = match[4][1..-2] # HREF
337
356
  match.delete_at(5) if match.size > 5 # Chop out NDATA decl
338
357
  # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
358
+ elsif Private::PEREFERENCE_PATTERN.match?(match[2])
359
+ raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
339
360
  else
340
361
  match[2] = match[2][1..-2]
341
362
  match.pop if match.size == 4
@@ -358,7 +379,7 @@ module REXML
358
379
  val = attdef[4] if val == "#FIXED "
359
380
  pairs[attdef[0]] = val
360
381
  if attdef[0] =~ /^xmlns:(.*)/
361
- @nsstack[0] << $1
382
+ @namespaces[$1] = val
362
383
  end
363
384
  end
364
385
  end
@@ -383,14 +404,14 @@ module REXML
383
404
  raise REXML::ParseException.new(message, @source)
384
405
  end
385
406
  return [:notationdecl, name, *id]
386
- elsif md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
407
+ elsif md = @source.match(/--(.*?)-->/um, true)
387
408
  case md[1]
388
409
  when /--/, /-\z/
389
410
  raise REXML::ParseException.new("Malformed comment", @source)
390
411
  end
391
412
  return [ :comment, md[1] ] if md
392
413
  end
393
- elsif match = @source.match(/(%.*?;)\s*/um, true, term: Private::DOCTYPE_TERM)
414
+ elsif match = @source.match(/(%.*?;)\s*/um, true)
394
415
  return [ :externalentity, match[1] ]
395
416
  elsif @source.match(/\]\s*>/um, true)
396
417
  @document_status = :after_doctype
@@ -411,7 +432,7 @@ module REXML
411
432
  # here explicitly.
412
433
  @source.ensure_buffer
413
434
  if @source.match("/", true)
414
- @nsstack.shift
435
+ @namespaces_restore_stack.pop
415
436
  last_tag = @tags.pop
416
437
  md = @source.match(Private::CLOSE_PATTERN, true)
417
438
  if md and !last_tag
@@ -430,7 +451,7 @@ module REXML
430
451
  #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
431
452
  raise REXML::ParseException.new("Malformed node", @source) unless md
432
453
  if md[0][0] == ?-
433
- md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
454
+ md = @source.match(/--(.*?)-->/um, true)
434
455
 
435
456
  if md.nil? || /--|-\z/.match?(md[1])
436
457
  raise REXML::ParseException.new("Malformed comment", @source)
@@ -438,13 +459,13 @@ module REXML
438
459
 
439
460
  return [ :comment, md[1] ]
440
461
  else
441
- md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true, term: Private::CDATA_TERM)
462
+ md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
442
463
  return [ :cdata, md[1] ] if md
443
464
  end
444
465
  raise REXML::ParseException.new( "Declarations can only occur "+
445
466
  "in the doctype declaration.", @source)
446
467
  elsif @source.match("?", true)
447
- return process_instruction(start_position)
468
+ return process_instruction
448
469
  else
449
470
  # Get the next tag
450
471
  md = @source.match(Private::TAG_PATTERN, true)
@@ -456,18 +477,18 @@ module REXML
456
477
  @document_status = :in_element
457
478
  @prefixes.clear
458
479
  @prefixes << md[2] if md[2]
459
- @nsstack.unshift(curr_ns=Set.new)
460
- attributes, closed = parse_attributes(@prefixes, curr_ns)
480
+ push_namespaces_restore
481
+ attributes, closed = parse_attributes(@prefixes)
461
482
  # Verify that all of the prefixes have been defined
462
483
  for prefix in @prefixes
463
- unless @nsstack.find{|k| k.member?(prefix)}
484
+ unless @namespaces.key?(prefix)
464
485
  raise UndefinedNamespaceException.new(prefix,@source,self)
465
486
  end
466
487
  end
467
488
 
468
489
  if closed
469
490
  @closed = tag
470
- @nsstack.shift
491
+ pop_namespaces_restore
471
492
  else
472
493
  if @tags.empty? and @have_root
473
494
  raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
@@ -482,11 +503,15 @@ module REXML
482
503
  if text.chomp!("<")
483
504
  @source.position -= "<".bytesize
484
505
  end
485
- if @tags.empty? and @have_root
506
+ if @tags.empty?
486
507
  unless /\A\s*\z/.match?(text)
487
- raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
508
+ if @have_root
509
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
510
+ else
511
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
512
+ end
488
513
  end
489
- return pull_event
514
+ return pull_event if @have_root
490
515
  end
491
516
  return [ :text, text ]
492
517
  end
@@ -503,13 +528,13 @@ module REXML
503
528
  private :pull_event
504
529
 
505
530
  def entity( reference, entities )
506
- value = nil
507
- value = entities[ reference ] if entities
508
- if not value
509
- value = DEFAULT_ENTITIES[ reference ]
510
- value = value[2] if value
511
- end
512
- unnormalize( value, entities ) if value
531
+ return unless entities
532
+
533
+ value = entities[ reference ]
534
+ return if value.nil?
535
+
536
+ record_entity_expansion
537
+ unnormalize( value, entities )
513
538
  end
514
539
 
515
540
  # Escapes all possible entities
@@ -543,17 +568,29 @@ module REXML
543
568
  [Integer(m)].pack('U*')
544
569
  }
545
570
  matches.collect!{|x|x[0]}.compact!
571
+ if filter
572
+ matches.reject! do |entity_reference|
573
+ filter.include?(entity_reference)
574
+ end
575
+ end
546
576
  if matches.size > 0
547
- matches.each do |entity_reference|
548
- unless filter and filter.include?(entity_reference)
549
- entity_value = entity( entity_reference, entities )
550
- if entity_value
551
- re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
552
- rv.gsub!( re, entity_value )
553
- else
554
- er = DEFAULT_ENTITIES[entity_reference]
555
- rv.gsub!( er[0], er[2] ) if er
577
+ matches.tally.each do |entity_reference, n|
578
+ entity_expansion_count_before = @entity_expansion_count
579
+ entity_value = entity( entity_reference, entities )
580
+ if entity_value
581
+ if n > 1
582
+ entity_expansion_count_delta =
583
+ @entity_expansion_count - entity_expansion_count_before
584
+ record_entity_expansion(entity_expansion_count_delta * (n - 1))
585
+ end
586
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
587
+ rv.gsub!( re, entity_value )
588
+ if rv.bytesize > Security.entity_expansion_text_limit
589
+ raise "entity expansion has grown too large"
556
590
  end
591
+ else
592
+ er = DEFAULT_ENTITIES[entity_reference]
593
+ rv.gsub!( er[0], er[2] ) if er
557
594
  end
558
595
  end
559
596
  rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
@@ -562,6 +599,39 @@ module REXML
562
599
  end
563
600
 
564
601
  private
602
+ def add_namespace(prefix, uri)
603
+ @namespaces_restore_stack.last[prefix] = @namespaces[prefix]
604
+ if uri.nil?
605
+ @namespaces.delete(prefix)
606
+ else
607
+ @namespaces[prefix] = uri
608
+ end
609
+ end
610
+
611
+ def push_namespaces_restore
612
+ namespaces_restore = {}
613
+ @namespaces_restore_stack.push(namespaces_restore)
614
+ namespaces_restore
615
+ end
616
+
617
+ def pop_namespaces_restore
618
+ namespaces_restore = @namespaces_restore_stack.pop
619
+ namespaces_restore.each do |prefix, uri|
620
+ if uri.nil?
621
+ @namespaces.delete(prefix)
622
+ else
623
+ @namespaces[prefix] = uri
624
+ end
625
+ end
626
+ end
627
+
628
+ def record_entity_expansion(delta=1)
629
+ @entity_expansion_count += delta
630
+ if @entity_expansion_count > Security.entity_expansion_limit
631
+ raise "number of entity expansions exceeded, processing aborted."
632
+ end
633
+ end
634
+
565
635
  def need_source_encoding_update?(xml_declaration_encoding)
566
636
  return false if xml_declaration_encoding.nil?
567
637
  return false if /\AUTF-16\z/i =~ xml_declaration_encoding
@@ -571,14 +641,14 @@ module REXML
571
641
  def parse_name(base_error_message)
572
642
  md = @source.match(Private::NAME_PATTERN, true)
573
643
  unless md
574
- if @source.match(/\s*\S/um)
644
+ if @source.match(/\S/um)
575
645
  message = "#{base_error_message}: invalid name"
576
646
  else
577
647
  message = "#{base_error_message}: name is missing"
578
648
  end
579
649
  raise REXML::ParseException.new(message, @source)
580
650
  end
581
- md[1]
651
+ md[0]
582
652
  end
583
653
 
584
654
  def parse_id(base_error_message,
@@ -647,18 +717,24 @@ module REXML
647
717
  end
648
718
  end
649
719
 
650
- def process_instruction(start_position)
651
- match_data = @source.match(Private::INSTRUCTION_END, true, term: Private::INSTRUCTION_TERM)
652
- unless match_data
653
- message = "Invalid processing instruction node"
654
- @source.position = start_position
655
- raise REXML::ParseException.new(message, @source)
720
+ def process_instruction
721
+ name = parse_name("Malformed XML: Invalid processing instruction node")
722
+ if @source.match(/\s+/um, true)
723
+ match_data = @source.match(/(.*?)\?>/um, true)
724
+ unless match_data
725
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
726
+ end
727
+ content = match_data[1]
728
+ else
729
+ content = nil
730
+ unless @source.match("?>", true)
731
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
732
+ end
656
733
  end
657
- if match_data[1] == "xml"
734
+ if name == "xml"
658
735
  if @document_status
659
736
  raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
660
737
  end
661
- content = match_data[2]
662
738
  version = VERSION.match(content)
663
739
  version = version[1] unless version.nil?
664
740
  encoding = ENCODING.match(content)
@@ -673,11 +749,12 @@ module REXML
673
749
  standalone = standalone[1] unless standalone.nil?
674
750
  return [ :xmldecl, version, encoding, standalone ]
675
751
  end
676
- [:processing_instruction, match_data[1], match_data[2]]
752
+ [:processing_instruction, name, content]
677
753
  end
678
754
 
679
- def parse_attributes(prefixes, curr_ns)
755
+ def parse_attributes(prefixes)
680
756
  attributes = {}
757
+ expanded_names = {}
681
758
  closed = false
682
759
  while true
683
760
  if @source.match(">", true)
@@ -719,7 +796,7 @@ module REXML
719
796
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
720
797
  raise REXML::ParseException.new( msg, @source, self)
721
798
  end
722
- curr_ns << local_part
799
+ add_namespace(local_part, value)
723
800
  elsif prefix
724
801
  prefixes << prefix unless prefix == "xml"
725
802
  end
@@ -729,6 +806,20 @@ module REXML
729
806
  raise REXML::ParseException.new(msg, @source, self)
730
807
  end
731
808
 
809
+ unless prefix == "xmlns"
810
+ uri = @namespaces[prefix]
811
+ expanded_name = [uri, local_part]
812
+ existing_prefix = expanded_names[expanded_name]
813
+ if existing_prefix
814
+ message = "Namespace conflict in adding attribute " +
815
+ "\"#{local_part}\": " +
816
+ "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
817
+ "prefix \"#{prefix}\" = \"#{uri}\""
818
+ raise REXML::ParseException.new(message, @source, self)
819
+ end
820
+ expanded_names[expanded_name] = prefix
821
+ end
822
+
732
823
  attributes[name] = value
733
824
  else
734
825
  message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
@@ -47,6 +47,10 @@ module REXML
47
47
  @listeners << listener
48
48
  end
49
49
 
50
+ def entity_expansion_count
51
+ @parser.entity_expansion_count
52
+ end
53
+
50
54
  def each
51
55
  while has_next?
52
56
  yield self.pull
@@ -22,6 +22,10 @@ module REXML
22
22
  @parser.source
23
23
  end
24
24
 
25
+ def entity_expansion_count
26
+ @parser.entity_expansion_count
27
+ end
28
+
25
29
  def add_listener( listener )
26
30
  @parser.add_listener( listener )
27
31
  end
@@ -7,36 +7,33 @@ module REXML
7
7
  def initialize source, listener
8
8
  @listener = listener
9
9
  @parser = BaseParser.new( source )
10
- @tag_stack = []
10
+ @entities = {}
11
11
  end
12
12
 
13
13
  def add_listener( listener )
14
14
  @parser.add_listener( listener )
15
15
  end
16
16
 
17
+ def entity_expansion_count
18
+ @parser.entity_expansion_count
19
+ end
20
+
17
21
  def parse
18
22
  # entity string
19
23
  while true
20
24
  event = @parser.pull
21
25
  case event[0]
22
26
  when :end_document
23
- unless @tag_stack.empty?
24
- tag_path = "/" + @tag_stack.join("/")
25
- raise ParseException.new("Missing end tag for '#{tag_path}'",
26
- @parser.source)
27
- end
28
27
  return
29
28
  when :start_element
30
- @tag_stack << event[1]
31
29
  attrs = event[2].each do |n, v|
32
30
  event[2][n] = @parser.unnormalize( v )
33
31
  end
34
32
  @listener.tag_start( event[1], attrs )
35
33
  when :end_element
36
34
  @listener.tag_end( event[1] )
37
- @tag_stack.pop
38
35
  when :text
39
- unnormalized = @parser.unnormalize( event[1] )
36
+ unnormalized = @parser.unnormalize( event[1], @entities )
40
37
  @listener.text( unnormalized )
41
38
  when :processing_instruction
42
39
  @listener.instruction( *event[1,2] )
@@ -48,6 +45,7 @@ module REXML
48
45
  when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
49
46
  @listener.send( event[0].to_s, *event[1..-1] )
50
47
  when :entitydecl, :notationdecl
48
+ @entities[ event[1] ] = event[2] if event.size == 3
51
49
  @listener.send( event[0].to_s, event[1..-1] )
52
50
  when :externalentity
53
51
  entity_reference = event[1]
@@ -15,7 +15,6 @@ module REXML
15
15
  end
16
16
 
17
17
  def parse
18
- tag_stack = []
19
18
  entities = nil
20
19
  begin
21
20
  while true
@@ -23,19 +22,13 @@ module REXML
23
22
  #STDERR.puts "TREEPARSER GOT #{event.inspect}"
24
23
  case event[0]
25
24
  when :end_document
26
- unless tag_stack.empty?
27
- raise ParseException.new("No close tag for #{@build_context.xpath}",
28
- @parser.source, @parser)
29
- end
30
25
  return
31
26
  when :start_element
32
- tag_stack.push(event[1])
33
27
  el = @build_context = @build_context.add_element( event[1] )
34
28
  event[2].each do |key, value|
35
29
  el.attributes[key]=Attribute.new(key,value,self)
36
30
  end
37
31
  when :end_element
38
- tag_stack.pop
39
32
  @build_context = @build_context.parent
40
33
  when :text
41
34
  if @build_context[-1].instance_of? Text
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.2"
34
+ VERSION = "3.3.6"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -117,7 +117,7 @@ module REXML
117
117
  def ensure_buffer
118
118
  end
119
119
 
120
- def match(pattern, cons=false, term: nil)
120
+ def match(pattern, cons=false)
121
121
  if cons
122
122
  @scanner.scan(pattern).nil? ? nil : @scanner
123
123
  else
@@ -204,10 +204,20 @@ module REXML
204
204
  end
205
205
  end
206
206
 
207
- def read(term = nil)
207
+ def read(term = nil, min_bytes = 1)
208
208
  term = encode(term) if term
209
209
  begin
210
- @scanner << readline(term)
210
+ str = readline(term)
211
+ @scanner << str
212
+ read_bytes = str.bytesize
213
+ begin
214
+ while read_bytes < min_bytes
215
+ str = readline(term)
216
+ @scanner << str
217
+ read_bytes += str.bytesize
218
+ end
219
+ rescue IOError
220
+ end
211
221
  true
212
222
  rescue Exception, NameError
213
223
  @source = nil
@@ -237,10 +247,9 @@ module REXML
237
247
  read if @scanner.eos? && @source
238
248
  end
239
249
 
240
- # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
241
- # - ">"
242
- # - "XXX>" (X is any string excluding '>')
243
- def match( pattern, cons=false, term: nil )
250
+ def match( pattern, cons=false )
251
+ # To avoid performance issue, we need to increase bytes to read per scan
252
+ min_bytes = 1
244
253
  while true
245
254
  if cons
246
255
  md = @scanner.scan(pattern)
@@ -250,7 +259,8 @@ module REXML
250
259
  break if md
251
260
  return nil if pattern.is_a?(String)
252
261
  return nil if @source.nil?
253
- return nil unless read(term)
262
+ return nil unless read(nil, min_bytes)
263
+ min_bytes *= 2
254
264
  end
255
265
 
256
266
  md.nil? ? nil : @scanner
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.2
4
+ version: 3.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-07-16 00:00:00.000000000 Z
10
+ date: 2024-08-22 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: strscan
@@ -116,7 +116,7 @@ homepage: https://github.com/ruby/rexml
116
116
  licenses:
117
117
  - BSD-2-Clause
118
118
  metadata:
119
- changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.2
119
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.6
120
120
  rdoc_options:
121
121
  - "--main"
122
122
  - README.md