rexml 3.3.5 → 3.3.6

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e2ee370ff6c1ab70149f6743a12ddf1eeae2c2af3c20f8cb7c6e56ff9699eec
4
- data.tar.gz: 158254197a12b1038b9b5e116c9abc89a329ef97acda8031399a56d3aee45fe9
3
+ metadata.gz: 4b79c22060286dad847e18d30b4b336bda21d2772ccb35413fb9ba51a0012ed2
4
+ data.tar.gz: feb56a4a3071541e983acd33b8baa6b9052f8d67d871102cfe6e69773a0cfcfe
5
5
  SHA512:
6
- metadata.gz: 6b805e28e50ef71bbc5d0349fdd4ec57ec4811bba94fe4c3f8aa17bedb81971da48e98205c53a8eadd18f07b69a2f68c8200529d546aef4187f9f3e903670857
7
- data.tar.gz: df3e369135f9b156475772a77702a91d45b8ee64ad49f608b2b33dc63d7b07dd271d7ac458d0b5e944e613798a0940231282997a747c4838e3e5c3afaf60253b
6
+ metadata.gz: b615c95f8624212e151443ad03ba9b64f39aee8a200ea212150a10116340157cfda1bf974ab3d03161c0fb37d866e8c1c69ccc6a9549a13398452b32166af2d8
7
+ data.tar.gz: db7dcac658e1f51f30575c24d6f36dc256349331fa1951c8fdfaf214baf97a5a446a1fcc411358a76d2c6fc36388ec8b1178adeacc3225d16d5d95ac53a8c4b3
data/NEWS.md CHANGED
@@ -1,5 +1,44 @@
1
1
  # News
2
2
 
3
+ ## 3.3.6 - 2024-08-22 {#version-3-3-6}
4
+
5
+ ### Improvements
6
+
7
+ * Removed duplicated entity expansions for performance.
8
+ * GH-194
9
+ * Patch by Viktor Ivarsson.
10
+
11
+ * Improved namespace conflicted attribute check performance. It was
12
+ too slow for deep elements.
13
+ * Reported by l33thaxor.
14
+
15
+ ### Fixes
16
+
17
+ * Fixed a bug that default entity expansions are counted for
18
+ security check. Default entity expansions should not be counted
19
+ because they don't have a security risk.
20
+ * GH-198
21
+ * GH-199
22
+ * Patch Viktor Ivarsson
23
+
24
+ * Fixed a parser bug that parameter entity references in internal
25
+ subsets are expanded. It's not allowed in the XML specification.
26
+ * GH-191
27
+ * Patch by NAITOH Jun.
28
+
29
+ * Fixed a stream parser bug that user-defined entity references in
30
+ text aren't expanded.
31
+ * GH-200
32
+ * Patch by NAITOH Jun.
33
+
34
+ ### Thanks
35
+
36
+ * Viktor Ivarsson
37
+
38
+ * NAITOH Jun
39
+
40
+ * l33thaxor
41
+
3
42
  ## 3.3.5 - 2024-08-12 {#version-3-3-5}
4
43
 
5
44
  ### Fixes
data/lib/rexml/element.rb CHANGED
@@ -441,9 +441,14 @@ module REXML
441
441
  # Related: #root_node, #document.
442
442
  #
443
443
  def root
444
- return elements[1] if self.kind_of? Document
445
- return self if parent.kind_of? Document or parent.nil?
446
- return parent.root
444
+ target = self
445
+ while target
446
+ return target.elements[1] if target.kind_of? Document
447
+ parent = target.parent
448
+ return target if parent.kind_of? Document or parent.nil?
449
+ target = parent
450
+ end
451
+ nil
447
452
  end
448
453
 
449
454
  # :call-seq:
@@ -619,8 +624,12 @@ module REXML
619
624
  else
620
625
  prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
621
626
  end
622
- ns = attributes[ prefix ]
623
- ns = parent.namespace(prefix) if ns.nil? and parent
627
+ ns = nil
628
+ target = self
629
+ while ns.nil? and target
630
+ ns = target.attributes[prefix]
631
+ target = target.parent
632
+ end
624
633
  ns = '' if ns.nil? and prefix == 'xmlns'
625
634
  return ns
626
635
  end
@@ -2375,17 +2384,6 @@ module REXML
2375
2384
  elsif old_attr.kind_of? Hash
2376
2385
  old_attr[value.prefix] = value
2377
2386
  elsif old_attr.prefix != value.prefix
2378
- # Check for conflicting namespaces
2379
- if value.prefix != "xmlns" and old_attr.prefix != "xmlns"
2380
- old_namespace = old_attr.namespace
2381
- new_namespace = value.namespace
2382
- if old_namespace == new_namespace
2383
- raise ParseException.new(
2384
- "Namespace conflict in adding attribute \"#{value.name}\": "+
2385
- "Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+
2386
- "prefix \"#{value.prefix}\" = \"#{new_namespace}\"")
2387
- end
2388
- end
2389
2387
  store value.name, {old_attr.prefix => old_attr,
2390
2388
  value.prefix => value}
2391
2389
  else
data/lib/rexml/entity.rb CHANGED
@@ -12,6 +12,7 @@ module REXML
12
12
  EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
13
13
  NDATADECL = "\\s+NDATA\\s+#{NAME}"
14
14
  PEREFERENCE = "%#{NAME};"
15
+ PEREFERENCE_RE = /#{PEREFERENCE}/um
15
16
  ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
16
17
  PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
17
18
  ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
@@ -19,7 +20,7 @@ module REXML
19
20
  GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
20
21
  ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
21
22
 
22
- attr_reader :name, :external, :ref, :ndata, :pubid
23
+ attr_reader :name, :external, :ref, :ndata, :pubid, :value
23
24
 
24
25
  # Create a new entity. Simple entities can be constructed by passing a
25
26
  # name, value to the constructor; this creates a generic, plain entity
@@ -68,14 +69,11 @@ module REXML
68
69
  end
69
70
 
70
71
  # Evaluates to the unnormalized value of this entity; that is, replacing
71
- # all entities -- both %ent; and &ent; entities. This differs from
72
- # +value()+ in that +value+ only replaces %ent; entities.
72
+ # &ent; entities.
73
73
  def unnormalized
74
74
  document.record_entity_expansion unless document.nil?
75
- v = value()
76
- return nil if v.nil?
77
- @unnormalized = Text::unnormalize(v, parent)
78
- @unnormalized
75
+ return nil if @value.nil?
76
+ @unnormalized = Text::unnormalize(@value, parent)
79
77
  end
80
78
 
81
79
  #once :unnormalized
@@ -121,46 +119,6 @@ module REXML
121
119
  write rv
122
120
  rv
123
121
  end
124
-
125
- PEREFERENCE_RE = /#{PEREFERENCE}/um
126
- # Returns the value of this entity. At the moment, only internal entities
127
- # are processed. If the value contains internal references (IE,
128
- # %blah;), those are replaced with their values. IE, if the doctype
129
- # contains:
130
- # <!ENTITY % foo "bar">
131
- # <!ENTITY yada "nanoo %foo; nanoo>
132
- # then:
133
- # doctype.entity('yada').value #-> "nanoo bar nanoo"
134
- def value
135
- @resolved_value ||= resolve_value
136
- end
137
-
138
- def parent=(other)
139
- @resolved_value = nil
140
- super
141
- end
142
-
143
- private
144
- def resolve_value
145
- return nil if @value.nil?
146
- return @value unless @value.match?(PEREFERENCE_RE)
147
-
148
- matches = @value.scan(PEREFERENCE_RE)
149
- rv = @value.clone
150
- if @parent
151
- sum = 0
152
- matches.each do |entity_reference|
153
- entity_value = @parent.entity( entity_reference[0] )
154
- if sum + entity_value.bytesize > Security.entity_expansion_text_limit
155
- raise "entity expansion has grown too large"
156
- else
157
- sum += entity_value.bytesize
158
- end
159
- rv.gsub!( /%#{entity_reference.join};/um, entity_value )
160
- end
161
- end
162
- rv
163
- end
164
122
  end
165
123
 
166
124
  # This is a set of entity constants -- the ones defined in the XML
@@ -8,6 +8,22 @@ require "strscan"
8
8
 
9
9
  module REXML
10
10
  module Parsers
11
+ unless [].respond_to?(:tally)
12
+ module EnumerableTally
13
+ refine Enumerable do
14
+ def tally
15
+ counts = {}
16
+ each do |item|
17
+ counts[item] ||= 0
18
+ counts[item] += 1
19
+ end
20
+ counts
21
+ end
22
+ end
23
+ end
24
+ using EnumerableTally
25
+ end
26
+
11
27
  if StringScanner::Version < "3.0.8"
12
28
  module StringScannerCaptures
13
29
  refine StringScanner do
@@ -125,6 +141,7 @@ module REXML
125
141
  }
126
142
 
127
143
  module Private
144
+ PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
128
145
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
129
146
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
130
147
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
@@ -164,7 +181,8 @@ module REXML
164
181
  @tags = []
165
182
  @stack = []
166
183
  @entities = []
167
- @nsstack = []
184
+ @namespaces = {}
185
+ @namespaces_restore_stack = []
168
186
  end
169
187
 
170
188
  def position
@@ -232,6 +250,10 @@ module REXML
232
250
  if @document_status == :in_doctype
233
251
  raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
234
252
  end
253
+ unless @tags.empty?
254
+ path = "/" + @tags.join("/")
255
+ raise ParseException.new("Missing end tag for '#{path}'", @source)
256
+ end
235
257
  return [ :end_document ]
236
258
  end
237
259
  return @stack.shift if @stack.size > 0
@@ -264,7 +286,6 @@ module REXML
264
286
  @source.position = start_position
265
287
  raise REXML::ParseException.new(message, @source)
266
288
  end
267
- @nsstack.unshift(Set.new)
268
289
  name = parse_name(base_error_message)
269
290
  if @source.match(/\s*\[/um, true)
270
291
  id = [nil, nil, nil]
@@ -334,6 +355,8 @@ module REXML
334
355
  match[4] = match[4][1..-2] # HREF
335
356
  match.delete_at(5) if match.size > 5 # Chop out NDATA decl
336
357
  # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
358
+ elsif Private::PEREFERENCE_PATTERN.match?(match[2])
359
+ raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
337
360
  else
338
361
  match[2] = match[2][1..-2]
339
362
  match.pop if match.size == 4
@@ -356,7 +379,7 @@ module REXML
356
379
  val = attdef[4] if val == "#FIXED "
357
380
  pairs[attdef[0]] = val
358
381
  if attdef[0] =~ /^xmlns:(.*)/
359
- @nsstack[0] << $1
382
+ @namespaces[$1] = val
360
383
  end
361
384
  end
362
385
  end
@@ -409,7 +432,7 @@ module REXML
409
432
  # here explicitly.
410
433
  @source.ensure_buffer
411
434
  if @source.match("/", true)
412
- @nsstack.shift
435
+ @namespaces_restore_stack.pop
413
436
  last_tag = @tags.pop
414
437
  md = @source.match(Private::CLOSE_PATTERN, true)
415
438
  if md and !last_tag
@@ -454,18 +477,18 @@ module REXML
454
477
  @document_status = :in_element
455
478
  @prefixes.clear
456
479
  @prefixes << md[2] if md[2]
457
- @nsstack.unshift(curr_ns=Set.new)
458
- attributes, closed = parse_attributes(@prefixes, curr_ns)
480
+ push_namespaces_restore
481
+ attributes, closed = parse_attributes(@prefixes)
459
482
  # Verify that all of the prefixes have been defined
460
483
  for prefix in @prefixes
461
- unless @nsstack.find{|k| k.member?(prefix)}
484
+ unless @namespaces.key?(prefix)
462
485
  raise UndefinedNamespaceException.new(prefix,@source,self)
463
486
  end
464
487
  end
465
488
 
466
489
  if closed
467
490
  @closed = tag
468
- @nsstack.shift
491
+ pop_namespaces_restore
469
492
  else
470
493
  if @tags.empty? and @have_root
471
494
  raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
@@ -505,15 +528,13 @@ module REXML
505
528
  private :pull_event
506
529
 
507
530
  def entity( reference, entities )
508
- value = nil
509
- value = entities[ reference ] if entities
510
- if value
511
- record_entity_expansion
512
- else
513
- value = DEFAULT_ENTITIES[ reference ]
514
- value = value[2] if value
515
- end
516
- unnormalize( value, entities ) if value
531
+ return unless entities
532
+
533
+ value = entities[ reference ]
534
+ return if value.nil?
535
+
536
+ record_entity_expansion
537
+ unnormalize( value, entities )
517
538
  end
518
539
 
519
540
  # Escapes all possible entities
@@ -547,20 +568,29 @@ module REXML
547
568
  [Integer(m)].pack('U*')
548
569
  }
549
570
  matches.collect!{|x|x[0]}.compact!
571
+ if filter
572
+ matches.reject! do |entity_reference|
573
+ filter.include?(entity_reference)
574
+ end
575
+ end
550
576
  if matches.size > 0
551
- matches.each do |entity_reference|
552
- unless filter and filter.include?(entity_reference)
553
- entity_value = entity( entity_reference, entities )
554
- if entity_value
555
- re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
556
- rv.gsub!( re, entity_value )
557
- if rv.bytesize > Security.entity_expansion_text_limit
558
- raise "entity expansion has grown too large"
559
- end
560
- else
561
- er = DEFAULT_ENTITIES[entity_reference]
562
- rv.gsub!( er[0], er[2] ) if er
577
+ matches.tally.each do |entity_reference, n|
578
+ entity_expansion_count_before = @entity_expansion_count
579
+ entity_value = entity( entity_reference, entities )
580
+ if entity_value
581
+ if n > 1
582
+ entity_expansion_count_delta =
583
+ @entity_expansion_count - entity_expansion_count_before
584
+ record_entity_expansion(entity_expansion_count_delta * (n - 1))
563
585
  end
586
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
587
+ rv.gsub!( re, entity_value )
588
+ if rv.bytesize > Security.entity_expansion_text_limit
589
+ raise "entity expansion has grown too large"
590
+ end
591
+ else
592
+ er = DEFAULT_ENTITIES[entity_reference]
593
+ rv.gsub!( er[0], er[2] ) if er
564
594
  end
565
595
  end
566
596
  rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
@@ -569,9 +599,34 @@ module REXML
569
599
  end
570
600
 
571
601
  private
602
+ def add_namespace(prefix, uri)
603
+ @namespaces_restore_stack.last[prefix] = @namespaces[prefix]
604
+ if uri.nil?
605
+ @namespaces.delete(prefix)
606
+ else
607
+ @namespaces[prefix] = uri
608
+ end
609
+ end
610
+
611
+ def push_namespaces_restore
612
+ namespaces_restore = {}
613
+ @namespaces_restore_stack.push(namespaces_restore)
614
+ namespaces_restore
615
+ end
572
616
 
573
- def record_entity_expansion
574
- @entity_expansion_count += 1
617
+ def pop_namespaces_restore
618
+ namespaces_restore = @namespaces_restore_stack.pop
619
+ namespaces_restore.each do |prefix, uri|
620
+ if uri.nil?
621
+ @namespaces.delete(prefix)
622
+ else
623
+ @namespaces[prefix] = uri
624
+ end
625
+ end
626
+ end
627
+
628
+ def record_entity_expansion(delta=1)
629
+ @entity_expansion_count += delta
575
630
  if @entity_expansion_count > Security.entity_expansion_limit
576
631
  raise "number of entity expansions exceeded, processing aborted."
577
632
  end
@@ -697,8 +752,9 @@ module REXML
697
752
  [:processing_instruction, name, content]
698
753
  end
699
754
 
700
- def parse_attributes(prefixes, curr_ns)
755
+ def parse_attributes(prefixes)
701
756
  attributes = {}
757
+ expanded_names = {}
702
758
  closed = false
703
759
  while true
704
760
  if @source.match(">", true)
@@ -740,7 +796,7 @@ module REXML
740
796
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
741
797
  raise REXML::ParseException.new( msg, @source, self)
742
798
  end
743
- curr_ns << local_part
799
+ add_namespace(local_part, value)
744
800
  elsif prefix
745
801
  prefixes << prefix unless prefix == "xml"
746
802
  end
@@ -750,6 +806,20 @@ module REXML
750
806
  raise REXML::ParseException.new(msg, @source, self)
751
807
  end
752
808
 
809
+ unless prefix == "xmlns"
810
+ uri = @namespaces[prefix]
811
+ expanded_name = [uri, local_part]
812
+ existing_prefix = expanded_names[expanded_name]
813
+ if existing_prefix
814
+ message = "Namespace conflict in adding attribute " +
815
+ "\"#{local_part}\": " +
816
+ "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
817
+ "prefix \"#{prefix}\" = \"#{uri}\""
818
+ raise REXML::ParseException.new(message, @source, self)
819
+ end
820
+ expanded_names[expanded_name] = prefix
821
+ end
822
+
753
823
  attributes[name] = value
754
824
  else
755
825
  message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
@@ -7,36 +7,33 @@ module REXML
7
7
  def initialize source, listener
8
8
  @listener = listener
9
9
  @parser = BaseParser.new( source )
10
- @tag_stack = []
10
+ @entities = {}
11
11
  end
12
12
 
13
13
  def add_listener( listener )
14
14
  @parser.add_listener( listener )
15
15
  end
16
16
 
17
+ def entity_expansion_count
18
+ @parser.entity_expansion_count
19
+ end
20
+
17
21
  def parse
18
22
  # entity string
19
23
  while true
20
24
  event = @parser.pull
21
25
  case event[0]
22
26
  when :end_document
23
- unless @tag_stack.empty?
24
- tag_path = "/" + @tag_stack.join("/")
25
- raise ParseException.new("Missing end tag for '#{tag_path}'",
26
- @parser.source)
27
- end
28
27
  return
29
28
  when :start_element
30
- @tag_stack << event[1]
31
29
  attrs = event[2].each do |n, v|
32
30
  event[2][n] = @parser.unnormalize( v )
33
31
  end
34
32
  @listener.tag_start( event[1], attrs )
35
33
  when :end_element
36
34
  @listener.tag_end( event[1] )
37
- @tag_stack.pop
38
35
  when :text
39
- unnormalized = @parser.unnormalize( event[1] )
36
+ unnormalized = @parser.unnormalize( event[1], @entities )
40
37
  @listener.text( unnormalized )
41
38
  when :processing_instruction
42
39
  @listener.instruction( *event[1,2] )
@@ -48,6 +45,7 @@ module REXML
48
45
  when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
49
46
  @listener.send( event[0].to_s, *event[1..-1] )
50
47
  when :entitydecl, :notationdecl
48
+ @entities[ event[1] ] = event[2] if event.size == 3
51
49
  @listener.send( event[0].to_s, event[1..-1] )
52
50
  when :externalentity
53
51
  entity_reference = event[1]
@@ -15,7 +15,6 @@ module REXML
15
15
  end
16
16
 
17
17
  def parse
18
- tag_stack = []
19
18
  entities = nil
20
19
  begin
21
20
  while true
@@ -23,19 +22,13 @@ module REXML
23
22
  #STDERR.puts "TREEPARSER GOT #{event.inspect}"
24
23
  case event[0]
25
24
  when :end_document
26
- unless tag_stack.empty?
27
- raise ParseException.new("No close tag for #{@build_context.xpath}",
28
- @parser.source, @parser)
29
- end
30
25
  return
31
26
  when :start_element
32
- tag_stack.push(event[1])
33
27
  el = @build_context = @build_context.add_element( event[1] )
34
28
  event[2].each do |key, value|
35
29
  el.attributes[key]=Attribute.new(key,value,self)
36
30
  end
37
31
  when :end_element
38
- tag_stack.pop
39
32
  @build_context = @build_context.parent
40
33
  when :text
41
34
  if @build_context[-1].instance_of? Text
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.5"
34
+ VERSION = "3.3.6"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.5
4
+ version: 3.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-08-12 00:00:00.000000000 Z
10
+ date: 2024-08-22 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: strscan
@@ -116,7 +116,7 @@ homepage: https://github.com/ruby/rexml
116
116
  licenses:
117
117
  - BSD-2-Clause
118
118
  metadata:
119
- changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.5
119
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.6
120
120
  rdoc_options:
121
121
  - "--main"
122
122
  - README.md