rexml 3.3.1 → 3.3.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: afaa8e7d5241253a1c36a218f94eeff525cc19378d2ed104f738abfc01693889
4
- data.tar.gz: 665e18c0db75cce5e3db16c674c02e986ff9141df54fd7ff3da704b4403a928d
3
+ metadata.gz: 4b79c22060286dad847e18d30b4b336bda21d2772ccb35413fb9ba51a0012ed2
4
+ data.tar.gz: feb56a4a3071541e983acd33b8baa6b9052f8d67d871102cfe6e69773a0cfcfe
5
5
  SHA512:
6
- metadata.gz: 86ea7a0ce4847b320f297b1eb03158003c2931847c07ea118f0a7413f476660dcf40baec8b59a92a2e7096eb665ace359b04c5d8e82617b7162305465472c88d
7
- data.tar.gz: ae248f28516ab6c76170623bcc5e5a30389596823133fd0a13cb74235d6101dd469235bab8b1e15bcbd7a7795f04b44e4674dfdcb1712109dce58001cea01648
6
+ metadata.gz: b615c95f8624212e151443ad03ba9b64f39aee8a200ea212150a10116340157cfda1bf974ab3d03161c0fb37d866e8c1c69ccc6a9549a13398452b32166af2d8
7
+ data.tar.gz: db7dcac658e1f51f30575c24d6f36dc256349331fa1951c8fdfaf214baf97a5a446a1fcc411358a76d2c6fc36388ec8b1178adeacc3225d16d5d95ac53a8c4b3
data/NEWS.md CHANGED
@@ -1,5 +1,160 @@
1
1
  # News
2
2
 
3
+ ## 3.3.6 - 2024-08-22 {#version-3-3-6}
4
+
5
+ ### Improvements
6
+
7
+ * Removed duplicated entity expansions for performance.
8
+ * GH-194
9
+ * Patch by Viktor Ivarsson.
10
+
11
+ * Improved namespace conflicted attribute check performance. It was
12
+ too slow for deep elements.
13
+ * Reported by l33thaxor.
14
+
15
+ ### Fixes
16
+
17
+ * Fixed a bug that default entity expansions are counted for
18
+ security check. Default entity expansions should not be counted
19
+ because they don't have a security risk.
20
+ * GH-198
21
+ * GH-199
22
+ * Patch Viktor Ivarsson
23
+
24
+ * Fixed a parser bug that parameter entity references in internal
25
+ subsets are expanded. It's not allowed in the XML specification.
26
+ * GH-191
27
+ * Patch by NAITOH Jun.
28
+
29
+ * Fixed a stream parser bug that user-defined entity references in
30
+ text aren't expanded.
31
+ * GH-200
32
+ * Patch by NAITOH Jun.
33
+
34
+ ### Thanks
35
+
36
+ * Viktor Ivarsson
37
+
38
+ * NAITOH Jun
39
+
40
+ * l33thaxor
41
+
42
+ ## 3.3.5 - 2024-08-12 {#version-3-3-5}
43
+
44
+ ### Fixes
45
+
46
+ * Fixed a bug that `REXML::Security.entity_expansion_text_limit`
47
+ check has wrong text size calculation in SAX and pull parsers.
48
+ * GH-193
49
+ * GH-195
50
+ * Reported by Viktor Ivarsson.
51
+ * Patch by NAITOH Jun.
52
+
53
+ ### Thanks
54
+
55
+ * Viktor Ivarsson
56
+
57
+ * NAITOH Jun
58
+
59
+ ## 3.3.4 - 2024-08-01 {#version-3-3-4}
60
+
61
+ ### Fixes
62
+
63
+ * Fixed a bug that `REXML::Security` isn't defined when
64
+ `REXML::Parsers::StreamParser` is used and
65
+ `rexml/parsers/streamparser` is only required.
66
+ * GH-189
67
+ * Patch by takuya kodama.
68
+
69
+ ### Thanks
70
+
71
+ * takuya kodama
72
+
73
+ ## 3.3.3 - 2024-08-01 {#version-3-3-3}
74
+
75
+ ### Improvements
76
+
77
+ * Added support for detecting invalid XML that has unsupported
78
+ content before root element
79
+ * GH-184
80
+ * Patch by NAITOH Jun.
81
+
82
+ * Added support for `REXML::Security.entity_expansion_limit=` and
83
+ `REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
84
+ parsers
85
+ * GH-187
86
+ * Patch by NAITOH Jun.
87
+
88
+ * Added more tests for invalid XMLs.
89
+ * GH-183
90
+ * Patch by Watson.
91
+
92
+ * Added more performance tests.
93
+ * Patch by Watson.
94
+
95
+ * Improved parse performance.
96
+ * GH-186
97
+ * Patch by tomoya ishida.
98
+
99
+ ### Thanks
100
+
101
+ * NAITOH Jun
102
+
103
+ * Watson
104
+
105
+ * tomoya ishida
106
+
107
+ ## 3.3.2 - 2024-07-16 {#version-3-3-2}
108
+
109
+ ### Improvements
110
+
111
+ * Improved parse performance.
112
+ * GH-160
113
+ * Patch by NAITOH Jun.
114
+
115
+ * Improved parse performance.
116
+ * GH-169
117
+ * GH-170
118
+ * GH-171
119
+ * GH-172
120
+ * GH-173
121
+ * GH-174
122
+ * GH-175
123
+ * GH-176
124
+ * GH-177
125
+ * Patch by Watson.
126
+
127
+ * Added support for raising a parse exception when an XML has extra
128
+ content after the root element.
129
+ * GH-161
130
+ * Patch by NAITOH Jun.
131
+
132
+ * Added support for raising a parse exception when an XML
133
+ declaration exists in wrong position.
134
+ * GH-162
135
+ * Patch by NAITOH Jun.
136
+
137
+ * Removed needless a space after XML declaration in pretty print mode.
138
+ * GH-164
139
+ * Patch by NAITOH Jun.
140
+
141
+ * Stopped to emit `:text` event after the root element.
142
+ * GH-167
143
+ * Patch by NAITOH Jun.
144
+
145
+ ### Fixes
146
+
147
+ * Fixed a bug that SAX2 parser doesn't expand predefined entities for
148
+ `characters` callback.
149
+ * GH-168
150
+ * Patch by NAITOH Jun.
151
+
152
+ ### Thanks
153
+
154
+ * NAITOH Jun
155
+
156
+ * Watson
157
+
3
158
  ## 3.3.1 - 2024-06-25 {#version-3-3-1}
4
159
 
5
160
  ### Improvements
data/lib/rexml/element.rb CHANGED
@@ -441,9 +441,14 @@ module REXML
441
441
  # Related: #root_node, #document.
442
442
  #
443
443
  def root
444
- return elements[1] if self.kind_of? Document
445
- return self if parent.kind_of? Document or parent.nil?
446
- return parent.root
444
+ target = self
445
+ while target
446
+ return target.elements[1] if target.kind_of? Document
447
+ parent = target.parent
448
+ return target if parent.kind_of? Document or parent.nil?
449
+ target = parent
450
+ end
451
+ nil
447
452
  end
448
453
 
449
454
  # :call-seq:
@@ -619,8 +624,12 @@ module REXML
619
624
  else
620
625
  prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
621
626
  end
622
- ns = attributes[ prefix ]
623
- ns = parent.namespace(prefix) if ns.nil? and parent
627
+ ns = nil
628
+ target = self
629
+ while ns.nil? and target
630
+ ns = target.attributes[prefix]
631
+ target = target.parent
632
+ end
624
633
  ns = '' if ns.nil? and prefix == 'xmlns'
625
634
  return ns
626
635
  end
@@ -2375,17 +2384,6 @@ module REXML
2375
2384
  elsif old_attr.kind_of? Hash
2376
2385
  old_attr[value.prefix] = value
2377
2386
  elsif old_attr.prefix != value.prefix
2378
- # Check for conflicting namespaces
2379
- if value.prefix != "xmlns" and old_attr.prefix != "xmlns"
2380
- old_namespace = old_attr.namespace
2381
- new_namespace = value.namespace
2382
- if old_namespace == new_namespace
2383
- raise ParseException.new(
2384
- "Namespace conflict in adding attribute \"#{value.name}\": "+
2385
- "Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+
2386
- "prefix \"#{value.prefix}\" = \"#{new_namespace}\"")
2387
- end
2388
- end
2389
2387
  store value.name, {old_attr.prefix => old_attr,
2390
2388
  value.prefix => value}
2391
2389
  else
data/lib/rexml/entity.rb CHANGED
@@ -12,6 +12,7 @@ module REXML
12
12
  EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
13
13
  NDATADECL = "\\s+NDATA\\s+#{NAME}"
14
14
  PEREFERENCE = "%#{NAME};"
15
+ PEREFERENCE_RE = /#{PEREFERENCE}/um
15
16
  ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
16
17
  PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
17
18
  ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
@@ -19,7 +20,7 @@ module REXML
19
20
  GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
20
21
  ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
21
22
 
22
- attr_reader :name, :external, :ref, :ndata, :pubid
23
+ attr_reader :name, :external, :ref, :ndata, :pubid, :value
23
24
 
24
25
  # Create a new entity. Simple entities can be constructed by passing a
25
26
  # name, value to the constructor; this creates a generic, plain entity
@@ -68,14 +69,11 @@ module REXML
68
69
  end
69
70
 
70
71
  # Evaluates to the unnormalized value of this entity; that is, replacing
71
- # all entities -- both %ent; and &ent; entities. This differs from
72
- # +value()+ in that +value+ only replaces %ent; entities.
72
+ # &ent; entities.
73
73
  def unnormalized
74
74
  document.record_entity_expansion unless document.nil?
75
- v = value()
76
- return nil if v.nil?
77
- @unnormalized = Text::unnormalize(v, parent)
78
- @unnormalized
75
+ return nil if @value.nil?
76
+ @unnormalized = Text::unnormalize(@value, parent)
79
77
  end
80
78
 
81
79
  #once :unnormalized
@@ -121,46 +119,6 @@ module REXML
121
119
  write rv
122
120
  rv
123
121
  end
124
-
125
- PEREFERENCE_RE = /#{PEREFERENCE}/um
126
- # Returns the value of this entity. At the moment, only internal entities
127
- # are processed. If the value contains internal references (IE,
128
- # %blah;), those are replaced with their values. IE, if the doctype
129
- # contains:
130
- # <!ENTITY % foo "bar">
131
- # <!ENTITY yada "nanoo %foo; nanoo>
132
- # then:
133
- # doctype.entity('yada').value #-> "nanoo bar nanoo"
134
- def value
135
- @resolved_value ||= resolve_value
136
- end
137
-
138
- def parent=(other)
139
- @resolved_value = nil
140
- super
141
- end
142
-
143
- private
144
- def resolve_value
145
- return nil if @value.nil?
146
- return @value unless @value.match?(PEREFERENCE_RE)
147
-
148
- matches = @value.scan(PEREFERENCE_RE)
149
- rv = @value.clone
150
- if @parent
151
- sum = 0
152
- matches.each do |entity_reference|
153
- entity_value = @parent.entity( entity_reference[0] )
154
- if sum + entity_value.bytesize > Security.entity_expansion_text_limit
155
- raise "entity expansion has grown too large"
156
- else
157
- sum += entity_value.bytesize
158
- end
159
- rv.gsub!( /%#{entity_reference.join};/um, entity_value )
160
- end
161
- end
162
- rv
163
- end
164
122
  end
165
123
 
166
124
  # This is a set of entity constants -- the ones defined in the XML
@@ -111,7 +111,7 @@ module REXML
111
111
  # itself, then we don't need a carriage return... which makes this
112
112
  # logic more complex.
113
113
  node.children.each { |child|
114
- next if child == node.children[-1] and child.instance_of?(Text)
114
+ next if child.instance_of?(Text)
115
115
  unless child == node.children[0] or child.instance_of?(Text) or
116
116
  (child == node.children[1] and !node.children[0].writethis)
117
117
  output << "\n"
@@ -1,12 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
  require_relative '../parseexception'
3
3
  require_relative '../undefinednamespaceexception'
4
+ require_relative '../security'
4
5
  require_relative '../source'
5
6
  require 'set'
6
7
  require "strscan"
7
8
 
8
9
  module REXML
9
10
  module Parsers
11
+ unless [].respond_to?(:tally)
12
+ module EnumerableTally
13
+ refine Enumerable do
14
+ def tally
15
+ counts = {}
16
+ each do |item|
17
+ counts[item] ||= 0
18
+ counts[item] += 1
19
+ end
20
+ counts
21
+ end
22
+ end
23
+ end
24
+ using EnumerableTally
25
+ end
26
+
10
27
  if StringScanner::Version < "3.0.8"
11
28
  module StringScannerCaptures
12
29
  refine StringScanner do
@@ -124,11 +141,11 @@ module REXML
124
141
  }
125
142
 
126
143
  module Private
127
- INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
144
+ PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
128
145
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
129
146
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
130
147
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
131
- NAME_PATTERN = /\s*#{NAME}/um
148
+ NAME_PATTERN = /#{NAME}/um
132
149
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
133
150
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
134
151
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
@@ -146,6 +163,7 @@ module REXML
146
163
  self.stream = source
147
164
  @listeners = []
148
165
  @prefixes = Set.new
166
+ @entity_expansion_count = 0
149
167
  end
150
168
 
151
169
  def add_listener( listener )
@@ -153,15 +171,18 @@ module REXML
153
171
  end
154
172
 
155
173
  attr_reader :source
174
+ attr_reader :entity_expansion_count
156
175
 
157
176
  def stream=( source )
158
177
  @source = SourceFactory.create_from( source )
159
178
  @closed = nil
179
+ @have_root = false
160
180
  @document_status = nil
161
181
  @tags = []
162
182
  @stack = []
163
183
  @entities = []
164
- @nsstack = []
184
+ @namespaces = {}
185
+ @namespaces_restore_stack = []
165
186
  end
166
187
 
167
188
  def position
@@ -229,6 +250,10 @@ module REXML
229
250
  if @document_status == :in_doctype
230
251
  raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
231
252
  end
253
+ unless @tags.empty?
254
+ path = "/" + @tags.join("/")
255
+ raise ParseException.new("Missing end tag for '#{path}'", @source)
256
+ end
232
257
  return [ :end_document ]
233
258
  end
234
259
  return @stack.shift if @stack.size > 0
@@ -239,7 +264,7 @@ module REXML
239
264
  if @document_status == nil
240
265
  start_position = @source.position
241
266
  if @source.match("<?", true)
242
- return process_instruction(start_position)
267
+ return process_instruction
243
268
  elsif @source.match("<!", true)
244
269
  if @source.match("--", true)
245
270
  md = @source.match(/(.*?)-->/um, true)
@@ -261,7 +286,6 @@ module REXML
261
286
  @source.position = start_position
262
287
  raise REXML::ParseException.new(message, @source)
263
288
  end
264
- @nsstack.unshift(Set.new)
265
289
  name = parse_name(base_error_message)
266
290
  if @source.match(/\s*\[/um, true)
267
291
  id = [nil, nil, nil]
@@ -309,7 +333,11 @@ module REXML
309
333
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
310
334
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
311
335
  elsif @source.match("ENTITY", true)
312
- match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true).captures.compact]
336
+ match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
337
+ unless match_data
338
+ raise REXML::ParseException.new("Malformed entity declaration", @source)
339
+ end
340
+ match = [:entitydecl, *match_data.captures.compact]
313
341
  ref = false
314
342
  if match[1] == '%'
315
343
  ref = true
@@ -327,6 +355,8 @@ module REXML
327
355
  match[4] = match[4][1..-2] # HREF
328
356
  match.delete_at(5) if match.size > 5 # Chop out NDATA decl
329
357
  # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
358
+ elsif Private::PEREFERENCE_PATTERN.match?(match[2])
359
+ raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
330
360
  else
331
361
  match[2] = match[2][1..-2]
332
362
  match.pop if match.size == 4
@@ -341,7 +371,7 @@ module REXML
341
371
  contents = md[0]
342
372
 
343
373
  pairs = {}
344
- values = md[0].scan( ATTDEF_RE )
374
+ values = md[0].strip.scan( ATTDEF_RE )
345
375
  values.each do |attdef|
346
376
  unless attdef[3] == "#IMPLIED"
347
377
  attdef.compact!
@@ -349,7 +379,7 @@ module REXML
349
379
  val = attdef[4] if val == "#FIXED "
350
380
  pairs[attdef[0]] = val
351
381
  if attdef[0] =~ /^xmlns:(.*)/
352
- @nsstack[0] << $1
382
+ @namespaces[$1] = val
353
383
  end
354
384
  end
355
385
  end
@@ -402,7 +432,7 @@ module REXML
402
432
  # here explicitly.
403
433
  @source.ensure_buffer
404
434
  if @source.match("/", true)
405
- @nsstack.shift
435
+ @namespaces_restore_stack.pop
406
436
  last_tag = @tags.pop
407
437
  md = @source.match(Private::CLOSE_PATTERN, true)
408
438
  if md and !last_tag
@@ -435,7 +465,7 @@ module REXML
435
465
  raise REXML::ParseException.new( "Declarations can only occur "+
436
466
  "in the doctype declaration.", @source)
437
467
  elsif @source.match("?", true)
438
- return process_instruction(start_position)
468
+ return process_instruction
439
469
  else
440
470
  # Get the next tag
441
471
  md = @source.match(Private::TAG_PATTERN, true)
@@ -447,21 +477,25 @@ module REXML
447
477
  @document_status = :in_element
448
478
  @prefixes.clear
449
479
  @prefixes << md[2] if md[2]
450
- @nsstack.unshift(curr_ns=Set.new)
451
- attributes, closed = parse_attributes(@prefixes, curr_ns)
480
+ push_namespaces_restore
481
+ attributes, closed = parse_attributes(@prefixes)
452
482
  # Verify that all of the prefixes have been defined
453
483
  for prefix in @prefixes
454
- unless @nsstack.find{|k| k.member?(prefix)}
484
+ unless @namespaces.key?(prefix)
455
485
  raise UndefinedNamespaceException.new(prefix,@source,self)
456
486
  end
457
487
  end
458
488
 
459
489
  if closed
460
490
  @closed = tag
461
- @nsstack.shift
491
+ pop_namespaces_restore
462
492
  else
493
+ if @tags.empty? and @have_root
494
+ raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
495
+ end
463
496
  @tags.push( tag )
464
497
  end
498
+ @have_root = true
465
499
  return [ :start_element, tag, attributes ]
466
500
  end
467
501
  else
@@ -469,6 +503,16 @@ module REXML
469
503
  if text.chomp!("<")
470
504
  @source.position -= "<".bytesize
471
505
  end
506
+ if @tags.empty?
507
+ unless /\A\s*\z/.match?(text)
508
+ if @have_root
509
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
510
+ else
511
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
512
+ end
513
+ end
514
+ return pull_event if @have_root
515
+ end
472
516
  return [ :text, text ]
473
517
  end
474
518
  rescue REXML::UndefinedNamespaceException
@@ -484,13 +528,13 @@ module REXML
484
528
  private :pull_event
485
529
 
486
530
  def entity( reference, entities )
487
- value = nil
488
- value = entities[ reference ] if entities
489
- if not value
490
- value = DEFAULT_ENTITIES[ reference ]
491
- value = value[2] if value
492
- end
493
- unnormalize( value, entities ) if value
531
+ return unless entities
532
+
533
+ value = entities[ reference ]
534
+ return if value.nil?
535
+
536
+ record_entity_expansion
537
+ unnormalize( value, entities )
494
538
  end
495
539
 
496
540
  # Escapes all possible entities
@@ -511,7 +555,11 @@ module REXML
511
555
 
512
556
  # Unescapes all possible entities
513
557
  def unnormalize( string, entities=nil, filter=nil )
514
- rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
558
+ if string.include?("\r")
559
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
560
+ else
561
+ rv = string.dup
562
+ end
515
563
  matches = rv.scan( REFERENCE_RE )
516
564
  return rv if matches.size == 0
517
565
  rv.gsub!( Private::CHARACTER_REFERENCES ) {
@@ -520,17 +568,29 @@ module REXML
520
568
  [Integer(m)].pack('U*')
521
569
  }
522
570
  matches.collect!{|x|x[0]}.compact!
571
+ if filter
572
+ matches.reject! do |entity_reference|
573
+ filter.include?(entity_reference)
574
+ end
575
+ end
523
576
  if matches.size > 0
524
- matches.each do |entity_reference|
525
- unless filter and filter.include?(entity_reference)
526
- entity_value = entity( entity_reference, entities )
527
- if entity_value
528
- re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
529
- rv.gsub!( re, entity_value )
530
- else
531
- er = DEFAULT_ENTITIES[entity_reference]
532
- rv.gsub!( er[0], er[2] ) if er
577
+ matches.tally.each do |entity_reference, n|
578
+ entity_expansion_count_before = @entity_expansion_count
579
+ entity_value = entity( entity_reference, entities )
580
+ if entity_value
581
+ if n > 1
582
+ entity_expansion_count_delta =
583
+ @entity_expansion_count - entity_expansion_count_before
584
+ record_entity_expansion(entity_expansion_count_delta * (n - 1))
533
585
  end
586
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
587
+ rv.gsub!( re, entity_value )
588
+ if rv.bytesize > Security.entity_expansion_text_limit
589
+ raise "entity expansion has grown too large"
590
+ end
591
+ else
592
+ er = DEFAULT_ENTITIES[entity_reference]
593
+ rv.gsub!( er[0], er[2] ) if er
534
594
  end
535
595
  end
536
596
  rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
@@ -539,6 +599,39 @@ module REXML
539
599
  end
540
600
 
541
601
  private
602
+ def add_namespace(prefix, uri)
603
+ @namespaces_restore_stack.last[prefix] = @namespaces[prefix]
604
+ if uri.nil?
605
+ @namespaces.delete(prefix)
606
+ else
607
+ @namespaces[prefix] = uri
608
+ end
609
+ end
610
+
611
+ def push_namespaces_restore
612
+ namespaces_restore = {}
613
+ @namespaces_restore_stack.push(namespaces_restore)
614
+ namespaces_restore
615
+ end
616
+
617
+ def pop_namespaces_restore
618
+ namespaces_restore = @namespaces_restore_stack.pop
619
+ namespaces_restore.each do |prefix, uri|
620
+ if uri.nil?
621
+ @namespaces.delete(prefix)
622
+ else
623
+ @namespaces[prefix] = uri
624
+ end
625
+ end
626
+ end
627
+
628
+ def record_entity_expansion(delta=1)
629
+ @entity_expansion_count += delta
630
+ if @entity_expansion_count > Security.entity_expansion_limit
631
+ raise "number of entity expansions exceeded, processing aborted."
632
+ end
633
+ end
634
+
542
635
  def need_source_encoding_update?(xml_declaration_encoding)
543
636
  return false if xml_declaration_encoding.nil?
544
637
  return false if /\AUTF-16\z/i =~ xml_declaration_encoding
@@ -548,14 +641,14 @@ module REXML
548
641
  def parse_name(base_error_message)
549
642
  md = @source.match(Private::NAME_PATTERN, true)
550
643
  unless md
551
- if @source.match(/\s*\S/um)
644
+ if @source.match(/\S/um)
552
645
  message = "#{base_error_message}: invalid name"
553
646
  else
554
647
  message = "#{base_error_message}: name is missing"
555
648
  end
556
649
  raise REXML::ParseException.new(message, @source)
557
650
  end
558
- md[1]
651
+ md[0]
559
652
  end
560
653
 
561
654
  def parse_id(base_error_message,
@@ -624,15 +717,24 @@ module REXML
624
717
  end
625
718
  end
626
719
 
627
- def process_instruction(start_position)
628
- match_data = @source.match(Private::INSTRUCTION_END, true)
629
- unless match_data
630
- message = "Invalid processing instruction node"
631
- @source.position = start_position
632
- raise REXML::ParseException.new(message, @source)
720
+ def process_instruction
721
+ name = parse_name("Malformed XML: Invalid processing instruction node")
722
+ if @source.match(/\s+/um, true)
723
+ match_data = @source.match(/(.*?)\?>/um, true)
724
+ unless match_data
725
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
726
+ end
727
+ content = match_data[1]
728
+ else
729
+ content = nil
730
+ unless @source.match("?>", true)
731
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
732
+ end
633
733
  end
634
- if @document_status.nil? and match_data[1] == "xml"
635
- content = match_data[2]
734
+ if name == "xml"
735
+ if @document_status
736
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
737
+ end
636
738
  version = VERSION.match(content)
637
739
  version = version[1] unless version.nil?
638
740
  encoding = ENCODING.match(content)
@@ -647,11 +749,12 @@ module REXML
647
749
  standalone = standalone[1] unless standalone.nil?
648
750
  return [ :xmldecl, version, encoding, standalone ]
649
751
  end
650
- [:processing_instruction, match_data[1], match_data[2]]
752
+ [:processing_instruction, name, content]
651
753
  end
652
754
 
653
- def parse_attributes(prefixes, curr_ns)
755
+ def parse_attributes(prefixes)
654
756
  attributes = {}
757
+ expanded_names = {}
655
758
  closed = false
656
759
  while true
657
760
  if @source.match(">", true)
@@ -693,7 +796,7 @@ module REXML
693
796
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
694
797
  raise REXML::ParseException.new( msg, @source, self)
695
798
  end
696
- curr_ns << local_part
799
+ add_namespace(local_part, value)
697
800
  elsif prefix
698
801
  prefixes << prefix unless prefix == "xml"
699
802
  end
@@ -703,6 +806,20 @@ module REXML
703
806
  raise REXML::ParseException.new(msg, @source, self)
704
807
  end
705
808
 
809
+ unless prefix == "xmlns"
810
+ uri = @namespaces[prefix]
811
+ expanded_name = [uri, local_part]
812
+ existing_prefix = expanded_names[expanded_name]
813
+ if existing_prefix
814
+ message = "Namespace conflict in adding attribute " +
815
+ "\"#{local_part}\": " +
816
+ "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
817
+ "prefix \"#{prefix}\" = \"#{uri}\""
818
+ raise REXML::ParseException.new(message, @source, self)
819
+ end
820
+ expanded_names[expanded_name] = prefix
821
+ end
822
+
706
823
  attributes[name] = value
707
824
  else
708
825
  message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
@@ -47,6 +47,10 @@ module REXML
47
47
  @listeners << listener
48
48
  end
49
49
 
50
+ def entity_expansion_count
51
+ @parser.entity_expansion_count
52
+ end
53
+
50
54
  def each
51
55
  while has_next?
52
56
  yield self.pull
@@ -22,6 +22,10 @@ module REXML
22
22
  @parser.source
23
23
  end
24
24
 
25
+ def entity_expansion_count
26
+ @parser.entity_expansion_count
27
+ end
28
+
25
29
  def add_listener( listener )
26
30
  @parser.add_listener( listener )
27
31
  end
@@ -157,25 +161,8 @@ module REXML
157
161
  end
158
162
  end
159
163
  when :text
160
- #normalized = @parser.normalize( event[1] )
161
- #handle( :characters, normalized )
162
- copy = event[1].clone
163
-
164
- esub = proc { |match|
165
- if @entities.has_key?($1)
166
- @entities[$1].gsub(Text::REFERENCE, &esub)
167
- else
168
- match
169
- end
170
- }
171
-
172
- copy.gsub!( Text::REFERENCE, &esub )
173
- copy.gsub!( Text::NUMERICENTITY ) {|m|
174
- m=$1
175
- m = "0#{m}" if m[0] == ?x
176
- [Integer(m)].pack('U*')
177
- }
178
- handle( :characters, copy )
164
+ unnormalized = @parser.unnormalize( event[1], @entities )
165
+ handle( :characters, unnormalized )
179
166
  when :entitydecl
180
167
  handle_entitydecl( event )
181
168
  when :processing_instruction, :comment, :attlistdecl,
@@ -7,37 +7,34 @@ module REXML
7
7
  def initialize source, listener
8
8
  @listener = listener
9
9
  @parser = BaseParser.new( source )
10
- @tag_stack = []
10
+ @entities = {}
11
11
  end
12
12
 
13
13
  def add_listener( listener )
14
14
  @parser.add_listener( listener )
15
15
  end
16
16
 
17
+ def entity_expansion_count
18
+ @parser.entity_expansion_count
19
+ end
20
+
17
21
  def parse
18
22
  # entity string
19
23
  while true
20
24
  event = @parser.pull
21
25
  case event[0]
22
26
  when :end_document
23
- unless @tag_stack.empty?
24
- tag_path = "/" + @tag_stack.join("/")
25
- raise ParseException.new("Missing end tag for '#{tag_path}'",
26
- @parser.source)
27
- end
28
27
  return
29
28
  when :start_element
30
- @tag_stack << event[1]
31
29
  attrs = event[2].each do |n, v|
32
30
  event[2][n] = @parser.unnormalize( v )
33
31
  end
34
32
  @listener.tag_start( event[1], attrs )
35
33
  when :end_element
36
34
  @listener.tag_end( event[1] )
37
- @tag_stack.pop
38
35
  when :text
39
- normalized = @parser.unnormalize( event[1] )
40
- @listener.text( normalized )
36
+ unnormalized = @parser.unnormalize( event[1], @entities )
37
+ @listener.text( unnormalized )
41
38
  when :processing_instruction
42
39
  @listener.instruction( *event[1,2] )
43
40
  when :start_doctype
@@ -48,6 +45,7 @@ module REXML
48
45
  when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
49
46
  @listener.send( event[0].to_s, *event[1..-1] )
50
47
  when :entitydecl, :notationdecl
48
+ @entities[ event[1] ] = event[2] if event.size == 3
51
49
  @listener.send( event[0].to_s, event[1..-1] )
52
50
  when :externalentity
53
51
  entity_reference = event[1]
@@ -15,7 +15,6 @@ module REXML
15
15
  end
16
16
 
17
17
  def parse
18
- tag_stack = []
19
18
  entities = nil
20
19
  begin
21
20
  while true
@@ -23,19 +22,13 @@ module REXML
23
22
  #STDERR.puts "TREEPARSER GOT #{event.inspect}"
24
23
  case event[0]
25
24
  when :end_document
26
- unless tag_stack.empty?
27
- raise ParseException.new("No close tag for #{@build_context.xpath}",
28
- @parser.source, @parser)
29
- end
30
25
  return
31
26
  when :start_element
32
- tag_stack.push(event[1])
33
27
  el = @build_context = @build_context.add_element( event[1] )
34
28
  event[2].each do |key, value|
35
29
  el.attributes[key]=Attribute.new(key,value,self)
36
30
  end
37
31
  when :end_element
38
- tag_stack.pop
39
32
  @build_context = @build_context.parent
40
33
  when :text
41
34
  if @build_context[-1].instance_of? Text
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.1"
34
+ VERSION = "3.3.6"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -204,10 +204,20 @@ module REXML
204
204
  end
205
205
  end
206
206
 
207
- def read(term = nil)
207
+ def read(term = nil, min_bytes = 1)
208
208
  term = encode(term) if term
209
209
  begin
210
- @scanner << readline(term)
210
+ str = readline(term)
211
+ @scanner << str
212
+ read_bytes = str.bytesize
213
+ begin
214
+ while read_bytes < min_bytes
215
+ str = readline(term)
216
+ @scanner << str
217
+ read_bytes += str.bytesize
218
+ end
219
+ rescue IOError
220
+ end
211
221
  true
212
222
  rescue Exception, NameError
213
223
  @source = nil
@@ -237,10 +247,9 @@ module REXML
237
247
  read if @scanner.eos? && @source
238
248
  end
239
249
 
240
- # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
241
- # - ">"
242
- # - "XXX>" (X is any string excluding '>')
243
250
  def match( pattern, cons=false )
251
+ # To avoid performance issue, we need to increase bytes to read per scan
252
+ min_bytes = 1
244
253
  while true
245
254
  if cons
246
255
  md = @scanner.scan(pattern)
@@ -250,7 +259,8 @@ module REXML
250
259
  break if md
251
260
  return nil if pattern.is_a?(String)
252
261
  return nil if @source.nil?
253
- return nil unless read
262
+ return nil unless read(nil, min_bytes)
263
+ min_bytes *= 2
254
264
  end
255
265
 
256
266
  md.nil? ? nil : @scanner
data/lib/rexml/text.rb CHANGED
@@ -151,25 +151,45 @@ module REXML
151
151
  end
152
152
  end
153
153
 
154
- # context sensitive
155
- string.scan(pattern) do
156
- if $1[-1] != ?;
157
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
158
- elsif $1[0] == ?&
159
- if $5 and $5[0] == ?#
160
- case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
161
- when *VALID_CHAR
154
+ pos = 0
155
+ while (index = string.index(/<|&/, pos))
156
+ if string[index] == "<"
157
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
158
+ end
159
+
160
+ unless (end_index = string.index(/[^\s];/, index + 1))
161
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162
+ end
163
+
164
+ value = string[(index + 1)..end_index]
165
+ if /\s/.match?(value)
166
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
167
+ end
168
+
169
+ if value[0] == "#"
170
+ character_reference = value[1..-1]
171
+
172
+ unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
173
+ if character_reference[0] == "x" || character_reference[-1] == "x"
174
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162
175
  else
163
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
176
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
164
177
  end
165
- # FIXME: below can't work but this needs API change.
166
- # elsif @parent and $3 and !SUBSTITUTES.include?($1)
167
- # if !doctype or !doctype.entities.has_key?($3)
168
- # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
169
- # end
170
178
  end
179
+
180
+ case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
181
+ when *VALID_CHAR
182
+ else
183
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
184
+ end
185
+ elsif !(/\A#{Entity::NAME}\z/um.match?(value))
186
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
171
187
  end
188
+
189
+ pos = end_index + 1
172
190
  end
191
+
192
+ string
173
193
  end
174
194
 
175
195
  def node_type
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.1
4
+ version: 3.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-06-25 00:00:00.000000000 Z
10
+ date: 2024-08-22 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: strscan
@@ -116,7 +116,7 @@ homepage: https://github.com/ruby/rexml
116
116
  licenses:
117
117
  - BSD-2-Clause
118
118
  metadata:
119
- changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.1
119
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.6
120
120
  rdoc_options:
121
121
  - "--main"
122
122
  - README.md