rexml 3.3.1 → 3.3.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: afaa8e7d5241253a1c36a218f94eeff525cc19378d2ed104f738abfc01693889
4
- data.tar.gz: 665e18c0db75cce5e3db16c674c02e986ff9141df54fd7ff3da704b4403a928d
3
+ metadata.gz: 70ccd1465a05dba3d53dcfc4a98e76dec865a4f6ac833b954aff4234bce6c255
4
+ data.tar.gz: 53f43fab8f531e0ba7461ce091e5eae6bec27b12e9139450c7b3e748b4eeacdc
5
5
  SHA512:
6
- metadata.gz: 86ea7a0ce4847b320f297b1eb03158003c2931847c07ea118f0a7413f476660dcf40baec8b59a92a2e7096eb665ace359b04c5d8e82617b7162305465472c88d
7
- data.tar.gz: ae248f28516ab6c76170623bcc5e5a30389596823133fd0a13cb74235d6101dd469235bab8b1e15bcbd7a7795f04b44e4674dfdcb1712109dce58001cea01648
6
+ metadata.gz: b46818d79ae57075c4e0bd620802e82c6958dddc7da1b182504c3fdc16685c887ac0ddd6a4838a080483abba330839e9ef4b2db22cc81b9eae3eac71ac14c965
7
+ data.tar.gz: 1e5205905eb435c02038dd0539de22472f5364ffc47635f13a1752cb79a423dcca558fb47394ac5d624b358e779b07cbcafedfd06b99742026856f9988109976
data/NEWS.md CHANGED
@@ -1,5 +1,53 @@
1
1
  # News
2
2
 
3
+ ## 3.3.2 - 2024-07-16 {#version-3-3-2}
4
+
5
+ ### Improvements
6
+
7
+ * Improved parse performance.
8
+ * GH-160
9
+ * Patch by NAITOH Jun.
10
+
11
+ * Improved parse performance.
12
+ * GH-169
13
+ * GH-170
14
+ * GH-171
15
+ * GH-172
16
+ * GH-173
17
+ * GH-174
18
+ * Patch by Watson.
19
+
20
+ * Added support for raising a parse exception when an XML has extra
21
+ content after the root element.
22
+ * GH-161
23
+ * Patch by NAITOH Jun.
24
+
25
+ * Added support for raising a parse exception when an XML
26
+ declaration exists in wrong position.
27
+ * GH-162
28
+ * Patch by NAITOH Jun.
29
+
30
+ * Removed needless a space after XML declaration in pretty print mode.
31
+ * GH-164
32
+ * Patch by NAITOH Jun.
33
+
34
+ * Stopped to emit `:text` event after the root element.
35
+ * GH-167
36
+ * Patch by NAITOH Jun.
37
+
38
+ ### Fixes
39
+
40
+ * Fixed a bug that SAX2 parser doesn't expand predefined entities for
41
+ `characters` callback.
42
+ * GH-168
43
+ * Patch by NAITOH Jun.
44
+
45
+ ### Thanks
46
+
47
+ * NAITOH Jun
48
+
49
+ * Watson
50
+
3
51
  ## 3.3.1 - 2024-06-25 {#version-3-3-1}
4
52
 
5
53
  ### Improvements
@@ -111,7 +111,7 @@ module REXML
111
111
  # itself, then we don't need a carriage return... which makes this
112
112
  # logic more complex.
113
113
  node.children.each { |child|
114
- next if child == node.children[-1] and child.instance_of?(Text)
114
+ next if child.instance_of?(Text)
115
115
  unless child == node.children[0] or child.instance_of?(Text) or
116
116
  (child == node.children[1] and !node.children[0].writethis)
117
117
  output << "\n"
@@ -124,6 +124,14 @@ module REXML
124
124
  }
125
125
 
126
126
  module Private
127
+ # Terminal requires two or more letters.
128
+ INSTRUCTION_TERM = "?>"
129
+ COMMENT_TERM = "-->"
130
+ CDATA_TERM = "]]>"
131
+ DOCTYPE_TERM = "]>"
132
+ # Read to the end of DOCTYPE because there is no proper ENTITY termination
133
+ ENTITY_TERM = DOCTYPE_TERM
134
+
127
135
  INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
128
136
  TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
129
137
  CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
@@ -157,6 +165,7 @@ module REXML
157
165
  def stream=( source )
158
166
  @source = SourceFactory.create_from( source )
159
167
  @closed = nil
168
+ @have_root = false
160
169
  @document_status = nil
161
170
  @tags = []
162
171
  @stack = []
@@ -242,7 +251,7 @@ module REXML
242
251
  return process_instruction(start_position)
243
252
  elsif @source.match("<!", true)
244
253
  if @source.match("--", true)
245
- md = @source.match(/(.*?)-->/um, true)
254
+ md = @source.match(/(.*?)-->/um, true, term: Private::COMMENT_TERM)
246
255
  if md.nil?
247
256
  raise REXML::ParseException.new("Unclosed comment", @source)
248
257
  end
@@ -309,7 +318,7 @@ module REXML
309
318
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
310
319
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
311
320
  elsif @source.match("ENTITY", true)
312
- match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true).captures.compact]
321
+ match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true, term: Private::ENTITY_TERM).captures.compact]
313
322
  ref = false
314
323
  if match[1] == '%'
315
324
  ref = true
@@ -341,7 +350,7 @@ module REXML
341
350
  contents = md[0]
342
351
 
343
352
  pairs = {}
344
- values = md[0].scan( ATTDEF_RE )
353
+ values = md[0].strip.scan( ATTDEF_RE )
345
354
  values.each do |attdef|
346
355
  unless attdef[3] == "#IMPLIED"
347
356
  attdef.compact!
@@ -374,14 +383,14 @@ module REXML
374
383
  raise REXML::ParseException.new(message, @source)
375
384
  end
376
385
  return [:notationdecl, name, *id]
377
- elsif md = @source.match(/--(.*?)-->/um, true)
386
+ elsif md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
378
387
  case md[1]
379
388
  when /--/, /-\z/
380
389
  raise REXML::ParseException.new("Malformed comment", @source)
381
390
  end
382
391
  return [ :comment, md[1] ] if md
383
392
  end
384
- elsif match = @source.match(/(%.*?;)\s*/um, true)
393
+ elsif match = @source.match(/(%.*?;)\s*/um, true, term: Private::DOCTYPE_TERM)
385
394
  return [ :externalentity, match[1] ]
386
395
  elsif @source.match(/\]\s*>/um, true)
387
396
  @document_status = :after_doctype
@@ -421,7 +430,7 @@ module REXML
421
430
  #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
422
431
  raise REXML::ParseException.new("Malformed node", @source) unless md
423
432
  if md[0][0] == ?-
424
- md = @source.match(/--(.*?)-->/um, true)
433
+ md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
425
434
 
426
435
  if md.nil? || /--|-\z/.match?(md[1])
427
436
  raise REXML::ParseException.new("Malformed comment", @source)
@@ -429,7 +438,7 @@ module REXML
429
438
 
430
439
  return [ :comment, md[1] ]
431
440
  else
432
- md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
441
+ md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true, term: Private::CDATA_TERM)
433
442
  return [ :cdata, md[1] ] if md
434
443
  end
435
444
  raise REXML::ParseException.new( "Declarations can only occur "+
@@ -460,8 +469,12 @@ module REXML
460
469
  @closed = tag
461
470
  @nsstack.shift
462
471
  else
472
+ if @tags.empty? and @have_root
473
+ raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
474
+ end
463
475
  @tags.push( tag )
464
476
  end
477
+ @have_root = true
465
478
  return [ :start_element, tag, attributes ]
466
479
  end
467
480
  else
@@ -469,6 +482,12 @@ module REXML
469
482
  if text.chomp!("<")
470
483
  @source.position -= "<".bytesize
471
484
  end
485
+ if @tags.empty? and @have_root
486
+ unless /\A\s*\z/.match?(text)
487
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
488
+ end
489
+ return pull_event
490
+ end
472
491
  return [ :text, text ]
473
492
  end
474
493
  rescue REXML::UndefinedNamespaceException
@@ -511,7 +530,11 @@ module REXML
511
530
 
512
531
  # Unescapes all possible entities
513
532
  def unnormalize( string, entities=nil, filter=nil )
514
- rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
533
+ if string.include?("\r")
534
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
535
+ else
536
+ rv = string.dup
537
+ end
515
538
  matches = rv.scan( REFERENCE_RE )
516
539
  return rv if matches.size == 0
517
540
  rv.gsub!( Private::CHARACTER_REFERENCES ) {
@@ -625,13 +648,16 @@ module REXML
625
648
  end
626
649
 
627
650
  def process_instruction(start_position)
628
- match_data = @source.match(Private::INSTRUCTION_END, true)
651
+ match_data = @source.match(Private::INSTRUCTION_END, true, term: Private::INSTRUCTION_TERM)
629
652
  unless match_data
630
653
  message = "Invalid processing instruction node"
631
654
  @source.position = start_position
632
655
  raise REXML::ParseException.new(message, @source)
633
656
  end
634
- if @document_status.nil? and match_data[1] == "xml"
657
+ if match_data[1] == "xml"
658
+ if @document_status
659
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
660
+ end
635
661
  content = match_data[2]
636
662
  version = VERSION.match(content)
637
663
  version = version[1] unless version.nil?
@@ -157,25 +157,8 @@ module REXML
157
157
  end
158
158
  end
159
159
  when :text
160
- #normalized = @parser.normalize( event[1] )
161
- #handle( :characters, normalized )
162
- copy = event[1].clone
163
-
164
- esub = proc { |match|
165
- if @entities.has_key?($1)
166
- @entities[$1].gsub(Text::REFERENCE, &esub)
167
- else
168
- match
169
- end
170
- }
171
-
172
- copy.gsub!( Text::REFERENCE, &esub )
173
- copy.gsub!( Text::NUMERICENTITY ) {|m|
174
- m=$1
175
- m = "0#{m}" if m[0] == ?x
176
- [Integer(m)].pack('U*')
177
- }
178
- handle( :characters, copy )
160
+ unnormalized = @parser.unnormalize( event[1], @entities )
161
+ handle( :characters, unnormalized )
179
162
  when :entitydecl
180
163
  handle_entitydecl( event )
181
164
  when :processing_instruction, :comment, :attlistdecl,
@@ -36,8 +36,8 @@ module REXML
36
36
  @listener.tag_end( event[1] )
37
37
  @tag_stack.pop
38
38
  when :text
39
- normalized = @parser.unnormalize( event[1] )
40
- @listener.text( normalized )
39
+ unnormalized = @parser.unnormalize( event[1] )
40
+ @listener.text( unnormalized )
41
41
  when :processing_instruction
42
42
  @listener.instruction( *event[1,2] )
43
43
  when :start_doctype
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.1"
34
+ VERSION = "3.3.2"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -117,7 +117,7 @@ module REXML
117
117
  def ensure_buffer
118
118
  end
119
119
 
120
- def match(pattern, cons=false)
120
+ def match(pattern, cons=false, term: nil)
121
121
  if cons
122
122
  @scanner.scan(pattern).nil? ? nil : @scanner
123
123
  else
@@ -240,7 +240,7 @@ module REXML
240
240
  # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
241
241
  # - ">"
242
242
  # - "XXX>" (X is any string excluding '>')
243
- def match( pattern, cons=false )
243
+ def match( pattern, cons=false, term: nil )
244
244
  while true
245
245
  if cons
246
246
  md = @scanner.scan(pattern)
@@ -250,7 +250,7 @@ module REXML
250
250
  break if md
251
251
  return nil if pattern.is_a?(String)
252
252
  return nil if @source.nil?
253
- return nil unless read
253
+ return nil unless read(term)
254
254
  end
255
255
 
256
256
  md.nil? ? nil : @scanner
data/lib/rexml/text.rb CHANGED
@@ -151,25 +151,45 @@ module REXML
151
151
  end
152
152
  end
153
153
 
154
- # context sensitive
155
- string.scan(pattern) do
156
- if $1[-1] != ?;
157
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
158
- elsif $1[0] == ?&
159
- if $5 and $5[0] == ?#
160
- case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
161
- when *VALID_CHAR
154
+ pos = 0
155
+ while (index = string.index(/<|&/, pos))
156
+ if string[index] == "<"
157
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
158
+ end
159
+
160
+ unless (end_index = string.index(/[^\s];/, index + 1))
161
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162
+ end
163
+
164
+ value = string[(index + 1)..end_index]
165
+ if /\s/.match?(value)
166
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
167
+ end
168
+
169
+ if value[0] == "#"
170
+ character_reference = value[1..-1]
171
+
172
+ unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
173
+ if character_reference[0] == "x" || character_reference[-1] == "x"
174
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
162
175
  else
163
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
176
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
164
177
  end
165
- # FIXME: below can't work but this needs API change.
166
- # elsif @parent and $3 and !SUBSTITUTES.include?($1)
167
- # if !doctype or !doctype.entities.has_key?($3)
168
- # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
169
- # end
170
178
  end
179
+
180
+ case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
181
+ when *VALID_CHAR
182
+ else
183
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
184
+ end
185
+ elsif !(/\A#{Entity::NAME}\z/um.match?(value))
186
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
171
187
  end
188
+
189
+ pos = end_index + 1
172
190
  end
191
+
192
+ string
173
193
  end
174
194
 
175
195
  def node_type
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.1
4
+ version: 3.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-06-25 00:00:00.000000000 Z
10
+ date: 2024-07-16 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: strscan
@@ -116,7 +116,7 @@ homepage: https://github.com/ruby/rexml
116
116
  licenses:
117
117
  - BSD-2-Clause
118
118
  metadata:
119
- changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.1
119
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.2
120
120
  rdoc_options:
121
121
  - "--main"
122
122
  - README.md