rexml 3.3.0 → 3.3.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 25c91d880b1d9de11210a1fec235f630b6948e09104aa810b3cdde1a3d277139
4
- data.tar.gz: b06ffa92b8f883052db3b41fcc025b9b09c0f9117c301476de0264203c94632f
3
+ metadata.gz: afaa8e7d5241253a1c36a218f94eeff525cc19378d2ed104f738abfc01693889
4
+ data.tar.gz: 665e18c0db75cce5e3db16c674c02e986ff9141df54fd7ff3da704b4403a928d
5
5
  SHA512:
6
- metadata.gz: 21fe3b359774dd752c405d74c4888f25dfd55c436ca0f02cc34f602aff88b99f80b97cdb186bf9ccc40e52783b9f90f985d8e87fd0687972cedaf759c808cc9d
7
- data.tar.gz: d613797dcd6f20b6c1d17deca5235a91456165fbc04dcd4fa0ec419e72bd8ba0aee7eadbb2a7e2a65263f74e0dcfacc99a9976ffe396717c2956b7f5155c2725
6
+ metadata.gz: 86ea7a0ce4847b320f297b1eb03158003c2931847c07ea118f0a7413f476660dcf40baec8b59a92a2e7096eb665ace359b04c5d8e82617b7162305465472c88d
7
+ data.tar.gz: ae248f28516ab6c76170623bcc5e5a30389596823133fd0a13cb74235d6101dd469235bab8b1e15bcbd7a7795f04b44e4674dfdcb1712109dce58001cea01648
data/NEWS.md CHANGED
@@ -1,5 +1,52 @@
1
1
  # News
2
2
 
3
+ ## 3.3.1 - 2024-06-25 {#version-3-3-1}
4
+
5
+ ### Improvements
6
+
7
+ * Added support for detecting malformed top-level comments.
8
+ * GH-145
9
+ * Patch by Hiroya Fujinami.
10
+
11
+ * Improved `REXML::Element#attribute` performance.
12
+ * GH-146
13
+ * Patch by Hiroya Fujinami.
14
+
15
+ * Added support for detecting malformed `<!-->` comments.
16
+ * GH-147
17
+ * Patch by Hiroya Fujinami.
18
+
19
+ * Added support for detecting unclosed `DOCTYPE`.
20
+ * GH-152
21
+ * Patch by Hiroya Fujinami.
22
+
23
+ * Added `changlog_uri` metadata to gemspec.
24
+ * GH-156
25
+ * Patch by fynsta.
26
+
27
+ * Improved parse performance.
28
+ * GH-157
29
+ * GH-158
30
+ * Patch by NAITOH Jun.
31
+
32
+ ### Fixes
33
+
34
+ * Fixed a bug that large XML can't be parsed.
35
+ * GH-154
36
+ * Patch by NAITOH Jun.
37
+
38
+ * Fixed a bug that private constants are visible.
39
+ * GH-155
40
+ * Patch by NAITOH Jun.
41
+
42
+ ### Thanks
43
+
44
+ * Hiroya Fujinami
45
+
46
+ * NAITOH Jun
47
+
48
+ * fynsta
49
+
3
50
  ## 3.3.0 - 2024-06-11 {#version-3-3-0}
4
51
 
5
52
  ### Improvements
data/lib/rexml/element.rb CHANGED
@@ -7,14 +7,6 @@ require_relative "xpath"
7
7
  require_relative "parseexception"
8
8
 
9
9
  module REXML
10
- # An implementation note about namespaces:
11
- # As we parse, when we find namespaces we put them in a hash and assign
12
- # them a unique ID. We then convert the namespace prefix for the node
13
- # to the unique ID. This makes namespace lookup much faster for the
14
- # cost of extra memory use. We save the namespace prefix for the
15
- # context node and convert it back when we write it.
16
- @@namespaces = {}
17
-
18
10
  # An \REXML::Element object represents an XML element.
19
11
  #
20
12
  # An element:
@@ -1284,16 +1276,11 @@ module REXML
1284
1276
  # document.root.attribute("x", "a") # => a:x='a:x'
1285
1277
  #
1286
1278
  def attribute( name, namespace=nil )
1287
- prefix = nil
1288
- if namespaces.respond_to? :key
1289
- prefix = namespaces.key(namespace) if namespace
1290
- else
1291
- prefix = namespaces.index(namespace) if namespace
1292
- end
1279
+ prefix = namespaces.key(namespace) if namespace
1293
1280
  prefix = nil if prefix == 'xmlns'
1294
1281
 
1295
1282
  ret_val =
1296
- attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
1283
+ attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
1297
1284
 
1298
1285
  return ret_val unless ret_val.nil?
1299
1286
  return nil if prefix.nil?
@@ -132,13 +132,20 @@ module REXML
132
132
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
133
133
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
134
134
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
135
+ CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
136
+ CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
137
+ DEFAULT_ENTITIES_PATTERNS = {}
138
+ default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
139
+ default_entities.each do |term|
140
+ DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
141
+ end
135
142
  end
136
143
  private_constant :Private
137
- include Private
138
144
 
139
145
  def initialize( source )
140
146
  self.stream = source
141
147
  @listeners = []
148
+ @prefixes = Set.new
142
149
  end
143
150
 
144
151
  def add_listener( listener )
@@ -204,6 +211,8 @@ module REXML
204
211
 
205
212
  # Returns the next event. This is a +PullEvent+ object.
206
213
  def pull
214
+ @source.drop_parsed_content
215
+
207
216
  pull_event.tap do |event|
208
217
  @listeners.each do |listener|
209
218
  listener.receive event
@@ -216,7 +225,12 @@ module REXML
216
225
  x, @closed = @closed, nil
217
226
  return [ :end_element, x ]
218
227
  end
219
- return [ :end_document ] if empty?
228
+ if empty?
229
+ if @document_status == :in_doctype
230
+ raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
231
+ end
232
+ return [ :end_document ]
233
+ end
220
234
  return @stack.shift if @stack.size > 0
221
235
  #STDERR.puts @source.encoding
222
236
  #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
@@ -228,7 +242,14 @@ module REXML
228
242
  return process_instruction(start_position)
229
243
  elsif @source.match("<!", true)
230
244
  if @source.match("--", true)
231
- return [ :comment, @source.match(/(.*?)-->/um, true)[1] ]
245
+ md = @source.match(/(.*?)-->/um, true)
246
+ if md.nil?
247
+ raise REXML::ParseException.new("Unclosed comment", @source)
248
+ end
249
+ if /--|-\z/.match?(md[1])
250
+ raise REXML::ParseException.new("Malformed comment", @source)
251
+ end
252
+ return [ :comment, md[1] ]
232
253
  elsif @source.match("DOCTYPE", true)
233
254
  base_error_message = "Malformed DOCTYPE"
234
255
  unless @source.match(/\s+/um, true)
@@ -240,7 +261,7 @@ module REXML
240
261
  @source.position = start_position
241
262
  raise REXML::ParseException.new(message, @source)
242
263
  end
243
- @nsstack.unshift(curr_ns=Set.new)
264
+ @nsstack.unshift(Set.new)
244
265
  name = parse_name(base_error_message)
245
266
  if @source.match(/\s*\[/um, true)
246
267
  id = [nil, nil, nil]
@@ -288,7 +309,7 @@ module REXML
288
309
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
289
310
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
290
311
  elsif @source.match("ENTITY", true)
291
- match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
312
+ match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true).captures.compact]
292
313
  ref = false
293
314
  if match[1] == '%'
294
315
  ref = true
@@ -314,7 +335,7 @@ module REXML
314
335
  match << '%' if ref
315
336
  return match
316
337
  elsif @source.match("ATTLIST", true)
317
- md = @source.match(ATTLISTDECL_END, true)
338
+ md = @source.match(Private::ATTLISTDECL_END, true)
318
339
  raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
319
340
  element = md[1]
320
341
  contents = md[0]
@@ -366,6 +387,9 @@ module REXML
366
387
  @document_status = :after_doctype
367
388
  return [ :end_doctype ]
368
389
  end
390
+ if @document_status == :in_doctype
391
+ raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
392
+ end
369
393
  end
370
394
  if @document_status == :after_doctype
371
395
  @source.match(/\s*/um, true)
@@ -380,7 +404,7 @@ module REXML
380
404
  if @source.match("/", true)
381
405
  @nsstack.shift
382
406
  last_tag = @tags.pop
383
- md = @source.match(CLOSE_PATTERN, true)
407
+ md = @source.match(Private::CLOSE_PATTERN, true)
384
408
  if md and !last_tag
385
409
  message = "Unexpected top-level end tag (got '#{md[1]}')"
386
410
  raise REXML::ParseException.new(message, @source)
@@ -399,12 +423,11 @@ module REXML
399
423
  if md[0][0] == ?-
400
424
  md = @source.match(/--(.*?)-->/um, true)
401
425
 
402
- case md[1]
403
- when /--/, /-\z/
426
+ if md.nil? || /--|-\z/.match?(md[1])
404
427
  raise REXML::ParseException.new("Malformed comment", @source)
405
428
  end
406
429
 
407
- return [ :comment, md[1] ] if md
430
+ return [ :comment, md[1] ]
408
431
  else
409
432
  md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
410
433
  return [ :cdata, md[1] ] if md
@@ -415,19 +438,19 @@ module REXML
415
438
  return process_instruction(start_position)
416
439
  else
417
440
  # Get the next tag
418
- md = @source.match(TAG_PATTERN, true)
441
+ md = @source.match(Private::TAG_PATTERN, true)
419
442
  unless md
420
443
  @source.position = start_position
421
444
  raise REXML::ParseException.new("malformed XML: missing tag start", @source)
422
445
  end
423
446
  tag = md[1]
424
447
  @document_status = :in_element
425
- prefixes = Set.new
426
- prefixes << md[2] if md[2]
448
+ @prefixes.clear
449
+ @prefixes << md[2] if md[2]
427
450
  @nsstack.unshift(curr_ns=Set.new)
428
- attributes, closed = parse_attributes(prefixes, curr_ns)
451
+ attributes, closed = parse_attributes(@prefixes, curr_ns)
429
452
  # Verify that all of the prefixes have been defined
430
- for prefix in prefixes
453
+ for prefix in @prefixes
431
454
  unless @nsstack.find{|k| k.member?(prefix)}
432
455
  raise UndefinedNamespaceException.new(prefix,@source,self)
433
456
  end
@@ -488,10 +511,10 @@ module REXML
488
511
 
489
512
  # Unescapes all possible entities
490
513
  def unnormalize( string, entities=nil, filter=nil )
491
- rv = string.gsub( /\r\n?/, "\n" )
514
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
492
515
  matches = rv.scan( REFERENCE_RE )
493
516
  return rv if matches.size == 0
494
- rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
517
+ rv.gsub!( Private::CHARACTER_REFERENCES ) {
495
518
  m=$1
496
519
  m = "0#{m}" if m[0] == ?x
497
520
  [Integer(m)].pack('U*')
@@ -502,7 +525,7 @@ module REXML
502
525
  unless filter and filter.include?(entity_reference)
503
526
  entity_value = entity( entity_reference, entities )
504
527
  if entity_value
505
- re = /&#{entity_reference};/
528
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
506
529
  rv.gsub!( re, entity_value )
507
530
  else
508
531
  er = DEFAULT_ENTITIES[entity_reference]
@@ -510,7 +533,7 @@ module REXML
510
533
  end
511
534
  end
512
535
  end
513
- rv.gsub!( /&amp;/, '&' )
536
+ rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
514
537
  end
515
538
  rv
516
539
  end
@@ -523,7 +546,7 @@ module REXML
523
546
  end
524
547
 
525
548
  def parse_name(base_error_message)
526
- md = @source.match(NAME_PATTERN, true)
549
+ md = @source.match(Private::NAME_PATTERN, true)
527
550
  unless md
528
551
  if @source.match(/\s*\S/um)
529
552
  message = "#{base_error_message}: invalid name"
@@ -602,7 +625,7 @@ module REXML
602
625
  end
603
626
 
604
627
  def process_instruction(start_position)
605
- match_data = @source.match(INSTRUCTION_END, true)
628
+ match_data = @source.match(Private::INSTRUCTION_END, true)
606
629
  unless match_data
607
630
  message = "Invalid processing instruction node"
608
631
  @source.position = start_position
@@ -16,7 +16,6 @@ module REXML
16
16
 
17
17
  def parse
18
18
  tag_stack = []
19
- in_doctype = false
20
19
  entities = nil
21
20
  begin
22
21
  while true
@@ -39,17 +38,15 @@ module REXML
39
38
  tag_stack.pop
40
39
  @build_context = @build_context.parent
41
40
  when :text
42
- if not in_doctype
43
- if @build_context[-1].instance_of? Text
44
- @build_context[-1] << event[1]
45
- else
46
- @build_context.add(
47
- Text.new(event[1], @build_context.whitespace, nil, true)
48
- ) unless (
49
- @build_context.ignore_whitespace_nodes and
50
- event[1].strip.size==0
51
- )
52
- end
41
+ if @build_context[-1].instance_of? Text
42
+ @build_context[-1] << event[1]
43
+ else
44
+ @build_context.add(
45
+ Text.new(event[1], @build_context.whitespace, nil, true)
46
+ ) unless (
47
+ @build_context.ignore_whitespace_nodes and
48
+ event[1].strip.size==0
49
+ )
53
50
  end
54
51
  when :comment
55
52
  c = Comment.new( event[1] )
@@ -60,14 +57,12 @@ module REXML
60
57
  when :processing_instruction
61
58
  @build_context.add( Instruction.new( event[1], event[2] ) )
62
59
  when :end_doctype
63
- in_doctype = false
64
60
  entities.each { |k,v| entities[k] = @build_context.entities[k].value }
65
61
  @build_context = @build_context.parent
66
62
  when :start_doctype
67
63
  doctype = DocType.new( event[1..-1], @build_context )
68
64
  @build_context = doctype
69
65
  entities = {}
70
- in_doctype = true
71
66
  when :attlistdecl
72
67
  n = AttlistDecl.new( event[1..-1] )
73
68
  @build_context.add( n )
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.0"
34
+ VERSION = "3.3.1"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -55,6 +55,7 @@ module REXML
55
55
  attr_reader :encoding
56
56
 
57
57
  module Private
58
+ SCANNER_RESET_SIZE = 100000
58
59
  PRE_DEFINED_TERM_PATTERNS = {}
59
60
  pre_defined_terms = ["'", '"', "<"]
60
61
  pre_defined_terms.each do |term|
@@ -62,7 +63,6 @@ module REXML
62
63
  end
63
64
  end
64
65
  private_constant :Private
65
- include Private
66
66
 
67
67
  # Constructor
68
68
  # @param arg must be a String, and should be a valid XML document
@@ -84,6 +84,12 @@ module REXML
84
84
  @scanner.rest
85
85
  end
86
86
 
87
+ def drop_parsed_content
88
+ if @scanner.pos > Private::SCANNER_RESET_SIZE
89
+ @scanner.string = @scanner.rest
90
+ end
91
+ end
92
+
87
93
  def buffer_encoding=(encoding)
88
94
  @scanner.string.force_encoding(encoding)
89
95
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.0
4
+ version: 3.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-06-11 00:00:00.000000000 Z
10
+ date: 2024-06-25 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: strscan
@@ -116,8 +115,8 @@ files:
116
115
  homepage: https://github.com/ruby/rexml
117
116
  licenses:
118
117
  - BSD-2-Clause
119
- metadata: {}
120
- post_install_message:
118
+ metadata:
119
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.1
121
120
  rdoc_options:
122
121
  - "--main"
123
122
  - README.md
@@ -134,8 +133,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
134
133
  - !ruby/object:Gem::Version
135
134
  version: '0'
136
135
  requirements: []
137
- rubygems_version: 3.4.20
138
- signing_key:
136
+ rubygems_version: 3.6.0.dev
139
137
  specification_version: 4
140
138
  summary: An XML toolkit for Ruby
141
139
  test_files: []