rexml 3.3.0 → 3.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 25c91d880b1d9de11210a1fec235f630b6948e09104aa810b3cdde1a3d277139
4
- data.tar.gz: b06ffa92b8f883052db3b41fcc025b9b09c0f9117c301476de0264203c94632f
3
+ metadata.gz: afaa8e7d5241253a1c36a218f94eeff525cc19378d2ed104f738abfc01693889
4
+ data.tar.gz: 665e18c0db75cce5e3db16c674c02e986ff9141df54fd7ff3da704b4403a928d
5
5
  SHA512:
6
- metadata.gz: 21fe3b359774dd752c405d74c4888f25dfd55c436ca0f02cc34f602aff88b99f80b97cdb186bf9ccc40e52783b9f90f985d8e87fd0687972cedaf759c808cc9d
7
- data.tar.gz: d613797dcd6f20b6c1d17deca5235a91456165fbc04dcd4fa0ec419e72bd8ba0aee7eadbb2a7e2a65263f74e0dcfacc99a9976ffe396717c2956b7f5155c2725
6
+ metadata.gz: 86ea7a0ce4847b320f297b1eb03158003c2931847c07ea118f0a7413f476660dcf40baec8b59a92a2e7096eb665ace359b04c5d8e82617b7162305465472c88d
7
+ data.tar.gz: ae248f28516ab6c76170623bcc5e5a30389596823133fd0a13cb74235d6101dd469235bab8b1e15bcbd7a7795f04b44e4674dfdcb1712109dce58001cea01648
data/NEWS.md CHANGED
@@ -1,5 +1,52 @@
1
1
  # News
2
2
 
3
+ ## 3.3.1 - 2024-06-25 {#version-3-3-1}
4
+
5
+ ### Improvements
6
+
7
+ * Added support for detecting malformed top-level comments.
8
+ * GH-145
9
+ * Patch by Hiroya Fujinami.
10
+
11
+ * Improved `REXML::Element#attribute` performance.
12
+ * GH-146
13
+ * Patch by Hiroya Fujinami.
14
+
15
+ * Added support for detecting malformed `<!-->` comments.
16
+ * GH-147
17
+ * Patch by Hiroya Fujinami.
18
+
19
+ * Added support for detecting unclosed `DOCTYPE`.
20
+ * GH-152
21
+ * Patch by Hiroya Fujinami.
22
+
23
+ * Added `changlog_uri` metadata to gemspec.
24
+ * GH-156
25
+ * Patch by fynsta.
26
+
27
+ * Improved parse performance.
28
+ * GH-157
29
+ * GH-158
30
+ * Patch by NAITOH Jun.
31
+
32
+ ### Fixes
33
+
34
+ * Fixed a bug that large XML can't be parsed.
35
+ * GH-154
36
+ * Patch by NAITOH Jun.
37
+
38
+ * Fixed a bug that private constants are visible.
39
+ * GH-155
40
+ * Patch by NAITOH Jun.
41
+
42
+ ### Thanks
43
+
44
+ * Hiroya Fujinami
45
+
46
+ * NAITOH Jun
47
+
48
+ * fynsta
49
+
3
50
  ## 3.3.0 - 2024-06-11 {#version-3-3-0}
4
51
 
5
52
  ### Improvements
data/lib/rexml/element.rb CHANGED
@@ -7,14 +7,6 @@ require_relative "xpath"
7
7
  require_relative "parseexception"
8
8
 
9
9
  module REXML
10
- # An implementation note about namespaces:
11
- # As we parse, when we find namespaces we put them in a hash and assign
12
- # them a unique ID. We then convert the namespace prefix for the node
13
- # to the unique ID. This makes namespace lookup much faster for the
14
- # cost of extra memory use. We save the namespace prefix for the
15
- # context node and convert it back when we write it.
16
- @@namespaces = {}
17
-
18
10
  # An \REXML::Element object represents an XML element.
19
11
  #
20
12
  # An element:
@@ -1284,16 +1276,11 @@ module REXML
1284
1276
  # document.root.attribute("x", "a") # => a:x='a:x'
1285
1277
  #
1286
1278
  def attribute( name, namespace=nil )
1287
- prefix = nil
1288
- if namespaces.respond_to? :key
1289
- prefix = namespaces.key(namespace) if namespace
1290
- else
1291
- prefix = namespaces.index(namespace) if namespace
1292
- end
1279
+ prefix = namespaces.key(namespace) if namespace
1293
1280
  prefix = nil if prefix == 'xmlns'
1294
1281
 
1295
1282
  ret_val =
1296
- attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
1283
+ attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
1297
1284
 
1298
1285
  return ret_val unless ret_val.nil?
1299
1286
  return nil if prefix.nil?
@@ -132,13 +132,20 @@ module REXML
132
132
  GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
133
133
  PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
134
134
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
135
+ CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
136
+ CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
137
+ DEFAULT_ENTITIES_PATTERNS = {}
138
+ default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
139
+ default_entities.each do |term|
140
+ DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
141
+ end
135
142
  end
136
143
  private_constant :Private
137
- include Private
138
144
 
139
145
  def initialize( source )
140
146
  self.stream = source
141
147
  @listeners = []
148
+ @prefixes = Set.new
142
149
  end
143
150
 
144
151
  def add_listener( listener )
@@ -204,6 +211,8 @@ module REXML
204
211
 
205
212
  # Returns the next event. This is a +PullEvent+ object.
206
213
  def pull
214
+ @source.drop_parsed_content
215
+
207
216
  pull_event.tap do |event|
208
217
  @listeners.each do |listener|
209
218
  listener.receive event
@@ -216,7 +225,12 @@ module REXML
216
225
  x, @closed = @closed, nil
217
226
  return [ :end_element, x ]
218
227
  end
219
- return [ :end_document ] if empty?
228
+ if empty?
229
+ if @document_status == :in_doctype
230
+ raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
231
+ end
232
+ return [ :end_document ]
233
+ end
220
234
  return @stack.shift if @stack.size > 0
221
235
  #STDERR.puts @source.encoding
222
236
  #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
@@ -228,7 +242,14 @@ module REXML
228
242
  return process_instruction(start_position)
229
243
  elsif @source.match("<!", true)
230
244
  if @source.match("--", true)
231
- return [ :comment, @source.match(/(.*?)-->/um, true)[1] ]
245
+ md = @source.match(/(.*?)-->/um, true)
246
+ if md.nil?
247
+ raise REXML::ParseException.new("Unclosed comment", @source)
248
+ end
249
+ if /--|-\z/.match?(md[1])
250
+ raise REXML::ParseException.new("Malformed comment", @source)
251
+ end
252
+ return [ :comment, md[1] ]
232
253
  elsif @source.match("DOCTYPE", true)
233
254
  base_error_message = "Malformed DOCTYPE"
234
255
  unless @source.match(/\s+/um, true)
@@ -240,7 +261,7 @@ module REXML
240
261
  @source.position = start_position
241
262
  raise REXML::ParseException.new(message, @source)
242
263
  end
243
- @nsstack.unshift(curr_ns=Set.new)
264
+ @nsstack.unshift(Set.new)
244
265
  name = parse_name(base_error_message)
245
266
  if @source.match(/\s*\[/um, true)
246
267
  id = [nil, nil, nil]
@@ -288,7 +309,7 @@ module REXML
288
309
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
289
310
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
290
311
  elsif @source.match("ENTITY", true)
291
- match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
312
+ match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true).captures.compact]
292
313
  ref = false
293
314
  if match[1] == '%'
294
315
  ref = true
@@ -314,7 +335,7 @@ module REXML
314
335
  match << '%' if ref
315
336
  return match
316
337
  elsif @source.match("ATTLIST", true)
317
- md = @source.match(ATTLISTDECL_END, true)
338
+ md = @source.match(Private::ATTLISTDECL_END, true)
318
339
  raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
319
340
  element = md[1]
320
341
  contents = md[0]
@@ -366,6 +387,9 @@ module REXML
366
387
  @document_status = :after_doctype
367
388
  return [ :end_doctype ]
368
389
  end
390
+ if @document_status == :in_doctype
391
+ raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
392
+ end
369
393
  end
370
394
  if @document_status == :after_doctype
371
395
  @source.match(/\s*/um, true)
@@ -380,7 +404,7 @@ module REXML
380
404
  if @source.match("/", true)
381
405
  @nsstack.shift
382
406
  last_tag = @tags.pop
383
- md = @source.match(CLOSE_PATTERN, true)
407
+ md = @source.match(Private::CLOSE_PATTERN, true)
384
408
  if md and !last_tag
385
409
  message = "Unexpected top-level end tag (got '#{md[1]}')"
386
410
  raise REXML::ParseException.new(message, @source)
@@ -399,12 +423,11 @@ module REXML
399
423
  if md[0][0] == ?-
400
424
  md = @source.match(/--(.*?)-->/um, true)
401
425
 
402
- case md[1]
403
- when /--/, /-\z/
426
+ if md.nil? || /--|-\z/.match?(md[1])
404
427
  raise REXML::ParseException.new("Malformed comment", @source)
405
428
  end
406
429
 
407
- return [ :comment, md[1] ] if md
430
+ return [ :comment, md[1] ]
408
431
  else
409
432
  md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
410
433
  return [ :cdata, md[1] ] if md
@@ -415,19 +438,19 @@ module REXML
415
438
  return process_instruction(start_position)
416
439
  else
417
440
  # Get the next tag
418
- md = @source.match(TAG_PATTERN, true)
441
+ md = @source.match(Private::TAG_PATTERN, true)
419
442
  unless md
420
443
  @source.position = start_position
421
444
  raise REXML::ParseException.new("malformed XML: missing tag start", @source)
422
445
  end
423
446
  tag = md[1]
424
447
  @document_status = :in_element
425
- prefixes = Set.new
426
- prefixes << md[2] if md[2]
448
+ @prefixes.clear
449
+ @prefixes << md[2] if md[2]
427
450
  @nsstack.unshift(curr_ns=Set.new)
428
- attributes, closed = parse_attributes(prefixes, curr_ns)
451
+ attributes, closed = parse_attributes(@prefixes, curr_ns)
429
452
  # Verify that all of the prefixes have been defined
430
- for prefix in prefixes
453
+ for prefix in @prefixes
431
454
  unless @nsstack.find{|k| k.member?(prefix)}
432
455
  raise UndefinedNamespaceException.new(prefix,@source,self)
433
456
  end
@@ -488,10 +511,10 @@ module REXML
488
511
 
489
512
  # Unescapes all possible entities
490
513
  def unnormalize( string, entities=nil, filter=nil )
491
- rv = string.gsub( /\r\n?/, "\n" )
514
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
492
515
  matches = rv.scan( REFERENCE_RE )
493
516
  return rv if matches.size == 0
494
- rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
517
+ rv.gsub!( Private::CHARACTER_REFERENCES ) {
495
518
  m=$1
496
519
  m = "0#{m}" if m[0] == ?x
497
520
  [Integer(m)].pack('U*')
@@ -502,7 +525,7 @@ module REXML
502
525
  unless filter and filter.include?(entity_reference)
503
526
  entity_value = entity( entity_reference, entities )
504
527
  if entity_value
505
- re = /&#{entity_reference};/
528
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
506
529
  rv.gsub!( re, entity_value )
507
530
  else
508
531
  er = DEFAULT_ENTITIES[entity_reference]
@@ -510,7 +533,7 @@ module REXML
510
533
  end
511
534
  end
512
535
  end
513
- rv.gsub!( /&amp;/, '&' )
536
+ rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
514
537
  end
515
538
  rv
516
539
  end
@@ -523,7 +546,7 @@ module REXML
523
546
  end
524
547
 
525
548
  def parse_name(base_error_message)
526
- md = @source.match(NAME_PATTERN, true)
549
+ md = @source.match(Private::NAME_PATTERN, true)
527
550
  unless md
528
551
  if @source.match(/\s*\S/um)
529
552
  message = "#{base_error_message}: invalid name"
@@ -602,7 +625,7 @@ module REXML
602
625
  end
603
626
 
604
627
  def process_instruction(start_position)
605
- match_data = @source.match(INSTRUCTION_END, true)
628
+ match_data = @source.match(Private::INSTRUCTION_END, true)
606
629
  unless match_data
607
630
  message = "Invalid processing instruction node"
608
631
  @source.position = start_position
@@ -16,7 +16,6 @@ module REXML
16
16
 
17
17
  def parse
18
18
  tag_stack = []
19
- in_doctype = false
20
19
  entities = nil
21
20
  begin
22
21
  while true
@@ -39,17 +38,15 @@ module REXML
39
38
  tag_stack.pop
40
39
  @build_context = @build_context.parent
41
40
  when :text
42
- if not in_doctype
43
- if @build_context[-1].instance_of? Text
44
- @build_context[-1] << event[1]
45
- else
46
- @build_context.add(
47
- Text.new(event[1], @build_context.whitespace, nil, true)
48
- ) unless (
49
- @build_context.ignore_whitespace_nodes and
50
- event[1].strip.size==0
51
- )
52
- end
41
+ if @build_context[-1].instance_of? Text
42
+ @build_context[-1] << event[1]
43
+ else
44
+ @build_context.add(
45
+ Text.new(event[1], @build_context.whitespace, nil, true)
46
+ ) unless (
47
+ @build_context.ignore_whitespace_nodes and
48
+ event[1].strip.size==0
49
+ )
53
50
  end
54
51
  when :comment
55
52
  c = Comment.new( event[1] )
@@ -60,14 +57,12 @@ module REXML
60
57
  when :processing_instruction
61
58
  @build_context.add( Instruction.new( event[1], event[2] ) )
62
59
  when :end_doctype
63
- in_doctype = false
64
60
  entities.each { |k,v| entities[k] = @build_context.entities[k].value }
65
61
  @build_context = @build_context.parent
66
62
  when :start_doctype
67
63
  doctype = DocType.new( event[1..-1], @build_context )
68
64
  @build_context = doctype
69
65
  entities = {}
70
- in_doctype = true
71
66
  when :attlistdecl
72
67
  n = AttlistDecl.new( event[1..-1] )
73
68
  @build_context.add( n )
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.0"
34
+ VERSION = "3.3.1"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -55,6 +55,7 @@ module REXML
55
55
  attr_reader :encoding
56
56
 
57
57
  module Private
58
+ SCANNER_RESET_SIZE = 100000
58
59
  PRE_DEFINED_TERM_PATTERNS = {}
59
60
  pre_defined_terms = ["'", '"', "<"]
60
61
  pre_defined_terms.each do |term|
@@ -62,7 +63,6 @@ module REXML
62
63
  end
63
64
  end
64
65
  private_constant :Private
65
- include Private
66
66
 
67
67
  # Constructor
68
68
  # @param arg must be a String, and should be a valid XML document
@@ -84,6 +84,12 @@ module REXML
84
84
  @scanner.rest
85
85
  end
86
86
 
87
+ def drop_parsed_content
88
+ if @scanner.pos > Private::SCANNER_RESET_SIZE
89
+ @scanner.string = @scanner.rest
90
+ end
91
+ end
92
+
87
93
  def buffer_encoding=(encoding)
88
94
  @scanner.string.force_encoding(encoding)
89
95
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.0
4
+ version: 3.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-06-11 00:00:00.000000000 Z
10
+ date: 2024-06-25 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: strscan
@@ -116,8 +115,8 @@ files:
116
115
  homepage: https://github.com/ruby/rexml
117
116
  licenses:
118
117
  - BSD-2-Clause
119
- metadata: {}
120
- post_install_message:
118
+ metadata:
119
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.1
121
120
  rdoc_options:
122
121
  - "--main"
123
122
  - README.md
@@ -134,8 +133,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
134
133
  - !ruby/object:Gem::Version
135
134
  version: '0'
136
135
  requirements: []
137
- rubygems_version: 3.4.20
138
- signing_key:
136
+ rubygems_version: 3.6.0.dev
139
137
  specification_version: 4
140
138
  summary: An XML toolkit for Ruby
141
139
  test_files: []