rexml 3.3.9 → 3.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9d8de4465de1e9548d66ad026772932f724b9747dc8b1c62960d8efeaeaa8412
4
- data.tar.gz: 1cb29aaa36dcef98ba8bd4e9fa249959405f67fdb6bed54d12b466fdf43f57af
3
+ metadata.gz: 582bb5339257c81f2ce9c076155c01d7adfe8fb169c09bc7f5f489f6a76bca80
4
+ data.tar.gz: 160de8899d8d1f995bafca23631e9e4ab928ebbffa21684e3b61dad805a6187b
5
5
  SHA512:
6
- metadata.gz: 78c881a10f12e46e1b6710d6ec75e42e4311c233376a7587756bc098063d21f52a4d82bcac8201001bf7e39079b3db4015482dae5b4ba46e561ef75fa15b15a0
7
- data.tar.gz: 8d7a4b94937ce7b0bdf6ed83152fe207098dfe45333498a64e50d5fe9a686dffa2f66913c1edf265470b2b6a04cfae20857f1cffa404c278249784eeb533d594
6
+ metadata.gz: e2b095792523f54301e8a6af2f1682a9ad24d92cdd5d94c9e6088b27520e3c03b68fe06061b6ff2fd96b001b9cb947c57e4095244d83206a83fc2a1829dd4243
7
+ data.tar.gz: 4f335d2b1e58c1da233c3f0a0588def502c8cb2660633e0e06b4d0930bbcedcaae36b52dc550923704b4525d94a1011f4b5f4e87a81e5d689cce24ee89210a23
data/NEWS.md CHANGED
@@ -1,5 +1,34 @@
1
1
  # News
2
2
 
3
+ ## 3.4.0 - 2024-12-15 {#version-3-4-0}
4
+
5
+ ### Improvement
6
+
7
+ * Improved performance.
8
+ * GH-216
9
+ * Patch by NAITOH Jun
10
+
11
+ * JRuby: Improved parse performance.
12
+ * GH-219
13
+ * Patch by João Duarte
14
+
15
+ * Added support for reusing pull parser.
16
+ * GH-214
17
+ * GH-220
18
+ * Patch by Dmitry Pogrebnoy
19
+
20
+ * Improved error handling when source is `IO`.
21
+ * GH-221
22
+ * Patch by NAITOH Jun
23
+
24
+ ### Thanks
25
+
26
+ * NAITOH Jun
27
+
28
+ * João Duarte
29
+
30
+ * Dmitry Pogrebnoy
31
+
3
32
  ## 3.3.9 - 2024-10-24 {#version-3-3-9}
4
33
 
5
34
  ### Improvements
@@ -181,6 +181,10 @@ module REXML
181
181
 
182
182
  def stream=( source )
183
183
  @source = SourceFactory.create_from( source )
184
+ reset
185
+ end
186
+
187
+ def reset
184
188
  @closed = nil
185
189
  @have_root = false
186
190
  @document_status = nil
@@ -269,10 +273,10 @@ module REXML
269
273
  @source.ensure_buffer
270
274
  if @document_status == nil
271
275
  start_position = @source.position
272
- if @source.match("<?", true)
276
+ if @source.match?("<?", true)
273
277
  return process_instruction
274
- elsif @source.match("<!", true)
275
- if @source.match("--", true)
278
+ elsif @source.match?("<!", true)
279
+ if @source.match?("--", true)
276
280
  md = @source.match(/(.*?)-->/um, true)
277
281
  if md.nil?
278
282
  raise REXML::ParseException.new("Unclosed comment", @source)
@@ -281,10 +285,10 @@ module REXML
281
285
  raise REXML::ParseException.new("Malformed comment", @source)
282
286
  end
283
287
  return [ :comment, md[1] ]
284
- elsif @source.match("DOCTYPE", true)
288
+ elsif @source.match?("DOCTYPE", true)
285
289
  base_error_message = "Malformed DOCTYPE"
286
- unless @source.match(/\s+/um, true)
287
- if @source.match(">")
290
+ unless @source.match?(/\s+/um, true)
291
+ if @source.match?(">")
288
292
  message = "#{base_error_message}: name is missing"
289
293
  else
290
294
  message = "#{base_error_message}: invalid name"
@@ -293,10 +297,10 @@ module REXML
293
297
  raise REXML::ParseException.new(message, @source)
294
298
  end
295
299
  name = parse_name(base_error_message)
296
- if @source.match(/\s*\[/um, true)
300
+ if @source.match?(/\s*\[/um, true)
297
301
  id = [nil, nil, nil]
298
302
  @document_status = :in_doctype
299
- elsif @source.match(/\s*>/um, true)
303
+ elsif @source.match?(/\s*>/um, true)
300
304
  id = [nil, nil, nil]
301
305
  @document_status = :after_doctype
302
306
  @source.ensure_buffer
@@ -308,9 +312,9 @@ module REXML
308
312
  # For backward compatibility
309
313
  id[1], id[2] = id[2], nil
310
314
  end
311
- if @source.match(/\s*\[/um, true)
315
+ if @source.match?(/\s*\[/um, true)
312
316
  @document_status = :in_doctype
313
- elsif @source.match(/\s*>/um, true)
317
+ elsif @source.match?(/\s*>/um, true)
314
318
  @document_status = :after_doctype
315
319
  @source.ensure_buffer
316
320
  else
@@ -320,7 +324,7 @@ module REXML
320
324
  end
321
325
  args = [:start_doctype, name, *id]
322
326
  if @document_status == :after_doctype
323
- @source.match(/\s*/um, true)
327
+ @source.match?(/\s*/um, true)
324
328
  @stack << [ :end_doctype ]
325
329
  end
326
330
  return args
@@ -331,14 +335,14 @@ module REXML
331
335
  end
332
336
  end
333
337
  if @document_status == :in_doctype
334
- @source.match(/\s*/um, true) # skip spaces
338
+ @source.match?(/\s*/um, true) # skip spaces
335
339
  start_position = @source.position
336
- if @source.match("<!", true)
337
- if @source.match("ELEMENT", true)
340
+ if @source.match?("<!", true)
341
+ if @source.match?("ELEMENT", true)
338
342
  md = @source.match(/(.*?)>/um, true)
339
343
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
340
344
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
341
- elsif @source.match("ENTITY", true)
345
+ elsif @source.match?("ENTITY", true)
342
346
  match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
343
347
  unless match_data
344
348
  raise REXML::ParseException.new("Malformed entity declaration", @source)
@@ -370,7 +374,7 @@ module REXML
370
374
  end
371
375
  match << '%' if ref
372
376
  return match
373
- elsif @source.match("ATTLIST", true)
377
+ elsif @source.match?("ATTLIST", true)
374
378
  md = @source.match(Private::ATTLISTDECL_END, true)
375
379
  raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
376
380
  element = md[1]
@@ -390,10 +394,10 @@ module REXML
390
394
  end
391
395
  end
392
396
  return [ :attlistdecl, element, pairs, contents ]
393
- elsif @source.match("NOTATION", true)
397
+ elsif @source.match?("NOTATION", true)
394
398
  base_error_message = "Malformed notation declaration"
395
- unless @source.match(/\s+/um, true)
396
- if @source.match(">")
399
+ unless @source.match?(/\s+/um, true)
400
+ if @source.match?(">")
397
401
  message = "#{base_error_message}: name is missing"
398
402
  else
399
403
  message = "#{base_error_message}: invalid name"
@@ -405,7 +409,7 @@ module REXML
405
409
  id = parse_id(base_error_message,
406
410
  accept_external_id: true,
407
411
  accept_public_id: true)
408
- unless @source.match(/\s*>/um, true)
412
+ unless @source.match?(/\s*>/um, true)
409
413
  message = "#{base_error_message}: garbage before end >"
410
414
  raise REXML::ParseException.new(message, @source)
411
415
  end
@@ -419,7 +423,7 @@ module REXML
419
423
  end
420
424
  elsif match = @source.match(/(%.*?;)\s*/um, true)
421
425
  return [ :externalentity, match[1] ]
422
- elsif @source.match(/\]\s*>/um, true)
426
+ elsif @source.match?(/\]\s*>/um, true)
423
427
  @document_status = :after_doctype
424
428
  return [ :end_doctype ]
425
429
  end
@@ -428,16 +432,16 @@ module REXML
428
432
  end
429
433
  end
430
434
  if @document_status == :after_doctype
431
- @source.match(/\s*/um, true)
435
+ @source.match?(/\s*/um, true)
432
436
  end
433
437
  begin
434
438
  start_position = @source.position
435
- if @source.match("<", true)
439
+ if @source.match?("<", true)
436
440
  # :text's read_until may remain only "<" in buffer. In the
437
441
  # case, buffer is empty here. So we need to fill buffer
438
442
  # here explicitly.
439
443
  @source.ensure_buffer
440
- if @source.match("/", true)
444
+ if @source.match?("/", true)
441
445
  @namespaces_restore_stack.pop
442
446
  last_tag = @tags.pop
443
447
  md = @source.match(Private::CLOSE_PATTERN, true)
@@ -452,7 +456,7 @@ module REXML
452
456
  raise REXML::ParseException.new(message, @source)
453
457
  end
454
458
  return [ :end_element, last_tag ]
455
- elsif @source.match("!", true)
459
+ elsif @source.match?("!", true)
456
460
  md = @source.match(/([^>]*>)/um)
457
461
  #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
458
462
  raise REXML::ParseException.new("Malformed node", @source) unless md
@@ -470,7 +474,7 @@ module REXML
470
474
  end
471
475
  raise REXML::ParseException.new( "Declarations can only occur "+
472
476
  "in the doctype declaration.", @source)
473
- elsif @source.match("?", true)
477
+ elsif @source.match?("?", true)
474
478
  return process_instruction
475
479
  else
476
480
  # Get the next tag
@@ -651,7 +655,7 @@ module REXML
651
655
  def parse_name(base_error_message)
652
656
  md = @source.match(Private::NAME_PATTERN, true)
653
657
  unless md
654
- if @source.match(/\S/um)
658
+ if @source.match?(/\S/um)
655
659
  message = "#{base_error_message}: invalid name"
656
660
  else
657
661
  message = "#{base_error_message}: name is missing"
@@ -693,34 +697,34 @@ module REXML
693
697
  accept_public_id:)
694
698
  public = /\A\s*PUBLIC/um
695
699
  system = /\A\s*SYSTEM/um
696
- if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
697
- if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
700
+ if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
701
+ if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
698
702
  return "public ID literal is missing"
699
703
  end
700
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
704
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
701
705
  return "invalid public ID literal"
702
706
  end
703
707
  if accept_public_id
704
- if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
708
+ if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
705
709
  return "system ID literal is missing"
706
710
  end
707
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
711
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
708
712
  return "invalid system literal"
709
713
  end
710
714
  "garbage after system literal"
711
715
  else
712
716
  "garbage after public ID literal"
713
717
  end
714
- elsif accept_external_id and @source.match(/#{system}/um)
715
- if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
718
+ elsif accept_external_id and @source.match?(/#{system}/um)
719
+ if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
716
720
  return "system literal is missing"
717
721
  end
718
- unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
722
+ unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
719
723
  return "invalid system literal"
720
724
  end
721
725
  "garbage after system literal"
722
726
  else
723
- unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
727
+ unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
724
728
  return "invalid ID type"
725
729
  end
726
730
  "ID type is missing"
@@ -729,7 +733,7 @@ module REXML
729
733
 
730
734
  def process_instruction
731
735
  name = parse_name("Malformed XML: Invalid processing instruction node")
732
- if @source.match(/\s+/um, true)
736
+ if @source.match?(/\s+/um, true)
733
737
  match_data = @source.match(/(.*?)\?>/um, true)
734
738
  unless match_data
735
739
  raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
@@ -737,7 +741,7 @@ module REXML
737
741
  content = match_data[1]
738
742
  else
739
743
  content = nil
740
- unless @source.match("?>", true)
744
+ unless @source.match?("?>", true)
741
745
  raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
742
746
  end
743
747
  end
@@ -767,9 +771,9 @@ module REXML
767
771
  expanded_names = {}
768
772
  closed = false
769
773
  while true
770
- if @source.match(">", true)
774
+ if @source.match?(">", true)
771
775
  return attributes, closed
772
- elsif @source.match("/>", true)
776
+ elsif @source.match?("/>", true)
773
777
  closed = true
774
778
  return attributes, closed
775
779
  elsif match = @source.match(QNAME, true)
@@ -777,7 +781,7 @@ module REXML
777
781
  prefix = match[2]
778
782
  local_part = match[3]
779
783
 
780
- unless @source.match(/\s*=\s*/um, true)
784
+ unless @source.match?(/\s*=\s*/um, true)
781
785
  message = "Missing attribute equal: <#{name}>"
782
786
  raise REXML::ParseException.new(message, @source)
783
787
  end
@@ -793,7 +797,7 @@ module REXML
793
797
  message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
794
798
  raise REXML::ParseException.new(message, @source)
795
799
  end
796
- @source.match(/\s*/um, true)
800
+ @source.match?(/\s*/um, true)
797
801
  if prefix == "xmlns"
798
802
  if local_part == "xml"
799
803
  if value != Private::XML_PREFIXED_NAMESPACE
@@ -93,6 +93,10 @@ module REXML
93
93
  def unshift token
94
94
  @my_stack.unshift token
95
95
  end
96
+
97
+ def reset
98
+ @parser.reset
99
+ end
96
100
  end
97
101
 
98
102
  # A parsing event. The contents of the event are accessed as an +Array?,
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.9"
34
+ VERSION = "3.4.0"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # coding: US-ASCII
2
2
  # frozen_string_literal: false
3
3
 
4
+ require "stringio"
4
5
  require "strscan"
5
6
 
6
7
  require_relative 'encoding'
@@ -18,6 +19,16 @@ module REXML
18
19
  pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
19
20
  super(pattern)
20
21
  end
22
+
23
+ def match?(pattern)
24
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
25
+ super(pattern)
26
+ end
27
+
28
+ def skip(pattern)
29
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
30
+ super(pattern)
31
+ end
21
32
  end
22
33
  end
23
34
  using StringScannerCheckScanString
@@ -35,7 +46,6 @@ module REXML
35
46
  arg.respond_to? :eof?
36
47
  IOSource.new(arg)
37
48
  elsif arg.respond_to? :to_str
38
- require 'stringio'
39
49
  IOSource.new(StringIO.new(arg))
40
50
  elsif arg.kind_of? Source
41
51
  arg
@@ -77,7 +87,7 @@ module REXML
77
87
  detect_encoding
78
88
  end
79
89
  @line = 0
80
- @term_encord = {}
90
+ @encoded_terms = {}
81
91
  end
82
92
 
83
93
  # The current buffer (what we're going to read next)
@@ -126,6 +136,14 @@ module REXML
126
136
  end
127
137
  end
128
138
 
139
+ def match?(pattern, cons=false)
140
+ if cons
141
+ !@scanner.skip(pattern).nil?
142
+ else
143
+ !@scanner.match?(pattern).nil?
144
+ end
145
+ end
146
+
129
147
  def position
130
148
  @scanner.pos
131
149
  end
@@ -228,7 +246,7 @@ module REXML
228
246
 
229
247
  def read_until(term)
230
248
  pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
231
- term = @term_encord[term] ||= encode(term)
249
+ term = @encoded_terms[term] ||= encode(term)
232
250
  until str = @scanner.scan_until(pattern)
233
251
  break if @source.nil?
234
252
  break if @source.eof?
@@ -267,6 +285,23 @@ module REXML
267
285
  md.nil? ? nil : @scanner
268
286
  end
269
287
 
288
+ def match?( pattern, cons=false )
289
+ # To avoid performance issue, we need to increase bytes to read per scan
290
+ min_bytes = 1
291
+ while true
292
+ if cons
293
+ n_matched_bytes = @scanner.skip(pattern)
294
+ else
295
+ n_matched_bytes = @scanner.match?(pattern)
296
+ end
297
+ return true if n_matched_bytes
298
+ return false if pattern.is_a?(String)
299
+ return false if @source.nil?
300
+ return false unless read(nil, min_bytes)
301
+ min_bytes *= 2
302
+ end
303
+ end
304
+
270
305
  def empty?
271
306
  super and ( @source.nil? || @source.eof? )
272
307
  end
@@ -286,7 +321,7 @@ module REXML
286
321
  rescue
287
322
  end
288
323
  @er_source.seek(pos)
289
- rescue IOError
324
+ rescue IOError, SystemCallError
290
325
  pos = -1
291
326
  line = -1
292
327
  end
data/lib/rexml/text.rb CHANGED
@@ -29,31 +29,16 @@ module REXML
29
29
  (0x10000..0x10FFFF)
30
30
  ]
31
31
 
32
- if String.method_defined? :encode
33
- VALID_XML_CHARS = Regexp.new('^['+
34
- VALID_CHAR.map { |item|
35
- case item
36
- when Integer
37
- [item].pack('U').force_encoding('utf-8')
38
- when Range
39
- [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
40
- end
41
- }.join +
42
- ']*$')
43
- else
44
- VALID_XML_CHARS = /^(
45
- [\x09\x0A\x0D\x20-\x7E] # ASCII
46
- | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
47
- | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
48
- | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
49
- | \xEF[\x80-\xBE]{2} #
50
- | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
51
- | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
52
- | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
53
- | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
54
- | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
55
- )*$/nx;
56
- end
32
+ VALID_XML_CHARS = Regexp.new('^['+
33
+ VALID_CHAR.map { |item|
34
+ case item
35
+ when Integer
36
+ [item].pack('U').force_encoding('utf-8')
37
+ when Range
38
+ [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
39
+ end
40
+ }.join +
41
+ ']*$')
57
42
 
58
43
  # Constructor
59
44
  # +arg+ if a String, the content is set to the String. If a Text,
@@ -132,21 +117,11 @@ module REXML
132
117
 
133
118
  # illegal anywhere
134
119
  if !string.match?(VALID_XML_CHARS)
135
- if String.method_defined? :encode
136
- string.chars.each do |c|
137
- case c.ord
138
- when *VALID_CHAR
139
- else
140
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
141
- end
142
- end
143
- else
144
- string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
145
- case c.unpack('U')
146
- when *VALID_CHAR
147
- else
148
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
149
- end
120
+ string.chars.each do |c|
121
+ case c.ord
122
+ when *VALID_CHAR
123
+ else
124
+ raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
150
125
  end
151
126
  end
152
127
  end
metadata CHANGED
@@ -1,13 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.9
4
+ version: 3.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
+ autorequire:
8
9
  bindir: bin
9
10
  cert_chain: []
10
- date: 2024-10-24 00:00:00.000000000 Z
11
+ date: 2024-12-15 00:00:00.000000000 Z
11
12
  dependencies: []
12
13
  description: An XML toolkit for Ruby
13
14
  email:
@@ -102,7 +103,8 @@ homepage: https://github.com/ruby/rexml
102
103
  licenses:
103
104
  - BSD-2-Clause
104
105
  metadata:
105
- changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.9
106
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.0
107
+ post_install_message:
106
108
  rdoc_options:
107
109
  - "--main"
108
110
  - README.md
@@ -119,7 +121,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
119
121
  - !ruby/object:Gem::Version
120
122
  version: '0'
121
123
  requirements: []
122
- rubygems_version: 3.6.0.dev
124
+ rubygems_version: 3.5.22
125
+ signing_key:
123
126
  specification_version: 4
124
127
  summary: An XML toolkit for Ruby
125
128
  test_files: []