rexml 3.3.9 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9d8de4465de1e9548d66ad026772932f724b9747dc8b1c62960d8efeaeaa8412
4
- data.tar.gz: 1cb29aaa36dcef98ba8bd4e9fa249959405f67fdb6bed54d12b466fdf43f57af
3
+ metadata.gz: 582bb5339257c81f2ce9c076155c01d7adfe8fb169c09bc7f5f489f6a76bca80
4
+ data.tar.gz: 160de8899d8d1f995bafca23631e9e4ab928ebbffa21684e3b61dad805a6187b
5
5
  SHA512:
6
- metadata.gz: 78c881a10f12e46e1b6710d6ec75e42e4311c233376a7587756bc098063d21f52a4d82bcac8201001bf7e39079b3db4015482dae5b4ba46e561ef75fa15b15a0
7
- data.tar.gz: 8d7a4b94937ce7b0bdf6ed83152fe207098dfe45333498a64e50d5fe9a686dffa2f66913c1edf265470b2b6a04cfae20857f1cffa404c278249784eeb533d594
6
+ metadata.gz: e2b095792523f54301e8a6af2f1682a9ad24d92cdd5d94c9e6088b27520e3c03b68fe06061b6ff2fd96b001b9cb947c57e4095244d83206a83fc2a1829dd4243
7
+ data.tar.gz: 4f335d2b1e58c1da233c3f0a0588def502c8cb2660633e0e06b4d0930bbcedcaae36b52dc550923704b4525d94a1011f4b5f4e87a81e5d689cce24ee89210a23
data/NEWS.md CHANGED
@@ -1,5 +1,34 @@
1
1
  # News
2
2
 
3
+ ## 3.4.0 - 2024-12-15 {#version-3-4-0}
4
+
5
+ ### Improvement
6
+
7
+ * Improved performance.
8
+ * GH-216
9
+ * Patch by NAITOH Jun
10
+
11
+ * JRuby: Improved parse performance.
12
+ * GH-219
13
+ * Patch by João Duarte
14
+
15
+ * Added support for reusing pull parser.
16
+ * GH-214
17
+ * GH-220
18
+ * Patch by Dmitry Pogrebnoy
19
+
20
+ * Improved error handling when source is `IO`.
21
+ * GH-221
22
+ * Patch by NAITOH Jun
23
+
24
+ ### Thanks
25
+
26
+ * NAITOH Jun
27
+
28
+ * João Duarte
29
+
30
+ * Dmitry Pogrebnoy
31
+
3
32
  ## 3.3.9 - 2024-10-24 {#version-3-3-9}
4
33
 
5
34
  ### Improvements
@@ -181,6 +181,10 @@ module REXML
181
181
 
182
182
  def stream=( source )
183
183
  @source = SourceFactory.create_from( source )
184
+ reset
185
+ end
186
+
187
+ def reset
184
188
  @closed = nil
185
189
  @have_root = false
186
190
  @document_status = nil
@@ -269,10 +273,10 @@ module REXML
269
273
  @source.ensure_buffer
270
274
  if @document_status == nil
271
275
  start_position = @source.position
272
- if @source.match("<?", true)
276
+ if @source.match?("<?", true)
273
277
  return process_instruction
274
- elsif @source.match("<!", true)
275
- if @source.match("--", true)
278
+ elsif @source.match?("<!", true)
279
+ if @source.match?("--", true)
276
280
  md = @source.match(/(.*?)-->/um, true)
277
281
  if md.nil?
278
282
  raise REXML::ParseException.new("Unclosed comment", @source)
@@ -281,10 +285,10 @@ module REXML
281
285
  raise REXML::ParseException.new("Malformed comment", @source)
282
286
  end
283
287
  return [ :comment, md[1] ]
284
- elsif @source.match("DOCTYPE", true)
288
+ elsif @source.match?("DOCTYPE", true)
285
289
  base_error_message = "Malformed DOCTYPE"
286
- unless @source.match(/\s+/um, true)
287
- if @source.match(">")
290
+ unless @source.match?(/\s+/um, true)
291
+ if @source.match?(">")
288
292
  message = "#{base_error_message}: name is missing"
289
293
  else
290
294
  message = "#{base_error_message}: invalid name"
@@ -293,10 +297,10 @@ module REXML
293
297
  raise REXML::ParseException.new(message, @source)
294
298
  end
295
299
  name = parse_name(base_error_message)
296
- if @source.match(/\s*\[/um, true)
300
+ if @source.match?(/\s*\[/um, true)
297
301
  id = [nil, nil, nil]
298
302
  @document_status = :in_doctype
299
- elsif @source.match(/\s*>/um, true)
303
+ elsif @source.match?(/\s*>/um, true)
300
304
  id = [nil, nil, nil]
301
305
  @document_status = :after_doctype
302
306
  @source.ensure_buffer
@@ -308,9 +312,9 @@ module REXML
308
312
  # For backward compatibility
309
313
  id[1], id[2] = id[2], nil
310
314
  end
311
- if @source.match(/\s*\[/um, true)
315
+ if @source.match?(/\s*\[/um, true)
312
316
  @document_status = :in_doctype
313
- elsif @source.match(/\s*>/um, true)
317
+ elsif @source.match?(/\s*>/um, true)
314
318
  @document_status = :after_doctype
315
319
  @source.ensure_buffer
316
320
  else
@@ -320,7 +324,7 @@ module REXML
320
324
  end
321
325
  args = [:start_doctype, name, *id]
322
326
  if @document_status == :after_doctype
323
- @source.match(/\s*/um, true)
327
+ @source.match?(/\s*/um, true)
324
328
  @stack << [ :end_doctype ]
325
329
  end
326
330
  return args
@@ -331,14 +335,14 @@ module REXML
331
335
  end
332
336
  end
333
337
  if @document_status == :in_doctype
334
- @source.match(/\s*/um, true) # skip spaces
338
+ @source.match?(/\s*/um, true) # skip spaces
335
339
  start_position = @source.position
336
- if @source.match("<!", true)
337
- if @source.match("ELEMENT", true)
340
+ if @source.match?("<!", true)
341
+ if @source.match?("ELEMENT", true)
338
342
  md = @source.match(/(.*?)>/um, true)
339
343
  raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
340
344
  return [ :elementdecl, "<!ELEMENT" + md[1] ]
341
- elsif @source.match("ENTITY", true)
345
+ elsif @source.match?("ENTITY", true)
342
346
  match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
343
347
  unless match_data
344
348
  raise REXML::ParseException.new("Malformed entity declaration", @source)
@@ -370,7 +374,7 @@ module REXML
370
374
  end
371
375
  match << '%' if ref
372
376
  return match
373
- elsif @source.match("ATTLIST", true)
377
+ elsif @source.match?("ATTLIST", true)
374
378
  md = @source.match(Private::ATTLISTDECL_END, true)
375
379
  raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
376
380
  element = md[1]
@@ -390,10 +394,10 @@ module REXML
390
394
  end
391
395
  end
392
396
  return [ :attlistdecl, element, pairs, contents ]
393
- elsif @source.match("NOTATION", true)
397
+ elsif @source.match?("NOTATION", true)
394
398
  base_error_message = "Malformed notation declaration"
395
- unless @source.match(/\s+/um, true)
396
- if @source.match(">")
399
+ unless @source.match?(/\s+/um, true)
400
+ if @source.match?(">")
397
401
  message = "#{base_error_message}: name is missing"
398
402
  else
399
403
  message = "#{base_error_message}: invalid name"
@@ -405,7 +409,7 @@ module REXML
405
409
  id = parse_id(base_error_message,
406
410
  accept_external_id: true,
407
411
  accept_public_id: true)
408
- unless @source.match(/\s*>/um, true)
412
+ unless @source.match?(/\s*>/um, true)
409
413
  message = "#{base_error_message}: garbage before end >"
410
414
  raise REXML::ParseException.new(message, @source)
411
415
  end
@@ -419,7 +423,7 @@ module REXML
419
423
  end
420
424
  elsif match = @source.match(/(%.*?;)\s*/um, true)
421
425
  return [ :externalentity, match[1] ]
422
- elsif @source.match(/\]\s*>/um, true)
426
+ elsif @source.match?(/\]\s*>/um, true)
423
427
  @document_status = :after_doctype
424
428
  return [ :end_doctype ]
425
429
  end
@@ -428,16 +432,16 @@ module REXML
428
432
  end
429
433
  end
430
434
  if @document_status == :after_doctype
431
- @source.match(/\s*/um, true)
435
+ @source.match?(/\s*/um, true)
432
436
  end
433
437
  begin
434
438
  start_position = @source.position
435
- if @source.match("<", true)
439
+ if @source.match?("<", true)
436
440
  # :text's read_until may remain only "<" in buffer. In the
437
441
  # case, buffer is empty here. So we need to fill buffer
438
442
  # here explicitly.
439
443
  @source.ensure_buffer
440
- if @source.match("/", true)
444
+ if @source.match?("/", true)
441
445
  @namespaces_restore_stack.pop
442
446
  last_tag = @tags.pop
443
447
  md = @source.match(Private::CLOSE_PATTERN, true)
@@ -452,7 +456,7 @@ module REXML
452
456
  raise REXML::ParseException.new(message, @source)
453
457
  end
454
458
  return [ :end_element, last_tag ]
455
- elsif @source.match("!", true)
459
+ elsif @source.match?("!", true)
456
460
  md = @source.match(/([^>]*>)/um)
457
461
  #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
458
462
  raise REXML::ParseException.new("Malformed node", @source) unless md
@@ -470,7 +474,7 @@ module REXML
470
474
  end
471
475
  raise REXML::ParseException.new( "Declarations can only occur "+
472
476
  "in the doctype declaration.", @source)
473
- elsif @source.match("?", true)
477
+ elsif @source.match?("?", true)
474
478
  return process_instruction
475
479
  else
476
480
  # Get the next tag
@@ -651,7 +655,7 @@ module REXML
651
655
  def parse_name(base_error_message)
652
656
  md = @source.match(Private::NAME_PATTERN, true)
653
657
  unless md
654
- if @source.match(/\S/um)
658
+ if @source.match?(/\S/um)
655
659
  message = "#{base_error_message}: invalid name"
656
660
  else
657
661
  message = "#{base_error_message}: name is missing"
@@ -693,34 +697,34 @@ module REXML
693
697
  accept_public_id:)
694
698
  public = /\A\s*PUBLIC/um
695
699
  system = /\A\s*SYSTEM/um
696
- if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
697
- if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
700
+ if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
701
+ if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
698
702
  return "public ID literal is missing"
699
703
  end
700
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
704
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
701
705
  return "invalid public ID literal"
702
706
  end
703
707
  if accept_public_id
704
- if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
708
+ if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
705
709
  return "system ID literal is missing"
706
710
  end
707
- unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
711
+ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
708
712
  return "invalid system literal"
709
713
  end
710
714
  "garbage after system literal"
711
715
  else
712
716
  "garbage after public ID literal"
713
717
  end
714
- elsif accept_external_id and @source.match(/#{system}/um)
715
- if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
718
+ elsif accept_external_id and @source.match?(/#{system}/um)
719
+ if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
716
720
  return "system literal is missing"
717
721
  end
718
- unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
722
+ unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
719
723
  return "invalid system literal"
720
724
  end
721
725
  "garbage after system literal"
722
726
  else
723
- unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
727
+ unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
724
728
  return "invalid ID type"
725
729
  end
726
730
  "ID type is missing"
@@ -729,7 +733,7 @@ module REXML
729
733
 
730
734
  def process_instruction
731
735
  name = parse_name("Malformed XML: Invalid processing instruction node")
732
- if @source.match(/\s+/um, true)
736
+ if @source.match?(/\s+/um, true)
733
737
  match_data = @source.match(/(.*?)\?>/um, true)
734
738
  unless match_data
735
739
  raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
@@ -737,7 +741,7 @@ module REXML
737
741
  content = match_data[1]
738
742
  else
739
743
  content = nil
740
- unless @source.match("?>", true)
744
+ unless @source.match?("?>", true)
741
745
  raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
742
746
  end
743
747
  end
@@ -767,9 +771,9 @@ module REXML
767
771
  expanded_names = {}
768
772
  closed = false
769
773
  while true
770
- if @source.match(">", true)
774
+ if @source.match?(">", true)
771
775
  return attributes, closed
772
- elsif @source.match("/>", true)
776
+ elsif @source.match?("/>", true)
773
777
  closed = true
774
778
  return attributes, closed
775
779
  elsif match = @source.match(QNAME, true)
@@ -777,7 +781,7 @@ module REXML
777
781
  prefix = match[2]
778
782
  local_part = match[3]
779
783
 
780
- unless @source.match(/\s*=\s*/um, true)
784
+ unless @source.match?(/\s*=\s*/um, true)
781
785
  message = "Missing attribute equal: <#{name}>"
782
786
  raise REXML::ParseException.new(message, @source)
783
787
  end
@@ -793,7 +797,7 @@ module REXML
793
797
  message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
794
798
  raise REXML::ParseException.new(message, @source)
795
799
  end
796
- @source.match(/\s*/um, true)
800
+ @source.match?(/\s*/um, true)
797
801
  if prefix == "xmlns"
798
802
  if local_part == "xml"
799
803
  if value != Private::XML_PREFIXED_NAMESPACE
@@ -93,6 +93,10 @@ module REXML
93
93
  def unshift token
94
94
  @my_stack.unshift token
95
95
  end
96
+
97
+ def reset
98
+ @parser.reset
99
+ end
96
100
  end
97
101
 
98
102
  # A parsing event. The contents of the event are accessed as an +Array?,
data/lib/rexml/rexml.rb CHANGED
@@ -31,7 +31,7 @@
31
31
  module REXML
32
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
33
33
  DATE = "2008/019"
34
- VERSION = "3.3.9"
34
+ VERSION = "3.4.0"
35
35
  REVISION = ""
36
36
 
37
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # coding: US-ASCII
2
2
  # frozen_string_literal: false
3
3
 
4
+ require "stringio"
4
5
  require "strscan"
5
6
 
6
7
  require_relative 'encoding'
@@ -18,6 +19,16 @@ module REXML
18
19
  pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
19
20
  super(pattern)
20
21
  end
22
+
23
+ def match?(pattern)
24
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
25
+ super(pattern)
26
+ end
27
+
28
+ def skip(pattern)
29
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
30
+ super(pattern)
31
+ end
21
32
  end
22
33
  end
23
34
  using StringScannerCheckScanString
@@ -35,7 +46,6 @@ module REXML
35
46
  arg.respond_to? :eof?
36
47
  IOSource.new(arg)
37
48
  elsif arg.respond_to? :to_str
38
- require 'stringio'
39
49
  IOSource.new(StringIO.new(arg))
40
50
  elsif arg.kind_of? Source
41
51
  arg
@@ -77,7 +87,7 @@ module REXML
77
87
  detect_encoding
78
88
  end
79
89
  @line = 0
80
- @term_encord = {}
90
+ @encoded_terms = {}
81
91
  end
82
92
 
83
93
  # The current buffer (what we're going to read next)
@@ -126,6 +136,14 @@ module REXML
126
136
  end
127
137
  end
128
138
 
139
+ def match?(pattern, cons=false)
140
+ if cons
141
+ !@scanner.skip(pattern).nil?
142
+ else
143
+ !@scanner.match?(pattern).nil?
144
+ end
145
+ end
146
+
129
147
  def position
130
148
  @scanner.pos
131
149
  end
@@ -228,7 +246,7 @@ module REXML
228
246
 
229
247
  def read_until(term)
230
248
  pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
231
- term = @term_encord[term] ||= encode(term)
249
+ term = @encoded_terms[term] ||= encode(term)
232
250
  until str = @scanner.scan_until(pattern)
233
251
  break if @source.nil?
234
252
  break if @source.eof?
@@ -267,6 +285,23 @@ module REXML
267
285
  md.nil? ? nil : @scanner
268
286
  end
269
287
 
288
+ def match?( pattern, cons=false )
289
+ # To avoid performance issue, we need to increase bytes to read per scan
290
+ min_bytes = 1
291
+ while true
292
+ if cons
293
+ n_matched_bytes = @scanner.skip(pattern)
294
+ else
295
+ n_matched_bytes = @scanner.match?(pattern)
296
+ end
297
+ return true if n_matched_bytes
298
+ return false if pattern.is_a?(String)
299
+ return false if @source.nil?
300
+ return false unless read(nil, min_bytes)
301
+ min_bytes *= 2
302
+ end
303
+ end
304
+
270
305
  def empty?
271
306
  super and ( @source.nil? || @source.eof? )
272
307
  end
@@ -286,7 +321,7 @@ module REXML
286
321
  rescue
287
322
  end
288
323
  @er_source.seek(pos)
289
- rescue IOError
324
+ rescue IOError, SystemCallError
290
325
  pos = -1
291
326
  line = -1
292
327
  end
data/lib/rexml/text.rb CHANGED
@@ -29,31 +29,16 @@ module REXML
29
29
  (0x10000..0x10FFFF)
30
30
  ]
31
31
 
32
- if String.method_defined? :encode
33
- VALID_XML_CHARS = Regexp.new('^['+
34
- VALID_CHAR.map { |item|
35
- case item
36
- when Integer
37
- [item].pack('U').force_encoding('utf-8')
38
- when Range
39
- [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
40
- end
41
- }.join +
42
- ']*$')
43
- else
44
- VALID_XML_CHARS = /^(
45
- [\x09\x0A\x0D\x20-\x7E] # ASCII
46
- | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
47
- | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
48
- | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
49
- | \xEF[\x80-\xBE]{2} #
50
- | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
51
- | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
52
- | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
53
- | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
54
- | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
55
- )*$/nx;
56
- end
32
+ VALID_XML_CHARS = Regexp.new('^['+
33
+ VALID_CHAR.map { |item|
34
+ case item
35
+ when Integer
36
+ [item].pack('U').force_encoding('utf-8')
37
+ when Range
38
+ [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
39
+ end
40
+ }.join +
41
+ ']*$')
57
42
 
58
43
  # Constructor
59
44
  # +arg+ if a String, the content is set to the String. If a Text,
@@ -132,21 +117,11 @@ module REXML
132
117
 
133
118
  # illegal anywhere
134
119
  if !string.match?(VALID_XML_CHARS)
135
- if String.method_defined? :encode
136
- string.chars.each do |c|
137
- case c.ord
138
- when *VALID_CHAR
139
- else
140
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
141
- end
142
- end
143
- else
144
- string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
145
- case c.unpack('U')
146
- when *VALID_CHAR
147
- else
148
- raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
149
- end
120
+ string.chars.each do |c|
121
+ case c.ord
122
+ when *VALID_CHAR
123
+ else
124
+ raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
150
125
  end
151
126
  end
152
127
  end
metadata CHANGED
@@ -1,13 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.9
4
+ version: 3.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
+ autorequire:
8
9
  bindir: bin
9
10
  cert_chain: []
10
- date: 2024-10-24 00:00:00.000000000 Z
11
+ date: 2024-12-15 00:00:00.000000000 Z
11
12
  dependencies: []
12
13
  description: An XML toolkit for Ruby
13
14
  email:
@@ -102,7 +103,8 @@ homepage: https://github.com/ruby/rexml
102
103
  licenses:
103
104
  - BSD-2-Clause
104
105
  metadata:
105
- changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.9
106
+ changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.0
107
+ post_install_message:
106
108
  rdoc_options:
107
109
  - "--main"
108
110
  - README.md
@@ -119,7 +121,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
119
121
  - !ruby/object:Gem::Version
120
122
  version: '0'
121
123
  requirements: []
122
- rubygems_version: 3.6.0.dev
124
+ rubygems_version: 3.5.22
125
+ signing_key:
123
126
  specification_version: 4
124
127
  summary: An XML toolkit for Ruby
125
128
  test_files: []