rexml 3.3.9 → 3.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +53 -0
- data/lib/rexml/parsers/baseparser.rb +70 -45
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +55 -6
- data/lib/rexml/text.rb +15 -40
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9dc6a26dcc5ba93c112d65fa910e49ca970108c726cdce28324d7771a0831a3
|
4
|
+
data.tar.gz: b03ad34d3180aeeaa1ecc7ab21bf5ffe5f2845107a2c35ca3198653f80b932fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0d493943fab795f3c8fc8490a40750382e3c4cf38c73532b1f850612384795c2bb916afc70ebff0bd26e9e2f304ea6a22299a0481523bd0322d5655df05edbd
|
7
|
+
data.tar.gz: bfb02a2bfadb24cbdeed951e06e113e17b123015271cabfffacc3ecc4bbb1bd7c7f56e358d42173feb8b333309f725d57b76f155fea814d70c6decae3b791165
|
data/NEWS.md
CHANGED
@@ -1,5 +1,58 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.4.1 - 2025-02-16 {#version-3-4-1}
|
4
|
+
|
5
|
+
### Improvement
|
6
|
+
|
7
|
+
* Improved performance.
|
8
|
+
* GH-226
|
9
|
+
* GH-227
|
10
|
+
* GH-237
|
11
|
+
* Patch by NAITOH Jun
|
12
|
+
|
13
|
+
### Fixes
|
14
|
+
|
15
|
+
* Fix serialization of ATTLIST is incorrect
|
16
|
+
* GH-233
|
17
|
+
* GH-234
|
18
|
+
* Patch by OlofKalufs
|
19
|
+
* Reported by OlofKalufs
|
20
|
+
|
21
|
+
### Thanks
|
22
|
+
|
23
|
+
* NAITOH Jun
|
24
|
+
|
25
|
+
* OlofKalufs
|
26
|
+
|
27
|
+
## 3.4.0 - 2024-12-15 {#version-3-4-0}
|
28
|
+
|
29
|
+
### Improvement
|
30
|
+
|
31
|
+
* Improved performance.
|
32
|
+
* GH-216
|
33
|
+
* Patch by NAITOH Jun
|
34
|
+
|
35
|
+
* JRuby: Improved parse performance.
|
36
|
+
* GH-219
|
37
|
+
* Patch by João Duarte
|
38
|
+
|
39
|
+
* Added support for reusing pull parser.
|
40
|
+
* GH-214
|
41
|
+
* GH-220
|
42
|
+
* Patch by Dmitry Pogrebnoy
|
43
|
+
|
44
|
+
* Improved error handling when source is `IO`.
|
45
|
+
* GH-221
|
46
|
+
* Patch by NAITOH Jun
|
47
|
+
|
48
|
+
### Thanks
|
49
|
+
|
50
|
+
* NAITOH Jun
|
51
|
+
|
52
|
+
* João Duarte
|
53
|
+
|
54
|
+
* Dmitry Pogrebnoy
|
55
|
+
|
3
56
|
## 3.3.9 - 2024-10-24 {#version-3-3-9}
|
4
57
|
|
5
58
|
### Improvements
|
@@ -181,6 +181,10 @@ module REXML
|
|
181
181
|
|
182
182
|
def stream=( source )
|
183
183
|
@source = SourceFactory.create_from( source )
|
184
|
+
reset
|
185
|
+
end
|
186
|
+
|
187
|
+
def reset
|
184
188
|
@closed = nil
|
185
189
|
@have_root = false
|
186
190
|
@document_status = nil
|
@@ -269,10 +273,10 @@ module REXML
|
|
269
273
|
@source.ensure_buffer
|
270
274
|
if @document_status == nil
|
271
275
|
start_position = @source.position
|
272
|
-
if @source.match("<?", true)
|
276
|
+
if @source.match?("<?", true)
|
273
277
|
return process_instruction
|
274
|
-
elsif @source.match("<!", true)
|
275
|
-
if @source.match("--", true)
|
278
|
+
elsif @source.match?("<!", true)
|
279
|
+
if @source.match?("--", true)
|
276
280
|
md = @source.match(/(.*?)-->/um, true)
|
277
281
|
if md.nil?
|
278
282
|
raise REXML::ParseException.new("Unclosed comment", @source)
|
@@ -281,10 +285,10 @@ module REXML
|
|
281
285
|
raise REXML::ParseException.new("Malformed comment", @source)
|
282
286
|
end
|
283
287
|
return [ :comment, md[1] ]
|
284
|
-
elsif @source.match("DOCTYPE", true)
|
288
|
+
elsif @source.match?("DOCTYPE", true)
|
285
289
|
base_error_message = "Malformed DOCTYPE"
|
286
|
-
unless @source.match(/\s+/um, true)
|
287
|
-
if @source.match(">")
|
290
|
+
unless @source.match?(/\s+/um, true)
|
291
|
+
if @source.match?(">")
|
288
292
|
message = "#{base_error_message}: name is missing"
|
289
293
|
else
|
290
294
|
message = "#{base_error_message}: invalid name"
|
@@ -293,10 +297,11 @@ module REXML
|
|
293
297
|
raise REXML::ParseException.new(message, @source)
|
294
298
|
end
|
295
299
|
name = parse_name(base_error_message)
|
296
|
-
|
300
|
+
@source.match?(/\s*/um, true) # skip spaces
|
301
|
+
if @source.match?("[", true)
|
297
302
|
id = [nil, nil, nil]
|
298
303
|
@document_status = :in_doctype
|
299
|
-
elsif @source.match(
|
304
|
+
elsif @source.match?(">", true)
|
300
305
|
id = [nil, nil, nil]
|
301
306
|
@document_status = :after_doctype
|
302
307
|
@source.ensure_buffer
|
@@ -308,9 +313,10 @@ module REXML
|
|
308
313
|
# For backward compatibility
|
309
314
|
id[1], id[2] = id[2], nil
|
310
315
|
end
|
311
|
-
|
316
|
+
@source.match?(/\s*/um, true) # skip spaces
|
317
|
+
if @source.match?("[", true)
|
312
318
|
@document_status = :in_doctype
|
313
|
-
elsif @source.match(
|
319
|
+
elsif @source.match?(">", true)
|
314
320
|
@document_status = :after_doctype
|
315
321
|
@source.ensure_buffer
|
316
322
|
else
|
@@ -320,7 +326,7 @@ module REXML
|
|
320
326
|
end
|
321
327
|
args = [:start_doctype, name, *id]
|
322
328
|
if @document_status == :after_doctype
|
323
|
-
@source.match(/\s*/um, true)
|
329
|
+
@source.match?(/\s*/um, true)
|
324
330
|
@stack << [ :end_doctype ]
|
325
331
|
end
|
326
332
|
return args
|
@@ -331,14 +337,14 @@ module REXML
|
|
331
337
|
end
|
332
338
|
end
|
333
339
|
if @document_status == :in_doctype
|
334
|
-
@source.match(/\s*/um, true) # skip spaces
|
340
|
+
@source.match?(/\s*/um, true) # skip spaces
|
335
341
|
start_position = @source.position
|
336
|
-
if @source.match("<!", true)
|
337
|
-
if @source.match("ELEMENT", true)
|
342
|
+
if @source.match?("<!", true)
|
343
|
+
if @source.match?("ELEMENT", true)
|
338
344
|
md = @source.match(/(.*?)>/um, true)
|
339
345
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
340
346
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
341
|
-
elsif @source.match("ENTITY", true)
|
347
|
+
elsif @source.match?("ENTITY", true)
|
342
348
|
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
343
349
|
unless match_data
|
344
350
|
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
@@ -370,11 +376,11 @@ module REXML
|
|
370
376
|
end
|
371
377
|
match << '%' if ref
|
372
378
|
return match
|
373
|
-
elsif @source.match("ATTLIST", true)
|
379
|
+
elsif @source.match?("ATTLIST", true)
|
374
380
|
md = @source.match(Private::ATTLISTDECL_END, true)
|
375
381
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
376
382
|
element = md[1]
|
377
|
-
contents = md[0]
|
383
|
+
contents = "<!ATTLIST" + md[0]
|
378
384
|
|
379
385
|
pairs = {}
|
380
386
|
values = md[0].strip.scan( ATTDEF_RE )
|
@@ -390,10 +396,10 @@ module REXML
|
|
390
396
|
end
|
391
397
|
end
|
392
398
|
return [ :attlistdecl, element, pairs, contents ]
|
393
|
-
elsif @source.match("NOTATION", true)
|
399
|
+
elsif @source.match?("NOTATION", true)
|
394
400
|
base_error_message = "Malformed notation declaration"
|
395
|
-
unless @source.match(/\s+/um, true)
|
396
|
-
if @source.match(">")
|
401
|
+
unless @source.match?(/\s+/um, true)
|
402
|
+
if @source.match?(">")
|
397
403
|
message = "#{base_error_message}: name is missing"
|
398
404
|
else
|
399
405
|
message = "#{base_error_message}: invalid name"
|
@@ -405,7 +411,8 @@ module REXML
|
|
405
411
|
id = parse_id(base_error_message,
|
406
412
|
accept_external_id: true,
|
407
413
|
accept_public_id: true)
|
408
|
-
|
414
|
+
@source.match?(/\s*/um, true) # skip spaces
|
415
|
+
unless @source.match?(">", true)
|
409
416
|
message = "#{base_error_message}: garbage before end >"
|
410
417
|
raise REXML::ParseException.new(message, @source)
|
411
418
|
end
|
@@ -419,7 +426,7 @@ module REXML
|
|
419
426
|
end
|
420
427
|
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
421
428
|
return [ :externalentity, match[1] ]
|
422
|
-
elsif @source.match(/\]\s*>/um, true)
|
429
|
+
elsif @source.match?(/\]\s*>/um, true)
|
423
430
|
@document_status = :after_doctype
|
424
431
|
return [ :end_doctype ]
|
425
432
|
end
|
@@ -428,16 +435,16 @@ module REXML
|
|
428
435
|
end
|
429
436
|
end
|
430
437
|
if @document_status == :after_doctype
|
431
|
-
@source.match(/\s*/um, true)
|
438
|
+
@source.match?(/\s*/um, true)
|
432
439
|
end
|
433
440
|
begin
|
434
441
|
start_position = @source.position
|
435
|
-
if @source.match("<", true)
|
442
|
+
if @source.match?("<", true)
|
436
443
|
# :text's read_until may remain only "<" in buffer. In the
|
437
444
|
# case, buffer is empty here. So we need to fill buffer
|
438
445
|
# here explicitly.
|
439
446
|
@source.ensure_buffer
|
440
|
-
if @source.match("/", true)
|
447
|
+
if @source.match?("/", true)
|
441
448
|
@namespaces_restore_stack.pop
|
442
449
|
last_tag = @tags.pop
|
443
450
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
@@ -452,7 +459,7 @@ module REXML
|
|
452
459
|
raise REXML::ParseException.new(message, @source)
|
453
460
|
end
|
454
461
|
return [ :end_element, last_tag ]
|
455
|
-
elsif @source.match("!", true)
|
462
|
+
elsif @source.match?("!", true)
|
456
463
|
md = @source.match(/([^>]*>)/um)
|
457
464
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
458
465
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
@@ -470,7 +477,7 @@ module REXML
|
|
470
477
|
end
|
471
478
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
472
479
|
"in the doctype declaration.", @source)
|
473
|
-
elsif @source.match("?", true)
|
480
|
+
elsif @source.match?("?", true)
|
474
481
|
return process_instruction
|
475
482
|
else
|
476
483
|
# Get the next tag
|
@@ -651,7 +658,7 @@ module REXML
|
|
651
658
|
def parse_name(base_error_message)
|
652
659
|
md = @source.match(Private::NAME_PATTERN, true)
|
653
660
|
unless md
|
654
|
-
if @source.match(/\S/um)
|
661
|
+
if @source.match?(/\S/um)
|
655
662
|
message = "#{base_error_message}: invalid name"
|
656
663
|
else
|
657
664
|
message = "#{base_error_message}: name is missing"
|
@@ -693,34 +700,34 @@ module REXML
|
|
693
700
|
accept_public_id:)
|
694
701
|
public = /\A\s*PUBLIC/um
|
695
702
|
system = /\A\s*SYSTEM/um
|
696
|
-
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
697
|
-
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
703
|
+
if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
|
704
|
+
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
698
705
|
return "public ID literal is missing"
|
699
706
|
end
|
700
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
707
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
701
708
|
return "invalid public ID literal"
|
702
709
|
end
|
703
710
|
if accept_public_id
|
704
|
-
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
711
|
+
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
705
712
|
return "system ID literal is missing"
|
706
713
|
end
|
707
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
714
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
708
715
|
return "invalid system literal"
|
709
716
|
end
|
710
717
|
"garbage after system literal"
|
711
718
|
else
|
712
719
|
"garbage after public ID literal"
|
713
720
|
end
|
714
|
-
elsif accept_external_id and @source.match(/#{system}/um)
|
715
|
-
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
721
|
+
elsif accept_external_id and @source.match?(/#{system}/um)
|
722
|
+
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
716
723
|
return "system literal is missing"
|
717
724
|
end
|
718
|
-
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
725
|
+
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
719
726
|
return "invalid system literal"
|
720
727
|
end
|
721
728
|
"garbage after system literal"
|
722
729
|
else
|
723
|
-
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
730
|
+
unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
724
731
|
return "invalid ID type"
|
725
732
|
end
|
726
733
|
"ID type is missing"
|
@@ -729,7 +736,7 @@ module REXML
|
|
729
736
|
|
730
737
|
def process_instruction
|
731
738
|
name = parse_name("Malformed XML: Invalid processing instruction node")
|
732
|
-
if @source.match(/\s+/um, true)
|
739
|
+
if @source.match?(/\s+/um, true)
|
733
740
|
match_data = @source.match(/(.*?)\?>/um, true)
|
734
741
|
unless match_data
|
735
742
|
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
@@ -737,7 +744,7 @@ module REXML
|
|
737
744
|
content = match_data[1]
|
738
745
|
else
|
739
746
|
content = nil
|
740
|
-
unless @source.match("?>", true)
|
747
|
+
unless @source.match?("?>", true)
|
741
748
|
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
742
749
|
end
|
743
750
|
end
|
@@ -762,14 +769,33 @@ module REXML
|
|
762
769
|
[:processing_instruction, name, content]
|
763
770
|
end
|
764
771
|
|
772
|
+
if StringScanner::Version < "3.1.1"
|
773
|
+
def scan_quote
|
774
|
+
@source.match(/(['"])/, true)&.[](1)
|
775
|
+
end
|
776
|
+
else
|
777
|
+
def scan_quote
|
778
|
+
case @source.peek_byte
|
779
|
+
when 34 # '"'.ord
|
780
|
+
@source.scan_byte
|
781
|
+
'"'
|
782
|
+
when 39 # "'".ord
|
783
|
+
@source.scan_byte
|
784
|
+
"'"
|
785
|
+
else
|
786
|
+
nil
|
787
|
+
end
|
788
|
+
end
|
789
|
+
end
|
790
|
+
|
765
791
|
def parse_attributes(prefixes)
|
766
792
|
attributes = {}
|
767
793
|
expanded_names = {}
|
768
794
|
closed = false
|
769
795
|
while true
|
770
|
-
if @source.match(">", true)
|
796
|
+
if @source.match?(">", true)
|
771
797
|
return attributes, closed
|
772
|
-
elsif @source.match("/>", true)
|
798
|
+
elsif @source.match?("/>", true)
|
773
799
|
closed = true
|
774
800
|
return attributes, closed
|
775
801
|
elsif match = @source.match(QNAME, true)
|
@@ -777,15 +803,14 @@ module REXML
|
|
777
803
|
prefix = match[2]
|
778
804
|
local_part = match[3]
|
779
805
|
|
780
|
-
unless @source.match(/\s*=\s*/um, true)
|
806
|
+
unless @source.match?(/\s*=\s*/um, true)
|
781
807
|
message = "Missing attribute equal: <#{name}>"
|
782
808
|
raise REXML::ParseException.new(message, @source)
|
783
809
|
end
|
784
|
-
unless
|
810
|
+
unless quote = scan_quote
|
785
811
|
message = "Missing attribute value start quote: <#{name}>"
|
786
812
|
raise REXML::ParseException.new(message, @source)
|
787
813
|
end
|
788
|
-
quote = match[1]
|
789
814
|
start_position = @source.position
|
790
815
|
value = @source.read_until(quote)
|
791
816
|
unless value.chomp!(quote)
|
@@ -793,7 +818,7 @@ module REXML
|
|
793
818
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
794
819
|
raise REXML::ParseException.new(message, @source)
|
795
820
|
end
|
796
|
-
@source.match(/\s*/um, true)
|
821
|
+
@source.match?(/\s*/um, true)
|
797
822
|
if prefix == "xmlns"
|
798
823
|
if local_part == "xml"
|
799
824
|
if value != Private::XML_PREFIXED_NAMESPACE
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
3
|
|
4
|
+
require "stringio"
|
4
5
|
require "strscan"
|
5
6
|
|
6
7
|
require_relative 'encoding'
|
@@ -18,6 +19,16 @@ module REXML
|
|
18
19
|
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
20
|
super(pattern)
|
20
21
|
end
|
22
|
+
|
23
|
+
def match?(pattern)
|
24
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
25
|
+
super(pattern)
|
26
|
+
end
|
27
|
+
|
28
|
+
def skip(pattern)
|
29
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
30
|
+
super(pattern)
|
31
|
+
end
|
21
32
|
end
|
22
33
|
end
|
23
34
|
using StringScannerCheckScanString
|
@@ -35,7 +46,6 @@ module REXML
|
|
35
46
|
arg.respond_to? :eof?
|
36
47
|
IOSource.new(arg)
|
37
48
|
elsif arg.respond_to? :to_str
|
38
|
-
require 'stringio'
|
39
49
|
IOSource.new(StringIO.new(arg))
|
40
50
|
elsif arg.kind_of? Source
|
41
51
|
arg
|
@@ -58,8 +68,14 @@ module REXML
|
|
58
68
|
SCANNER_RESET_SIZE = 100000
|
59
69
|
PRE_DEFINED_TERM_PATTERNS = {}
|
60
70
|
pre_defined_terms = ["'", '"', "<"]
|
61
|
-
|
62
|
-
|
71
|
+
if StringScanner::Version < "3.1.1"
|
72
|
+
pre_defined_terms.each do |term|
|
73
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
74
|
+
end
|
75
|
+
else
|
76
|
+
pre_defined_terms.each do |term|
|
77
|
+
PRE_DEFINED_TERM_PATTERNS[term] = term
|
78
|
+
end
|
63
79
|
end
|
64
80
|
end
|
65
81
|
private_constant :Private
|
@@ -77,7 +93,7 @@ module REXML
|
|
77
93
|
detect_encoding
|
78
94
|
end
|
79
95
|
@line = 0
|
80
|
-
@
|
96
|
+
@encoded_terms = {}
|
81
97
|
end
|
82
98
|
|
83
99
|
# The current buffer (what we're going to read next)
|
@@ -126,6 +142,14 @@ module REXML
|
|
126
142
|
end
|
127
143
|
end
|
128
144
|
|
145
|
+
def match?(pattern, cons=false)
|
146
|
+
if cons
|
147
|
+
!@scanner.skip(pattern).nil?
|
148
|
+
else
|
149
|
+
!@scanner.match?(pattern).nil?
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
129
153
|
def position
|
130
154
|
@scanner.pos
|
131
155
|
end
|
@@ -134,6 +158,14 @@ module REXML
|
|
134
158
|
@scanner.pos = pos
|
135
159
|
end
|
136
160
|
|
161
|
+
def peek_byte
|
162
|
+
@scanner.peek_byte
|
163
|
+
end
|
164
|
+
|
165
|
+
def scan_byte
|
166
|
+
@scanner.scan_byte
|
167
|
+
end
|
168
|
+
|
137
169
|
# @return true if the Source is exhausted
|
138
170
|
def empty?
|
139
171
|
@scanner.eos?
|
@@ -228,7 +260,7 @@ module REXML
|
|
228
260
|
|
229
261
|
def read_until(term)
|
230
262
|
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
231
|
-
term = @
|
263
|
+
term = @encoded_terms[term] ||= encode(term)
|
232
264
|
until str = @scanner.scan_until(pattern)
|
233
265
|
break if @source.nil?
|
234
266
|
break if @source.eof?
|
@@ -267,6 +299,23 @@ module REXML
|
|
267
299
|
md.nil? ? nil : @scanner
|
268
300
|
end
|
269
301
|
|
302
|
+
def match?( pattern, cons=false )
|
303
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
304
|
+
min_bytes = 1
|
305
|
+
while true
|
306
|
+
if cons
|
307
|
+
n_matched_bytes = @scanner.skip(pattern)
|
308
|
+
else
|
309
|
+
n_matched_bytes = @scanner.match?(pattern)
|
310
|
+
end
|
311
|
+
return true if n_matched_bytes
|
312
|
+
return false if pattern.is_a?(String)
|
313
|
+
return false if @source.nil?
|
314
|
+
return false unless read(nil, min_bytes)
|
315
|
+
min_bytes *= 2
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
270
319
|
def empty?
|
271
320
|
super and ( @source.nil? || @source.eof? )
|
272
321
|
end
|
@@ -286,7 +335,7 @@ module REXML
|
|
286
335
|
rescue
|
287
336
|
end
|
288
337
|
@er_source.seek(pos)
|
289
|
-
rescue IOError
|
338
|
+
rescue IOError, SystemCallError
|
290
339
|
pos = -1
|
291
340
|
line = -1
|
292
341
|
end
|
data/lib/rexml/text.rb
CHANGED
@@ -29,31 +29,16 @@ module REXML
|
|
29
29
|
(0x10000..0x10FFFF)
|
30
30
|
]
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
']*$')
|
43
|
-
else
|
44
|
-
VALID_XML_CHARS = /^(
|
45
|
-
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
46
|
-
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
47
|
-
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
48
|
-
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
|
49
|
-
| \xEF[\x80-\xBE]{2} #
|
50
|
-
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
|
51
|
-
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
52
|
-
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
53
|
-
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
54
|
-
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
55
|
-
)*$/nx;
|
56
|
-
end
|
32
|
+
VALID_XML_CHARS = Regexp.new('^['+
|
33
|
+
VALID_CHAR.map { |item|
|
34
|
+
case item
|
35
|
+
when Integer
|
36
|
+
[item].pack('U').force_encoding('utf-8')
|
37
|
+
when Range
|
38
|
+
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
|
39
|
+
end
|
40
|
+
}.join +
|
41
|
+
']*$')
|
57
42
|
|
58
43
|
# Constructor
|
59
44
|
# +arg+ if a String, the content is set to the String. If a Text,
|
@@ -132,21 +117,11 @@ module REXML
|
|
132
117
|
|
133
118
|
# illegal anywhere
|
134
119
|
if !string.match?(VALID_XML_CHARS)
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
141
|
-
end
|
142
|
-
end
|
143
|
-
else
|
144
|
-
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
|
145
|
-
case c.unpack('U')
|
146
|
-
when *VALID_CHAR
|
147
|
-
else
|
148
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
149
|
-
end
|
120
|
+
string.chars.each do |c|
|
121
|
+
case c.ord
|
122
|
+
when *VALID_CHAR
|
123
|
+
else
|
124
|
+
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
150
125
|
end
|
151
126
|
end
|
152
127
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 2025-02-16 00:00:00.000000000 Z
|
11
11
|
dependencies: []
|
12
12
|
description: An XML toolkit for Ruby
|
13
13
|
email:
|
@@ -102,7 +102,7 @@ homepage: https://github.com/ruby/rexml
|
|
102
102
|
licenses:
|
103
103
|
- BSD-2-Clause
|
104
104
|
metadata:
|
105
|
-
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.
|
105
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.1
|
106
106
|
rdoc_options:
|
107
107
|
- "--main"
|
108
108
|
- README.md
|
@@ -119,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
119
|
- !ruby/object:Gem::Version
|
120
120
|
version: '0'
|
121
121
|
requirements: []
|
122
|
-
rubygems_version: 3.6.
|
122
|
+
rubygems_version: 3.6.2
|
123
123
|
specification_version: 4
|
124
124
|
summary: An XML toolkit for Ruby
|
125
125
|
test_files: []
|