rexml 3.3.9 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +53 -0
- data/lib/rexml/parsers/baseparser.rb +70 -45
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +55 -6
- data/lib/rexml/text.rb +15 -40
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9dc6a26dcc5ba93c112d65fa910e49ca970108c726cdce28324d7771a0831a3
|
4
|
+
data.tar.gz: b03ad34d3180aeeaa1ecc7ab21bf5ffe5f2845107a2c35ca3198653f80b932fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0d493943fab795f3c8fc8490a40750382e3c4cf38c73532b1f850612384795c2bb916afc70ebff0bd26e9e2f304ea6a22299a0481523bd0322d5655df05edbd
|
7
|
+
data.tar.gz: bfb02a2bfadb24cbdeed951e06e113e17b123015271cabfffacc3ecc4bbb1bd7c7f56e358d42173feb8b333309f725d57b76f155fea814d70c6decae3b791165
|
data/NEWS.md
CHANGED
@@ -1,5 +1,58 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.4.1 - 2025-02-16 {#version-3-4-1}
|
4
|
+
|
5
|
+
### Improvement
|
6
|
+
|
7
|
+
* Improved performance.
|
8
|
+
* GH-226
|
9
|
+
* GH-227
|
10
|
+
* GH-237
|
11
|
+
* Patch by NAITOH Jun
|
12
|
+
|
13
|
+
### Fixes
|
14
|
+
|
15
|
+
* Fix serialization of ATTLIST is incorrect
|
16
|
+
* GH-233
|
17
|
+
* GH-234
|
18
|
+
* Patch by OlofKalufs
|
19
|
+
* Reported by OlofKalufs
|
20
|
+
|
21
|
+
### Thanks
|
22
|
+
|
23
|
+
* NAITOH Jun
|
24
|
+
|
25
|
+
* OlofKalufs
|
26
|
+
|
27
|
+
## 3.4.0 - 2024-12-15 {#version-3-4-0}
|
28
|
+
|
29
|
+
### Improvement
|
30
|
+
|
31
|
+
* Improved performance.
|
32
|
+
* GH-216
|
33
|
+
* Patch by NAITOH Jun
|
34
|
+
|
35
|
+
* JRuby: Improved parse performance.
|
36
|
+
* GH-219
|
37
|
+
* Patch by João Duarte
|
38
|
+
|
39
|
+
* Added support for reusing pull parser.
|
40
|
+
* GH-214
|
41
|
+
* GH-220
|
42
|
+
* Patch by Dmitry Pogrebnoy
|
43
|
+
|
44
|
+
* Improved error handling when source is `IO`.
|
45
|
+
* GH-221
|
46
|
+
* Patch by NAITOH Jun
|
47
|
+
|
48
|
+
### Thanks
|
49
|
+
|
50
|
+
* NAITOH Jun
|
51
|
+
|
52
|
+
* João Duarte
|
53
|
+
|
54
|
+
* Dmitry Pogrebnoy
|
55
|
+
|
3
56
|
## 3.3.9 - 2024-10-24 {#version-3-3-9}
|
4
57
|
|
5
58
|
### Improvements
|
@@ -181,6 +181,10 @@ module REXML
|
|
181
181
|
|
182
182
|
def stream=( source )
|
183
183
|
@source = SourceFactory.create_from( source )
|
184
|
+
reset
|
185
|
+
end
|
186
|
+
|
187
|
+
def reset
|
184
188
|
@closed = nil
|
185
189
|
@have_root = false
|
186
190
|
@document_status = nil
|
@@ -269,10 +273,10 @@ module REXML
|
|
269
273
|
@source.ensure_buffer
|
270
274
|
if @document_status == nil
|
271
275
|
start_position = @source.position
|
272
|
-
if @source.match("<?", true)
|
276
|
+
if @source.match?("<?", true)
|
273
277
|
return process_instruction
|
274
|
-
elsif @source.match("<!", true)
|
275
|
-
if @source.match("--", true)
|
278
|
+
elsif @source.match?("<!", true)
|
279
|
+
if @source.match?("--", true)
|
276
280
|
md = @source.match(/(.*?)-->/um, true)
|
277
281
|
if md.nil?
|
278
282
|
raise REXML::ParseException.new("Unclosed comment", @source)
|
@@ -281,10 +285,10 @@ module REXML
|
|
281
285
|
raise REXML::ParseException.new("Malformed comment", @source)
|
282
286
|
end
|
283
287
|
return [ :comment, md[1] ]
|
284
|
-
elsif @source.match("DOCTYPE", true)
|
288
|
+
elsif @source.match?("DOCTYPE", true)
|
285
289
|
base_error_message = "Malformed DOCTYPE"
|
286
|
-
unless @source.match(/\s+/um, true)
|
287
|
-
if @source.match(">")
|
290
|
+
unless @source.match?(/\s+/um, true)
|
291
|
+
if @source.match?(">")
|
288
292
|
message = "#{base_error_message}: name is missing"
|
289
293
|
else
|
290
294
|
message = "#{base_error_message}: invalid name"
|
@@ -293,10 +297,11 @@ module REXML
|
|
293
297
|
raise REXML::ParseException.new(message, @source)
|
294
298
|
end
|
295
299
|
name = parse_name(base_error_message)
|
296
|
-
|
300
|
+
@source.match?(/\s*/um, true) # skip spaces
|
301
|
+
if @source.match?("[", true)
|
297
302
|
id = [nil, nil, nil]
|
298
303
|
@document_status = :in_doctype
|
299
|
-
elsif @source.match(
|
304
|
+
elsif @source.match?(">", true)
|
300
305
|
id = [nil, nil, nil]
|
301
306
|
@document_status = :after_doctype
|
302
307
|
@source.ensure_buffer
|
@@ -308,9 +313,10 @@ module REXML
|
|
308
313
|
# For backward compatibility
|
309
314
|
id[1], id[2] = id[2], nil
|
310
315
|
end
|
311
|
-
|
316
|
+
@source.match?(/\s*/um, true) # skip spaces
|
317
|
+
if @source.match?("[", true)
|
312
318
|
@document_status = :in_doctype
|
313
|
-
elsif @source.match(
|
319
|
+
elsif @source.match?(">", true)
|
314
320
|
@document_status = :after_doctype
|
315
321
|
@source.ensure_buffer
|
316
322
|
else
|
@@ -320,7 +326,7 @@ module REXML
|
|
320
326
|
end
|
321
327
|
args = [:start_doctype, name, *id]
|
322
328
|
if @document_status == :after_doctype
|
323
|
-
@source.match(/\s*/um, true)
|
329
|
+
@source.match?(/\s*/um, true)
|
324
330
|
@stack << [ :end_doctype ]
|
325
331
|
end
|
326
332
|
return args
|
@@ -331,14 +337,14 @@ module REXML
|
|
331
337
|
end
|
332
338
|
end
|
333
339
|
if @document_status == :in_doctype
|
334
|
-
@source.match(/\s*/um, true) # skip spaces
|
340
|
+
@source.match?(/\s*/um, true) # skip spaces
|
335
341
|
start_position = @source.position
|
336
|
-
if @source.match("<!", true)
|
337
|
-
if @source.match("ELEMENT", true)
|
342
|
+
if @source.match?("<!", true)
|
343
|
+
if @source.match?("ELEMENT", true)
|
338
344
|
md = @source.match(/(.*?)>/um, true)
|
339
345
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
340
346
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
341
|
-
elsif @source.match("ENTITY", true)
|
347
|
+
elsif @source.match?("ENTITY", true)
|
342
348
|
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
343
349
|
unless match_data
|
344
350
|
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
@@ -370,11 +376,11 @@ module REXML
|
|
370
376
|
end
|
371
377
|
match << '%' if ref
|
372
378
|
return match
|
373
|
-
elsif @source.match("ATTLIST", true)
|
379
|
+
elsif @source.match?("ATTLIST", true)
|
374
380
|
md = @source.match(Private::ATTLISTDECL_END, true)
|
375
381
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
376
382
|
element = md[1]
|
377
|
-
contents = md[0]
|
383
|
+
contents = "<!ATTLIST" + md[0]
|
378
384
|
|
379
385
|
pairs = {}
|
380
386
|
values = md[0].strip.scan( ATTDEF_RE )
|
@@ -390,10 +396,10 @@ module REXML
|
|
390
396
|
end
|
391
397
|
end
|
392
398
|
return [ :attlistdecl, element, pairs, contents ]
|
393
|
-
elsif @source.match("NOTATION", true)
|
399
|
+
elsif @source.match?("NOTATION", true)
|
394
400
|
base_error_message = "Malformed notation declaration"
|
395
|
-
unless @source.match(/\s+/um, true)
|
396
|
-
if @source.match(">")
|
401
|
+
unless @source.match?(/\s+/um, true)
|
402
|
+
if @source.match?(">")
|
397
403
|
message = "#{base_error_message}: name is missing"
|
398
404
|
else
|
399
405
|
message = "#{base_error_message}: invalid name"
|
@@ -405,7 +411,8 @@ module REXML
|
|
405
411
|
id = parse_id(base_error_message,
|
406
412
|
accept_external_id: true,
|
407
413
|
accept_public_id: true)
|
408
|
-
|
414
|
+
@source.match?(/\s*/um, true) # skip spaces
|
415
|
+
unless @source.match?(">", true)
|
409
416
|
message = "#{base_error_message}: garbage before end >"
|
410
417
|
raise REXML::ParseException.new(message, @source)
|
411
418
|
end
|
@@ -419,7 +426,7 @@ module REXML
|
|
419
426
|
end
|
420
427
|
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
421
428
|
return [ :externalentity, match[1] ]
|
422
|
-
elsif @source.match(/\]\s*>/um, true)
|
429
|
+
elsif @source.match?(/\]\s*>/um, true)
|
423
430
|
@document_status = :after_doctype
|
424
431
|
return [ :end_doctype ]
|
425
432
|
end
|
@@ -428,16 +435,16 @@ module REXML
|
|
428
435
|
end
|
429
436
|
end
|
430
437
|
if @document_status == :after_doctype
|
431
|
-
@source.match(/\s*/um, true)
|
438
|
+
@source.match?(/\s*/um, true)
|
432
439
|
end
|
433
440
|
begin
|
434
441
|
start_position = @source.position
|
435
|
-
if @source.match("<", true)
|
442
|
+
if @source.match?("<", true)
|
436
443
|
# :text's read_until may remain only "<" in buffer. In the
|
437
444
|
# case, buffer is empty here. So we need to fill buffer
|
438
445
|
# here explicitly.
|
439
446
|
@source.ensure_buffer
|
440
|
-
if @source.match("/", true)
|
447
|
+
if @source.match?("/", true)
|
441
448
|
@namespaces_restore_stack.pop
|
442
449
|
last_tag = @tags.pop
|
443
450
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
@@ -452,7 +459,7 @@ module REXML
|
|
452
459
|
raise REXML::ParseException.new(message, @source)
|
453
460
|
end
|
454
461
|
return [ :end_element, last_tag ]
|
455
|
-
elsif @source.match("!", true)
|
462
|
+
elsif @source.match?("!", true)
|
456
463
|
md = @source.match(/([^>]*>)/um)
|
457
464
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
458
465
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
@@ -470,7 +477,7 @@ module REXML
|
|
470
477
|
end
|
471
478
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
472
479
|
"in the doctype declaration.", @source)
|
473
|
-
elsif @source.match("?", true)
|
480
|
+
elsif @source.match?("?", true)
|
474
481
|
return process_instruction
|
475
482
|
else
|
476
483
|
# Get the next tag
|
@@ -651,7 +658,7 @@ module REXML
|
|
651
658
|
def parse_name(base_error_message)
|
652
659
|
md = @source.match(Private::NAME_PATTERN, true)
|
653
660
|
unless md
|
654
|
-
if @source.match(/\S/um)
|
661
|
+
if @source.match?(/\S/um)
|
655
662
|
message = "#{base_error_message}: invalid name"
|
656
663
|
else
|
657
664
|
message = "#{base_error_message}: name is missing"
|
@@ -693,34 +700,34 @@ module REXML
|
|
693
700
|
accept_public_id:)
|
694
701
|
public = /\A\s*PUBLIC/um
|
695
702
|
system = /\A\s*SYSTEM/um
|
696
|
-
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
697
|
-
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
703
|
+
if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
|
704
|
+
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
698
705
|
return "public ID literal is missing"
|
699
706
|
end
|
700
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
707
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
701
708
|
return "invalid public ID literal"
|
702
709
|
end
|
703
710
|
if accept_public_id
|
704
|
-
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
711
|
+
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
705
712
|
return "system ID literal is missing"
|
706
713
|
end
|
707
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
714
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
708
715
|
return "invalid system literal"
|
709
716
|
end
|
710
717
|
"garbage after system literal"
|
711
718
|
else
|
712
719
|
"garbage after public ID literal"
|
713
720
|
end
|
714
|
-
elsif accept_external_id and @source.match(/#{system}/um)
|
715
|
-
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
721
|
+
elsif accept_external_id and @source.match?(/#{system}/um)
|
722
|
+
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
716
723
|
return "system literal is missing"
|
717
724
|
end
|
718
|
-
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
725
|
+
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
719
726
|
return "invalid system literal"
|
720
727
|
end
|
721
728
|
"garbage after system literal"
|
722
729
|
else
|
723
|
-
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
730
|
+
unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
724
731
|
return "invalid ID type"
|
725
732
|
end
|
726
733
|
"ID type is missing"
|
@@ -729,7 +736,7 @@ module REXML
|
|
729
736
|
|
730
737
|
def process_instruction
|
731
738
|
name = parse_name("Malformed XML: Invalid processing instruction node")
|
732
|
-
if @source.match(/\s+/um, true)
|
739
|
+
if @source.match?(/\s+/um, true)
|
733
740
|
match_data = @source.match(/(.*?)\?>/um, true)
|
734
741
|
unless match_data
|
735
742
|
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
@@ -737,7 +744,7 @@ module REXML
|
|
737
744
|
content = match_data[1]
|
738
745
|
else
|
739
746
|
content = nil
|
740
|
-
unless @source.match("?>", true)
|
747
|
+
unless @source.match?("?>", true)
|
741
748
|
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
742
749
|
end
|
743
750
|
end
|
@@ -762,14 +769,33 @@ module REXML
|
|
762
769
|
[:processing_instruction, name, content]
|
763
770
|
end
|
764
771
|
|
772
|
+
if StringScanner::Version < "3.1.1"
|
773
|
+
def scan_quote
|
774
|
+
@source.match(/(['"])/, true)&.[](1)
|
775
|
+
end
|
776
|
+
else
|
777
|
+
def scan_quote
|
778
|
+
case @source.peek_byte
|
779
|
+
when 34 # '"'.ord
|
780
|
+
@source.scan_byte
|
781
|
+
'"'
|
782
|
+
when 39 # "'".ord
|
783
|
+
@source.scan_byte
|
784
|
+
"'"
|
785
|
+
else
|
786
|
+
nil
|
787
|
+
end
|
788
|
+
end
|
789
|
+
end
|
790
|
+
|
765
791
|
def parse_attributes(prefixes)
|
766
792
|
attributes = {}
|
767
793
|
expanded_names = {}
|
768
794
|
closed = false
|
769
795
|
while true
|
770
|
-
if @source.match(">", true)
|
796
|
+
if @source.match?(">", true)
|
771
797
|
return attributes, closed
|
772
|
-
elsif @source.match("/>", true)
|
798
|
+
elsif @source.match?("/>", true)
|
773
799
|
closed = true
|
774
800
|
return attributes, closed
|
775
801
|
elsif match = @source.match(QNAME, true)
|
@@ -777,15 +803,14 @@ module REXML
|
|
777
803
|
prefix = match[2]
|
778
804
|
local_part = match[3]
|
779
805
|
|
780
|
-
unless @source.match(/\s*=\s*/um, true)
|
806
|
+
unless @source.match?(/\s*=\s*/um, true)
|
781
807
|
message = "Missing attribute equal: <#{name}>"
|
782
808
|
raise REXML::ParseException.new(message, @source)
|
783
809
|
end
|
784
|
-
unless
|
810
|
+
unless quote = scan_quote
|
785
811
|
message = "Missing attribute value start quote: <#{name}>"
|
786
812
|
raise REXML::ParseException.new(message, @source)
|
787
813
|
end
|
788
|
-
quote = match[1]
|
789
814
|
start_position = @source.position
|
790
815
|
value = @source.read_until(quote)
|
791
816
|
unless value.chomp!(quote)
|
@@ -793,7 +818,7 @@ module REXML
|
|
793
818
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
794
819
|
raise REXML::ParseException.new(message, @source)
|
795
820
|
end
|
796
|
-
@source.match(/\s*/um, true)
|
821
|
+
@source.match?(/\s*/um, true)
|
797
822
|
if prefix == "xmlns"
|
798
823
|
if local_part == "xml"
|
799
824
|
if value != Private::XML_PREFIXED_NAMESPACE
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
3
|
|
4
|
+
require "stringio"
|
4
5
|
require "strscan"
|
5
6
|
|
6
7
|
require_relative 'encoding'
|
@@ -18,6 +19,16 @@ module REXML
|
|
18
19
|
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
20
|
super(pattern)
|
20
21
|
end
|
22
|
+
|
23
|
+
def match?(pattern)
|
24
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
25
|
+
super(pattern)
|
26
|
+
end
|
27
|
+
|
28
|
+
def skip(pattern)
|
29
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
30
|
+
super(pattern)
|
31
|
+
end
|
21
32
|
end
|
22
33
|
end
|
23
34
|
using StringScannerCheckScanString
|
@@ -35,7 +46,6 @@ module REXML
|
|
35
46
|
arg.respond_to? :eof?
|
36
47
|
IOSource.new(arg)
|
37
48
|
elsif arg.respond_to? :to_str
|
38
|
-
require 'stringio'
|
39
49
|
IOSource.new(StringIO.new(arg))
|
40
50
|
elsif arg.kind_of? Source
|
41
51
|
arg
|
@@ -58,8 +68,14 @@ module REXML
|
|
58
68
|
SCANNER_RESET_SIZE = 100000
|
59
69
|
PRE_DEFINED_TERM_PATTERNS = {}
|
60
70
|
pre_defined_terms = ["'", '"', "<"]
|
61
|
-
|
62
|
-
|
71
|
+
if StringScanner::Version < "3.1.1"
|
72
|
+
pre_defined_terms.each do |term|
|
73
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
74
|
+
end
|
75
|
+
else
|
76
|
+
pre_defined_terms.each do |term|
|
77
|
+
PRE_DEFINED_TERM_PATTERNS[term] = term
|
78
|
+
end
|
63
79
|
end
|
64
80
|
end
|
65
81
|
private_constant :Private
|
@@ -77,7 +93,7 @@ module REXML
|
|
77
93
|
detect_encoding
|
78
94
|
end
|
79
95
|
@line = 0
|
80
|
-
@
|
96
|
+
@encoded_terms = {}
|
81
97
|
end
|
82
98
|
|
83
99
|
# The current buffer (what we're going to read next)
|
@@ -126,6 +142,14 @@ module REXML
|
|
126
142
|
end
|
127
143
|
end
|
128
144
|
|
145
|
+
def match?(pattern, cons=false)
|
146
|
+
if cons
|
147
|
+
!@scanner.skip(pattern).nil?
|
148
|
+
else
|
149
|
+
!@scanner.match?(pattern).nil?
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
129
153
|
def position
|
130
154
|
@scanner.pos
|
131
155
|
end
|
@@ -134,6 +158,14 @@ module REXML
|
|
134
158
|
@scanner.pos = pos
|
135
159
|
end
|
136
160
|
|
161
|
+
def peek_byte
|
162
|
+
@scanner.peek_byte
|
163
|
+
end
|
164
|
+
|
165
|
+
def scan_byte
|
166
|
+
@scanner.scan_byte
|
167
|
+
end
|
168
|
+
|
137
169
|
# @return true if the Source is exhausted
|
138
170
|
def empty?
|
139
171
|
@scanner.eos?
|
@@ -228,7 +260,7 @@ module REXML
|
|
228
260
|
|
229
261
|
def read_until(term)
|
230
262
|
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
231
|
-
term = @
|
263
|
+
term = @encoded_terms[term] ||= encode(term)
|
232
264
|
until str = @scanner.scan_until(pattern)
|
233
265
|
break if @source.nil?
|
234
266
|
break if @source.eof?
|
@@ -267,6 +299,23 @@ module REXML
|
|
267
299
|
md.nil? ? nil : @scanner
|
268
300
|
end
|
269
301
|
|
302
|
+
def match?( pattern, cons=false )
|
303
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
304
|
+
min_bytes = 1
|
305
|
+
while true
|
306
|
+
if cons
|
307
|
+
n_matched_bytes = @scanner.skip(pattern)
|
308
|
+
else
|
309
|
+
n_matched_bytes = @scanner.match?(pattern)
|
310
|
+
end
|
311
|
+
return true if n_matched_bytes
|
312
|
+
return false if pattern.is_a?(String)
|
313
|
+
return false if @source.nil?
|
314
|
+
return false unless read(nil, min_bytes)
|
315
|
+
min_bytes *= 2
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
270
319
|
def empty?
|
271
320
|
super and ( @source.nil? || @source.eof? )
|
272
321
|
end
|
@@ -286,7 +335,7 @@ module REXML
|
|
286
335
|
rescue
|
287
336
|
end
|
288
337
|
@er_source.seek(pos)
|
289
|
-
rescue IOError
|
338
|
+
rescue IOError, SystemCallError
|
290
339
|
pos = -1
|
291
340
|
line = -1
|
292
341
|
end
|
data/lib/rexml/text.rb
CHANGED
@@ -29,31 +29,16 @@ module REXML
|
|
29
29
|
(0x10000..0x10FFFF)
|
30
30
|
]
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
']*$')
|
43
|
-
else
|
44
|
-
VALID_XML_CHARS = /^(
|
45
|
-
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
46
|
-
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
47
|
-
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
48
|
-
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
|
49
|
-
| \xEF[\x80-\xBE]{2} #
|
50
|
-
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
|
51
|
-
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
52
|
-
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
53
|
-
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
54
|
-
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
55
|
-
)*$/nx;
|
56
|
-
end
|
32
|
+
VALID_XML_CHARS = Regexp.new('^['+
|
33
|
+
VALID_CHAR.map { |item|
|
34
|
+
case item
|
35
|
+
when Integer
|
36
|
+
[item].pack('U').force_encoding('utf-8')
|
37
|
+
when Range
|
38
|
+
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
|
39
|
+
end
|
40
|
+
}.join +
|
41
|
+
']*$')
|
57
42
|
|
58
43
|
# Constructor
|
59
44
|
# +arg+ if a String, the content is set to the String. If a Text,
|
@@ -132,21 +117,11 @@ module REXML
|
|
132
117
|
|
133
118
|
# illegal anywhere
|
134
119
|
if !string.match?(VALID_XML_CHARS)
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
141
|
-
end
|
142
|
-
end
|
143
|
-
else
|
144
|
-
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
|
145
|
-
case c.unpack('U')
|
146
|
-
when *VALID_CHAR
|
147
|
-
else
|
148
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
149
|
-
end
|
120
|
+
string.chars.each do |c|
|
121
|
+
case c.ord
|
122
|
+
when *VALID_CHAR
|
123
|
+
else
|
124
|
+
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
150
125
|
end
|
151
126
|
end
|
152
127
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 2025-02-16 00:00:00.000000000 Z
|
11
11
|
dependencies: []
|
12
12
|
description: An XML toolkit for Ruby
|
13
13
|
email:
|
@@ -102,7 +102,7 @@ homepage: https://github.com/ruby/rexml
|
|
102
102
|
licenses:
|
103
103
|
- BSD-2-Clause
|
104
104
|
metadata:
|
105
|
-
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.
|
105
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.1
|
106
106
|
rdoc_options:
|
107
107
|
- "--main"
|
108
108
|
- README.md
|
@@ -119,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
119
|
- !ruby/object:Gem::Version
|
120
120
|
version: '0'
|
121
121
|
requirements: []
|
122
|
-
rubygems_version: 3.6.
|
122
|
+
rubygems_version: 3.6.2
|
123
123
|
specification_version: 4
|
124
124
|
summary: An XML toolkit for Ruby
|
125
125
|
test_files: []
|