rexml 3.3.8 → 3.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +50 -0
- data/lib/rexml/parsers/baseparser.rb +54 -45
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +45 -4
- data/lib/rexml/text.rb +15 -40
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 582bb5339257c81f2ce9c076155c01d7adfe8fb169c09bc7f5f489f6a76bca80
|
4
|
+
data.tar.gz: 160de8899d8d1f995bafca23631e9e4ab928ebbffa21684e3b61dad805a6187b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e2b095792523f54301e8a6af2f1682a9ad24d92cdd5d94c9e6088b27520e3c03b68fe06061b6ff2fd96b001b9cb947c57e4095244d83206a83fc2a1829dd4243
|
7
|
+
data.tar.gz: 4f335d2b1e58c1da233c3f0a0588def502c8cb2660633e0e06b4d0930bbcedcaae36b52dc550923704b4525d94a1011f4b5f4e87a81e5d689cce24ee89210a23
|
data/NEWS.md
CHANGED
@@ -1,5 +1,55 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.4.0 - 2024-12-15 {#version-3-4-0}
|
4
|
+
|
5
|
+
### Improvement
|
6
|
+
|
7
|
+
* Improved performance.
|
8
|
+
* GH-216
|
9
|
+
* Patch by NAITOH Jun
|
10
|
+
|
11
|
+
* JRuby: Improved parse performance.
|
12
|
+
* GH-219
|
13
|
+
* Patch by João Duarte
|
14
|
+
|
15
|
+
* Added support for reusing pull parser.
|
16
|
+
* GH-214
|
17
|
+
* GH-220
|
18
|
+
* Patch by Dmitry Pogrebnoy
|
19
|
+
|
20
|
+
* Improved error handling when source is `IO`.
|
21
|
+
* GH-221
|
22
|
+
* Patch by NAITOH Jun
|
23
|
+
|
24
|
+
### Thanks
|
25
|
+
|
26
|
+
* NAITOH Jun
|
27
|
+
|
28
|
+
* João Duarte
|
29
|
+
|
30
|
+
* Dmitry Pogrebnoy
|
31
|
+
|
32
|
+
## 3.3.9 - 2024-10-24 {#version-3-3-9}
|
33
|
+
|
34
|
+
### Improvements
|
35
|
+
|
36
|
+
* Improved performance.
|
37
|
+
* GH-210
|
38
|
+
* Patch by NAITOH Jun.
|
39
|
+
|
40
|
+
### Fixes
|
41
|
+
|
42
|
+
* Fixed a parse bug for text only invalid XML.
|
43
|
+
* GH-215
|
44
|
+
* Patch by NAITOH Jun.
|
45
|
+
|
46
|
+
* Fixed a parse bug that `�x...;` is accepted as a character
|
47
|
+
reference.
|
48
|
+
|
49
|
+
### Thanks
|
50
|
+
|
51
|
+
* NAITOH Jun
|
52
|
+
|
3
53
|
## 3.3.8 - 2024-09-29 {#version-3-3-8}
|
4
54
|
|
5
55
|
### Improvements
|
@@ -150,7 +150,7 @@ module REXML
|
|
150
150
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
151
151
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
152
152
|
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
153
|
-
CHARACTER_REFERENCES = /&#
|
153
|
+
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
|
154
154
|
DEFAULT_ENTITIES_PATTERNS = {}
|
155
155
|
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
156
156
|
default_entities.each do |term|
|
@@ -167,6 +167,7 @@ module REXML
|
|
167
167
|
@entity_expansion_count = 0
|
168
168
|
@entity_expansion_limit = Security.entity_expansion_limit
|
169
169
|
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
170
|
+
@source.ensure_buffer
|
170
171
|
end
|
171
172
|
|
172
173
|
def add_listener( listener )
|
@@ -180,6 +181,10 @@ module REXML
|
|
180
181
|
|
181
182
|
def stream=( source )
|
182
183
|
@source = SourceFactory.create_from( source )
|
184
|
+
reset
|
185
|
+
end
|
186
|
+
|
187
|
+
def reset
|
183
188
|
@closed = nil
|
184
189
|
@have_root = false
|
185
190
|
@document_status = nil
|
@@ -268,10 +273,10 @@ module REXML
|
|
268
273
|
@source.ensure_buffer
|
269
274
|
if @document_status == nil
|
270
275
|
start_position = @source.position
|
271
|
-
if @source.match("<?", true)
|
276
|
+
if @source.match?("<?", true)
|
272
277
|
return process_instruction
|
273
|
-
elsif @source.match("<!", true)
|
274
|
-
if @source.match("--", true)
|
278
|
+
elsif @source.match?("<!", true)
|
279
|
+
if @source.match?("--", true)
|
275
280
|
md = @source.match(/(.*?)-->/um, true)
|
276
281
|
if md.nil?
|
277
282
|
raise REXML::ParseException.new("Unclosed comment", @source)
|
@@ -280,10 +285,10 @@ module REXML
|
|
280
285
|
raise REXML::ParseException.new("Malformed comment", @source)
|
281
286
|
end
|
282
287
|
return [ :comment, md[1] ]
|
283
|
-
elsif @source.match("DOCTYPE", true)
|
288
|
+
elsif @source.match?("DOCTYPE", true)
|
284
289
|
base_error_message = "Malformed DOCTYPE"
|
285
|
-
unless @source.match(/\s+/um, true)
|
286
|
-
if @source.match(">")
|
290
|
+
unless @source.match?(/\s+/um, true)
|
291
|
+
if @source.match?(">")
|
287
292
|
message = "#{base_error_message}: name is missing"
|
288
293
|
else
|
289
294
|
message = "#{base_error_message}: invalid name"
|
@@ -292,10 +297,10 @@ module REXML
|
|
292
297
|
raise REXML::ParseException.new(message, @source)
|
293
298
|
end
|
294
299
|
name = parse_name(base_error_message)
|
295
|
-
if @source.match(/\s*\[/um, true)
|
300
|
+
if @source.match?(/\s*\[/um, true)
|
296
301
|
id = [nil, nil, nil]
|
297
302
|
@document_status = :in_doctype
|
298
|
-
elsif @source.match(/\s*>/um, true)
|
303
|
+
elsif @source.match?(/\s*>/um, true)
|
299
304
|
id = [nil, nil, nil]
|
300
305
|
@document_status = :after_doctype
|
301
306
|
@source.ensure_buffer
|
@@ -307,9 +312,9 @@ module REXML
|
|
307
312
|
# For backward compatibility
|
308
313
|
id[1], id[2] = id[2], nil
|
309
314
|
end
|
310
|
-
if @source.match(/\s*\[/um, true)
|
315
|
+
if @source.match?(/\s*\[/um, true)
|
311
316
|
@document_status = :in_doctype
|
312
|
-
elsif @source.match(/\s*>/um, true)
|
317
|
+
elsif @source.match?(/\s*>/um, true)
|
313
318
|
@document_status = :after_doctype
|
314
319
|
@source.ensure_buffer
|
315
320
|
else
|
@@ -319,7 +324,7 @@ module REXML
|
|
319
324
|
end
|
320
325
|
args = [:start_doctype, name, *id]
|
321
326
|
if @document_status == :after_doctype
|
322
|
-
@source.match(/\s*/um, true)
|
327
|
+
@source.match?(/\s*/um, true)
|
323
328
|
@stack << [ :end_doctype ]
|
324
329
|
end
|
325
330
|
return args
|
@@ -330,14 +335,14 @@ module REXML
|
|
330
335
|
end
|
331
336
|
end
|
332
337
|
if @document_status == :in_doctype
|
333
|
-
@source.match(/\s*/um, true) # skip spaces
|
338
|
+
@source.match?(/\s*/um, true) # skip spaces
|
334
339
|
start_position = @source.position
|
335
|
-
if @source.match("<!", true)
|
336
|
-
if @source.match("ELEMENT", true)
|
340
|
+
if @source.match?("<!", true)
|
341
|
+
if @source.match?("ELEMENT", true)
|
337
342
|
md = @source.match(/(.*?)>/um, true)
|
338
343
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
339
344
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
340
|
-
elsif @source.match("ENTITY", true)
|
345
|
+
elsif @source.match?("ENTITY", true)
|
341
346
|
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
342
347
|
unless match_data
|
343
348
|
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
@@ -369,7 +374,7 @@ module REXML
|
|
369
374
|
end
|
370
375
|
match << '%' if ref
|
371
376
|
return match
|
372
|
-
elsif @source.match("ATTLIST", true)
|
377
|
+
elsif @source.match?("ATTLIST", true)
|
373
378
|
md = @source.match(Private::ATTLISTDECL_END, true)
|
374
379
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
375
380
|
element = md[1]
|
@@ -389,10 +394,10 @@ module REXML
|
|
389
394
|
end
|
390
395
|
end
|
391
396
|
return [ :attlistdecl, element, pairs, contents ]
|
392
|
-
elsif @source.match("NOTATION", true)
|
397
|
+
elsif @source.match?("NOTATION", true)
|
393
398
|
base_error_message = "Malformed notation declaration"
|
394
|
-
unless @source.match(/\s+/um, true)
|
395
|
-
if @source.match(">")
|
399
|
+
unless @source.match?(/\s+/um, true)
|
400
|
+
if @source.match?(">")
|
396
401
|
message = "#{base_error_message}: name is missing"
|
397
402
|
else
|
398
403
|
message = "#{base_error_message}: invalid name"
|
@@ -404,7 +409,7 @@ module REXML
|
|
404
409
|
id = parse_id(base_error_message,
|
405
410
|
accept_external_id: true,
|
406
411
|
accept_public_id: true)
|
407
|
-
unless @source.match(/\s*>/um, true)
|
412
|
+
unless @source.match?(/\s*>/um, true)
|
408
413
|
message = "#{base_error_message}: garbage before end >"
|
409
414
|
raise REXML::ParseException.new(message, @source)
|
410
415
|
end
|
@@ -418,7 +423,7 @@ module REXML
|
|
418
423
|
end
|
419
424
|
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
420
425
|
return [ :externalentity, match[1] ]
|
421
|
-
elsif @source.match(/\]\s*>/um, true)
|
426
|
+
elsif @source.match?(/\]\s*>/um, true)
|
422
427
|
@document_status = :after_doctype
|
423
428
|
return [ :end_doctype ]
|
424
429
|
end
|
@@ -427,16 +432,16 @@ module REXML
|
|
427
432
|
end
|
428
433
|
end
|
429
434
|
if @document_status == :after_doctype
|
430
|
-
@source.match(/\s*/um, true)
|
435
|
+
@source.match?(/\s*/um, true)
|
431
436
|
end
|
432
437
|
begin
|
433
438
|
start_position = @source.position
|
434
|
-
if @source.match("<", true)
|
439
|
+
if @source.match?("<", true)
|
435
440
|
# :text's read_until may remain only "<" in buffer. In the
|
436
441
|
# case, buffer is empty here. So we need to fill buffer
|
437
442
|
# here explicitly.
|
438
443
|
@source.ensure_buffer
|
439
|
-
if @source.match("/", true)
|
444
|
+
if @source.match?("/", true)
|
440
445
|
@namespaces_restore_stack.pop
|
441
446
|
last_tag = @tags.pop
|
442
447
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
@@ -451,7 +456,7 @@ module REXML
|
|
451
456
|
raise REXML::ParseException.new(message, @source)
|
452
457
|
end
|
453
458
|
return [ :end_element, last_tag ]
|
454
|
-
elsif @source.match("!", true)
|
459
|
+
elsif @source.match?("!", true)
|
455
460
|
md = @source.match(/([^>]*>)/um)
|
456
461
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
457
462
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
@@ -469,7 +474,7 @@ module REXML
|
|
469
474
|
end
|
470
475
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
471
476
|
"in the doctype declaration.", @source)
|
472
|
-
elsif @source.match("?", true)
|
477
|
+
elsif @source.match?("?", true)
|
473
478
|
return process_instruction
|
474
479
|
else
|
475
480
|
# Get the next tag
|
@@ -569,8 +574,12 @@ module REXML
|
|
569
574
|
return rv if matches.size == 0
|
570
575
|
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
571
576
|
m=$1
|
572
|
-
|
573
|
-
|
577
|
+
if m.start_with?("x")
|
578
|
+
code_point = Integer(m[1..-1], 16)
|
579
|
+
else
|
580
|
+
code_point = Integer(m, 10)
|
581
|
+
end
|
582
|
+
[code_point].pack('U*')
|
574
583
|
}
|
575
584
|
matches.collect!{|x|x[0]}.compact!
|
576
585
|
if filter
|
@@ -646,7 +655,7 @@ module REXML
|
|
646
655
|
def parse_name(base_error_message)
|
647
656
|
md = @source.match(Private::NAME_PATTERN, true)
|
648
657
|
unless md
|
649
|
-
if @source.match(/\S/um)
|
658
|
+
if @source.match?(/\S/um)
|
650
659
|
message = "#{base_error_message}: invalid name"
|
651
660
|
else
|
652
661
|
message = "#{base_error_message}: name is missing"
|
@@ -688,34 +697,34 @@ module REXML
|
|
688
697
|
accept_public_id:)
|
689
698
|
public = /\A\s*PUBLIC/um
|
690
699
|
system = /\A\s*SYSTEM/um
|
691
|
-
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
692
|
-
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
700
|
+
if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
|
701
|
+
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
693
702
|
return "public ID literal is missing"
|
694
703
|
end
|
695
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
704
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
696
705
|
return "invalid public ID literal"
|
697
706
|
end
|
698
707
|
if accept_public_id
|
699
|
-
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
708
|
+
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
700
709
|
return "system ID literal is missing"
|
701
710
|
end
|
702
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
711
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
703
712
|
return "invalid system literal"
|
704
713
|
end
|
705
714
|
"garbage after system literal"
|
706
715
|
else
|
707
716
|
"garbage after public ID literal"
|
708
717
|
end
|
709
|
-
elsif accept_external_id and @source.match(/#{system}/um)
|
710
|
-
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
718
|
+
elsif accept_external_id and @source.match?(/#{system}/um)
|
719
|
+
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
711
720
|
return "system literal is missing"
|
712
721
|
end
|
713
|
-
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
722
|
+
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
714
723
|
return "invalid system literal"
|
715
724
|
end
|
716
725
|
"garbage after system literal"
|
717
726
|
else
|
718
|
-
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
727
|
+
unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
719
728
|
return "invalid ID type"
|
720
729
|
end
|
721
730
|
"ID type is missing"
|
@@ -724,7 +733,7 @@ module REXML
|
|
724
733
|
|
725
734
|
def process_instruction
|
726
735
|
name = parse_name("Malformed XML: Invalid processing instruction node")
|
727
|
-
if @source.match(/\s+/um, true)
|
736
|
+
if @source.match?(/\s+/um, true)
|
728
737
|
match_data = @source.match(/(.*?)\?>/um, true)
|
729
738
|
unless match_data
|
730
739
|
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
@@ -732,7 +741,7 @@ module REXML
|
|
732
741
|
content = match_data[1]
|
733
742
|
else
|
734
743
|
content = nil
|
735
|
-
unless @source.match("?>", true)
|
744
|
+
unless @source.match?("?>", true)
|
736
745
|
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
737
746
|
end
|
738
747
|
end
|
@@ -762,9 +771,9 @@ module REXML
|
|
762
771
|
expanded_names = {}
|
763
772
|
closed = false
|
764
773
|
while true
|
765
|
-
if @source.match(">", true)
|
774
|
+
if @source.match?(">", true)
|
766
775
|
return attributes, closed
|
767
|
-
elsif @source.match("/>", true)
|
776
|
+
elsif @source.match?("/>", true)
|
768
777
|
closed = true
|
769
778
|
return attributes, closed
|
770
779
|
elsif match = @source.match(QNAME, true)
|
@@ -772,7 +781,7 @@ module REXML
|
|
772
781
|
prefix = match[2]
|
773
782
|
local_part = match[3]
|
774
783
|
|
775
|
-
unless @source.match(/\s*=\s*/um, true)
|
784
|
+
unless @source.match?(/\s*=\s*/um, true)
|
776
785
|
message = "Missing attribute equal: <#{name}>"
|
777
786
|
raise REXML::ParseException.new(message, @source)
|
778
787
|
end
|
@@ -788,7 +797,7 @@ module REXML
|
|
788
797
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
789
798
|
raise REXML::ParseException.new(message, @source)
|
790
799
|
end
|
791
|
-
@source.match(/\s*/um, true)
|
800
|
+
@source.match?(/\s*/um, true)
|
792
801
|
if prefix == "xmlns"
|
793
802
|
if local_part == "xml"
|
794
803
|
if value != Private::XML_PREFIXED_NAMESPACE
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
3
|
|
4
|
+
require "stringio"
|
4
5
|
require "strscan"
|
5
6
|
|
6
7
|
require_relative 'encoding'
|
@@ -18,6 +19,16 @@ module REXML
|
|
18
19
|
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
20
|
super(pattern)
|
20
21
|
end
|
22
|
+
|
23
|
+
def match?(pattern)
|
24
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
25
|
+
super(pattern)
|
26
|
+
end
|
27
|
+
|
28
|
+
def skip(pattern)
|
29
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
30
|
+
super(pattern)
|
31
|
+
end
|
21
32
|
end
|
22
33
|
end
|
23
34
|
using StringScannerCheckScanString
|
@@ -35,7 +46,6 @@ module REXML
|
|
35
46
|
arg.respond_to? :eof?
|
36
47
|
IOSource.new(arg)
|
37
48
|
elsif arg.respond_to? :to_str
|
38
|
-
require 'stringio'
|
39
49
|
IOSource.new(StringIO.new(arg))
|
40
50
|
elsif arg.kind_of? Source
|
41
51
|
arg
|
@@ -77,6 +87,7 @@ module REXML
|
|
77
87
|
detect_encoding
|
78
88
|
end
|
79
89
|
@line = 0
|
90
|
+
@encoded_terms = {}
|
80
91
|
end
|
81
92
|
|
82
93
|
# The current buffer (what we're going to read next)
|
@@ -125,6 +136,14 @@ module REXML
|
|
125
136
|
end
|
126
137
|
end
|
127
138
|
|
139
|
+
def match?(pattern, cons=false)
|
140
|
+
if cons
|
141
|
+
!@scanner.skip(pattern).nil?
|
142
|
+
else
|
143
|
+
!@scanner.match?(pattern).nil?
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
128
147
|
def position
|
129
148
|
@scanner.pos
|
130
149
|
end
|
@@ -227,7 +246,7 @@ module REXML
|
|
227
246
|
|
228
247
|
def read_until(term)
|
229
248
|
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
230
|
-
term = encode(term)
|
249
|
+
term = @encoded_terms[term] ||= encode(term)
|
231
250
|
until str = @scanner.scan_until(pattern)
|
232
251
|
break if @source.nil?
|
233
252
|
break if @source.eof?
|
@@ -266,6 +285,23 @@ module REXML
|
|
266
285
|
md.nil? ? nil : @scanner
|
267
286
|
end
|
268
287
|
|
288
|
+
def match?( pattern, cons=false )
|
289
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
290
|
+
min_bytes = 1
|
291
|
+
while true
|
292
|
+
if cons
|
293
|
+
n_matched_bytes = @scanner.skip(pattern)
|
294
|
+
else
|
295
|
+
n_matched_bytes = @scanner.match?(pattern)
|
296
|
+
end
|
297
|
+
return true if n_matched_bytes
|
298
|
+
return false if pattern.is_a?(String)
|
299
|
+
return false if @source.nil?
|
300
|
+
return false unless read(nil, min_bytes)
|
301
|
+
min_bytes *= 2
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
269
305
|
def empty?
|
270
306
|
super and ( @source.nil? || @source.eof? )
|
271
307
|
end
|
@@ -285,7 +321,7 @@ module REXML
|
|
285
321
|
rescue
|
286
322
|
end
|
287
323
|
@er_source.seek(pos)
|
288
|
-
rescue IOError
|
324
|
+
rescue IOError, SystemCallError
|
289
325
|
pos = -1
|
290
326
|
line = -1
|
291
327
|
end
|
@@ -294,14 +330,19 @@ module REXML
|
|
294
330
|
|
295
331
|
private
|
296
332
|
def readline(term = nil)
|
297
|
-
str = @source.readline(term || @line_break)
|
298
333
|
if @pending_buffer
|
334
|
+
begin
|
335
|
+
str = @source.readline(term || @line_break)
|
336
|
+
rescue IOError
|
337
|
+
end
|
299
338
|
if str.nil?
|
300
339
|
str = @pending_buffer
|
301
340
|
else
|
302
341
|
str = @pending_buffer + str
|
303
342
|
end
|
304
343
|
@pending_buffer = nil
|
344
|
+
else
|
345
|
+
str = @source.readline(term || @line_break)
|
305
346
|
end
|
306
347
|
return nil if str.nil?
|
307
348
|
|
data/lib/rexml/text.rb
CHANGED
@@ -29,31 +29,16 @@ module REXML
|
|
29
29
|
(0x10000..0x10FFFF)
|
30
30
|
]
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
']*$')
|
43
|
-
else
|
44
|
-
VALID_XML_CHARS = /^(
|
45
|
-
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
46
|
-
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
47
|
-
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
48
|
-
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
|
49
|
-
| \xEF[\x80-\xBE]{2} #
|
50
|
-
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
|
51
|
-
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
52
|
-
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
53
|
-
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
54
|
-
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
55
|
-
)*$/nx;
|
56
|
-
end
|
32
|
+
VALID_XML_CHARS = Regexp.new('^['+
|
33
|
+
VALID_CHAR.map { |item|
|
34
|
+
case item
|
35
|
+
when Integer
|
36
|
+
[item].pack('U').force_encoding('utf-8')
|
37
|
+
when Range
|
38
|
+
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
|
39
|
+
end
|
40
|
+
}.join +
|
41
|
+
']*$')
|
57
42
|
|
58
43
|
# Constructor
|
59
44
|
# +arg+ if a String, the content is set to the String. If a Text,
|
@@ -132,21 +117,11 @@ module REXML
|
|
132
117
|
|
133
118
|
# illegal anywhere
|
134
119
|
if !string.match?(VALID_XML_CHARS)
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
141
|
-
end
|
142
|
-
end
|
143
|
-
else
|
144
|
-
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
|
145
|
-
case c.unpack('U')
|
146
|
-
when *VALID_CHAR
|
147
|
-
else
|
148
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
149
|
-
end
|
120
|
+
string.chars.each do |c|
|
121
|
+
case c.ord
|
122
|
+
when *VALID_CHAR
|
123
|
+
else
|
124
|
+
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
150
125
|
end
|
151
126
|
end
|
152
127
|
end
|
metadata
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
|
+
autorequire:
|
8
9
|
bindir: bin
|
9
10
|
cert_chain: []
|
10
|
-
date: 2024-
|
11
|
+
date: 2024-12-15 00:00:00.000000000 Z
|
11
12
|
dependencies: []
|
12
13
|
description: An XML toolkit for Ruby
|
13
14
|
email:
|
@@ -102,7 +103,8 @@ homepage: https://github.com/ruby/rexml
|
|
102
103
|
licenses:
|
103
104
|
- BSD-2-Clause
|
104
105
|
metadata:
|
105
|
-
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.
|
106
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.0
|
107
|
+
post_install_message:
|
106
108
|
rdoc_options:
|
107
109
|
- "--main"
|
108
110
|
- README.md
|
@@ -119,7 +121,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
121
|
- !ruby/object:Gem::Version
|
120
122
|
version: '0'
|
121
123
|
requirements: []
|
122
|
-
rubygems_version: 3.
|
124
|
+
rubygems_version: 3.5.22
|
125
|
+
signing_key:
|
123
126
|
specification_version: 4
|
124
127
|
summary: An XML toolkit for Ruby
|
125
128
|
test_files: []
|