rexml 3.3.7 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +95 -0
- data/lib/rexml/parsers/baseparser.rb +81 -50
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/parsers/sax2parser.rb +2 -0
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +61 -6
- data/lib/rexml/text.rb +15 -40
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9dc6a26dcc5ba93c112d65fa910e49ca970108c726cdce28324d7771a0831a3
|
4
|
+
data.tar.gz: b03ad34d3180aeeaa1ecc7ab21bf5ffe5f2845107a2c35ca3198653f80b932fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0d493943fab795f3c8fc8490a40750382e3c4cf38c73532b1f850612384795c2bb916afc70ebff0bd26e9e2f304ea6a22299a0481523bd0322d5655df05edbd
|
7
|
+
data.tar.gz: bfb02a2bfadb24cbdeed951e06e113e17b123015271cabfffacc3ecc4bbb1bd7c7f56e358d42173feb8b333309f725d57b76f155fea814d70c6decae3b791165
|
data/NEWS.md
CHANGED
@@ -1,5 +1,100 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.4.1 - 2025-02-16 {#version-3-4-1}
|
4
|
+
|
5
|
+
### Improvement
|
6
|
+
|
7
|
+
* Improved performance.
|
8
|
+
* GH-226
|
9
|
+
* GH-227
|
10
|
+
* GH-237
|
11
|
+
* Patch by NAITOH Jun
|
12
|
+
|
13
|
+
### Fixes
|
14
|
+
|
15
|
+
* Fix serialization of ATTLIST is incorrect
|
16
|
+
* GH-233
|
17
|
+
* GH-234
|
18
|
+
* Patch by OlofKalufs
|
19
|
+
* Reported by OlofKalufs
|
20
|
+
|
21
|
+
### Thanks
|
22
|
+
|
23
|
+
* NAITOH Jun
|
24
|
+
|
25
|
+
* OlofKalufs
|
26
|
+
|
27
|
+
## 3.4.0 - 2024-12-15 {#version-3-4-0}
|
28
|
+
|
29
|
+
### Improvement
|
30
|
+
|
31
|
+
* Improved performance.
|
32
|
+
* GH-216
|
33
|
+
* Patch by NAITOH Jun
|
34
|
+
|
35
|
+
* JRuby: Improved parse performance.
|
36
|
+
* GH-219
|
37
|
+
* Patch by João Duarte
|
38
|
+
|
39
|
+
* Added support for reusing pull parser.
|
40
|
+
* GH-214
|
41
|
+
* GH-220
|
42
|
+
* Patch by Dmitry Pogrebnoy
|
43
|
+
|
44
|
+
* Improved error handling when source is `IO`.
|
45
|
+
* GH-221
|
46
|
+
* Patch by NAITOH Jun
|
47
|
+
|
48
|
+
### Thanks
|
49
|
+
|
50
|
+
* NAITOH Jun
|
51
|
+
|
52
|
+
* João Duarte
|
53
|
+
|
54
|
+
* Dmitry Pogrebnoy
|
55
|
+
|
56
|
+
## 3.3.9 - 2024-10-24 {#version-3-3-9}
|
57
|
+
|
58
|
+
### Improvements
|
59
|
+
|
60
|
+
* Improved performance.
|
61
|
+
* GH-210
|
62
|
+
* Patch by NAITOH Jun.
|
63
|
+
|
64
|
+
### Fixes
|
65
|
+
|
66
|
+
* Fixed a parse bug for text only invalid XML.
|
67
|
+
* GH-215
|
68
|
+
* Patch by NAITOH Jun.
|
69
|
+
|
70
|
+
* Fixed a parse bug that `�x...;` is accepted as a character
|
71
|
+
reference.
|
72
|
+
|
73
|
+
### Thanks
|
74
|
+
|
75
|
+
* NAITOH Jun
|
76
|
+
|
77
|
+
## 3.3.8 - 2024-09-29 {#version-3-3-8}
|
78
|
+
|
79
|
+
### Improvements
|
80
|
+
|
81
|
+
* SAX2: Improve parse performance.
|
82
|
+
* GH-207
|
83
|
+
* Patch by NAITOH Jun.
|
84
|
+
|
85
|
+
### Fixes
|
86
|
+
|
87
|
+
* Fixed a bug that unexpected attribute namespace conflict error for
|
88
|
+
the predefined "xml" namespace is reported.
|
89
|
+
* GH-208
|
90
|
+
* Patch by KITAITI Makoto
|
91
|
+
|
92
|
+
### Thanks
|
93
|
+
|
94
|
+
* NAITOH Jun
|
95
|
+
|
96
|
+
* KITAITI Makoto
|
97
|
+
|
3
98
|
## 3.3.7 - 2024-09-04 {#version-3-3-7}
|
4
99
|
|
5
100
|
### Improvements
|
@@ -150,12 +150,13 @@ module REXML
|
|
150
150
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
151
151
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
152
152
|
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
153
|
-
CHARACTER_REFERENCES = /&#
|
153
|
+
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
|
154
154
|
DEFAULT_ENTITIES_PATTERNS = {}
|
155
155
|
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
156
156
|
default_entities.each do |term|
|
157
157
|
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
158
158
|
end
|
159
|
+
XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
159
160
|
end
|
160
161
|
private_constant :Private
|
161
162
|
|
@@ -166,6 +167,7 @@ module REXML
|
|
166
167
|
@entity_expansion_count = 0
|
167
168
|
@entity_expansion_limit = Security.entity_expansion_limit
|
168
169
|
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
170
|
+
@source.ensure_buffer
|
169
171
|
end
|
170
172
|
|
171
173
|
def add_listener( listener )
|
@@ -179,13 +181,17 @@ module REXML
|
|
179
181
|
|
180
182
|
def stream=( source )
|
181
183
|
@source = SourceFactory.create_from( source )
|
184
|
+
reset
|
185
|
+
end
|
186
|
+
|
187
|
+
def reset
|
182
188
|
@closed = nil
|
183
189
|
@have_root = false
|
184
190
|
@document_status = nil
|
185
191
|
@tags = []
|
186
192
|
@stack = []
|
187
193
|
@entities = []
|
188
|
-
@namespaces = {}
|
194
|
+
@namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
|
189
195
|
@namespaces_restore_stack = []
|
190
196
|
end
|
191
197
|
|
@@ -267,10 +273,10 @@ module REXML
|
|
267
273
|
@source.ensure_buffer
|
268
274
|
if @document_status == nil
|
269
275
|
start_position = @source.position
|
270
|
-
if @source.match("<?", true)
|
276
|
+
if @source.match?("<?", true)
|
271
277
|
return process_instruction
|
272
|
-
elsif @source.match("<!", true)
|
273
|
-
if @source.match("--", true)
|
278
|
+
elsif @source.match?("<!", true)
|
279
|
+
if @source.match?("--", true)
|
274
280
|
md = @source.match(/(.*?)-->/um, true)
|
275
281
|
if md.nil?
|
276
282
|
raise REXML::ParseException.new("Unclosed comment", @source)
|
@@ -279,10 +285,10 @@ module REXML
|
|
279
285
|
raise REXML::ParseException.new("Malformed comment", @source)
|
280
286
|
end
|
281
287
|
return [ :comment, md[1] ]
|
282
|
-
elsif @source.match("DOCTYPE", true)
|
288
|
+
elsif @source.match?("DOCTYPE", true)
|
283
289
|
base_error_message = "Malformed DOCTYPE"
|
284
|
-
unless @source.match(/\s+/um, true)
|
285
|
-
if @source.match(">")
|
290
|
+
unless @source.match?(/\s+/um, true)
|
291
|
+
if @source.match?(">")
|
286
292
|
message = "#{base_error_message}: name is missing"
|
287
293
|
else
|
288
294
|
message = "#{base_error_message}: invalid name"
|
@@ -291,10 +297,11 @@ module REXML
|
|
291
297
|
raise REXML::ParseException.new(message, @source)
|
292
298
|
end
|
293
299
|
name = parse_name(base_error_message)
|
294
|
-
|
300
|
+
@source.match?(/\s*/um, true) # skip spaces
|
301
|
+
if @source.match?("[", true)
|
295
302
|
id = [nil, nil, nil]
|
296
303
|
@document_status = :in_doctype
|
297
|
-
elsif @source.match(
|
304
|
+
elsif @source.match?(">", true)
|
298
305
|
id = [nil, nil, nil]
|
299
306
|
@document_status = :after_doctype
|
300
307
|
@source.ensure_buffer
|
@@ -306,9 +313,10 @@ module REXML
|
|
306
313
|
# For backward compatibility
|
307
314
|
id[1], id[2] = id[2], nil
|
308
315
|
end
|
309
|
-
|
316
|
+
@source.match?(/\s*/um, true) # skip spaces
|
317
|
+
if @source.match?("[", true)
|
310
318
|
@document_status = :in_doctype
|
311
|
-
elsif @source.match(
|
319
|
+
elsif @source.match?(">", true)
|
312
320
|
@document_status = :after_doctype
|
313
321
|
@source.ensure_buffer
|
314
322
|
else
|
@@ -318,7 +326,7 @@ module REXML
|
|
318
326
|
end
|
319
327
|
args = [:start_doctype, name, *id]
|
320
328
|
if @document_status == :after_doctype
|
321
|
-
@source.match(/\s*/um, true)
|
329
|
+
@source.match?(/\s*/um, true)
|
322
330
|
@stack << [ :end_doctype ]
|
323
331
|
end
|
324
332
|
return args
|
@@ -329,14 +337,14 @@ module REXML
|
|
329
337
|
end
|
330
338
|
end
|
331
339
|
if @document_status == :in_doctype
|
332
|
-
@source.match(/\s*/um, true) # skip spaces
|
340
|
+
@source.match?(/\s*/um, true) # skip spaces
|
333
341
|
start_position = @source.position
|
334
|
-
if @source.match("<!", true)
|
335
|
-
if @source.match("ELEMENT", true)
|
342
|
+
if @source.match?("<!", true)
|
343
|
+
if @source.match?("ELEMENT", true)
|
336
344
|
md = @source.match(/(.*?)>/um, true)
|
337
345
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
338
346
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
339
|
-
elsif @source.match("ENTITY", true)
|
347
|
+
elsif @source.match?("ENTITY", true)
|
340
348
|
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
341
349
|
unless match_data
|
342
350
|
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
@@ -368,11 +376,11 @@ module REXML
|
|
368
376
|
end
|
369
377
|
match << '%' if ref
|
370
378
|
return match
|
371
|
-
elsif @source.match("ATTLIST", true)
|
379
|
+
elsif @source.match?("ATTLIST", true)
|
372
380
|
md = @source.match(Private::ATTLISTDECL_END, true)
|
373
381
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
374
382
|
element = md[1]
|
375
|
-
contents = md[0]
|
383
|
+
contents = "<!ATTLIST" + md[0]
|
376
384
|
|
377
385
|
pairs = {}
|
378
386
|
values = md[0].strip.scan( ATTDEF_RE )
|
@@ -388,10 +396,10 @@ module REXML
|
|
388
396
|
end
|
389
397
|
end
|
390
398
|
return [ :attlistdecl, element, pairs, contents ]
|
391
|
-
elsif @source.match("NOTATION", true)
|
399
|
+
elsif @source.match?("NOTATION", true)
|
392
400
|
base_error_message = "Malformed notation declaration"
|
393
|
-
unless @source.match(/\s+/um, true)
|
394
|
-
if @source.match(">")
|
401
|
+
unless @source.match?(/\s+/um, true)
|
402
|
+
if @source.match?(">")
|
395
403
|
message = "#{base_error_message}: name is missing"
|
396
404
|
else
|
397
405
|
message = "#{base_error_message}: invalid name"
|
@@ -403,7 +411,8 @@ module REXML
|
|
403
411
|
id = parse_id(base_error_message,
|
404
412
|
accept_external_id: true,
|
405
413
|
accept_public_id: true)
|
406
|
-
|
414
|
+
@source.match?(/\s*/um, true) # skip spaces
|
415
|
+
unless @source.match?(">", true)
|
407
416
|
message = "#{base_error_message}: garbage before end >"
|
408
417
|
raise REXML::ParseException.new(message, @source)
|
409
418
|
end
|
@@ -417,7 +426,7 @@ module REXML
|
|
417
426
|
end
|
418
427
|
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
419
428
|
return [ :externalentity, match[1] ]
|
420
|
-
elsif @source.match(/\]\s*>/um, true)
|
429
|
+
elsif @source.match?(/\]\s*>/um, true)
|
421
430
|
@document_status = :after_doctype
|
422
431
|
return [ :end_doctype ]
|
423
432
|
end
|
@@ -426,16 +435,16 @@ module REXML
|
|
426
435
|
end
|
427
436
|
end
|
428
437
|
if @document_status == :after_doctype
|
429
|
-
@source.match(/\s*/um, true)
|
438
|
+
@source.match?(/\s*/um, true)
|
430
439
|
end
|
431
440
|
begin
|
432
441
|
start_position = @source.position
|
433
|
-
if @source.match("<", true)
|
442
|
+
if @source.match?("<", true)
|
434
443
|
# :text's read_until may remain only "<" in buffer. In the
|
435
444
|
# case, buffer is empty here. So we need to fill buffer
|
436
445
|
# here explicitly.
|
437
446
|
@source.ensure_buffer
|
438
|
-
if @source.match("/", true)
|
447
|
+
if @source.match?("/", true)
|
439
448
|
@namespaces_restore_stack.pop
|
440
449
|
last_tag = @tags.pop
|
441
450
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
@@ -450,7 +459,7 @@ module REXML
|
|
450
459
|
raise REXML::ParseException.new(message, @source)
|
451
460
|
end
|
452
461
|
return [ :end_element, last_tag ]
|
453
|
-
elsif @source.match("!", true)
|
462
|
+
elsif @source.match?("!", true)
|
454
463
|
md = @source.match(/([^>]*>)/um)
|
455
464
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
456
465
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
@@ -468,7 +477,7 @@ module REXML
|
|
468
477
|
end
|
469
478
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
470
479
|
"in the doctype declaration.", @source)
|
471
|
-
elsif @source.match("?", true)
|
480
|
+
elsif @source.match?("?", true)
|
472
481
|
return process_instruction
|
473
482
|
else
|
474
483
|
# Get the next tag
|
@@ -568,8 +577,12 @@ module REXML
|
|
568
577
|
return rv if matches.size == 0
|
569
578
|
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
570
579
|
m=$1
|
571
|
-
|
572
|
-
|
580
|
+
if m.start_with?("x")
|
581
|
+
code_point = Integer(m[1..-1], 16)
|
582
|
+
else
|
583
|
+
code_point = Integer(m, 10)
|
584
|
+
end
|
585
|
+
[code_point].pack('U*')
|
573
586
|
}
|
574
587
|
matches.collect!{|x|x[0]}.compact!
|
575
588
|
if filter
|
@@ -645,7 +658,7 @@ module REXML
|
|
645
658
|
def parse_name(base_error_message)
|
646
659
|
md = @source.match(Private::NAME_PATTERN, true)
|
647
660
|
unless md
|
648
|
-
if @source.match(/\S/um)
|
661
|
+
if @source.match?(/\S/um)
|
649
662
|
message = "#{base_error_message}: invalid name"
|
650
663
|
else
|
651
664
|
message = "#{base_error_message}: name is missing"
|
@@ -687,34 +700,34 @@ module REXML
|
|
687
700
|
accept_public_id:)
|
688
701
|
public = /\A\s*PUBLIC/um
|
689
702
|
system = /\A\s*SYSTEM/um
|
690
|
-
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
691
|
-
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
703
|
+
if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
|
704
|
+
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
692
705
|
return "public ID literal is missing"
|
693
706
|
end
|
694
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
707
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
695
708
|
return "invalid public ID literal"
|
696
709
|
end
|
697
710
|
if accept_public_id
|
698
|
-
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
711
|
+
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
699
712
|
return "system ID literal is missing"
|
700
713
|
end
|
701
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
714
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
702
715
|
return "invalid system literal"
|
703
716
|
end
|
704
717
|
"garbage after system literal"
|
705
718
|
else
|
706
719
|
"garbage after public ID literal"
|
707
720
|
end
|
708
|
-
elsif accept_external_id and @source.match(/#{system}/um)
|
709
|
-
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
721
|
+
elsif accept_external_id and @source.match?(/#{system}/um)
|
722
|
+
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
710
723
|
return "system literal is missing"
|
711
724
|
end
|
712
|
-
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
725
|
+
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
713
726
|
return "invalid system literal"
|
714
727
|
end
|
715
728
|
"garbage after system literal"
|
716
729
|
else
|
717
|
-
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
730
|
+
unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
718
731
|
return "invalid ID type"
|
719
732
|
end
|
720
733
|
"ID type is missing"
|
@@ -723,7 +736,7 @@ module REXML
|
|
723
736
|
|
724
737
|
def process_instruction
|
725
738
|
name = parse_name("Malformed XML: Invalid processing instruction node")
|
726
|
-
if @source.match(/\s+/um, true)
|
739
|
+
if @source.match?(/\s+/um, true)
|
727
740
|
match_data = @source.match(/(.*?)\?>/um, true)
|
728
741
|
unless match_data
|
729
742
|
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
@@ -731,7 +744,7 @@ module REXML
|
|
731
744
|
content = match_data[1]
|
732
745
|
else
|
733
746
|
content = nil
|
734
|
-
unless @source.match("?>", true)
|
747
|
+
unless @source.match?("?>", true)
|
735
748
|
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
736
749
|
end
|
737
750
|
end
|
@@ -756,14 +769,33 @@ module REXML
|
|
756
769
|
[:processing_instruction, name, content]
|
757
770
|
end
|
758
771
|
|
772
|
+
if StringScanner::Version < "3.1.1"
|
773
|
+
def scan_quote
|
774
|
+
@source.match(/(['"])/, true)&.[](1)
|
775
|
+
end
|
776
|
+
else
|
777
|
+
def scan_quote
|
778
|
+
case @source.peek_byte
|
779
|
+
when 34 # '"'.ord
|
780
|
+
@source.scan_byte
|
781
|
+
'"'
|
782
|
+
when 39 # "'".ord
|
783
|
+
@source.scan_byte
|
784
|
+
"'"
|
785
|
+
else
|
786
|
+
nil
|
787
|
+
end
|
788
|
+
end
|
789
|
+
end
|
790
|
+
|
759
791
|
def parse_attributes(prefixes)
|
760
792
|
attributes = {}
|
761
793
|
expanded_names = {}
|
762
794
|
closed = false
|
763
795
|
while true
|
764
|
-
if @source.match(">", true)
|
796
|
+
if @source.match?(">", true)
|
765
797
|
return attributes, closed
|
766
|
-
elsif @source.match("/>", true)
|
798
|
+
elsif @source.match?("/>", true)
|
767
799
|
closed = true
|
768
800
|
return attributes, closed
|
769
801
|
elsif match = @source.match(QNAME, true)
|
@@ -771,15 +803,14 @@ module REXML
|
|
771
803
|
prefix = match[2]
|
772
804
|
local_part = match[3]
|
773
805
|
|
774
|
-
unless @source.match(/\s*=\s*/um, true)
|
806
|
+
unless @source.match?(/\s*=\s*/um, true)
|
775
807
|
message = "Missing attribute equal: <#{name}>"
|
776
808
|
raise REXML::ParseException.new(message, @source)
|
777
809
|
end
|
778
|
-
unless
|
810
|
+
unless quote = scan_quote
|
779
811
|
message = "Missing attribute value start quote: <#{name}>"
|
780
812
|
raise REXML::ParseException.new(message, @source)
|
781
813
|
end
|
782
|
-
quote = match[1]
|
783
814
|
start_position = @source.position
|
784
815
|
value = @source.read_until(quote)
|
785
816
|
unless value.chomp!(quote)
|
@@ -787,10 +818,10 @@ module REXML
|
|
787
818
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
788
819
|
raise REXML::ParseException.new(message, @source)
|
789
820
|
end
|
790
|
-
@source.match(/\s*/um, true)
|
821
|
+
@source.match?(/\s*/um, true)
|
791
822
|
if prefix == "xmlns"
|
792
823
|
if local_part == "xml"
|
793
|
-
if value !=
|
824
|
+
if value != Private::XML_PREFIXED_NAMESPACE
|
794
825
|
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
795
826
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
796
827
|
raise REXML::ParseException.new( msg, @source, self )
|
@@ -259,6 +259,8 @@ module REXML
|
|
259
259
|
end
|
260
260
|
|
261
261
|
def get_namespace( prefix )
|
262
|
+
return nil if @namespace_stack.empty?
|
263
|
+
|
262
264
|
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
|
263
265
|
(@namespace_stack.find { |ns| not ns[nil].nil? })
|
264
266
|
uris[-1][prefix] unless uris.nil? or 0 == uris.size
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
3
|
|
4
|
+
require "stringio"
|
4
5
|
require "strscan"
|
5
6
|
|
6
7
|
require_relative 'encoding'
|
@@ -18,6 +19,16 @@ module REXML
|
|
18
19
|
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
20
|
super(pattern)
|
20
21
|
end
|
22
|
+
|
23
|
+
def match?(pattern)
|
24
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
25
|
+
super(pattern)
|
26
|
+
end
|
27
|
+
|
28
|
+
def skip(pattern)
|
29
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
30
|
+
super(pattern)
|
31
|
+
end
|
21
32
|
end
|
22
33
|
end
|
23
34
|
using StringScannerCheckScanString
|
@@ -35,7 +46,6 @@ module REXML
|
|
35
46
|
arg.respond_to? :eof?
|
36
47
|
IOSource.new(arg)
|
37
48
|
elsif arg.respond_to? :to_str
|
38
|
-
require 'stringio'
|
39
49
|
IOSource.new(StringIO.new(arg))
|
40
50
|
elsif arg.kind_of? Source
|
41
51
|
arg
|
@@ -58,8 +68,14 @@ module REXML
|
|
58
68
|
SCANNER_RESET_SIZE = 100000
|
59
69
|
PRE_DEFINED_TERM_PATTERNS = {}
|
60
70
|
pre_defined_terms = ["'", '"', "<"]
|
61
|
-
|
62
|
-
|
71
|
+
if StringScanner::Version < "3.1.1"
|
72
|
+
pre_defined_terms.each do |term|
|
73
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
74
|
+
end
|
75
|
+
else
|
76
|
+
pre_defined_terms.each do |term|
|
77
|
+
PRE_DEFINED_TERM_PATTERNS[term] = term
|
78
|
+
end
|
63
79
|
end
|
64
80
|
end
|
65
81
|
private_constant :Private
|
@@ -77,6 +93,7 @@ module REXML
|
|
77
93
|
detect_encoding
|
78
94
|
end
|
79
95
|
@line = 0
|
96
|
+
@encoded_terms = {}
|
80
97
|
end
|
81
98
|
|
82
99
|
# The current buffer (what we're going to read next)
|
@@ -125,6 +142,14 @@ module REXML
|
|
125
142
|
end
|
126
143
|
end
|
127
144
|
|
145
|
+
def match?(pattern, cons=false)
|
146
|
+
if cons
|
147
|
+
!@scanner.skip(pattern).nil?
|
148
|
+
else
|
149
|
+
!@scanner.match?(pattern).nil?
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
128
153
|
def position
|
129
154
|
@scanner.pos
|
130
155
|
end
|
@@ -133,6 +158,14 @@ module REXML
|
|
133
158
|
@scanner.pos = pos
|
134
159
|
end
|
135
160
|
|
161
|
+
def peek_byte
|
162
|
+
@scanner.peek_byte
|
163
|
+
end
|
164
|
+
|
165
|
+
def scan_byte
|
166
|
+
@scanner.scan_byte
|
167
|
+
end
|
168
|
+
|
136
169
|
# @return true if the Source is exhausted
|
137
170
|
def empty?
|
138
171
|
@scanner.eos?
|
@@ -227,7 +260,7 @@ module REXML
|
|
227
260
|
|
228
261
|
def read_until(term)
|
229
262
|
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
230
|
-
term = encode(term)
|
263
|
+
term = @encoded_terms[term] ||= encode(term)
|
231
264
|
until str = @scanner.scan_until(pattern)
|
232
265
|
break if @source.nil?
|
233
266
|
break if @source.eof?
|
@@ -266,6 +299,23 @@ module REXML
|
|
266
299
|
md.nil? ? nil : @scanner
|
267
300
|
end
|
268
301
|
|
302
|
+
def match?( pattern, cons=false )
|
303
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
304
|
+
min_bytes = 1
|
305
|
+
while true
|
306
|
+
if cons
|
307
|
+
n_matched_bytes = @scanner.skip(pattern)
|
308
|
+
else
|
309
|
+
n_matched_bytes = @scanner.match?(pattern)
|
310
|
+
end
|
311
|
+
return true if n_matched_bytes
|
312
|
+
return false if pattern.is_a?(String)
|
313
|
+
return false if @source.nil?
|
314
|
+
return false unless read(nil, min_bytes)
|
315
|
+
min_bytes *= 2
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
269
319
|
def empty?
|
270
320
|
super and ( @source.nil? || @source.eof? )
|
271
321
|
end
|
@@ -285,7 +335,7 @@ module REXML
|
|
285
335
|
rescue
|
286
336
|
end
|
287
337
|
@er_source.seek(pos)
|
288
|
-
rescue IOError
|
338
|
+
rescue IOError, SystemCallError
|
289
339
|
pos = -1
|
290
340
|
line = -1
|
291
341
|
end
|
@@ -294,14 +344,19 @@ module REXML
|
|
294
344
|
|
295
345
|
private
|
296
346
|
def readline(term = nil)
|
297
|
-
str = @source.readline(term || @line_break)
|
298
347
|
if @pending_buffer
|
348
|
+
begin
|
349
|
+
str = @source.readline(term || @line_break)
|
350
|
+
rescue IOError
|
351
|
+
end
|
299
352
|
if str.nil?
|
300
353
|
str = @pending_buffer
|
301
354
|
else
|
302
355
|
str = @pending_buffer + str
|
303
356
|
end
|
304
357
|
@pending_buffer = nil
|
358
|
+
else
|
359
|
+
str = @source.readline(term || @line_break)
|
305
360
|
end
|
306
361
|
return nil if str.nil?
|
307
362
|
|
data/lib/rexml/text.rb
CHANGED
@@ -29,31 +29,16 @@ module REXML
|
|
29
29
|
(0x10000..0x10FFFF)
|
30
30
|
]
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
']*$')
|
43
|
-
else
|
44
|
-
VALID_XML_CHARS = /^(
|
45
|
-
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
46
|
-
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
47
|
-
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
48
|
-
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
|
49
|
-
| \xEF[\x80-\xBE]{2} #
|
50
|
-
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
|
51
|
-
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
52
|
-
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
53
|
-
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
54
|
-
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
55
|
-
)*$/nx;
|
56
|
-
end
|
32
|
+
VALID_XML_CHARS = Regexp.new('^['+
|
33
|
+
VALID_CHAR.map { |item|
|
34
|
+
case item
|
35
|
+
when Integer
|
36
|
+
[item].pack('U').force_encoding('utf-8')
|
37
|
+
when Range
|
38
|
+
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
|
39
|
+
end
|
40
|
+
}.join +
|
41
|
+
']*$')
|
57
42
|
|
58
43
|
# Constructor
|
59
44
|
# +arg+ if a String, the content is set to the String. If a Text,
|
@@ -132,21 +117,11 @@ module REXML
|
|
132
117
|
|
133
118
|
# illegal anywhere
|
134
119
|
if !string.match?(VALID_XML_CHARS)
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
141
|
-
end
|
142
|
-
end
|
143
|
-
else
|
144
|
-
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
|
145
|
-
case c.unpack('U')
|
146
|
-
when *VALID_CHAR
|
147
|
-
else
|
148
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
149
|
-
end
|
120
|
+
string.chars.each do |c|
|
121
|
+
case c.ord
|
122
|
+
when *VALID_CHAR
|
123
|
+
else
|
124
|
+
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
150
125
|
end
|
151
126
|
end
|
152
127
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 2025-02-16 00:00:00.000000000 Z
|
11
11
|
dependencies: []
|
12
12
|
description: An XML toolkit for Ruby
|
13
13
|
email:
|
@@ -102,7 +102,7 @@ homepage: https://github.com/ruby/rexml
|
|
102
102
|
licenses:
|
103
103
|
- BSD-2-Clause
|
104
104
|
metadata:
|
105
|
-
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.
|
105
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.1
|
106
106
|
rdoc_options:
|
107
107
|
- "--main"
|
108
108
|
- README.md
|
@@ -119,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
119
|
- !ruby/object:Gem::Version
|
120
120
|
version: '0'
|
121
121
|
requirements: []
|
122
|
-
rubygems_version: 3.6.
|
122
|
+
rubygems_version: 3.6.2
|
123
123
|
specification_version: 4
|
124
124
|
summary: An XML toolkit for Ruby
|
125
125
|
test_files: []
|