rexml 3.3.8 → 3.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +219 -1
- data/lib/rexml/attribute.rb +7 -8
- data/lib/rexml/cdata.rb +1 -1
- data/lib/rexml/child.rb +2 -3
- data/lib/rexml/comment.rb +1 -1
- data/lib/rexml/doctype.rb +3 -8
- data/lib/rexml/document.rb +21 -5
- data/lib/rexml/element.rb +53 -59
- data/lib/rexml/encoding.rb +3 -6
- data/lib/rexml/functions.rb +3 -3
- data/lib/rexml/instruction.rb +1 -1
- data/lib/rexml/namespace.rb +4 -4
- data/lib/rexml/node.rb +2 -2
- data/lib/rexml/parsers/baseparser.rb +208 -116
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/parsers/xpathparser.rb +4 -4
- data/lib/rexml/quickpath.rb +19 -18
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/security.rb +2 -2
- data/lib/rexml/source.rb +68 -8
- data/lib/rexml/text.rb +29 -57
- data/lib/rexml/validation/relaxng.rb +27 -26
- data/lib/rexml/validation/validation.rb +8 -8
- data/lib/rexml/xpath.rb +2 -13
- data/lib/rexml/xpath_parser.rb +44 -42
- metadata +4 -4
data/lib/rexml/encoding.rb
CHANGED
|
@@ -5,7 +5,7 @@ module REXML
|
|
|
5
5
|
# ID ---> Encoding name
|
|
6
6
|
attr_reader :encoding
|
|
7
7
|
def encoding=(encoding)
|
|
8
|
-
encoding = encoding.name if encoding.is_a?(Encoding)
|
|
8
|
+
encoding = encoding.name if encoding.is_a?(::Encoding)
|
|
9
9
|
if encoding.is_a?(String)
|
|
10
10
|
original_encoding = encoding
|
|
11
11
|
encoding = find_encoding(encoding)
|
|
@@ -13,12 +13,9 @@ module REXML
|
|
|
13
13
|
raise ArgumentError, "Bad encoding name #{original_encoding}"
|
|
14
14
|
end
|
|
15
15
|
end
|
|
16
|
+
encoding = encoding.upcase if encoding
|
|
16
17
|
return false if defined?(@encoding) and encoding == @encoding
|
|
17
|
-
|
|
18
|
-
@encoding = encoding.upcase
|
|
19
|
-
else
|
|
20
|
-
@encoding = 'UTF-8'
|
|
21
|
-
end
|
|
18
|
+
@encoding = encoding || "UTF-8"
|
|
22
19
|
true
|
|
23
20
|
end
|
|
24
21
|
|
data/lib/rexml/functions.rb
CHANGED
|
@@ -39,11 +39,11 @@ module REXML
|
|
|
39
39
|
|
|
40
40
|
def Functions::text( )
|
|
41
41
|
if @@context[:node].node_type == :element
|
|
42
|
-
|
|
42
|
+
@@context[:node].find_all{|n| n.node_type == :text}.collect{|n| n.value}
|
|
43
43
|
elsif @@context[:node].node_type == :text
|
|
44
|
-
|
|
44
|
+
@@context[:node].value
|
|
45
45
|
else
|
|
46
|
-
|
|
46
|
+
false
|
|
47
47
|
end
|
|
48
48
|
end
|
|
49
49
|
|
data/lib/rexml/instruction.rb
CHANGED
|
@@ -49,7 +49,7 @@ module REXML
|
|
|
49
49
|
# See the rexml/formatters package
|
|
50
50
|
#
|
|
51
51
|
def write writer, indent=-1, transitive=false, ie_hack=false
|
|
52
|
-
Kernel.warn( "#{self.class.name}
|
|
52
|
+
Kernel.warn( "#{self.class.name}#write is deprecated", uplevel: 1)
|
|
53
53
|
indent(writer, indent)
|
|
54
54
|
writer << START
|
|
55
55
|
writer << @target
|
data/lib/rexml/namespace.rb
CHANGED
|
@@ -42,11 +42,11 @@ module REXML
|
|
|
42
42
|
# Compares names optionally WITH namespaces
|
|
43
43
|
def has_name?( other, ns=nil )
|
|
44
44
|
if ns
|
|
45
|
-
|
|
45
|
+
namespace() == ns and name() == other
|
|
46
46
|
elsif other.include? ":"
|
|
47
|
-
|
|
47
|
+
fully_expanded_name == other
|
|
48
48
|
else
|
|
49
|
-
|
|
49
|
+
name == other
|
|
50
50
|
end
|
|
51
51
|
end
|
|
52
52
|
|
|
@@ -57,7 +57,7 @@ module REXML
|
|
|
57
57
|
def fully_expanded_name
|
|
58
58
|
ns = prefix
|
|
59
59
|
return "#{ns}:#@name" if ns.size > 0
|
|
60
|
-
|
|
60
|
+
@name
|
|
61
61
|
end
|
|
62
62
|
end
|
|
63
63
|
end
|
data/lib/rexml/node.rb
CHANGED
|
@@ -26,7 +26,7 @@ module REXML
|
|
|
26
26
|
# REXML::Formatters package for changing the output style.
|
|
27
27
|
def to_s indent=nil
|
|
28
28
|
unless indent.nil?
|
|
29
|
-
Kernel.warn( "#{self.class.name}
|
|
29
|
+
Kernel.warn( "#{self.class.name}#to_s(indent) parameter is deprecated", uplevel: 1)
|
|
30
30
|
f = REXML::Formatters::Pretty.new( indent )
|
|
31
31
|
f.write( self, rv = "" )
|
|
32
32
|
else
|
|
@@ -68,7 +68,7 @@ module REXML
|
|
|
68
68
|
each_recursive {|node|
|
|
69
69
|
return node if block.call(node)
|
|
70
70
|
}
|
|
71
|
-
|
|
71
|
+
nil
|
|
72
72
|
end
|
|
73
73
|
|
|
74
74
|
# Returns the position that +self+ holds in its parent's array, indexed
|
|
@@ -144,13 +144,14 @@ module REXML
|
|
|
144
144
|
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
|
145
145
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
|
146
146
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
|
147
|
+
EQUAL_PATTERN = /\s*=\s*/um
|
|
147
148
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
|
148
149
|
NAME_PATTERN = /#{NAME}/um
|
|
149
150
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
150
151
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
151
152
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
|
152
153
|
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
|
153
|
-
CHARACTER_REFERENCES = /&#
|
|
154
|
+
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
|
|
154
155
|
DEFAULT_ENTITIES_PATTERNS = {}
|
|
155
156
|
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
|
156
157
|
default_entities.each do |term|
|
|
@@ -167,6 +168,8 @@ module REXML
|
|
|
167
168
|
@entity_expansion_count = 0
|
|
168
169
|
@entity_expansion_limit = Security.entity_expansion_limit
|
|
169
170
|
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
|
171
|
+
@source.ensure_buffer
|
|
172
|
+
@version = nil
|
|
170
173
|
end
|
|
171
174
|
|
|
172
175
|
def add_listener( listener )
|
|
@@ -180,6 +183,10 @@ module REXML
|
|
|
180
183
|
|
|
181
184
|
def stream=( source )
|
|
182
185
|
@source = SourceFactory.create_from( source )
|
|
186
|
+
reset
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def reset
|
|
183
190
|
@closed = nil
|
|
184
191
|
@have_root = false
|
|
185
192
|
@document_status = nil
|
|
@@ -201,12 +208,12 @@ module REXML
|
|
|
201
208
|
|
|
202
209
|
# Returns true if there are no more events
|
|
203
210
|
def empty?
|
|
204
|
-
|
|
211
|
+
(@source.empty? and @stack.empty?)
|
|
205
212
|
end
|
|
206
213
|
|
|
207
214
|
# Returns true if there are more events. Synonymous with !empty?
|
|
208
215
|
def has_next?
|
|
209
|
-
|
|
216
|
+
!(@source.empty? and @stack.empty?)
|
|
210
217
|
end
|
|
211
218
|
|
|
212
219
|
# Push an event back on the head of the stream. This method
|
|
@@ -259,6 +266,11 @@ module REXML
|
|
|
259
266
|
path = "/" + @tags.join("/")
|
|
260
267
|
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
|
261
268
|
end
|
|
269
|
+
|
|
270
|
+
unless @document_status == :in_element
|
|
271
|
+
raise ParseException.new("Malformed XML: No root element", @source)
|
|
272
|
+
end
|
|
273
|
+
|
|
262
274
|
return [ :end_document ]
|
|
263
275
|
end
|
|
264
276
|
return @stack.shift if @stack.size > 0
|
|
@@ -268,22 +280,15 @@ module REXML
|
|
|
268
280
|
@source.ensure_buffer
|
|
269
281
|
if @document_status == nil
|
|
270
282
|
start_position = @source.position
|
|
271
|
-
if @source.match("<?", true)
|
|
283
|
+
if @source.match?("<?", true)
|
|
272
284
|
return process_instruction
|
|
273
|
-
elsif @source.match("<!", true)
|
|
274
|
-
if @source.match("--", true)
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
raise REXML::ParseException.new("Unclosed comment", @source)
|
|
278
|
-
end
|
|
279
|
-
if /--|-\z/.match?(md[1])
|
|
280
|
-
raise REXML::ParseException.new("Malformed comment", @source)
|
|
281
|
-
end
|
|
282
|
-
return [ :comment, md[1] ]
|
|
283
|
-
elsif @source.match("DOCTYPE", true)
|
|
285
|
+
elsif @source.match?("<!", true)
|
|
286
|
+
if @source.match?("--", true)
|
|
287
|
+
return [ :comment, process_comment ]
|
|
288
|
+
elsif @source.match?("DOCTYPE", true)
|
|
284
289
|
base_error_message = "Malformed DOCTYPE"
|
|
285
|
-
unless @source.
|
|
286
|
-
if @source.match(">")
|
|
290
|
+
unless @source.skip_spaces
|
|
291
|
+
if @source.match?(">")
|
|
287
292
|
message = "#{base_error_message}: name is missing"
|
|
288
293
|
else
|
|
289
294
|
message = "#{base_error_message}: invalid name"
|
|
@@ -292,10 +297,11 @@ module REXML
|
|
|
292
297
|
raise REXML::ParseException.new(message, @source)
|
|
293
298
|
end
|
|
294
299
|
name = parse_name(base_error_message)
|
|
295
|
-
|
|
300
|
+
@source.skip_spaces
|
|
301
|
+
if @source.match?("[", true)
|
|
296
302
|
id = [nil, nil, nil]
|
|
297
303
|
@document_status = :in_doctype
|
|
298
|
-
elsif @source.match(
|
|
304
|
+
elsif @source.match?(">", true)
|
|
299
305
|
id = [nil, nil, nil]
|
|
300
306
|
@document_status = :after_doctype
|
|
301
307
|
@source.ensure_buffer
|
|
@@ -307,9 +313,10 @@ module REXML
|
|
|
307
313
|
# For backward compatibility
|
|
308
314
|
id[1], id[2] = id[2], nil
|
|
309
315
|
end
|
|
310
|
-
|
|
316
|
+
@source.skip_spaces
|
|
317
|
+
if @source.match?("[", true)
|
|
311
318
|
@document_status = :in_doctype
|
|
312
|
-
elsif @source.match(
|
|
319
|
+
elsif @source.match?(">", true)
|
|
313
320
|
@document_status = :after_doctype
|
|
314
321
|
@source.ensure_buffer
|
|
315
322
|
else
|
|
@@ -319,7 +326,7 @@ module REXML
|
|
|
319
326
|
end
|
|
320
327
|
args = [:start_doctype, name, *id]
|
|
321
328
|
if @document_status == :after_doctype
|
|
322
|
-
@source.
|
|
329
|
+
@source.skip_spaces
|
|
323
330
|
@stack << [ :end_doctype ]
|
|
324
331
|
end
|
|
325
332
|
return args
|
|
@@ -330,14 +337,14 @@ module REXML
|
|
|
330
337
|
end
|
|
331
338
|
end
|
|
332
339
|
if @document_status == :in_doctype
|
|
333
|
-
@source.
|
|
340
|
+
@source.skip_spaces
|
|
334
341
|
start_position = @source.position
|
|
335
|
-
if @source.match("<!", true)
|
|
336
|
-
if @source.match("ELEMENT", true)
|
|
342
|
+
if @source.match?("<!", true)
|
|
343
|
+
if @source.match?("ELEMENT", true)
|
|
337
344
|
md = @source.match(/(.*?)>/um, true)
|
|
338
345
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
|
339
346
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
|
340
|
-
elsif @source.match("ENTITY", true)
|
|
347
|
+
elsif @source.match?("ENTITY", true)
|
|
341
348
|
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
|
342
349
|
unless match_data
|
|
343
350
|
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
|
@@ -369,11 +376,11 @@ module REXML
|
|
|
369
376
|
end
|
|
370
377
|
match << '%' if ref
|
|
371
378
|
return match
|
|
372
|
-
elsif @source.match("ATTLIST", true)
|
|
379
|
+
elsif @source.match?("ATTLIST", true)
|
|
373
380
|
md = @source.match(Private::ATTLISTDECL_END, true)
|
|
374
381
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
|
375
382
|
element = md[1]
|
|
376
|
-
contents = md[0]
|
|
383
|
+
contents = "<!ATTLIST" + md[0]
|
|
377
384
|
|
|
378
385
|
pairs = {}
|
|
379
386
|
values = md[0].strip.scan( ATTDEF_RE )
|
|
@@ -389,10 +396,10 @@ module REXML
|
|
|
389
396
|
end
|
|
390
397
|
end
|
|
391
398
|
return [ :attlistdecl, element, pairs, contents ]
|
|
392
|
-
elsif @source.match("NOTATION", true)
|
|
399
|
+
elsif @source.match?("NOTATION", true)
|
|
393
400
|
base_error_message = "Malformed notation declaration"
|
|
394
|
-
unless @source.
|
|
395
|
-
if @source.match(">")
|
|
401
|
+
unless @source.skip_spaces
|
|
402
|
+
if @source.match?(">")
|
|
396
403
|
message = "#{base_error_message}: name is missing"
|
|
397
404
|
else
|
|
398
405
|
message = "#{base_error_message}: invalid name"
|
|
@@ -404,39 +411,37 @@ module REXML
|
|
|
404
411
|
id = parse_id(base_error_message,
|
|
405
412
|
accept_external_id: true,
|
|
406
413
|
accept_public_id: true)
|
|
407
|
-
|
|
414
|
+
@source.skip_spaces
|
|
415
|
+
unless @source.match?(">", true)
|
|
408
416
|
message = "#{base_error_message}: garbage before end >"
|
|
409
417
|
raise REXML::ParseException.new(message, @source)
|
|
410
418
|
end
|
|
411
419
|
return [:notationdecl, name, *id]
|
|
412
|
-
elsif
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
end
|
|
417
|
-
return [ :comment, md[1] ] if md
|
|
420
|
+
elsif @source.match?("--", true)
|
|
421
|
+
return [ :comment, process_comment ]
|
|
422
|
+
else
|
|
423
|
+
raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor ELEMENT,ENTITY,ATTLIST,NOTATION", @source)
|
|
418
424
|
end
|
|
419
425
|
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
|
420
426
|
return [ :externalentity, match[1] ]
|
|
421
|
-
elsif @source.match(/\]\s*>/um, true)
|
|
427
|
+
elsif @source.match?(/\]\s*>/um, true)
|
|
422
428
|
@document_status = :after_doctype
|
|
423
429
|
return [ :end_doctype ]
|
|
424
|
-
|
|
425
|
-
if @document_status == :in_doctype
|
|
430
|
+
else
|
|
426
431
|
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
|
427
432
|
end
|
|
428
433
|
end
|
|
429
434
|
if @document_status == :after_doctype
|
|
430
|
-
@source.
|
|
435
|
+
@source.skip_spaces
|
|
431
436
|
end
|
|
432
437
|
begin
|
|
433
438
|
start_position = @source.position
|
|
434
|
-
if @source.match("<", true)
|
|
439
|
+
if @source.match?("<", true)
|
|
435
440
|
# :text's read_until may remain only "<" in buffer. In the
|
|
436
441
|
# case, buffer is empty here. So we need to fill buffer
|
|
437
442
|
# here explicitly.
|
|
438
443
|
@source.ensure_buffer
|
|
439
|
-
if @source.match("/", true)
|
|
444
|
+
if @source.match?("/", true)
|
|
440
445
|
@namespaces_restore_stack.pop
|
|
441
446
|
last_tag = @tags.pop
|
|
442
447
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
|
@@ -451,25 +456,21 @@ module REXML
|
|
|
451
456
|
raise REXML::ParseException.new(message, @source)
|
|
452
457
|
end
|
|
453
458
|
return [ :end_element, last_tag ]
|
|
454
|
-
elsif @source.match("!", true)
|
|
455
|
-
md = @source.match(/([^>]*>)/um)
|
|
459
|
+
elsif @source.match?("!", true)
|
|
456
460
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
if
|
|
462
|
-
|
|
461
|
+
if @source.match?("--", true)
|
|
462
|
+
return [ :comment, process_comment ]
|
|
463
|
+
elsif @source.match?("[CDATA[", true)
|
|
464
|
+
text = @source.read_until("]]>")
|
|
465
|
+
if text.chomp!("]]>")
|
|
466
|
+
return [ :cdata, text ]
|
|
467
|
+
else
|
|
468
|
+
raise REXML::ParseException.new("Malformed CDATA: Missing end ']]>'", @source)
|
|
463
469
|
end
|
|
464
|
-
|
|
465
|
-
return [ :comment, md[1] ]
|
|
466
470
|
else
|
|
467
|
-
|
|
468
|
-
return [ :cdata, md[1] ] if md
|
|
471
|
+
raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor CDATA", @source)
|
|
469
472
|
end
|
|
470
|
-
|
|
471
|
-
"in the doctype declaration.", @source)
|
|
472
|
-
elsif @source.match("?", true)
|
|
473
|
+
elsif @source.match?("?", true)
|
|
473
474
|
return process_instruction
|
|
474
475
|
else
|
|
475
476
|
# Get the next tag
|
|
@@ -528,7 +529,8 @@ module REXML
|
|
|
528
529
|
raise REXML::ParseException.new( "Exception parsing",
|
|
529
530
|
@source, self, (error ? error : $!) )
|
|
530
531
|
end
|
|
531
|
-
|
|
532
|
+
# NOTE: The end of the method never runs, because it is unreachable.
|
|
533
|
+
# All branches of code above have explicit unconditional return or raise statements.
|
|
532
534
|
end
|
|
533
535
|
private :pull_event
|
|
534
536
|
|
|
@@ -569,8 +571,12 @@ module REXML
|
|
|
569
571
|
return rv if matches.size == 0
|
|
570
572
|
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
|
571
573
|
m=$1
|
|
572
|
-
|
|
573
|
-
|
|
574
|
+
if m.start_with?("x")
|
|
575
|
+
code_point = Integer(m[1..-1], 16)
|
|
576
|
+
else
|
|
577
|
+
code_point = Integer(m, 10)
|
|
578
|
+
end
|
|
579
|
+
[code_point].pack('U*')
|
|
574
580
|
}
|
|
575
581
|
matches.collect!{|x|x[0]}.compact!
|
|
576
582
|
if filter
|
|
@@ -643,10 +649,14 @@ module REXML
|
|
|
643
649
|
true
|
|
644
650
|
end
|
|
645
651
|
|
|
652
|
+
def normalize_xml_declaration_encoding(xml_declaration_encoding)
|
|
653
|
+
/\AUTF-16(?:BE|LE)\z/i.match?(xml_declaration_encoding) ? "UTF-16" : nil
|
|
654
|
+
end
|
|
655
|
+
|
|
646
656
|
def parse_name(base_error_message)
|
|
647
657
|
md = @source.match(Private::NAME_PATTERN, true)
|
|
648
658
|
unless md
|
|
649
|
-
if @source.match(/\S/um)
|
|
659
|
+
if @source.match?(/\S/um)
|
|
650
660
|
message = "#{base_error_message}: invalid name"
|
|
651
661
|
else
|
|
652
662
|
message = "#{base_error_message}: name is missing"
|
|
@@ -688,73 +698,171 @@ module REXML
|
|
|
688
698
|
accept_public_id:)
|
|
689
699
|
public = /\A\s*PUBLIC/um
|
|
690
700
|
system = /\A\s*SYSTEM/um
|
|
691
|
-
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
|
692
|
-
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
|
701
|
+
if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
|
|
702
|
+
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
|
693
703
|
return "public ID literal is missing"
|
|
694
704
|
end
|
|
695
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
|
705
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
|
696
706
|
return "invalid public ID literal"
|
|
697
707
|
end
|
|
698
708
|
if accept_public_id
|
|
699
|
-
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
|
709
|
+
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
|
700
710
|
return "system ID literal is missing"
|
|
701
711
|
end
|
|
702
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
|
712
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
|
703
713
|
return "invalid system literal"
|
|
704
714
|
end
|
|
705
715
|
"garbage after system literal"
|
|
706
716
|
else
|
|
707
717
|
"garbage after public ID literal"
|
|
708
718
|
end
|
|
709
|
-
elsif accept_external_id and @source.match(/#{system}/um)
|
|
710
|
-
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
|
719
|
+
elsif accept_external_id and @source.match?(/#{system}/um)
|
|
720
|
+
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
|
711
721
|
return "system literal is missing"
|
|
712
722
|
end
|
|
713
|
-
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
|
723
|
+
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
|
714
724
|
return "invalid system literal"
|
|
715
725
|
end
|
|
716
726
|
"garbage after system literal"
|
|
717
727
|
else
|
|
718
|
-
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
|
728
|
+
unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
|
719
729
|
return "invalid ID type"
|
|
720
730
|
end
|
|
721
731
|
"ID type is missing"
|
|
722
732
|
end
|
|
723
733
|
end
|
|
724
734
|
|
|
735
|
+
def process_comment
|
|
736
|
+
text = @source.read_until("-->")
|
|
737
|
+
unless text.chomp!("-->")
|
|
738
|
+
raise REXML::ParseException.new("Unclosed comment: Missing end '-->'", @source)
|
|
739
|
+
end
|
|
740
|
+
|
|
741
|
+
if text.include? "--" or text.end_with?("-")
|
|
742
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
|
743
|
+
end
|
|
744
|
+
text
|
|
745
|
+
end
|
|
746
|
+
|
|
725
747
|
def process_instruction
|
|
726
748
|
name = parse_name("Malformed XML: Invalid processing instruction node")
|
|
727
|
-
if
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
749
|
+
if name == "xml"
|
|
750
|
+
xml_declaration
|
|
751
|
+
else # PITarget
|
|
752
|
+
if @source.skip_spaces # e.g. <?name content?>
|
|
753
|
+
start_position = @source.position
|
|
754
|
+
content = @source.read_until("?>")
|
|
755
|
+
unless content.chomp!("?>")
|
|
756
|
+
@source.position = start_position
|
|
757
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
|
|
758
|
+
end
|
|
759
|
+
else # e.g. <?name?>
|
|
760
|
+
content = nil
|
|
761
|
+
unless @source.match?("?>", true)
|
|
762
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
|
|
763
|
+
end
|
|
731
764
|
end
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
765
|
+
[:processing_instruction, name, content]
|
|
766
|
+
end
|
|
767
|
+
end
|
|
768
|
+
|
|
769
|
+
def xml_declaration
|
|
770
|
+
unless @version.nil?
|
|
771
|
+
raise ParseException.new("Malformed XML: XML declaration is duplicated", @source)
|
|
772
|
+
end
|
|
773
|
+
if @document_status
|
|
774
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
|
775
|
+
end
|
|
776
|
+
unless @source.skip_spaces
|
|
777
|
+
raise ParseException.new("Malformed XML: XML declaration misses spaces before version", @source)
|
|
778
|
+
end
|
|
779
|
+
unless @source.match?("version", true)
|
|
780
|
+
raise ParseException.new("Malformed XML: XML declaration misses version", @source)
|
|
781
|
+
end
|
|
782
|
+
@version = parse_attribute_value_with_equal("xml")
|
|
783
|
+
unless @source.skip_spaces
|
|
784
|
+
unless @source.match?("?>", true)
|
|
785
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
737
786
|
end
|
|
787
|
+
encoding = normalize_xml_declaration_encoding(@source.encoding)
|
|
788
|
+
return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.0"?>
|
|
738
789
|
end
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
790
|
+
|
|
791
|
+
if @source.match?("encoding", true)
|
|
792
|
+
encoding = parse_attribute_value_with_equal("xml")
|
|
793
|
+
unless @source.skip_spaces
|
|
794
|
+
unless @source.match?("?>", true)
|
|
795
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
796
|
+
end
|
|
797
|
+
if need_source_encoding_update?(encoding)
|
|
798
|
+
@source.encoding = encoding
|
|
799
|
+
end
|
|
800
|
+
encoding ||= normalize_xml_declaration_encoding(@source.encoding)
|
|
801
|
+
return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.1" encoding="UTF-8"?>
|
|
742
802
|
end
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
803
|
+
end
|
|
804
|
+
|
|
805
|
+
if @source.match?("standalone", true)
|
|
806
|
+
standalone = parse_attribute_value_with_equal("xml")
|
|
807
|
+
case standalone
|
|
808
|
+
when "yes", "no"
|
|
809
|
+
else
|
|
810
|
+
raise ParseException.new("Malformed XML: XML declaration standalone is not yes or no : <#{standalone}>", @source)
|
|
749
811
|
end
|
|
750
|
-
|
|
751
|
-
|
|
812
|
+
end
|
|
813
|
+
@source.skip_spaces
|
|
814
|
+
unless @source.match?("?>", true)
|
|
815
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
816
|
+
end
|
|
817
|
+
|
|
818
|
+
if need_source_encoding_update?(encoding)
|
|
819
|
+
@source.encoding = encoding
|
|
820
|
+
end
|
|
821
|
+
encoding ||= normalize_xml_declaration_encoding(@source.encoding)
|
|
822
|
+
|
|
823
|
+
# e.g. <?xml version="1.0" ?>
|
|
824
|
+
# <?xml version="1.1" encoding="UTF-8" ?>
|
|
825
|
+
# <?xml version="1.1" standalone="yes"?>
|
|
826
|
+
# <?xml version="1.1" encoding="UTF-8" standalone="yes" ?>
|
|
827
|
+
[ :xmldecl, @version, encoding, standalone ]
|
|
828
|
+
end
|
|
829
|
+
|
|
830
|
+
if StringScanner::Version < "3.1.1"
|
|
831
|
+
def scan_quote
|
|
832
|
+
@source.match(/(['"])/, true)&.[](1)
|
|
833
|
+
end
|
|
834
|
+
else
|
|
835
|
+
def scan_quote
|
|
836
|
+
case @source.peek_byte
|
|
837
|
+
when 34 # '"'.ord
|
|
838
|
+
@source.scan_byte
|
|
839
|
+
'"'
|
|
840
|
+
when 39 # "'".ord
|
|
841
|
+
@source.scan_byte
|
|
842
|
+
"'"
|
|
843
|
+
else
|
|
844
|
+
nil
|
|
752
845
|
end
|
|
753
|
-
standalone = STANDALONE.match(content)
|
|
754
|
-
standalone = standalone[1] unless standalone.nil?
|
|
755
|
-
return [ :xmldecl, version, encoding, standalone ]
|
|
756
846
|
end
|
|
757
|
-
|
|
847
|
+
end
|
|
848
|
+
|
|
849
|
+
def parse_attribute_value_with_equal(name)
|
|
850
|
+
unless @source.match?(Private::EQUAL_PATTERN, true)
|
|
851
|
+
message = "Missing attribute equal: <#{name}>"
|
|
852
|
+
raise REXML::ParseException.new(message, @source)
|
|
853
|
+
end
|
|
854
|
+
unless quote = scan_quote
|
|
855
|
+
message = "Missing attribute value start quote: <#{name}>"
|
|
856
|
+
raise REXML::ParseException.new(message, @source)
|
|
857
|
+
end
|
|
858
|
+
start_position = @source.position
|
|
859
|
+
value = @source.read_until(quote)
|
|
860
|
+
unless value.chomp!(quote)
|
|
861
|
+
@source.position = start_position
|
|
862
|
+
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
863
|
+
raise REXML::ParseException.new(message, @source)
|
|
864
|
+
end
|
|
865
|
+
value
|
|
758
866
|
end
|
|
759
867
|
|
|
760
868
|
def parse_attributes(prefixes)
|
|
@@ -762,33 +870,17 @@ module REXML
|
|
|
762
870
|
expanded_names = {}
|
|
763
871
|
closed = false
|
|
764
872
|
while true
|
|
765
|
-
if @source.match(">", true)
|
|
873
|
+
if @source.match?(">", true)
|
|
766
874
|
return attributes, closed
|
|
767
|
-
elsif @source.match("/>", true)
|
|
875
|
+
elsif @source.match?("/>", true)
|
|
768
876
|
closed = true
|
|
769
877
|
return attributes, closed
|
|
770
878
|
elsif match = @source.match(QNAME, true)
|
|
771
879
|
name = match[1]
|
|
772
880
|
prefix = match[2]
|
|
773
881
|
local_part = match[3]
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
message = "Missing attribute equal: <#{name}>"
|
|
777
|
-
raise REXML::ParseException.new(message, @source)
|
|
778
|
-
end
|
|
779
|
-
unless match = @source.match(/(['"])/, true)
|
|
780
|
-
message = "Missing attribute value start quote: <#{name}>"
|
|
781
|
-
raise REXML::ParseException.new(message, @source)
|
|
782
|
-
end
|
|
783
|
-
quote = match[1]
|
|
784
|
-
start_position = @source.position
|
|
785
|
-
value = @source.read_until(quote)
|
|
786
|
-
unless value.chomp!(quote)
|
|
787
|
-
@source.position = start_position
|
|
788
|
-
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
789
|
-
raise REXML::ParseException.new(message, @source)
|
|
790
|
-
end
|
|
791
|
-
@source.match(/\s*/um, true)
|
|
882
|
+
value = parse_attribute_value_with_equal(name)
|
|
883
|
+
@source.skip_spaces
|
|
792
884
|
if prefix == "xmlns"
|
|
793
885
|
if local_part == "xml"
|
|
794
886
|
if value != Private::XML_PREFIXED_NAMESPACE
|