rexml 3.3.7 → 3.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +240 -1
- data/lib/rexml/attribute.rb +7 -8
- data/lib/rexml/cdata.rb +1 -1
- data/lib/rexml/child.rb +2 -3
- data/lib/rexml/comment.rb +1 -1
- data/lib/rexml/doctype.rb +3 -8
- data/lib/rexml/document.rb +21 -5
- data/lib/rexml/element.rb +53 -59
- data/lib/rexml/encoding.rb +3 -6
- data/lib/rexml/functions.rb +3 -3
- data/lib/rexml/instruction.rb +1 -1
- data/lib/rexml/namespace.rb +4 -4
- data/lib/rexml/node.rb +2 -2
- data/lib/rexml/parsers/baseparser.rb +211 -118
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/parsers/sax2parser.rb +2 -0
- data/lib/rexml/parsers/xpathparser.rb +4 -4
- data/lib/rexml/quickpath.rb +19 -18
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/security.rb +2 -2
- data/lib/rexml/source.rb +68 -8
- data/lib/rexml/text.rb +29 -57
- data/lib/rexml/validation/relaxng.rb +27 -26
- data/lib/rexml/validation/validation.rb +8 -8
- data/lib/rexml/xpath.rb +2 -13
- data/lib/rexml/xpath_parser.rb +44 -42
- metadata +4 -4
|
@@ -144,18 +144,20 @@ module REXML
|
|
|
144
144
|
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
|
145
145
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
|
146
146
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
|
147
|
+
EQUAL_PATTERN = /\s*=\s*/um
|
|
147
148
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
|
148
149
|
NAME_PATTERN = /#{NAME}/um
|
|
149
150
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
150
151
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
151
152
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
|
152
153
|
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
|
153
|
-
CHARACTER_REFERENCES = /&#
|
|
154
|
+
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
|
|
154
155
|
DEFAULT_ENTITIES_PATTERNS = {}
|
|
155
156
|
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
|
156
157
|
default_entities.each do |term|
|
|
157
158
|
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
|
158
159
|
end
|
|
160
|
+
XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
|
159
161
|
end
|
|
160
162
|
private_constant :Private
|
|
161
163
|
|
|
@@ -166,6 +168,8 @@ module REXML
|
|
|
166
168
|
@entity_expansion_count = 0
|
|
167
169
|
@entity_expansion_limit = Security.entity_expansion_limit
|
|
168
170
|
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
|
171
|
+
@source.ensure_buffer
|
|
172
|
+
@version = nil
|
|
169
173
|
end
|
|
170
174
|
|
|
171
175
|
def add_listener( listener )
|
|
@@ -179,13 +183,17 @@ module REXML
|
|
|
179
183
|
|
|
180
184
|
def stream=( source )
|
|
181
185
|
@source = SourceFactory.create_from( source )
|
|
186
|
+
reset
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def reset
|
|
182
190
|
@closed = nil
|
|
183
191
|
@have_root = false
|
|
184
192
|
@document_status = nil
|
|
185
193
|
@tags = []
|
|
186
194
|
@stack = []
|
|
187
195
|
@entities = []
|
|
188
|
-
@namespaces = {}
|
|
196
|
+
@namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
|
|
189
197
|
@namespaces_restore_stack = []
|
|
190
198
|
end
|
|
191
199
|
|
|
@@ -200,12 +208,12 @@ module REXML
|
|
|
200
208
|
|
|
201
209
|
# Returns true if there are no more events
|
|
202
210
|
def empty?
|
|
203
|
-
|
|
211
|
+
(@source.empty? and @stack.empty?)
|
|
204
212
|
end
|
|
205
213
|
|
|
206
214
|
# Returns true if there are more events. Synonymous with !empty?
|
|
207
215
|
def has_next?
|
|
208
|
-
|
|
216
|
+
!(@source.empty? and @stack.empty?)
|
|
209
217
|
end
|
|
210
218
|
|
|
211
219
|
# Push an event back on the head of the stream. This method
|
|
@@ -258,6 +266,11 @@ module REXML
|
|
|
258
266
|
path = "/" + @tags.join("/")
|
|
259
267
|
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
|
260
268
|
end
|
|
269
|
+
|
|
270
|
+
unless @document_status == :in_element
|
|
271
|
+
raise ParseException.new("Malformed XML: No root element", @source)
|
|
272
|
+
end
|
|
273
|
+
|
|
261
274
|
return [ :end_document ]
|
|
262
275
|
end
|
|
263
276
|
return @stack.shift if @stack.size > 0
|
|
@@ -267,22 +280,15 @@ module REXML
|
|
|
267
280
|
@source.ensure_buffer
|
|
268
281
|
if @document_status == nil
|
|
269
282
|
start_position = @source.position
|
|
270
|
-
if @source.match("<?", true)
|
|
283
|
+
if @source.match?("<?", true)
|
|
271
284
|
return process_instruction
|
|
272
|
-
elsif @source.match("<!", true)
|
|
273
|
-
if @source.match("--", true)
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
raise REXML::ParseException.new("Unclosed comment", @source)
|
|
277
|
-
end
|
|
278
|
-
if /--|-\z/.match?(md[1])
|
|
279
|
-
raise REXML::ParseException.new("Malformed comment", @source)
|
|
280
|
-
end
|
|
281
|
-
return [ :comment, md[1] ]
|
|
282
|
-
elsif @source.match("DOCTYPE", true)
|
|
285
|
+
elsif @source.match?("<!", true)
|
|
286
|
+
if @source.match?("--", true)
|
|
287
|
+
return [ :comment, process_comment ]
|
|
288
|
+
elsif @source.match?("DOCTYPE", true)
|
|
283
289
|
base_error_message = "Malformed DOCTYPE"
|
|
284
|
-
unless @source.
|
|
285
|
-
if @source.match(">")
|
|
290
|
+
unless @source.skip_spaces
|
|
291
|
+
if @source.match?(">")
|
|
286
292
|
message = "#{base_error_message}: name is missing"
|
|
287
293
|
else
|
|
288
294
|
message = "#{base_error_message}: invalid name"
|
|
@@ -291,10 +297,11 @@ module REXML
|
|
|
291
297
|
raise REXML::ParseException.new(message, @source)
|
|
292
298
|
end
|
|
293
299
|
name = parse_name(base_error_message)
|
|
294
|
-
|
|
300
|
+
@source.skip_spaces
|
|
301
|
+
if @source.match?("[", true)
|
|
295
302
|
id = [nil, nil, nil]
|
|
296
303
|
@document_status = :in_doctype
|
|
297
|
-
elsif @source.match(
|
|
304
|
+
elsif @source.match?(">", true)
|
|
298
305
|
id = [nil, nil, nil]
|
|
299
306
|
@document_status = :after_doctype
|
|
300
307
|
@source.ensure_buffer
|
|
@@ -306,9 +313,10 @@ module REXML
|
|
|
306
313
|
# For backward compatibility
|
|
307
314
|
id[1], id[2] = id[2], nil
|
|
308
315
|
end
|
|
309
|
-
|
|
316
|
+
@source.skip_spaces
|
|
317
|
+
if @source.match?("[", true)
|
|
310
318
|
@document_status = :in_doctype
|
|
311
|
-
elsif @source.match(
|
|
319
|
+
elsif @source.match?(">", true)
|
|
312
320
|
@document_status = :after_doctype
|
|
313
321
|
@source.ensure_buffer
|
|
314
322
|
else
|
|
@@ -318,7 +326,7 @@ module REXML
|
|
|
318
326
|
end
|
|
319
327
|
args = [:start_doctype, name, *id]
|
|
320
328
|
if @document_status == :after_doctype
|
|
321
|
-
@source.
|
|
329
|
+
@source.skip_spaces
|
|
322
330
|
@stack << [ :end_doctype ]
|
|
323
331
|
end
|
|
324
332
|
return args
|
|
@@ -329,14 +337,14 @@ module REXML
|
|
|
329
337
|
end
|
|
330
338
|
end
|
|
331
339
|
if @document_status == :in_doctype
|
|
332
|
-
@source.
|
|
340
|
+
@source.skip_spaces
|
|
333
341
|
start_position = @source.position
|
|
334
|
-
if @source.match("<!", true)
|
|
335
|
-
if @source.match("ELEMENT", true)
|
|
342
|
+
if @source.match?("<!", true)
|
|
343
|
+
if @source.match?("ELEMENT", true)
|
|
336
344
|
md = @source.match(/(.*?)>/um, true)
|
|
337
345
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
|
338
346
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
|
339
|
-
elsif @source.match("ENTITY", true)
|
|
347
|
+
elsif @source.match?("ENTITY", true)
|
|
340
348
|
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
|
341
349
|
unless match_data
|
|
342
350
|
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
|
@@ -368,11 +376,11 @@ module REXML
|
|
|
368
376
|
end
|
|
369
377
|
match << '%' if ref
|
|
370
378
|
return match
|
|
371
|
-
elsif @source.match("ATTLIST", true)
|
|
379
|
+
elsif @source.match?("ATTLIST", true)
|
|
372
380
|
md = @source.match(Private::ATTLISTDECL_END, true)
|
|
373
381
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
|
374
382
|
element = md[1]
|
|
375
|
-
contents = md[0]
|
|
383
|
+
contents = "<!ATTLIST" + md[0]
|
|
376
384
|
|
|
377
385
|
pairs = {}
|
|
378
386
|
values = md[0].strip.scan( ATTDEF_RE )
|
|
@@ -388,10 +396,10 @@ module REXML
|
|
|
388
396
|
end
|
|
389
397
|
end
|
|
390
398
|
return [ :attlistdecl, element, pairs, contents ]
|
|
391
|
-
elsif @source.match("NOTATION", true)
|
|
399
|
+
elsif @source.match?("NOTATION", true)
|
|
392
400
|
base_error_message = "Malformed notation declaration"
|
|
393
|
-
unless @source.
|
|
394
|
-
if @source.match(">")
|
|
401
|
+
unless @source.skip_spaces
|
|
402
|
+
if @source.match?(">")
|
|
395
403
|
message = "#{base_error_message}: name is missing"
|
|
396
404
|
else
|
|
397
405
|
message = "#{base_error_message}: invalid name"
|
|
@@ -403,39 +411,37 @@ module REXML
|
|
|
403
411
|
id = parse_id(base_error_message,
|
|
404
412
|
accept_external_id: true,
|
|
405
413
|
accept_public_id: true)
|
|
406
|
-
|
|
414
|
+
@source.skip_spaces
|
|
415
|
+
unless @source.match?(">", true)
|
|
407
416
|
message = "#{base_error_message}: garbage before end >"
|
|
408
417
|
raise REXML::ParseException.new(message, @source)
|
|
409
418
|
end
|
|
410
419
|
return [:notationdecl, name, *id]
|
|
411
|
-
elsif
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
end
|
|
416
|
-
return [ :comment, md[1] ] if md
|
|
420
|
+
elsif @source.match?("--", true)
|
|
421
|
+
return [ :comment, process_comment ]
|
|
422
|
+
else
|
|
423
|
+
raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor ELEMENT,ENTITY,ATTLIST,NOTATION", @source)
|
|
417
424
|
end
|
|
418
425
|
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
|
419
426
|
return [ :externalentity, match[1] ]
|
|
420
|
-
elsif @source.match(/\]\s*>/um, true)
|
|
427
|
+
elsif @source.match?(/\]\s*>/um, true)
|
|
421
428
|
@document_status = :after_doctype
|
|
422
429
|
return [ :end_doctype ]
|
|
423
|
-
|
|
424
|
-
if @document_status == :in_doctype
|
|
430
|
+
else
|
|
425
431
|
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
|
426
432
|
end
|
|
427
433
|
end
|
|
428
434
|
if @document_status == :after_doctype
|
|
429
|
-
@source.
|
|
435
|
+
@source.skip_spaces
|
|
430
436
|
end
|
|
431
437
|
begin
|
|
432
438
|
start_position = @source.position
|
|
433
|
-
if @source.match("<", true)
|
|
439
|
+
if @source.match?("<", true)
|
|
434
440
|
# :text's read_until may remain only "<" in buffer. In the
|
|
435
441
|
# case, buffer is empty here. So we need to fill buffer
|
|
436
442
|
# here explicitly.
|
|
437
443
|
@source.ensure_buffer
|
|
438
|
-
if @source.match("/", true)
|
|
444
|
+
if @source.match?("/", true)
|
|
439
445
|
@namespaces_restore_stack.pop
|
|
440
446
|
last_tag = @tags.pop
|
|
441
447
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
|
@@ -450,25 +456,21 @@ module REXML
|
|
|
450
456
|
raise REXML::ParseException.new(message, @source)
|
|
451
457
|
end
|
|
452
458
|
return [ :end_element, last_tag ]
|
|
453
|
-
elsif @source.match("!", true)
|
|
454
|
-
md = @source.match(/([^>]*>)/um)
|
|
459
|
+
elsif @source.match?("!", true)
|
|
455
460
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
if
|
|
461
|
-
|
|
461
|
+
if @source.match?("--", true)
|
|
462
|
+
return [ :comment, process_comment ]
|
|
463
|
+
elsif @source.match?("[CDATA[", true)
|
|
464
|
+
text = @source.read_until("]]>")
|
|
465
|
+
if text.chomp!("]]>")
|
|
466
|
+
return [ :cdata, text ]
|
|
467
|
+
else
|
|
468
|
+
raise REXML::ParseException.new("Malformed CDATA: Missing end ']]>'", @source)
|
|
462
469
|
end
|
|
463
|
-
|
|
464
|
-
return [ :comment, md[1] ]
|
|
465
470
|
else
|
|
466
|
-
|
|
467
|
-
return [ :cdata, md[1] ] if md
|
|
471
|
+
raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor CDATA", @source)
|
|
468
472
|
end
|
|
469
|
-
|
|
470
|
-
"in the doctype declaration.", @source)
|
|
471
|
-
elsif @source.match("?", true)
|
|
473
|
+
elsif @source.match?("?", true)
|
|
472
474
|
return process_instruction
|
|
473
475
|
else
|
|
474
476
|
# Get the next tag
|
|
@@ -527,7 +529,8 @@ module REXML
|
|
|
527
529
|
raise REXML::ParseException.new( "Exception parsing",
|
|
528
530
|
@source, self, (error ? error : $!) )
|
|
529
531
|
end
|
|
530
|
-
|
|
532
|
+
# NOTE: The end of the method never runs, because it is unreachable.
|
|
533
|
+
# All branches of code above have explicit unconditional return or raise statements.
|
|
531
534
|
end
|
|
532
535
|
private :pull_event
|
|
533
536
|
|
|
@@ -568,8 +571,12 @@ module REXML
|
|
|
568
571
|
return rv if matches.size == 0
|
|
569
572
|
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
|
570
573
|
m=$1
|
|
571
|
-
|
|
572
|
-
|
|
574
|
+
if m.start_with?("x")
|
|
575
|
+
code_point = Integer(m[1..-1], 16)
|
|
576
|
+
else
|
|
577
|
+
code_point = Integer(m, 10)
|
|
578
|
+
end
|
|
579
|
+
[code_point].pack('U*')
|
|
573
580
|
}
|
|
574
581
|
matches.collect!{|x|x[0]}.compact!
|
|
575
582
|
if filter
|
|
@@ -642,10 +649,14 @@ module REXML
|
|
|
642
649
|
true
|
|
643
650
|
end
|
|
644
651
|
|
|
652
|
+
def normalize_xml_declaration_encoding(xml_declaration_encoding)
|
|
653
|
+
/\AUTF-16(?:BE|LE)\z/i.match?(xml_declaration_encoding) ? "UTF-16" : nil
|
|
654
|
+
end
|
|
655
|
+
|
|
645
656
|
def parse_name(base_error_message)
|
|
646
657
|
md = @source.match(Private::NAME_PATTERN, true)
|
|
647
658
|
unless md
|
|
648
|
-
if @source.match(/\S/um)
|
|
659
|
+
if @source.match?(/\S/um)
|
|
649
660
|
message = "#{base_error_message}: invalid name"
|
|
650
661
|
else
|
|
651
662
|
message = "#{base_error_message}: name is missing"
|
|
@@ -687,73 +698,171 @@ module REXML
|
|
|
687
698
|
accept_public_id:)
|
|
688
699
|
public = /\A\s*PUBLIC/um
|
|
689
700
|
system = /\A\s*SYSTEM/um
|
|
690
|
-
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
|
691
|
-
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
|
701
|
+
if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
|
|
702
|
+
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
|
692
703
|
return "public ID literal is missing"
|
|
693
704
|
end
|
|
694
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
|
705
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
|
695
706
|
return "invalid public ID literal"
|
|
696
707
|
end
|
|
697
708
|
if accept_public_id
|
|
698
|
-
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
|
709
|
+
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
|
699
710
|
return "system ID literal is missing"
|
|
700
711
|
end
|
|
701
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
|
712
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
|
702
713
|
return "invalid system literal"
|
|
703
714
|
end
|
|
704
715
|
"garbage after system literal"
|
|
705
716
|
else
|
|
706
717
|
"garbage after public ID literal"
|
|
707
718
|
end
|
|
708
|
-
elsif accept_external_id and @source.match(/#{system}/um)
|
|
709
|
-
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
|
719
|
+
elsif accept_external_id and @source.match?(/#{system}/um)
|
|
720
|
+
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
|
710
721
|
return "system literal is missing"
|
|
711
722
|
end
|
|
712
|
-
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
|
723
|
+
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
|
713
724
|
return "invalid system literal"
|
|
714
725
|
end
|
|
715
726
|
"garbage after system literal"
|
|
716
727
|
else
|
|
717
|
-
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
|
728
|
+
unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
|
718
729
|
return "invalid ID type"
|
|
719
730
|
end
|
|
720
731
|
"ID type is missing"
|
|
721
732
|
end
|
|
722
733
|
end
|
|
723
734
|
|
|
735
|
+
def process_comment
|
|
736
|
+
text = @source.read_until("-->")
|
|
737
|
+
unless text.chomp!("-->")
|
|
738
|
+
raise REXML::ParseException.new("Unclosed comment: Missing end '-->'", @source)
|
|
739
|
+
end
|
|
740
|
+
|
|
741
|
+
if text.include? "--" or text.end_with?("-")
|
|
742
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
|
743
|
+
end
|
|
744
|
+
text
|
|
745
|
+
end
|
|
746
|
+
|
|
724
747
|
def process_instruction
|
|
725
748
|
name = parse_name("Malformed XML: Invalid processing instruction node")
|
|
726
|
-
if
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
749
|
+
if name == "xml"
|
|
750
|
+
xml_declaration
|
|
751
|
+
else # PITarget
|
|
752
|
+
if @source.skip_spaces # e.g. <?name content?>
|
|
753
|
+
start_position = @source.position
|
|
754
|
+
content = @source.read_until("?>")
|
|
755
|
+
unless content.chomp!("?>")
|
|
756
|
+
@source.position = start_position
|
|
757
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
|
|
758
|
+
end
|
|
759
|
+
else # e.g. <?name?>
|
|
760
|
+
content = nil
|
|
761
|
+
unless @source.match?("?>", true)
|
|
762
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
|
|
763
|
+
end
|
|
730
764
|
end
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
765
|
+
[:processing_instruction, name, content]
|
|
766
|
+
end
|
|
767
|
+
end
|
|
768
|
+
|
|
769
|
+
def xml_declaration
|
|
770
|
+
unless @version.nil?
|
|
771
|
+
raise ParseException.new("Malformed XML: XML declaration is duplicated", @source)
|
|
772
|
+
end
|
|
773
|
+
if @document_status
|
|
774
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
|
775
|
+
end
|
|
776
|
+
unless @source.skip_spaces
|
|
777
|
+
raise ParseException.new("Malformed XML: XML declaration misses spaces before version", @source)
|
|
778
|
+
end
|
|
779
|
+
unless @source.match?("version", true)
|
|
780
|
+
raise ParseException.new("Malformed XML: XML declaration misses version", @source)
|
|
781
|
+
end
|
|
782
|
+
@version = parse_attribute_value_with_equal("xml")
|
|
783
|
+
unless @source.skip_spaces
|
|
784
|
+
unless @source.match?("?>", true)
|
|
785
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
736
786
|
end
|
|
787
|
+
encoding = normalize_xml_declaration_encoding(@source.encoding)
|
|
788
|
+
return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.0"?>
|
|
737
789
|
end
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
790
|
+
|
|
791
|
+
if @source.match?("encoding", true)
|
|
792
|
+
encoding = parse_attribute_value_with_equal("xml")
|
|
793
|
+
unless @source.skip_spaces
|
|
794
|
+
unless @source.match?("?>", true)
|
|
795
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
796
|
+
end
|
|
797
|
+
if need_source_encoding_update?(encoding)
|
|
798
|
+
@source.encoding = encoding
|
|
799
|
+
end
|
|
800
|
+
encoding ||= normalize_xml_declaration_encoding(@source.encoding)
|
|
801
|
+
return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.1" encoding="UTF-8"?>
|
|
741
802
|
end
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
803
|
+
end
|
|
804
|
+
|
|
805
|
+
if @source.match?("standalone", true)
|
|
806
|
+
standalone = parse_attribute_value_with_equal("xml")
|
|
807
|
+
case standalone
|
|
808
|
+
when "yes", "no"
|
|
809
|
+
else
|
|
810
|
+
raise ParseException.new("Malformed XML: XML declaration standalone is not yes or no : <#{standalone}>", @source)
|
|
748
811
|
end
|
|
749
|
-
|
|
750
|
-
|
|
812
|
+
end
|
|
813
|
+
@source.skip_spaces
|
|
814
|
+
unless @source.match?("?>", true)
|
|
815
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
816
|
+
end
|
|
817
|
+
|
|
818
|
+
if need_source_encoding_update?(encoding)
|
|
819
|
+
@source.encoding = encoding
|
|
820
|
+
end
|
|
821
|
+
encoding ||= normalize_xml_declaration_encoding(@source.encoding)
|
|
822
|
+
|
|
823
|
+
# e.g. <?xml version="1.0" ?>
|
|
824
|
+
# <?xml version="1.1" encoding="UTF-8" ?>
|
|
825
|
+
# <?xml version="1.1" standalone="yes"?>
|
|
826
|
+
# <?xml version="1.1" encoding="UTF-8" standalone="yes" ?>
|
|
827
|
+
[ :xmldecl, @version, encoding, standalone ]
|
|
828
|
+
end
|
|
829
|
+
|
|
830
|
+
if StringScanner::Version < "3.1.1"
|
|
831
|
+
def scan_quote
|
|
832
|
+
@source.match(/(['"])/, true)&.[](1)
|
|
833
|
+
end
|
|
834
|
+
else
|
|
835
|
+
def scan_quote
|
|
836
|
+
case @source.peek_byte
|
|
837
|
+
when 34 # '"'.ord
|
|
838
|
+
@source.scan_byte
|
|
839
|
+
'"'
|
|
840
|
+
when 39 # "'".ord
|
|
841
|
+
@source.scan_byte
|
|
842
|
+
"'"
|
|
843
|
+
else
|
|
844
|
+
nil
|
|
751
845
|
end
|
|
752
|
-
standalone = STANDALONE.match(content)
|
|
753
|
-
standalone = standalone[1] unless standalone.nil?
|
|
754
|
-
return [ :xmldecl, version, encoding, standalone ]
|
|
755
846
|
end
|
|
756
|
-
|
|
847
|
+
end
|
|
848
|
+
|
|
849
|
+
def parse_attribute_value_with_equal(name)
|
|
850
|
+
unless @source.match?(Private::EQUAL_PATTERN, true)
|
|
851
|
+
message = "Missing attribute equal: <#{name}>"
|
|
852
|
+
raise REXML::ParseException.new(message, @source)
|
|
853
|
+
end
|
|
854
|
+
unless quote = scan_quote
|
|
855
|
+
message = "Missing attribute value start quote: <#{name}>"
|
|
856
|
+
raise REXML::ParseException.new(message, @source)
|
|
857
|
+
end
|
|
858
|
+
start_position = @source.position
|
|
859
|
+
value = @source.read_until(quote)
|
|
860
|
+
unless value.chomp!(quote)
|
|
861
|
+
@source.position = start_position
|
|
862
|
+
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
863
|
+
raise REXML::ParseException.new(message, @source)
|
|
864
|
+
end
|
|
865
|
+
value
|
|
757
866
|
end
|
|
758
867
|
|
|
759
868
|
def parse_attributes(prefixes)
|
|
@@ -761,36 +870,20 @@ module REXML
|
|
|
761
870
|
expanded_names = {}
|
|
762
871
|
closed = false
|
|
763
872
|
while true
|
|
764
|
-
if @source.match(">", true)
|
|
873
|
+
if @source.match?(">", true)
|
|
765
874
|
return attributes, closed
|
|
766
|
-
elsif @source.match("/>", true)
|
|
875
|
+
elsif @source.match?("/>", true)
|
|
767
876
|
closed = true
|
|
768
877
|
return attributes, closed
|
|
769
878
|
elsif match = @source.match(QNAME, true)
|
|
770
879
|
name = match[1]
|
|
771
880
|
prefix = match[2]
|
|
772
881
|
local_part = match[3]
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
message = "Missing attribute equal: <#{name}>"
|
|
776
|
-
raise REXML::ParseException.new(message, @source)
|
|
777
|
-
end
|
|
778
|
-
unless match = @source.match(/(['"])/, true)
|
|
779
|
-
message = "Missing attribute value start quote: <#{name}>"
|
|
780
|
-
raise REXML::ParseException.new(message, @source)
|
|
781
|
-
end
|
|
782
|
-
quote = match[1]
|
|
783
|
-
start_position = @source.position
|
|
784
|
-
value = @source.read_until(quote)
|
|
785
|
-
unless value.chomp!(quote)
|
|
786
|
-
@source.position = start_position
|
|
787
|
-
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
788
|
-
raise REXML::ParseException.new(message, @source)
|
|
789
|
-
end
|
|
790
|
-
@source.match(/\s*/um, true)
|
|
882
|
+
value = parse_attribute_value_with_equal(name)
|
|
883
|
+
@source.skip_spaces
|
|
791
884
|
if prefix == "xmlns"
|
|
792
885
|
if local_part == "xml"
|
|
793
|
-
if value !=
|
|
886
|
+
if value != Private::XML_PREFIXED_NAMESPACE
|
|
794
887
|
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
|
795
888
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
|
796
889
|
raise REXML::ParseException.new( msg, @source, self )
|
|
@@ -259,6 +259,8 @@ module REXML
|
|
|
259
259
|
end
|
|
260
260
|
|
|
261
261
|
def get_namespace( prefix )
|
|
262
|
+
return nil if @namespace_stack.empty?
|
|
263
|
+
|
|
262
264
|
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
|
|
263
265
|
(@namespace_stack.find { |ns| not ns[nil].nil? })
|
|
264
266
|
uris[-1][prefix] unless uris.nil? or 0 == uris.size
|
|
@@ -215,7 +215,7 @@ module REXML
|
|
|
215
215
|
else
|
|
216
216
|
path << yield( parsed )
|
|
217
217
|
end
|
|
218
|
-
|
|
218
|
+
path.squeeze(" ")
|
|
219
219
|
end
|
|
220
220
|
# For backward compatibility
|
|
221
221
|
alias_method :preciate_to_string, :predicate_to_path
|
|
@@ -252,7 +252,7 @@ module REXML
|
|
|
252
252
|
path = path[1..-1]
|
|
253
253
|
end
|
|
254
254
|
end
|
|
255
|
-
|
|
255
|
+
RelativeLocationPath( path, parsed ) if path.size > 0
|
|
256
256
|
end
|
|
257
257
|
|
|
258
258
|
#RelativeLocationPath
|
|
@@ -388,7 +388,7 @@ module REXML
|
|
|
388
388
|
else
|
|
389
389
|
path = original_path
|
|
390
390
|
end
|
|
391
|
-
|
|
391
|
+
path
|
|
392
392
|
end
|
|
393
393
|
|
|
394
394
|
# Filters the supplied nodeset on the predicate(s)
|
|
@@ -600,7 +600,7 @@ module REXML
|
|
|
600
600
|
end
|
|
601
601
|
rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/
|
|
602
602
|
parsed.concat(n)
|
|
603
|
-
|
|
603
|
+
rest
|
|
604
604
|
end
|
|
605
605
|
|
|
606
606
|
#| FilterExpr Predicate
|