rexml 3.3.7 → 3.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +198 -1
- data/lib/rexml/attribute.rb +7 -8
- data/lib/rexml/cdata.rb +1 -1
- data/lib/rexml/child.rb +2 -3
- data/lib/rexml/comment.rb +1 -1
- data/lib/rexml/doctype.rb +3 -8
- data/lib/rexml/document.rb +18 -4
- data/lib/rexml/element.rb +53 -59
- data/lib/rexml/encoding.rb +3 -6
- data/lib/rexml/functions.rb +3 -3
- data/lib/rexml/instruction.rb +1 -1
- data/lib/rexml/namespace.rb +4 -4
- data/lib/rexml/node.rb +2 -2
- data/lib/rexml/parsers/baseparser.rb +206 -118
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/parsers/sax2parser.rb +2 -0
- data/lib/rexml/parsers/xpathparser.rb +4 -4
- data/lib/rexml/quickpath.rb +19 -18
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/security.rb +2 -2
- data/lib/rexml/source.rb +67 -7
- data/lib/rexml/text.rb +29 -57
- data/lib/rexml/validation/relaxng.rb +27 -26
- data/lib/rexml/validation/validation.rb +8 -8
- data/lib/rexml/xpath.rb +2 -13
- data/lib/rexml/xpath_parser.rb +44 -42
- metadata +4 -4
data/lib/rexml/functions.rb
CHANGED
|
@@ -39,11 +39,11 @@ module REXML
|
|
|
39
39
|
|
|
40
40
|
def Functions::text( )
|
|
41
41
|
if @@context[:node].node_type == :element
|
|
42
|
-
|
|
42
|
+
@@context[:node].find_all{|n| n.node_type == :text}.collect{|n| n.value}
|
|
43
43
|
elsif @@context[:node].node_type == :text
|
|
44
|
-
|
|
44
|
+
@@context[:node].value
|
|
45
45
|
else
|
|
46
|
-
|
|
46
|
+
false
|
|
47
47
|
end
|
|
48
48
|
end
|
|
49
49
|
|
data/lib/rexml/instruction.rb
CHANGED
|
@@ -49,7 +49,7 @@ module REXML
|
|
|
49
49
|
# See the rexml/formatters package
|
|
50
50
|
#
|
|
51
51
|
def write writer, indent=-1, transitive=false, ie_hack=false
|
|
52
|
-
Kernel.warn( "#{self.class.name}
|
|
52
|
+
Kernel.warn( "#{self.class.name}#write is deprecated", uplevel: 1)
|
|
53
53
|
indent(writer, indent)
|
|
54
54
|
writer << START
|
|
55
55
|
writer << @target
|
data/lib/rexml/namespace.rb
CHANGED
|
@@ -42,11 +42,11 @@ module REXML
|
|
|
42
42
|
# Compares names optionally WITH namespaces
|
|
43
43
|
def has_name?( other, ns=nil )
|
|
44
44
|
if ns
|
|
45
|
-
|
|
45
|
+
namespace() == ns and name() == other
|
|
46
46
|
elsif other.include? ":"
|
|
47
|
-
|
|
47
|
+
fully_expanded_name == other
|
|
48
48
|
else
|
|
49
|
-
|
|
49
|
+
name == other
|
|
50
50
|
end
|
|
51
51
|
end
|
|
52
52
|
|
|
@@ -57,7 +57,7 @@ module REXML
|
|
|
57
57
|
def fully_expanded_name
|
|
58
58
|
ns = prefix
|
|
59
59
|
return "#{ns}:#@name" if ns.size > 0
|
|
60
|
-
|
|
60
|
+
@name
|
|
61
61
|
end
|
|
62
62
|
end
|
|
63
63
|
end
|
data/lib/rexml/node.rb
CHANGED
|
@@ -26,7 +26,7 @@ module REXML
|
|
|
26
26
|
# REXML::Formatters package for changing the output style.
|
|
27
27
|
def to_s indent=nil
|
|
28
28
|
unless indent.nil?
|
|
29
|
-
Kernel.warn( "#{self.class.name}
|
|
29
|
+
Kernel.warn( "#{self.class.name}#to_s(indent) parameter is deprecated", uplevel: 1)
|
|
30
30
|
f = REXML::Formatters::Pretty.new( indent )
|
|
31
31
|
f.write( self, rv = "" )
|
|
32
32
|
else
|
|
@@ -68,7 +68,7 @@ module REXML
|
|
|
68
68
|
each_recursive {|node|
|
|
69
69
|
return node if block.call(node)
|
|
70
70
|
}
|
|
71
|
-
|
|
71
|
+
nil
|
|
72
72
|
end
|
|
73
73
|
|
|
74
74
|
# Returns the position that +self+ holds in its parent's array, indexed
|
|
@@ -144,18 +144,20 @@ module REXML
|
|
|
144
144
|
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
|
145
145
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
|
146
146
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
|
147
|
+
EQUAL_PATTERN = /\s*=\s*/um
|
|
147
148
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
|
148
149
|
NAME_PATTERN = /#{NAME}/um
|
|
149
150
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
150
151
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
151
152
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
|
152
153
|
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
|
153
|
-
CHARACTER_REFERENCES = /&#
|
|
154
|
+
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
|
|
154
155
|
DEFAULT_ENTITIES_PATTERNS = {}
|
|
155
156
|
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
|
156
157
|
default_entities.each do |term|
|
|
157
158
|
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
|
158
159
|
end
|
|
160
|
+
XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
|
159
161
|
end
|
|
160
162
|
private_constant :Private
|
|
161
163
|
|
|
@@ -166,6 +168,8 @@ module REXML
|
|
|
166
168
|
@entity_expansion_count = 0
|
|
167
169
|
@entity_expansion_limit = Security.entity_expansion_limit
|
|
168
170
|
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
|
171
|
+
@source.ensure_buffer
|
|
172
|
+
@version = nil
|
|
169
173
|
end
|
|
170
174
|
|
|
171
175
|
def add_listener( listener )
|
|
@@ -179,13 +183,17 @@ module REXML
|
|
|
179
183
|
|
|
180
184
|
def stream=( source )
|
|
181
185
|
@source = SourceFactory.create_from( source )
|
|
186
|
+
reset
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def reset
|
|
182
190
|
@closed = nil
|
|
183
191
|
@have_root = false
|
|
184
192
|
@document_status = nil
|
|
185
193
|
@tags = []
|
|
186
194
|
@stack = []
|
|
187
195
|
@entities = []
|
|
188
|
-
@namespaces = {}
|
|
196
|
+
@namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
|
|
189
197
|
@namespaces_restore_stack = []
|
|
190
198
|
end
|
|
191
199
|
|
|
@@ -200,12 +208,12 @@ module REXML
|
|
|
200
208
|
|
|
201
209
|
# Returns true if there are no more events
|
|
202
210
|
def empty?
|
|
203
|
-
|
|
211
|
+
(@source.empty? and @stack.empty?)
|
|
204
212
|
end
|
|
205
213
|
|
|
206
214
|
# Returns true if there are more events. Synonymous with !empty?
|
|
207
215
|
def has_next?
|
|
208
|
-
|
|
216
|
+
!(@source.empty? and @stack.empty?)
|
|
209
217
|
end
|
|
210
218
|
|
|
211
219
|
# Push an event back on the head of the stream. This method
|
|
@@ -267,22 +275,15 @@ module REXML
|
|
|
267
275
|
@source.ensure_buffer
|
|
268
276
|
if @document_status == nil
|
|
269
277
|
start_position = @source.position
|
|
270
|
-
if @source.match("<?", true)
|
|
278
|
+
if @source.match?("<?", true)
|
|
271
279
|
return process_instruction
|
|
272
|
-
elsif @source.match("<!", true)
|
|
273
|
-
if @source.match("--", true)
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
raise REXML::ParseException.new("Unclosed comment", @source)
|
|
277
|
-
end
|
|
278
|
-
if /--|-\z/.match?(md[1])
|
|
279
|
-
raise REXML::ParseException.new("Malformed comment", @source)
|
|
280
|
-
end
|
|
281
|
-
return [ :comment, md[1] ]
|
|
282
|
-
elsif @source.match("DOCTYPE", true)
|
|
280
|
+
elsif @source.match?("<!", true)
|
|
281
|
+
if @source.match?("--", true)
|
|
282
|
+
return [ :comment, process_comment ]
|
|
283
|
+
elsif @source.match?("DOCTYPE", true)
|
|
283
284
|
base_error_message = "Malformed DOCTYPE"
|
|
284
|
-
unless @source.
|
|
285
|
-
if @source.match(">")
|
|
285
|
+
unless @source.skip_spaces
|
|
286
|
+
if @source.match?(">")
|
|
286
287
|
message = "#{base_error_message}: name is missing"
|
|
287
288
|
else
|
|
288
289
|
message = "#{base_error_message}: invalid name"
|
|
@@ -291,10 +292,11 @@ module REXML
|
|
|
291
292
|
raise REXML::ParseException.new(message, @source)
|
|
292
293
|
end
|
|
293
294
|
name = parse_name(base_error_message)
|
|
294
|
-
|
|
295
|
+
@source.skip_spaces
|
|
296
|
+
if @source.match?("[", true)
|
|
295
297
|
id = [nil, nil, nil]
|
|
296
298
|
@document_status = :in_doctype
|
|
297
|
-
elsif @source.match(
|
|
299
|
+
elsif @source.match?(">", true)
|
|
298
300
|
id = [nil, nil, nil]
|
|
299
301
|
@document_status = :after_doctype
|
|
300
302
|
@source.ensure_buffer
|
|
@@ -306,9 +308,10 @@ module REXML
|
|
|
306
308
|
# For backward compatibility
|
|
307
309
|
id[1], id[2] = id[2], nil
|
|
308
310
|
end
|
|
309
|
-
|
|
311
|
+
@source.skip_spaces
|
|
312
|
+
if @source.match?("[", true)
|
|
310
313
|
@document_status = :in_doctype
|
|
311
|
-
elsif @source.match(
|
|
314
|
+
elsif @source.match?(">", true)
|
|
312
315
|
@document_status = :after_doctype
|
|
313
316
|
@source.ensure_buffer
|
|
314
317
|
else
|
|
@@ -318,7 +321,7 @@ module REXML
|
|
|
318
321
|
end
|
|
319
322
|
args = [:start_doctype, name, *id]
|
|
320
323
|
if @document_status == :after_doctype
|
|
321
|
-
@source.
|
|
324
|
+
@source.skip_spaces
|
|
322
325
|
@stack << [ :end_doctype ]
|
|
323
326
|
end
|
|
324
327
|
return args
|
|
@@ -329,14 +332,14 @@ module REXML
|
|
|
329
332
|
end
|
|
330
333
|
end
|
|
331
334
|
if @document_status == :in_doctype
|
|
332
|
-
@source.
|
|
335
|
+
@source.skip_spaces
|
|
333
336
|
start_position = @source.position
|
|
334
|
-
if @source.match("<!", true)
|
|
335
|
-
if @source.match("ELEMENT", true)
|
|
337
|
+
if @source.match?("<!", true)
|
|
338
|
+
if @source.match?("ELEMENT", true)
|
|
336
339
|
md = @source.match(/(.*?)>/um, true)
|
|
337
340
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
|
338
341
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
|
339
|
-
elsif @source.match("ENTITY", true)
|
|
342
|
+
elsif @source.match?("ENTITY", true)
|
|
340
343
|
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
|
341
344
|
unless match_data
|
|
342
345
|
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
|
@@ -368,11 +371,11 @@ module REXML
|
|
|
368
371
|
end
|
|
369
372
|
match << '%' if ref
|
|
370
373
|
return match
|
|
371
|
-
elsif @source.match("ATTLIST", true)
|
|
374
|
+
elsif @source.match?("ATTLIST", true)
|
|
372
375
|
md = @source.match(Private::ATTLISTDECL_END, true)
|
|
373
376
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
|
374
377
|
element = md[1]
|
|
375
|
-
contents = md[0]
|
|
378
|
+
contents = "<!ATTLIST" + md[0]
|
|
376
379
|
|
|
377
380
|
pairs = {}
|
|
378
381
|
values = md[0].strip.scan( ATTDEF_RE )
|
|
@@ -388,10 +391,10 @@ module REXML
|
|
|
388
391
|
end
|
|
389
392
|
end
|
|
390
393
|
return [ :attlistdecl, element, pairs, contents ]
|
|
391
|
-
elsif @source.match("NOTATION", true)
|
|
394
|
+
elsif @source.match?("NOTATION", true)
|
|
392
395
|
base_error_message = "Malformed notation declaration"
|
|
393
|
-
unless @source.
|
|
394
|
-
if @source.match(">")
|
|
396
|
+
unless @source.skip_spaces
|
|
397
|
+
if @source.match?(">")
|
|
395
398
|
message = "#{base_error_message}: name is missing"
|
|
396
399
|
else
|
|
397
400
|
message = "#{base_error_message}: invalid name"
|
|
@@ -403,39 +406,37 @@ module REXML
|
|
|
403
406
|
id = parse_id(base_error_message,
|
|
404
407
|
accept_external_id: true,
|
|
405
408
|
accept_public_id: true)
|
|
406
|
-
|
|
409
|
+
@source.skip_spaces
|
|
410
|
+
unless @source.match?(">", true)
|
|
407
411
|
message = "#{base_error_message}: garbage before end >"
|
|
408
412
|
raise REXML::ParseException.new(message, @source)
|
|
409
413
|
end
|
|
410
414
|
return [:notationdecl, name, *id]
|
|
411
|
-
elsif
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
end
|
|
416
|
-
return [ :comment, md[1] ] if md
|
|
415
|
+
elsif @source.match?("--", true)
|
|
416
|
+
return [ :comment, process_comment ]
|
|
417
|
+
else
|
|
418
|
+
raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor ELEMENT,ENTITY,ATTLIST,NOTATION", @source)
|
|
417
419
|
end
|
|
418
420
|
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
|
419
421
|
return [ :externalentity, match[1] ]
|
|
420
|
-
elsif @source.match(/\]\s*>/um, true)
|
|
422
|
+
elsif @source.match?(/\]\s*>/um, true)
|
|
421
423
|
@document_status = :after_doctype
|
|
422
424
|
return [ :end_doctype ]
|
|
423
|
-
|
|
424
|
-
if @document_status == :in_doctype
|
|
425
|
+
else
|
|
425
426
|
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
|
426
427
|
end
|
|
427
428
|
end
|
|
428
429
|
if @document_status == :after_doctype
|
|
429
|
-
@source.
|
|
430
|
+
@source.skip_spaces
|
|
430
431
|
end
|
|
431
432
|
begin
|
|
432
433
|
start_position = @source.position
|
|
433
|
-
if @source.match("<", true)
|
|
434
|
+
if @source.match?("<", true)
|
|
434
435
|
# :text's read_until may remain only "<" in buffer. In the
|
|
435
436
|
# case, buffer is empty here. So we need to fill buffer
|
|
436
437
|
# here explicitly.
|
|
437
438
|
@source.ensure_buffer
|
|
438
|
-
if @source.match("/", true)
|
|
439
|
+
if @source.match?("/", true)
|
|
439
440
|
@namespaces_restore_stack.pop
|
|
440
441
|
last_tag = @tags.pop
|
|
441
442
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
|
@@ -450,25 +451,21 @@ module REXML
|
|
|
450
451
|
raise REXML::ParseException.new(message, @source)
|
|
451
452
|
end
|
|
452
453
|
return [ :end_element, last_tag ]
|
|
453
|
-
elsif @source.match("!", true)
|
|
454
|
-
md = @source.match(/([^>]*>)/um)
|
|
454
|
+
elsif @source.match?("!", true)
|
|
455
455
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
if
|
|
461
|
-
|
|
456
|
+
if @source.match?("--", true)
|
|
457
|
+
return [ :comment, process_comment ]
|
|
458
|
+
elsif @source.match?("[CDATA[", true)
|
|
459
|
+
text = @source.read_until("]]>")
|
|
460
|
+
if text.chomp!("]]>")
|
|
461
|
+
return [ :cdata, text ]
|
|
462
|
+
else
|
|
463
|
+
raise REXML::ParseException.new("Malformed CDATA: Missing end ']]>'", @source)
|
|
462
464
|
end
|
|
463
|
-
|
|
464
|
-
return [ :comment, md[1] ]
|
|
465
465
|
else
|
|
466
|
-
|
|
467
|
-
return [ :cdata, md[1] ] if md
|
|
466
|
+
raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor CDATA", @source)
|
|
468
467
|
end
|
|
469
|
-
|
|
470
|
-
"in the doctype declaration.", @source)
|
|
471
|
-
elsif @source.match("?", true)
|
|
468
|
+
elsif @source.match?("?", true)
|
|
472
469
|
return process_instruction
|
|
473
470
|
else
|
|
474
471
|
# Get the next tag
|
|
@@ -527,7 +524,8 @@ module REXML
|
|
|
527
524
|
raise REXML::ParseException.new( "Exception parsing",
|
|
528
525
|
@source, self, (error ? error : $!) )
|
|
529
526
|
end
|
|
530
|
-
|
|
527
|
+
# NOTE: The end of the method never runs, because it is unreachable.
|
|
528
|
+
# All branches of code above have explicit unconditional return or raise statements.
|
|
531
529
|
end
|
|
532
530
|
private :pull_event
|
|
533
531
|
|
|
@@ -568,8 +566,12 @@ module REXML
|
|
|
568
566
|
return rv if matches.size == 0
|
|
569
567
|
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
|
570
568
|
m=$1
|
|
571
|
-
|
|
572
|
-
|
|
569
|
+
if m.start_with?("x")
|
|
570
|
+
code_point = Integer(m[1..-1], 16)
|
|
571
|
+
else
|
|
572
|
+
code_point = Integer(m, 10)
|
|
573
|
+
end
|
|
574
|
+
[code_point].pack('U*')
|
|
573
575
|
}
|
|
574
576
|
matches.collect!{|x|x[0]}.compact!
|
|
575
577
|
if filter
|
|
@@ -642,10 +644,14 @@ module REXML
|
|
|
642
644
|
true
|
|
643
645
|
end
|
|
644
646
|
|
|
647
|
+
def normalize_xml_declaration_encoding(xml_declaration_encoding)
|
|
648
|
+
/\AUTF-16(?:BE|LE)\z/i.match?(xml_declaration_encoding) ? "UTF-16" : nil
|
|
649
|
+
end
|
|
650
|
+
|
|
645
651
|
def parse_name(base_error_message)
|
|
646
652
|
md = @source.match(Private::NAME_PATTERN, true)
|
|
647
653
|
unless md
|
|
648
|
-
if @source.match(/\S/um)
|
|
654
|
+
if @source.match?(/\S/um)
|
|
649
655
|
message = "#{base_error_message}: invalid name"
|
|
650
656
|
else
|
|
651
657
|
message = "#{base_error_message}: name is missing"
|
|
@@ -687,73 +693,171 @@ module REXML
|
|
|
687
693
|
accept_public_id:)
|
|
688
694
|
public = /\A\s*PUBLIC/um
|
|
689
695
|
system = /\A\s*SYSTEM/um
|
|
690
|
-
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
|
691
|
-
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
|
696
|
+
if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
|
|
697
|
+
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
|
692
698
|
return "public ID literal is missing"
|
|
693
699
|
end
|
|
694
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
|
700
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
|
695
701
|
return "invalid public ID literal"
|
|
696
702
|
end
|
|
697
703
|
if accept_public_id
|
|
698
|
-
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
|
704
|
+
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
|
699
705
|
return "system ID literal is missing"
|
|
700
706
|
end
|
|
701
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
|
707
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
|
702
708
|
return "invalid system literal"
|
|
703
709
|
end
|
|
704
710
|
"garbage after system literal"
|
|
705
711
|
else
|
|
706
712
|
"garbage after public ID literal"
|
|
707
713
|
end
|
|
708
|
-
elsif accept_external_id and @source.match(/#{system}/um)
|
|
709
|
-
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
|
714
|
+
elsif accept_external_id and @source.match?(/#{system}/um)
|
|
715
|
+
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
|
710
716
|
return "system literal is missing"
|
|
711
717
|
end
|
|
712
|
-
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
|
718
|
+
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
|
713
719
|
return "invalid system literal"
|
|
714
720
|
end
|
|
715
721
|
"garbage after system literal"
|
|
716
722
|
else
|
|
717
|
-
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
|
723
|
+
unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
|
718
724
|
return "invalid ID type"
|
|
719
725
|
end
|
|
720
726
|
"ID type is missing"
|
|
721
727
|
end
|
|
722
728
|
end
|
|
723
729
|
|
|
730
|
+
def process_comment
|
|
731
|
+
text = @source.read_until("-->")
|
|
732
|
+
unless text.chomp!("-->")
|
|
733
|
+
raise REXML::ParseException.new("Unclosed comment: Missing end '-->'", @source)
|
|
734
|
+
end
|
|
735
|
+
|
|
736
|
+
if text.include? "--" or text.end_with?("-")
|
|
737
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
|
738
|
+
end
|
|
739
|
+
text
|
|
740
|
+
end
|
|
741
|
+
|
|
724
742
|
def process_instruction
|
|
725
743
|
name = parse_name("Malformed XML: Invalid processing instruction node")
|
|
726
|
-
if
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
744
|
+
if name == "xml"
|
|
745
|
+
xml_declaration
|
|
746
|
+
else # PITarget
|
|
747
|
+
if @source.skip_spaces # e.g. <?name content?>
|
|
748
|
+
start_position = @source.position
|
|
749
|
+
content = @source.read_until("?>")
|
|
750
|
+
unless content.chomp!("?>")
|
|
751
|
+
@source.position = start_position
|
|
752
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
|
|
753
|
+
end
|
|
754
|
+
else # e.g. <?name?>
|
|
755
|
+
content = nil
|
|
756
|
+
unless @source.match?("?>", true)
|
|
757
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
|
|
758
|
+
end
|
|
730
759
|
end
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
760
|
+
[:processing_instruction, name, content]
|
|
761
|
+
end
|
|
762
|
+
end
|
|
763
|
+
|
|
764
|
+
def xml_declaration
|
|
765
|
+
unless @version.nil?
|
|
766
|
+
raise ParseException.new("Malformed XML: XML declaration is duplicated", @source)
|
|
767
|
+
end
|
|
768
|
+
if @document_status
|
|
769
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
|
770
|
+
end
|
|
771
|
+
unless @source.skip_spaces
|
|
772
|
+
raise ParseException.new("Malformed XML: XML declaration misses spaces before version", @source)
|
|
773
|
+
end
|
|
774
|
+
unless @source.match?("version", true)
|
|
775
|
+
raise ParseException.new("Malformed XML: XML declaration misses version", @source)
|
|
776
|
+
end
|
|
777
|
+
@version = parse_attribute_value_with_equal("xml")
|
|
778
|
+
unless @source.skip_spaces
|
|
779
|
+
unless @source.match?("?>", true)
|
|
780
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
736
781
|
end
|
|
782
|
+
encoding = normalize_xml_declaration_encoding(@source.encoding)
|
|
783
|
+
return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.0"?>
|
|
737
784
|
end
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
785
|
+
|
|
786
|
+
if @source.match?("encoding", true)
|
|
787
|
+
encoding = parse_attribute_value_with_equal("xml")
|
|
788
|
+
unless @source.skip_spaces
|
|
789
|
+
unless @source.match?("?>", true)
|
|
790
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
791
|
+
end
|
|
792
|
+
if need_source_encoding_update?(encoding)
|
|
793
|
+
@source.encoding = encoding
|
|
794
|
+
end
|
|
795
|
+
encoding ||= normalize_xml_declaration_encoding(@source.encoding)
|
|
796
|
+
return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.1" encoding="UTF-8"?>
|
|
741
797
|
end
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
798
|
+
end
|
|
799
|
+
|
|
800
|
+
if @source.match?("standalone", true)
|
|
801
|
+
standalone = parse_attribute_value_with_equal("xml")
|
|
802
|
+
case standalone
|
|
803
|
+
when "yes", "no"
|
|
804
|
+
else
|
|
805
|
+
raise ParseException.new("Malformed XML: XML declaration standalone is not yes or no : <#{standalone}>", @source)
|
|
748
806
|
end
|
|
749
|
-
|
|
750
|
-
|
|
807
|
+
end
|
|
808
|
+
@source.skip_spaces
|
|
809
|
+
unless @source.match?("?>", true)
|
|
810
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
811
|
+
end
|
|
812
|
+
|
|
813
|
+
if need_source_encoding_update?(encoding)
|
|
814
|
+
@source.encoding = encoding
|
|
815
|
+
end
|
|
816
|
+
encoding ||= normalize_xml_declaration_encoding(@source.encoding)
|
|
817
|
+
|
|
818
|
+
# e.g. <?xml version="1.0" ?>
|
|
819
|
+
# <?xml version="1.1" encoding="UTF-8" ?>
|
|
820
|
+
# <?xml version="1.1" standalone="yes"?>
|
|
821
|
+
# <?xml version="1.1" encoding="UTF-8" standalone="yes" ?>
|
|
822
|
+
[ :xmldecl, @version, encoding, standalone ]
|
|
823
|
+
end
|
|
824
|
+
|
|
825
|
+
if StringScanner::Version < "3.1.1"
|
|
826
|
+
def scan_quote
|
|
827
|
+
@source.match(/(['"])/, true)&.[](1)
|
|
828
|
+
end
|
|
829
|
+
else
|
|
830
|
+
def scan_quote
|
|
831
|
+
case @source.peek_byte
|
|
832
|
+
when 34 # '"'.ord
|
|
833
|
+
@source.scan_byte
|
|
834
|
+
'"'
|
|
835
|
+
when 39 # "'".ord
|
|
836
|
+
@source.scan_byte
|
|
837
|
+
"'"
|
|
838
|
+
else
|
|
839
|
+
nil
|
|
751
840
|
end
|
|
752
|
-
standalone = STANDALONE.match(content)
|
|
753
|
-
standalone = standalone[1] unless standalone.nil?
|
|
754
|
-
return [ :xmldecl, version, encoding, standalone ]
|
|
755
841
|
end
|
|
756
|
-
|
|
842
|
+
end
|
|
843
|
+
|
|
844
|
+
def parse_attribute_value_with_equal(name)
|
|
845
|
+
unless @source.match?(Private::EQUAL_PATTERN, true)
|
|
846
|
+
message = "Missing attribute equal: <#{name}>"
|
|
847
|
+
raise REXML::ParseException.new(message, @source)
|
|
848
|
+
end
|
|
849
|
+
unless quote = scan_quote
|
|
850
|
+
message = "Missing attribute value start quote: <#{name}>"
|
|
851
|
+
raise REXML::ParseException.new(message, @source)
|
|
852
|
+
end
|
|
853
|
+
start_position = @source.position
|
|
854
|
+
value = @source.read_until(quote)
|
|
855
|
+
unless value.chomp!(quote)
|
|
856
|
+
@source.position = start_position
|
|
857
|
+
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
858
|
+
raise REXML::ParseException.new(message, @source)
|
|
859
|
+
end
|
|
860
|
+
value
|
|
757
861
|
end
|
|
758
862
|
|
|
759
863
|
def parse_attributes(prefixes)
|
|
@@ -761,36 +865,20 @@ module REXML
|
|
|
761
865
|
expanded_names = {}
|
|
762
866
|
closed = false
|
|
763
867
|
while true
|
|
764
|
-
if @source.match(">", true)
|
|
868
|
+
if @source.match?(">", true)
|
|
765
869
|
return attributes, closed
|
|
766
|
-
elsif @source.match("/>", true)
|
|
870
|
+
elsif @source.match?("/>", true)
|
|
767
871
|
closed = true
|
|
768
872
|
return attributes, closed
|
|
769
873
|
elsif match = @source.match(QNAME, true)
|
|
770
874
|
name = match[1]
|
|
771
875
|
prefix = match[2]
|
|
772
876
|
local_part = match[3]
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
message = "Missing attribute equal: <#{name}>"
|
|
776
|
-
raise REXML::ParseException.new(message, @source)
|
|
777
|
-
end
|
|
778
|
-
unless match = @source.match(/(['"])/, true)
|
|
779
|
-
message = "Missing attribute value start quote: <#{name}>"
|
|
780
|
-
raise REXML::ParseException.new(message, @source)
|
|
781
|
-
end
|
|
782
|
-
quote = match[1]
|
|
783
|
-
start_position = @source.position
|
|
784
|
-
value = @source.read_until(quote)
|
|
785
|
-
unless value.chomp!(quote)
|
|
786
|
-
@source.position = start_position
|
|
787
|
-
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
788
|
-
raise REXML::ParseException.new(message, @source)
|
|
789
|
-
end
|
|
790
|
-
@source.match(/\s*/um, true)
|
|
877
|
+
value = parse_attribute_value_with_equal(name)
|
|
878
|
+
@source.skip_spaces
|
|
791
879
|
if prefix == "xmlns"
|
|
792
880
|
if local_part == "xml"
|
|
793
|
-
if value !=
|
|
881
|
+
if value != Private::XML_PREFIXED_NAMESPACE
|
|
794
882
|
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
|
795
883
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
|
796
884
|
raise REXML::ParseException.new( msg, @source, self )
|
|
@@ -259,6 +259,8 @@ module REXML
|
|
|
259
259
|
end
|
|
260
260
|
|
|
261
261
|
def get_namespace( prefix )
|
|
262
|
+
return nil if @namespace_stack.empty?
|
|
263
|
+
|
|
262
264
|
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
|
|
263
265
|
(@namespace_stack.find { |ns| not ns[nil].nil? })
|
|
264
266
|
uris[-1][prefix] unless uris.nil? or 0 == uris.size
|