rexml 3.3.9 → 3.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +198 -1
- data/lib/rexml/attribute.rb +7 -8
- data/lib/rexml/cdata.rb +1 -1
- data/lib/rexml/child.rb +2 -3
- data/lib/rexml/comment.rb +1 -1
- data/lib/rexml/doctype.rb +3 -8
- data/lib/rexml/document.rb +21 -5
- data/lib/rexml/element.rb +53 -59
- data/lib/rexml/encoding.rb +3 -6
- data/lib/rexml/functions.rb +3 -3
- data/lib/rexml/instruction.rb +1 -1
- data/lib/rexml/namespace.rb +4 -4
- data/lib/rexml/node.rb +2 -2
- data/lib/rexml/parsers/baseparser.rb +200 -113
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/parsers/xpathparser.rb +4 -4
- data/lib/rexml/quickpath.rb +19 -18
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/security.rb +2 -2
- data/lib/rexml/source.rb +62 -8
- data/lib/rexml/text.rb +29 -57
- data/lib/rexml/validation/relaxng.rb +27 -26
- data/lib/rexml/validation/validation.rb +8 -8
- data/lib/rexml/xpath.rb +2 -13
- data/lib/rexml/xpath_parser.rb +44 -42
- metadata +4 -4
data/lib/rexml/encoding.rb
CHANGED
|
@@ -5,7 +5,7 @@ module REXML
|
|
|
5
5
|
# ID ---> Encoding name
|
|
6
6
|
attr_reader :encoding
|
|
7
7
|
def encoding=(encoding)
|
|
8
|
-
encoding = encoding.name if encoding.is_a?(Encoding)
|
|
8
|
+
encoding = encoding.name if encoding.is_a?(::Encoding)
|
|
9
9
|
if encoding.is_a?(String)
|
|
10
10
|
original_encoding = encoding
|
|
11
11
|
encoding = find_encoding(encoding)
|
|
@@ -13,12 +13,9 @@ module REXML
|
|
|
13
13
|
raise ArgumentError, "Bad encoding name #{original_encoding}"
|
|
14
14
|
end
|
|
15
15
|
end
|
|
16
|
+
encoding = encoding.upcase if encoding
|
|
16
17
|
return false if defined?(@encoding) and encoding == @encoding
|
|
17
|
-
|
|
18
|
-
@encoding = encoding.upcase
|
|
19
|
-
else
|
|
20
|
-
@encoding = 'UTF-8'
|
|
21
|
-
end
|
|
18
|
+
@encoding = encoding || "UTF-8"
|
|
22
19
|
true
|
|
23
20
|
end
|
|
24
21
|
|
data/lib/rexml/functions.rb
CHANGED
|
@@ -39,11 +39,11 @@ module REXML
|
|
|
39
39
|
|
|
40
40
|
def Functions::text( )
|
|
41
41
|
if @@context[:node].node_type == :element
|
|
42
|
-
|
|
42
|
+
@@context[:node].find_all{|n| n.node_type == :text}.collect{|n| n.value}
|
|
43
43
|
elsif @@context[:node].node_type == :text
|
|
44
|
-
|
|
44
|
+
@@context[:node].value
|
|
45
45
|
else
|
|
46
|
-
|
|
46
|
+
false
|
|
47
47
|
end
|
|
48
48
|
end
|
|
49
49
|
|
data/lib/rexml/instruction.rb
CHANGED
|
@@ -49,7 +49,7 @@ module REXML
|
|
|
49
49
|
# See the rexml/formatters package
|
|
50
50
|
#
|
|
51
51
|
def write writer, indent=-1, transitive=false, ie_hack=false
|
|
52
|
-
Kernel.warn( "#{self.class.name}
|
|
52
|
+
Kernel.warn( "#{self.class.name}#write is deprecated", uplevel: 1)
|
|
53
53
|
indent(writer, indent)
|
|
54
54
|
writer << START
|
|
55
55
|
writer << @target
|
data/lib/rexml/namespace.rb
CHANGED
|
@@ -42,11 +42,11 @@ module REXML
|
|
|
42
42
|
# Compares names optionally WITH namespaces
|
|
43
43
|
def has_name?( other, ns=nil )
|
|
44
44
|
if ns
|
|
45
|
-
|
|
45
|
+
namespace() == ns and name() == other
|
|
46
46
|
elsif other.include? ":"
|
|
47
|
-
|
|
47
|
+
fully_expanded_name == other
|
|
48
48
|
else
|
|
49
|
-
|
|
49
|
+
name == other
|
|
50
50
|
end
|
|
51
51
|
end
|
|
52
52
|
|
|
@@ -57,7 +57,7 @@ module REXML
|
|
|
57
57
|
def fully_expanded_name
|
|
58
58
|
ns = prefix
|
|
59
59
|
return "#{ns}:#@name" if ns.size > 0
|
|
60
|
-
|
|
60
|
+
@name
|
|
61
61
|
end
|
|
62
62
|
end
|
|
63
63
|
end
|
data/lib/rexml/node.rb
CHANGED
|
@@ -26,7 +26,7 @@ module REXML
|
|
|
26
26
|
# REXML::Formatters package for changing the output style.
|
|
27
27
|
def to_s indent=nil
|
|
28
28
|
unless indent.nil?
|
|
29
|
-
Kernel.warn( "#{self.class.name}
|
|
29
|
+
Kernel.warn( "#{self.class.name}#to_s(indent) parameter is deprecated", uplevel: 1)
|
|
30
30
|
f = REXML::Formatters::Pretty.new( indent )
|
|
31
31
|
f.write( self, rv = "" )
|
|
32
32
|
else
|
|
@@ -68,7 +68,7 @@ module REXML
|
|
|
68
68
|
each_recursive {|node|
|
|
69
69
|
return node if block.call(node)
|
|
70
70
|
}
|
|
71
|
-
|
|
71
|
+
nil
|
|
72
72
|
end
|
|
73
73
|
|
|
74
74
|
# Returns the position that +self+ holds in its parent's array, indexed
|
|
@@ -144,6 +144,7 @@ module REXML
|
|
|
144
144
|
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
|
145
145
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
|
146
146
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
|
147
|
+
EQUAL_PATTERN = /\s*=\s*/um
|
|
147
148
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
|
148
149
|
NAME_PATTERN = /#{NAME}/um
|
|
149
150
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
@@ -168,6 +169,7 @@ module REXML
|
|
|
168
169
|
@entity_expansion_limit = Security.entity_expansion_limit
|
|
169
170
|
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
|
170
171
|
@source.ensure_buffer
|
|
172
|
+
@version = nil
|
|
171
173
|
end
|
|
172
174
|
|
|
173
175
|
def add_listener( listener )
|
|
@@ -181,6 +183,10 @@ module REXML
|
|
|
181
183
|
|
|
182
184
|
def stream=( source )
|
|
183
185
|
@source = SourceFactory.create_from( source )
|
|
186
|
+
reset
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def reset
|
|
184
190
|
@closed = nil
|
|
185
191
|
@have_root = false
|
|
186
192
|
@document_status = nil
|
|
@@ -202,12 +208,12 @@ module REXML
|
|
|
202
208
|
|
|
203
209
|
# Returns true if there are no more events
|
|
204
210
|
def empty?
|
|
205
|
-
|
|
211
|
+
(@source.empty? and @stack.empty?)
|
|
206
212
|
end
|
|
207
213
|
|
|
208
214
|
# Returns true if there are more events. Synonymous with !empty?
|
|
209
215
|
def has_next?
|
|
210
|
-
|
|
216
|
+
!(@source.empty? and @stack.empty?)
|
|
211
217
|
end
|
|
212
218
|
|
|
213
219
|
# Push an event back on the head of the stream. This method
|
|
@@ -260,6 +266,11 @@ module REXML
|
|
|
260
266
|
path = "/" + @tags.join("/")
|
|
261
267
|
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
|
262
268
|
end
|
|
269
|
+
|
|
270
|
+
unless @document_status == :in_element
|
|
271
|
+
raise ParseException.new("Malformed XML: No root element", @source)
|
|
272
|
+
end
|
|
273
|
+
|
|
263
274
|
return [ :end_document ]
|
|
264
275
|
end
|
|
265
276
|
return @stack.shift if @stack.size > 0
|
|
@@ -269,22 +280,15 @@ module REXML
|
|
|
269
280
|
@source.ensure_buffer
|
|
270
281
|
if @document_status == nil
|
|
271
282
|
start_position = @source.position
|
|
272
|
-
if @source.match("<?", true)
|
|
283
|
+
if @source.match?("<?", true)
|
|
273
284
|
return process_instruction
|
|
274
|
-
elsif @source.match("<!", true)
|
|
275
|
-
if @source.match("--", true)
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
raise REXML::ParseException.new("Unclosed comment", @source)
|
|
279
|
-
end
|
|
280
|
-
if /--|-\z/.match?(md[1])
|
|
281
|
-
raise REXML::ParseException.new("Malformed comment", @source)
|
|
282
|
-
end
|
|
283
|
-
return [ :comment, md[1] ]
|
|
284
|
-
elsif @source.match("DOCTYPE", true)
|
|
285
|
+
elsif @source.match?("<!", true)
|
|
286
|
+
if @source.match?("--", true)
|
|
287
|
+
return [ :comment, process_comment ]
|
|
288
|
+
elsif @source.match?("DOCTYPE", true)
|
|
285
289
|
base_error_message = "Malformed DOCTYPE"
|
|
286
|
-
unless @source.
|
|
287
|
-
if @source.match(">")
|
|
290
|
+
unless @source.skip_spaces
|
|
291
|
+
if @source.match?(">")
|
|
288
292
|
message = "#{base_error_message}: name is missing"
|
|
289
293
|
else
|
|
290
294
|
message = "#{base_error_message}: invalid name"
|
|
@@ -293,10 +297,11 @@ module REXML
|
|
|
293
297
|
raise REXML::ParseException.new(message, @source)
|
|
294
298
|
end
|
|
295
299
|
name = parse_name(base_error_message)
|
|
296
|
-
|
|
300
|
+
@source.skip_spaces
|
|
301
|
+
if @source.match?("[", true)
|
|
297
302
|
id = [nil, nil, nil]
|
|
298
303
|
@document_status = :in_doctype
|
|
299
|
-
elsif @source.match(
|
|
304
|
+
elsif @source.match?(">", true)
|
|
300
305
|
id = [nil, nil, nil]
|
|
301
306
|
@document_status = :after_doctype
|
|
302
307
|
@source.ensure_buffer
|
|
@@ -308,9 +313,10 @@ module REXML
|
|
|
308
313
|
# For backward compatibility
|
|
309
314
|
id[1], id[2] = id[2], nil
|
|
310
315
|
end
|
|
311
|
-
|
|
316
|
+
@source.skip_spaces
|
|
317
|
+
if @source.match?("[", true)
|
|
312
318
|
@document_status = :in_doctype
|
|
313
|
-
elsif @source.match(
|
|
319
|
+
elsif @source.match?(">", true)
|
|
314
320
|
@document_status = :after_doctype
|
|
315
321
|
@source.ensure_buffer
|
|
316
322
|
else
|
|
@@ -320,7 +326,7 @@ module REXML
|
|
|
320
326
|
end
|
|
321
327
|
args = [:start_doctype, name, *id]
|
|
322
328
|
if @document_status == :after_doctype
|
|
323
|
-
@source.
|
|
329
|
+
@source.skip_spaces
|
|
324
330
|
@stack << [ :end_doctype ]
|
|
325
331
|
end
|
|
326
332
|
return args
|
|
@@ -331,14 +337,14 @@ module REXML
|
|
|
331
337
|
end
|
|
332
338
|
end
|
|
333
339
|
if @document_status == :in_doctype
|
|
334
|
-
@source.
|
|
340
|
+
@source.skip_spaces
|
|
335
341
|
start_position = @source.position
|
|
336
|
-
if @source.match("<!", true)
|
|
337
|
-
if @source.match("ELEMENT", true)
|
|
342
|
+
if @source.match?("<!", true)
|
|
343
|
+
if @source.match?("ELEMENT", true)
|
|
338
344
|
md = @source.match(/(.*?)>/um, true)
|
|
339
345
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
|
340
346
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
|
341
|
-
elsif @source.match("ENTITY", true)
|
|
347
|
+
elsif @source.match?("ENTITY", true)
|
|
342
348
|
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
|
343
349
|
unless match_data
|
|
344
350
|
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
|
@@ -370,11 +376,11 @@ module REXML
|
|
|
370
376
|
end
|
|
371
377
|
match << '%' if ref
|
|
372
378
|
return match
|
|
373
|
-
elsif @source.match("ATTLIST", true)
|
|
379
|
+
elsif @source.match?("ATTLIST", true)
|
|
374
380
|
md = @source.match(Private::ATTLISTDECL_END, true)
|
|
375
381
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
|
376
382
|
element = md[1]
|
|
377
|
-
contents = md[0]
|
|
383
|
+
contents = "<!ATTLIST" + md[0]
|
|
378
384
|
|
|
379
385
|
pairs = {}
|
|
380
386
|
values = md[0].strip.scan( ATTDEF_RE )
|
|
@@ -390,10 +396,10 @@ module REXML
|
|
|
390
396
|
end
|
|
391
397
|
end
|
|
392
398
|
return [ :attlistdecl, element, pairs, contents ]
|
|
393
|
-
elsif @source.match("NOTATION", true)
|
|
399
|
+
elsif @source.match?("NOTATION", true)
|
|
394
400
|
base_error_message = "Malformed notation declaration"
|
|
395
|
-
unless @source.
|
|
396
|
-
if @source.match(">")
|
|
401
|
+
unless @source.skip_spaces
|
|
402
|
+
if @source.match?(">")
|
|
397
403
|
message = "#{base_error_message}: name is missing"
|
|
398
404
|
else
|
|
399
405
|
message = "#{base_error_message}: invalid name"
|
|
@@ -405,39 +411,37 @@ module REXML
|
|
|
405
411
|
id = parse_id(base_error_message,
|
|
406
412
|
accept_external_id: true,
|
|
407
413
|
accept_public_id: true)
|
|
408
|
-
|
|
414
|
+
@source.skip_spaces
|
|
415
|
+
unless @source.match?(">", true)
|
|
409
416
|
message = "#{base_error_message}: garbage before end >"
|
|
410
417
|
raise REXML::ParseException.new(message, @source)
|
|
411
418
|
end
|
|
412
419
|
return [:notationdecl, name, *id]
|
|
413
|
-
elsif
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
end
|
|
418
|
-
return [ :comment, md[1] ] if md
|
|
420
|
+
elsif @source.match?("--", true)
|
|
421
|
+
return [ :comment, process_comment ]
|
|
422
|
+
else
|
|
423
|
+
raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor ELEMENT,ENTITY,ATTLIST,NOTATION", @source)
|
|
419
424
|
end
|
|
420
425
|
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
|
421
426
|
return [ :externalentity, match[1] ]
|
|
422
|
-
elsif @source.match(/\]\s*>/um, true)
|
|
427
|
+
elsif @source.match?(/\]\s*>/um, true)
|
|
423
428
|
@document_status = :after_doctype
|
|
424
429
|
return [ :end_doctype ]
|
|
425
|
-
|
|
426
|
-
if @document_status == :in_doctype
|
|
430
|
+
else
|
|
427
431
|
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
|
428
432
|
end
|
|
429
433
|
end
|
|
430
434
|
if @document_status == :after_doctype
|
|
431
|
-
@source.
|
|
435
|
+
@source.skip_spaces
|
|
432
436
|
end
|
|
433
437
|
begin
|
|
434
438
|
start_position = @source.position
|
|
435
|
-
if @source.match("<", true)
|
|
439
|
+
if @source.match?("<", true)
|
|
436
440
|
# :text's read_until may remain only "<" in buffer. In the
|
|
437
441
|
# case, buffer is empty here. So we need to fill buffer
|
|
438
442
|
# here explicitly.
|
|
439
443
|
@source.ensure_buffer
|
|
440
|
-
if @source.match("/", true)
|
|
444
|
+
if @source.match?("/", true)
|
|
441
445
|
@namespaces_restore_stack.pop
|
|
442
446
|
last_tag = @tags.pop
|
|
443
447
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
|
@@ -452,25 +456,21 @@ module REXML
|
|
|
452
456
|
raise REXML::ParseException.new(message, @source)
|
|
453
457
|
end
|
|
454
458
|
return [ :end_element, last_tag ]
|
|
455
|
-
elsif @source.match("!", true)
|
|
456
|
-
md = @source.match(/([^>]*>)/um)
|
|
459
|
+
elsif @source.match?("!", true)
|
|
457
460
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
if
|
|
463
|
-
|
|
461
|
+
if @source.match?("--", true)
|
|
462
|
+
return [ :comment, process_comment ]
|
|
463
|
+
elsif @source.match?("[CDATA[", true)
|
|
464
|
+
text = @source.read_until("]]>")
|
|
465
|
+
if text.chomp!("]]>")
|
|
466
|
+
return [ :cdata, text ]
|
|
467
|
+
else
|
|
468
|
+
raise REXML::ParseException.new("Malformed CDATA: Missing end ']]>'", @source)
|
|
464
469
|
end
|
|
465
|
-
|
|
466
|
-
return [ :comment, md[1] ]
|
|
467
470
|
else
|
|
468
|
-
|
|
469
|
-
return [ :cdata, md[1] ] if md
|
|
471
|
+
raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor CDATA", @source)
|
|
470
472
|
end
|
|
471
|
-
|
|
472
|
-
"in the doctype declaration.", @source)
|
|
473
|
-
elsif @source.match("?", true)
|
|
473
|
+
elsif @source.match?("?", true)
|
|
474
474
|
return process_instruction
|
|
475
475
|
else
|
|
476
476
|
# Get the next tag
|
|
@@ -529,7 +529,8 @@ module REXML
|
|
|
529
529
|
raise REXML::ParseException.new( "Exception parsing",
|
|
530
530
|
@source, self, (error ? error : $!) )
|
|
531
531
|
end
|
|
532
|
-
|
|
532
|
+
# NOTE: The end of the method never runs, because it is unreachable.
|
|
533
|
+
# All branches of code above have explicit unconditional return or raise statements.
|
|
533
534
|
end
|
|
534
535
|
private :pull_event
|
|
535
536
|
|
|
@@ -648,10 +649,14 @@ module REXML
|
|
|
648
649
|
true
|
|
649
650
|
end
|
|
650
651
|
|
|
652
|
+
def normalize_xml_declaration_encoding(xml_declaration_encoding)
|
|
653
|
+
/\AUTF-16(?:BE|LE)\z/i.match?(xml_declaration_encoding) ? "UTF-16" : nil
|
|
654
|
+
end
|
|
655
|
+
|
|
651
656
|
def parse_name(base_error_message)
|
|
652
657
|
md = @source.match(Private::NAME_PATTERN, true)
|
|
653
658
|
unless md
|
|
654
|
-
if @source.match(/\S/um)
|
|
659
|
+
if @source.match?(/\S/um)
|
|
655
660
|
message = "#{base_error_message}: invalid name"
|
|
656
661
|
else
|
|
657
662
|
message = "#{base_error_message}: name is missing"
|
|
@@ -693,73 +698,171 @@ module REXML
|
|
|
693
698
|
accept_public_id:)
|
|
694
699
|
public = /\A\s*PUBLIC/um
|
|
695
700
|
system = /\A\s*SYSTEM/um
|
|
696
|
-
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
|
697
|
-
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
|
701
|
+
if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
|
|
702
|
+
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
|
698
703
|
return "public ID literal is missing"
|
|
699
704
|
end
|
|
700
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
|
705
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
|
701
706
|
return "invalid public ID literal"
|
|
702
707
|
end
|
|
703
708
|
if accept_public_id
|
|
704
|
-
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
|
709
|
+
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
|
705
710
|
return "system ID literal is missing"
|
|
706
711
|
end
|
|
707
|
-
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
|
712
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
|
708
713
|
return "invalid system literal"
|
|
709
714
|
end
|
|
710
715
|
"garbage after system literal"
|
|
711
716
|
else
|
|
712
717
|
"garbage after public ID literal"
|
|
713
718
|
end
|
|
714
|
-
elsif accept_external_id and @source.match(/#{system}/um)
|
|
715
|
-
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
|
719
|
+
elsif accept_external_id and @source.match?(/#{system}/um)
|
|
720
|
+
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
|
716
721
|
return "system literal is missing"
|
|
717
722
|
end
|
|
718
|
-
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
|
723
|
+
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
|
719
724
|
return "invalid system literal"
|
|
720
725
|
end
|
|
721
726
|
"garbage after system literal"
|
|
722
727
|
else
|
|
723
|
-
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
|
728
|
+
unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
|
724
729
|
return "invalid ID type"
|
|
725
730
|
end
|
|
726
731
|
"ID type is missing"
|
|
727
732
|
end
|
|
728
733
|
end
|
|
729
734
|
|
|
735
|
+
def process_comment
|
|
736
|
+
text = @source.read_until("-->")
|
|
737
|
+
unless text.chomp!("-->")
|
|
738
|
+
raise REXML::ParseException.new("Unclosed comment: Missing end '-->'", @source)
|
|
739
|
+
end
|
|
740
|
+
|
|
741
|
+
if text.include? "--" or text.end_with?("-")
|
|
742
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
|
743
|
+
end
|
|
744
|
+
text
|
|
745
|
+
end
|
|
746
|
+
|
|
730
747
|
def process_instruction
|
|
731
748
|
name = parse_name("Malformed XML: Invalid processing instruction node")
|
|
732
|
-
if
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
749
|
+
if name == "xml"
|
|
750
|
+
xml_declaration
|
|
751
|
+
else # PITarget
|
|
752
|
+
if @source.skip_spaces # e.g. <?name content?>
|
|
753
|
+
start_position = @source.position
|
|
754
|
+
content = @source.read_until("?>")
|
|
755
|
+
unless content.chomp!("?>")
|
|
756
|
+
@source.position = start_position
|
|
757
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
|
|
758
|
+
end
|
|
759
|
+
else # e.g. <?name?>
|
|
760
|
+
content = nil
|
|
761
|
+
unless @source.match?("?>", true)
|
|
762
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
|
|
763
|
+
end
|
|
736
764
|
end
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
765
|
+
[:processing_instruction, name, content]
|
|
766
|
+
end
|
|
767
|
+
end
|
|
768
|
+
|
|
769
|
+
def xml_declaration
|
|
770
|
+
unless @version.nil?
|
|
771
|
+
raise ParseException.new("Malformed XML: XML declaration is duplicated", @source)
|
|
772
|
+
end
|
|
773
|
+
if @document_status
|
|
774
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
|
775
|
+
end
|
|
776
|
+
unless @source.skip_spaces
|
|
777
|
+
raise ParseException.new("Malformed XML: XML declaration misses spaces before version", @source)
|
|
778
|
+
end
|
|
779
|
+
unless @source.match?("version", true)
|
|
780
|
+
raise ParseException.new("Malformed XML: XML declaration misses version", @source)
|
|
781
|
+
end
|
|
782
|
+
@version = parse_attribute_value_with_equal("xml")
|
|
783
|
+
unless @source.skip_spaces
|
|
784
|
+
unless @source.match?("?>", true)
|
|
785
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
742
786
|
end
|
|
787
|
+
encoding = normalize_xml_declaration_encoding(@source.encoding)
|
|
788
|
+
return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.0"?>
|
|
743
789
|
end
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
790
|
+
|
|
791
|
+
if @source.match?("encoding", true)
|
|
792
|
+
encoding = parse_attribute_value_with_equal("xml")
|
|
793
|
+
unless @source.skip_spaces
|
|
794
|
+
unless @source.match?("?>", true)
|
|
795
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
796
|
+
end
|
|
797
|
+
if need_source_encoding_update?(encoding)
|
|
798
|
+
@source.encoding = encoding
|
|
799
|
+
end
|
|
800
|
+
encoding ||= normalize_xml_declaration_encoding(@source.encoding)
|
|
801
|
+
return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.1" encoding="UTF-8"?>
|
|
747
802
|
end
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
803
|
+
end
|
|
804
|
+
|
|
805
|
+
if @source.match?("standalone", true)
|
|
806
|
+
standalone = parse_attribute_value_with_equal("xml")
|
|
807
|
+
case standalone
|
|
808
|
+
when "yes", "no"
|
|
809
|
+
else
|
|
810
|
+
raise ParseException.new("Malformed XML: XML declaration standalone is not yes or no : <#{standalone}>", @source)
|
|
754
811
|
end
|
|
755
|
-
|
|
756
|
-
|
|
812
|
+
end
|
|
813
|
+
@source.skip_spaces
|
|
814
|
+
unless @source.match?("?>", true)
|
|
815
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
816
|
+
end
|
|
817
|
+
|
|
818
|
+
if need_source_encoding_update?(encoding)
|
|
819
|
+
@source.encoding = encoding
|
|
820
|
+
end
|
|
821
|
+
encoding ||= normalize_xml_declaration_encoding(@source.encoding)
|
|
822
|
+
|
|
823
|
+
# e.g. <?xml version="1.0" ?>
|
|
824
|
+
# <?xml version="1.1" encoding="UTF-8" ?>
|
|
825
|
+
# <?xml version="1.1" standalone="yes"?>
|
|
826
|
+
# <?xml version="1.1" encoding="UTF-8" standalone="yes" ?>
|
|
827
|
+
[ :xmldecl, @version, encoding, standalone ]
|
|
828
|
+
end
|
|
829
|
+
|
|
830
|
+
if StringScanner::Version < "3.1.1"
|
|
831
|
+
def scan_quote
|
|
832
|
+
@source.match(/(['"])/, true)&.[](1)
|
|
833
|
+
end
|
|
834
|
+
else
|
|
835
|
+
def scan_quote
|
|
836
|
+
case @source.peek_byte
|
|
837
|
+
when 34 # '"'.ord
|
|
838
|
+
@source.scan_byte
|
|
839
|
+
'"'
|
|
840
|
+
when 39 # "'".ord
|
|
841
|
+
@source.scan_byte
|
|
842
|
+
"'"
|
|
843
|
+
else
|
|
844
|
+
nil
|
|
757
845
|
end
|
|
758
|
-
standalone = STANDALONE.match(content)
|
|
759
|
-
standalone = standalone[1] unless standalone.nil?
|
|
760
|
-
return [ :xmldecl, version, encoding, standalone ]
|
|
761
846
|
end
|
|
762
|
-
|
|
847
|
+
end
|
|
848
|
+
|
|
849
|
+
def parse_attribute_value_with_equal(name)
|
|
850
|
+
unless @source.match?(Private::EQUAL_PATTERN, true)
|
|
851
|
+
message = "Missing attribute equal: <#{name}>"
|
|
852
|
+
raise REXML::ParseException.new(message, @source)
|
|
853
|
+
end
|
|
854
|
+
unless quote = scan_quote
|
|
855
|
+
message = "Missing attribute value start quote: <#{name}>"
|
|
856
|
+
raise REXML::ParseException.new(message, @source)
|
|
857
|
+
end
|
|
858
|
+
start_position = @source.position
|
|
859
|
+
value = @source.read_until(quote)
|
|
860
|
+
unless value.chomp!(quote)
|
|
861
|
+
@source.position = start_position
|
|
862
|
+
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
863
|
+
raise REXML::ParseException.new(message, @source)
|
|
864
|
+
end
|
|
865
|
+
value
|
|
763
866
|
end
|
|
764
867
|
|
|
765
868
|
def parse_attributes(prefixes)
|
|
@@ -767,33 +870,17 @@ module REXML
|
|
|
767
870
|
expanded_names = {}
|
|
768
871
|
closed = false
|
|
769
872
|
while true
|
|
770
|
-
if @source.match(">", true)
|
|
873
|
+
if @source.match?(">", true)
|
|
771
874
|
return attributes, closed
|
|
772
|
-
elsif @source.match("/>", true)
|
|
875
|
+
elsif @source.match?("/>", true)
|
|
773
876
|
closed = true
|
|
774
877
|
return attributes, closed
|
|
775
878
|
elsif match = @source.match(QNAME, true)
|
|
776
879
|
name = match[1]
|
|
777
880
|
prefix = match[2]
|
|
778
881
|
local_part = match[3]
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
message = "Missing attribute equal: <#{name}>"
|
|
782
|
-
raise REXML::ParseException.new(message, @source)
|
|
783
|
-
end
|
|
784
|
-
unless match = @source.match(/(['"])/, true)
|
|
785
|
-
message = "Missing attribute value start quote: <#{name}>"
|
|
786
|
-
raise REXML::ParseException.new(message, @source)
|
|
787
|
-
end
|
|
788
|
-
quote = match[1]
|
|
789
|
-
start_position = @source.position
|
|
790
|
-
value = @source.read_until(quote)
|
|
791
|
-
unless value.chomp!(quote)
|
|
792
|
-
@source.position = start_position
|
|
793
|
-
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
794
|
-
raise REXML::ParseException.new(message, @source)
|
|
795
|
-
end
|
|
796
|
-
@source.match(/\s*/um, true)
|
|
882
|
+
value = parse_attribute_value_with_equal(name)
|
|
883
|
+
@source.skip_spaces
|
|
797
884
|
if prefix == "xmlns"
|
|
798
885
|
if local_part == "xml"
|
|
799
886
|
if value != Private::XML_PREFIXED_NAMESPACE
|
|
@@ -215,7 +215,7 @@ module REXML
|
|
|
215
215
|
else
|
|
216
216
|
path << yield( parsed )
|
|
217
217
|
end
|
|
218
|
-
|
|
218
|
+
path.squeeze(" ")
|
|
219
219
|
end
|
|
220
220
|
# For backward compatibility
|
|
221
221
|
alias_method :preciate_to_string, :predicate_to_path
|
|
@@ -252,7 +252,7 @@ module REXML
|
|
|
252
252
|
path = path[1..-1]
|
|
253
253
|
end
|
|
254
254
|
end
|
|
255
|
-
|
|
255
|
+
RelativeLocationPath( path, parsed ) if path.size > 0
|
|
256
256
|
end
|
|
257
257
|
|
|
258
258
|
#RelativeLocationPath
|
|
@@ -388,7 +388,7 @@ module REXML
|
|
|
388
388
|
else
|
|
389
389
|
path = original_path
|
|
390
390
|
end
|
|
391
|
-
|
|
391
|
+
path
|
|
392
392
|
end
|
|
393
393
|
|
|
394
394
|
# Filters the supplied nodeset on the predicate(s)
|
|
@@ -600,7 +600,7 @@ module REXML
|
|
|
600
600
|
end
|
|
601
601
|
rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/
|
|
602
602
|
parsed.concat(n)
|
|
603
|
-
|
|
603
|
+
rest
|
|
604
604
|
end
|
|
605
605
|
|
|
606
606
|
#| FilterExpr Predicate
|