rexml 3.2.8 → 3.3.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +306 -2
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/element.rb +16 -31
- data/lib/rexml/entity.rb +9 -48
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parsers/baseparser.rb +233 -64
- data/lib/rexml/parsers/pullparser.rb +12 -0
- data/lib/rexml/parsers/sax2parser.rb +16 -19
- data/lib/rexml/parsers/streamparser.rb +16 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +79 -16
- data/lib/rexml/text.rb +39 -17
- metadata +5 -18
@@ -1,12 +1,40 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require_relative '../parseexception'
|
3
3
|
require_relative '../undefinednamespaceexception'
|
4
|
+
require_relative '../security'
|
4
5
|
require_relative '../source'
|
5
6
|
require 'set'
|
6
7
|
require "strscan"
|
7
8
|
|
8
9
|
module REXML
|
9
10
|
module Parsers
|
11
|
+
unless [].respond_to?(:tally)
|
12
|
+
module EnumerableTally
|
13
|
+
refine Enumerable do
|
14
|
+
def tally
|
15
|
+
counts = {}
|
16
|
+
each do |item|
|
17
|
+
counts[item] ||= 0
|
18
|
+
counts[item] += 1
|
19
|
+
end
|
20
|
+
counts
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
using EnumerableTally
|
25
|
+
end
|
26
|
+
|
27
|
+
if StringScanner::Version < "3.0.8"
|
28
|
+
module StringScannerCaptures
|
29
|
+
refine StringScanner do
|
30
|
+
def captures
|
31
|
+
values_at(*(1...size))
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
using StringScannerCaptures
|
36
|
+
end
|
37
|
+
|
10
38
|
# = Using the Pull Parser
|
11
39
|
# <em>This API is experimental, and subject to change.</em>
|
12
40
|
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
@@ -113,21 +141,33 @@ module REXML
|
|
113
141
|
}
|
114
142
|
|
115
143
|
module Private
|
116
|
-
|
144
|
+
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
117
145
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
118
146
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
119
147
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
120
|
-
NAME_PATTERN =
|
148
|
+
NAME_PATTERN = /#{NAME}/um
|
121
149
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
122
150
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
123
151
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
152
|
+
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
153
|
+
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
|
154
|
+
DEFAULT_ENTITIES_PATTERNS = {}
|
155
|
+
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
156
|
+
default_entities.each do |term|
|
157
|
+
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
158
|
+
end
|
159
|
+
XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
124
160
|
end
|
125
161
|
private_constant :Private
|
126
|
-
include Private
|
127
162
|
|
128
163
|
def initialize( source )
|
129
164
|
self.stream = source
|
130
165
|
@listeners = []
|
166
|
+
@prefixes = Set.new
|
167
|
+
@entity_expansion_count = 0
|
168
|
+
@entity_expansion_limit = Security.entity_expansion_limit
|
169
|
+
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
170
|
+
@source.ensure_buffer
|
131
171
|
end
|
132
172
|
|
133
173
|
def add_listener( listener )
|
@@ -135,15 +175,20 @@ module REXML
|
|
135
175
|
end
|
136
176
|
|
137
177
|
attr_reader :source
|
178
|
+
attr_reader :entity_expansion_count
|
179
|
+
attr_writer :entity_expansion_limit
|
180
|
+
attr_writer :entity_expansion_text_limit
|
138
181
|
|
139
182
|
def stream=( source )
|
140
183
|
@source = SourceFactory.create_from( source )
|
141
184
|
@closed = nil
|
185
|
+
@have_root = false
|
142
186
|
@document_status = nil
|
143
187
|
@tags = []
|
144
188
|
@stack = []
|
145
189
|
@entities = []
|
146
|
-
@
|
190
|
+
@namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
|
191
|
+
@namespaces_restore_stack = []
|
147
192
|
end
|
148
193
|
|
149
194
|
def position
|
@@ -193,6 +238,8 @@ module REXML
|
|
193
238
|
|
194
239
|
# Returns the next event. This is a +PullEvent+ object.
|
195
240
|
def pull
|
241
|
+
@source.drop_parsed_content
|
242
|
+
|
196
243
|
pull_event.tap do |event|
|
197
244
|
@listeners.each do |listener|
|
198
245
|
listener.receive event
|
@@ -205,7 +252,16 @@ module REXML
|
|
205
252
|
x, @closed = @closed, nil
|
206
253
|
return [ :end_element, x ]
|
207
254
|
end
|
208
|
-
|
255
|
+
if empty?
|
256
|
+
if @document_status == :in_doctype
|
257
|
+
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
258
|
+
end
|
259
|
+
unless @tags.empty?
|
260
|
+
path = "/" + @tags.join("/")
|
261
|
+
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
262
|
+
end
|
263
|
+
return [ :end_document ]
|
264
|
+
end
|
209
265
|
return @stack.shift if @stack.size > 0
|
210
266
|
#STDERR.puts @source.encoding
|
211
267
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
@@ -214,10 +270,17 @@ module REXML
|
|
214
270
|
if @document_status == nil
|
215
271
|
start_position = @source.position
|
216
272
|
if @source.match("<?", true)
|
217
|
-
return process_instruction
|
273
|
+
return process_instruction
|
218
274
|
elsif @source.match("<!", true)
|
219
275
|
if @source.match("--", true)
|
220
|
-
|
276
|
+
md = @source.match(/(.*?)-->/um, true)
|
277
|
+
if md.nil?
|
278
|
+
raise REXML::ParseException.new("Unclosed comment", @source)
|
279
|
+
end
|
280
|
+
if /--|-\z/.match?(md[1])
|
281
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
282
|
+
end
|
283
|
+
return [ :comment, md[1] ]
|
221
284
|
elsif @source.match("DOCTYPE", true)
|
222
285
|
base_error_message = "Malformed DOCTYPE"
|
223
286
|
unless @source.match(/\s+/um, true)
|
@@ -229,7 +292,6 @@ module REXML
|
|
229
292
|
@source.position = start_position
|
230
293
|
raise REXML::ParseException.new(message, @source)
|
231
294
|
end
|
232
|
-
@nsstack.unshift(curr_ns=Set.new)
|
233
295
|
name = parse_name(base_error_message)
|
234
296
|
if @source.match(/\s*\[/um, true)
|
235
297
|
id = [nil, nil, nil]
|
@@ -277,7 +339,11 @@ module REXML
|
|
277
339
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
278
340
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
279
341
|
elsif @source.match("ENTITY", true)
|
280
|
-
|
342
|
+
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
343
|
+
unless match_data
|
344
|
+
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
345
|
+
end
|
346
|
+
match = [:entitydecl, *match_data.captures.compact]
|
281
347
|
ref = false
|
282
348
|
if match[1] == '%'
|
283
349
|
ref = true
|
@@ -295,6 +361,8 @@ module REXML
|
|
295
361
|
match[4] = match[4][1..-2] # HREF
|
296
362
|
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
297
363
|
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
364
|
+
elsif Private::PEREFERENCE_PATTERN.match?(match[2])
|
365
|
+
raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
|
298
366
|
else
|
299
367
|
match[2] = match[2][1..-2]
|
300
368
|
match.pop if match.size == 4
|
@@ -303,13 +371,13 @@ module REXML
|
|
303
371
|
match << '%' if ref
|
304
372
|
return match
|
305
373
|
elsif @source.match("ATTLIST", true)
|
306
|
-
md = @source.match(ATTLISTDECL_END, true)
|
374
|
+
md = @source.match(Private::ATTLISTDECL_END, true)
|
307
375
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
308
376
|
element = md[1]
|
309
377
|
contents = md[0]
|
310
378
|
|
311
379
|
pairs = {}
|
312
|
-
values = md[0].scan( ATTDEF_RE )
|
380
|
+
values = md[0].strip.scan( ATTDEF_RE )
|
313
381
|
values.each do |attdef|
|
314
382
|
unless attdef[3] == "#IMPLIED"
|
315
383
|
attdef.compact!
|
@@ -317,7 +385,7 @@ module REXML
|
|
317
385
|
val = attdef[4] if val == "#FIXED "
|
318
386
|
pairs[attdef[0]] = val
|
319
387
|
if attdef[0] =~ /^xmlns:(.*)/
|
320
|
-
@
|
388
|
+
@namespaces[$1] = val
|
321
389
|
end
|
322
390
|
end
|
323
391
|
end
|
@@ -355,6 +423,9 @@ module REXML
|
|
355
423
|
@document_status = :after_doctype
|
356
424
|
return [ :end_doctype ]
|
357
425
|
end
|
426
|
+
if @document_status == :in_doctype
|
427
|
+
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
428
|
+
end
|
358
429
|
end
|
359
430
|
if @document_status == :after_doctype
|
360
431
|
@source.match(/\s*/um, true)
|
@@ -362,10 +433,14 @@ module REXML
|
|
362
433
|
begin
|
363
434
|
start_position = @source.position
|
364
435
|
if @source.match("<", true)
|
436
|
+
# :text's read_until may remain only "<" in buffer. In the
|
437
|
+
# case, buffer is empty here. So we need to fill buffer
|
438
|
+
# here explicitly.
|
439
|
+
@source.ensure_buffer
|
365
440
|
if @source.match("/", true)
|
366
|
-
@
|
441
|
+
@namespaces_restore_stack.pop
|
367
442
|
last_tag = @tags.pop
|
368
|
-
md = @source.match(CLOSE_PATTERN, true)
|
443
|
+
md = @source.match(Private::CLOSE_PATTERN, true)
|
369
444
|
if md and !last_tag
|
370
445
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
371
446
|
raise REXML::ParseException.new(message, @source)
|
@@ -384,12 +459,11 @@ module REXML
|
|
384
459
|
if md[0][0] == ?-
|
385
460
|
md = @source.match(/--(.*?)-->/um, true)
|
386
461
|
|
387
|
-
|
388
|
-
when /--/, /-\z/
|
462
|
+
if md.nil? || /--|-\z/.match?(md[1])
|
389
463
|
raise REXML::ParseException.new("Malformed comment", @source)
|
390
464
|
end
|
391
465
|
|
392
|
-
return [ :comment, md[1] ]
|
466
|
+
return [ :comment, md[1] ]
|
393
467
|
else
|
394
468
|
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
395
469
|
return [ :cdata, md[1] ] if md
|
@@ -397,38 +471,54 @@ module REXML
|
|
397
471
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
398
472
|
"in the doctype declaration.", @source)
|
399
473
|
elsif @source.match("?", true)
|
400
|
-
return process_instruction
|
474
|
+
return process_instruction
|
401
475
|
else
|
402
476
|
# Get the next tag
|
403
|
-
md = @source.match(TAG_PATTERN, true)
|
477
|
+
md = @source.match(Private::TAG_PATTERN, true)
|
404
478
|
unless md
|
405
479
|
@source.position = start_position
|
406
480
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
407
481
|
end
|
408
482
|
tag = md[1]
|
409
483
|
@document_status = :in_element
|
410
|
-
prefixes
|
411
|
-
prefixes << md[2] if md[2]
|
412
|
-
|
413
|
-
attributes, closed = parse_attributes(prefixes
|
484
|
+
@prefixes.clear
|
485
|
+
@prefixes << md[2] if md[2]
|
486
|
+
push_namespaces_restore
|
487
|
+
attributes, closed = parse_attributes(@prefixes)
|
414
488
|
# Verify that all of the prefixes have been defined
|
415
|
-
for prefix in prefixes
|
416
|
-
unless @
|
489
|
+
for prefix in @prefixes
|
490
|
+
unless @namespaces.key?(prefix)
|
417
491
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
418
492
|
end
|
419
493
|
end
|
420
494
|
|
421
495
|
if closed
|
422
496
|
@closed = tag
|
423
|
-
|
497
|
+
pop_namespaces_restore
|
424
498
|
else
|
499
|
+
if @tags.empty? and @have_root
|
500
|
+
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
501
|
+
end
|
425
502
|
@tags.push( tag )
|
426
503
|
end
|
504
|
+
@have_root = true
|
427
505
|
return [ :start_element, tag, attributes ]
|
428
506
|
end
|
429
507
|
else
|
430
|
-
|
431
|
-
text
|
508
|
+
text = @source.read_until("<")
|
509
|
+
if text.chomp!("<")
|
510
|
+
@source.position -= "<".bytesize
|
511
|
+
end
|
512
|
+
if @tags.empty?
|
513
|
+
unless /\A\s*\z/.match?(text)
|
514
|
+
if @have_root
|
515
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
516
|
+
else
|
517
|
+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
518
|
+
end
|
519
|
+
end
|
520
|
+
return pull_event if @have_root
|
521
|
+
end
|
432
522
|
return [ :text, text ]
|
433
523
|
end
|
434
524
|
rescue REXML::UndefinedNamespaceException
|
@@ -444,13 +534,13 @@ module REXML
|
|
444
534
|
private :pull_event
|
445
535
|
|
446
536
|
def entity( reference, entities )
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
unnormalize( value, entities )
|
537
|
+
return unless entities
|
538
|
+
|
539
|
+
value = entities[ reference ]
|
540
|
+
return if value.nil?
|
541
|
+
|
542
|
+
record_entity_expansion
|
543
|
+
unnormalize( value, entities )
|
454
544
|
end
|
455
545
|
|
456
546
|
# Escapes all possible entities
|
@@ -471,34 +561,87 @@ module REXML
|
|
471
561
|
|
472
562
|
# Unescapes all possible entities
|
473
563
|
def unnormalize( string, entities=nil, filter=nil )
|
474
|
-
|
564
|
+
if string.include?("\r")
|
565
|
+
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
566
|
+
else
|
567
|
+
rv = string.dup
|
568
|
+
end
|
475
569
|
matches = rv.scan( REFERENCE_RE )
|
476
570
|
return rv if matches.size == 0
|
477
|
-
rv.gsub!(
|
571
|
+
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
478
572
|
m=$1
|
479
|
-
|
480
|
-
|
573
|
+
if m.start_with?("x")
|
574
|
+
code_point = Integer(m[1..-1], 16)
|
575
|
+
else
|
576
|
+
code_point = Integer(m, 10)
|
577
|
+
end
|
578
|
+
[code_point].pack('U*')
|
481
579
|
}
|
482
580
|
matches.collect!{|x|x[0]}.compact!
|
581
|
+
if filter
|
582
|
+
matches.reject! do |entity_reference|
|
583
|
+
filter.include?(entity_reference)
|
584
|
+
end
|
585
|
+
end
|
483
586
|
if matches.size > 0
|
484
|
-
matches.each do |entity_reference|
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
587
|
+
matches.tally.each do |entity_reference, n|
|
588
|
+
entity_expansion_count_before = @entity_expansion_count
|
589
|
+
entity_value = entity( entity_reference, entities )
|
590
|
+
if entity_value
|
591
|
+
if n > 1
|
592
|
+
entity_expansion_count_delta =
|
593
|
+
@entity_expansion_count - entity_expansion_count_before
|
594
|
+
record_entity_expansion(entity_expansion_count_delta * (n - 1))
|
595
|
+
end
|
596
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
597
|
+
rv.gsub!( re, entity_value )
|
598
|
+
if rv.bytesize > @entity_expansion_text_limit
|
599
|
+
raise "entity expansion has grown too large"
|
493
600
|
end
|
601
|
+
else
|
602
|
+
er = DEFAULT_ENTITIES[entity_reference]
|
603
|
+
rv.gsub!( er[0], er[2] ) if er
|
494
604
|
end
|
495
605
|
end
|
496
|
-
rv.gsub!(
|
606
|
+
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
497
607
|
end
|
498
608
|
rv
|
499
609
|
end
|
500
610
|
|
501
611
|
private
|
612
|
+
def add_namespace(prefix, uri)
|
613
|
+
@namespaces_restore_stack.last[prefix] = @namespaces[prefix]
|
614
|
+
if uri.nil?
|
615
|
+
@namespaces.delete(prefix)
|
616
|
+
else
|
617
|
+
@namespaces[prefix] = uri
|
618
|
+
end
|
619
|
+
end
|
620
|
+
|
621
|
+
def push_namespaces_restore
|
622
|
+
namespaces_restore = {}
|
623
|
+
@namespaces_restore_stack.push(namespaces_restore)
|
624
|
+
namespaces_restore
|
625
|
+
end
|
626
|
+
|
627
|
+
def pop_namespaces_restore
|
628
|
+
namespaces_restore = @namespaces_restore_stack.pop
|
629
|
+
namespaces_restore.each do |prefix, uri|
|
630
|
+
if uri.nil?
|
631
|
+
@namespaces.delete(prefix)
|
632
|
+
else
|
633
|
+
@namespaces[prefix] = uri
|
634
|
+
end
|
635
|
+
end
|
636
|
+
end
|
637
|
+
|
638
|
+
def record_entity_expansion(delta=1)
|
639
|
+
@entity_expansion_count += delta
|
640
|
+
if @entity_expansion_count > @entity_expansion_limit
|
641
|
+
raise "number of entity expansions exceeded, processing aborted."
|
642
|
+
end
|
643
|
+
end
|
644
|
+
|
502
645
|
def need_source_encoding_update?(xml_declaration_encoding)
|
503
646
|
return false if xml_declaration_encoding.nil?
|
504
647
|
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
@@ -506,16 +649,16 @@ module REXML
|
|
506
649
|
end
|
507
650
|
|
508
651
|
def parse_name(base_error_message)
|
509
|
-
md = @source.match(NAME_PATTERN, true)
|
652
|
+
md = @source.match(Private::NAME_PATTERN, true)
|
510
653
|
unless md
|
511
|
-
if @source.match(/\
|
654
|
+
if @source.match(/\S/um)
|
512
655
|
message = "#{base_error_message}: invalid name"
|
513
656
|
else
|
514
657
|
message = "#{base_error_message}: name is missing"
|
515
658
|
end
|
516
659
|
raise REXML::ParseException.new(message, @source)
|
517
660
|
end
|
518
|
-
md[
|
661
|
+
md[0]
|
519
662
|
end
|
520
663
|
|
521
664
|
def parse_id(base_error_message,
|
@@ -584,15 +727,24 @@ module REXML
|
|
584
727
|
end
|
585
728
|
end
|
586
729
|
|
587
|
-
def process_instruction
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
730
|
+
def process_instruction
|
731
|
+
name = parse_name("Malformed XML: Invalid processing instruction node")
|
732
|
+
if @source.match(/\s+/um, true)
|
733
|
+
match_data = @source.match(/(.*?)\?>/um, true)
|
734
|
+
unless match_data
|
735
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
736
|
+
end
|
737
|
+
content = match_data[1]
|
738
|
+
else
|
739
|
+
content = nil
|
740
|
+
unless @source.match("?>", true)
|
741
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
742
|
+
end
|
593
743
|
end
|
594
|
-
if
|
595
|
-
|
744
|
+
if name == "xml"
|
745
|
+
if @document_status
|
746
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
747
|
+
end
|
596
748
|
version = VERSION.match(content)
|
597
749
|
version = version[1] unless version.nil?
|
598
750
|
encoding = ENCODING.match(content)
|
@@ -607,11 +759,12 @@ module REXML
|
|
607
759
|
standalone = standalone[1] unless standalone.nil?
|
608
760
|
return [ :xmldecl, version, encoding, standalone ]
|
609
761
|
end
|
610
|
-
[:processing_instruction,
|
762
|
+
[:processing_instruction, name, content]
|
611
763
|
end
|
612
764
|
|
613
|
-
def parse_attributes(prefixes
|
765
|
+
def parse_attributes(prefixes)
|
614
766
|
attributes = {}
|
767
|
+
expanded_names = {}
|
615
768
|
closed = false
|
616
769
|
while true
|
617
770
|
if @source.match(">", true)
|
@@ -633,15 +786,17 @@ module REXML
|
|
633
786
|
raise REXML::ParseException.new(message, @source)
|
634
787
|
end
|
635
788
|
quote = match[1]
|
789
|
+
start_position = @source.position
|
636
790
|
value = @source.read_until(quote)
|
637
791
|
unless value.chomp!(quote)
|
792
|
+
@source.position = start_position
|
638
793
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
639
794
|
raise REXML::ParseException.new(message, @source)
|
640
795
|
end
|
641
796
|
@source.match(/\s*/um, true)
|
642
797
|
if prefix == "xmlns"
|
643
798
|
if local_part == "xml"
|
644
|
-
if value !=
|
799
|
+
if value != Private::XML_PREFIXED_NAMESPACE
|
645
800
|
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
646
801
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
647
802
|
raise REXML::ParseException.new( msg, @source, self )
|
@@ -651,7 +806,7 @@ module REXML
|
|
651
806
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
652
807
|
raise REXML::ParseException.new( msg, @source, self)
|
653
808
|
end
|
654
|
-
|
809
|
+
add_namespace(local_part, value)
|
655
810
|
elsif prefix
|
656
811
|
prefixes << prefix unless prefix == "xml"
|
657
812
|
end
|
@@ -661,6 +816,20 @@ module REXML
|
|
661
816
|
raise REXML::ParseException.new(msg, @source, self)
|
662
817
|
end
|
663
818
|
|
819
|
+
unless prefix == "xmlns"
|
820
|
+
uri = @namespaces[prefix]
|
821
|
+
expanded_name = [uri, local_part]
|
822
|
+
existing_prefix = expanded_names[expanded_name]
|
823
|
+
if existing_prefix
|
824
|
+
message = "Namespace conflict in adding attribute " +
|
825
|
+
"\"#{local_part}\": " +
|
826
|
+
"Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
|
827
|
+
"prefix \"#{prefix}\" = \"#{uri}\""
|
828
|
+
raise REXML::ParseException.new(message, @source, self)
|
829
|
+
end
|
830
|
+
expanded_names[expanded_name] = prefix
|
831
|
+
end
|
832
|
+
|
664
833
|
attributes[name] = value
|
665
834
|
else
|
666
835
|
message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
@@ -47,6 +47,18 @@ module REXML
|
|
47
47
|
@listeners << listener
|
48
48
|
end
|
49
49
|
|
50
|
+
def entity_expansion_count
|
51
|
+
@parser.entity_expansion_count
|
52
|
+
end
|
53
|
+
|
54
|
+
def entity_expansion_limit=( limit )
|
55
|
+
@parser.entity_expansion_limit = limit
|
56
|
+
end
|
57
|
+
|
58
|
+
def entity_expansion_text_limit=( limit )
|
59
|
+
@parser.entity_expansion_text_limit = limit
|
60
|
+
end
|
61
|
+
|
50
62
|
def each
|
51
63
|
while has_next?
|
52
64
|
yield self.pull
|
@@ -22,6 +22,18 @@ module REXML
|
|
22
22
|
@parser.source
|
23
23
|
end
|
24
24
|
|
25
|
+
def entity_expansion_count
|
26
|
+
@parser.entity_expansion_count
|
27
|
+
end
|
28
|
+
|
29
|
+
def entity_expansion_limit=( limit )
|
30
|
+
@parser.entity_expansion_limit = limit
|
31
|
+
end
|
32
|
+
|
33
|
+
def entity_expansion_text_limit=( limit )
|
34
|
+
@parser.entity_expansion_text_limit = limit
|
35
|
+
end
|
36
|
+
|
25
37
|
def add_listener( listener )
|
26
38
|
@parser.add_listener( listener )
|
27
39
|
end
|
@@ -157,25 +169,8 @@ module REXML
|
|
157
169
|
end
|
158
170
|
end
|
159
171
|
when :text
|
160
|
-
|
161
|
-
|
162
|
-
copy = event[1].clone
|
163
|
-
|
164
|
-
esub = proc { |match|
|
165
|
-
if @entities.has_key?($1)
|
166
|
-
@entities[$1].gsub(Text::REFERENCE, &esub)
|
167
|
-
else
|
168
|
-
match
|
169
|
-
end
|
170
|
-
}
|
171
|
-
|
172
|
-
copy.gsub!( Text::REFERENCE, &esub )
|
173
|
-
copy.gsub!( Text::NUMERICENTITY ) {|m|
|
174
|
-
m=$1
|
175
|
-
m = "0#{m}" if m[0] == ?x
|
176
|
-
[Integer(m)].pack('U*')
|
177
|
-
}
|
178
|
-
handle( :characters, copy )
|
172
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
173
|
+
handle( :characters, unnormalized )
|
179
174
|
when :entitydecl
|
180
175
|
handle_entitydecl( event )
|
181
176
|
when :processing_instruction, :comment, :attlistdecl,
|
@@ -264,6 +259,8 @@ module REXML
|
|
264
259
|
end
|
265
260
|
|
266
261
|
def get_namespace( prefix )
|
262
|
+
return nil if @namespace_stack.empty?
|
263
|
+
|
267
264
|
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
|
268
265
|
(@namespace_stack.find { |ns| not ns[nil].nil? })
|
269
266
|
uris[-1][prefix] unless uris.nil? or 0 == uris.size
|
@@ -7,37 +7,42 @@ module REXML
|
|
7
7
|
def initialize source, listener
|
8
8
|
@listener = listener
|
9
9
|
@parser = BaseParser.new( source )
|
10
|
-
@
|
10
|
+
@entities = {}
|
11
11
|
end
|
12
12
|
|
13
13
|
def add_listener( listener )
|
14
14
|
@parser.add_listener( listener )
|
15
15
|
end
|
16
16
|
|
17
|
+
def entity_expansion_count
|
18
|
+
@parser.entity_expansion_count
|
19
|
+
end
|
20
|
+
|
21
|
+
def entity_expansion_limit=( limit )
|
22
|
+
@parser.entity_expansion_limit = limit
|
23
|
+
end
|
24
|
+
|
25
|
+
def entity_expansion_text_limit=( limit )
|
26
|
+
@parser.entity_expansion_text_limit = limit
|
27
|
+
end
|
28
|
+
|
17
29
|
def parse
|
18
30
|
# entity string
|
19
31
|
while true
|
20
32
|
event = @parser.pull
|
21
33
|
case event[0]
|
22
34
|
when :end_document
|
23
|
-
unless @tag_stack.empty?
|
24
|
-
tag_path = "/" + @tag_stack.join("/")
|
25
|
-
raise ParseException.new("Missing end tag for '#{tag_path}'",
|
26
|
-
@parser.source)
|
27
|
-
end
|
28
35
|
return
|
29
36
|
when :start_element
|
30
|
-
@tag_stack << event[1]
|
31
37
|
attrs = event[2].each do |n, v|
|
32
38
|
event[2][n] = @parser.unnormalize( v )
|
33
39
|
end
|
34
40
|
@listener.tag_start( event[1], attrs )
|
35
41
|
when :end_element
|
36
42
|
@listener.tag_end( event[1] )
|
37
|
-
@tag_stack.pop
|
38
43
|
when :text
|
39
|
-
|
40
|
-
@listener.text(
|
44
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
45
|
+
@listener.text( unnormalized )
|
41
46
|
when :processing_instruction
|
42
47
|
@listener.instruction( *event[1,2] )
|
43
48
|
when :start_doctype
|
@@ -48,6 +53,7 @@ module REXML
|
|
48
53
|
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
|
49
54
|
@listener.send( event[0].to_s, *event[1..-1] )
|
50
55
|
when :entitydecl, :notationdecl
|
56
|
+
@entities[ event[1] ] = event[2] if event.size == 3
|
51
57
|
@listener.send( event[0].to_s, event[1..-1] )
|
52
58
|
when :externalentity
|
53
59
|
entity_reference = event[1]
|