rexml 3.2.8 → 3.3.6
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +242 -2
- data/lib/rexml/element.rb +16 -31
- data/lib/rexml/entity.rb +5 -47
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parsers/baseparser.rb +220 -61
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/parsers/sax2parser.rb +6 -19
- data/lib/rexml/parsers/streamparser.rb +8 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +72 -15
- data/lib/rexml/text.rb +34 -14
- metadata +6 -5
@@ -1,12 +1,40 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require_relative '../parseexception'
|
3
3
|
require_relative '../undefinednamespaceexception'
|
4
|
+
require_relative '../security'
|
4
5
|
require_relative '../source'
|
5
6
|
require 'set'
|
6
7
|
require "strscan"
|
7
8
|
|
8
9
|
module REXML
|
9
10
|
module Parsers
|
11
|
+
unless [].respond_to?(:tally)
|
12
|
+
module EnumerableTally
|
13
|
+
refine Enumerable do
|
14
|
+
def tally
|
15
|
+
counts = {}
|
16
|
+
each do |item|
|
17
|
+
counts[item] ||= 0
|
18
|
+
counts[item] += 1
|
19
|
+
end
|
20
|
+
counts
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
using EnumerableTally
|
25
|
+
end
|
26
|
+
|
27
|
+
if StringScanner::Version < "3.0.8"
|
28
|
+
module StringScannerCaptures
|
29
|
+
refine StringScanner do
|
30
|
+
def captures
|
31
|
+
values_at(*(1...size))
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
using StringScannerCaptures
|
36
|
+
end
|
37
|
+
|
10
38
|
# = Using the Pull Parser
|
11
39
|
# <em>This API is experimental, and subject to change.</em>
|
12
40
|
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
@@ -113,21 +141,29 @@ module REXML
|
|
113
141
|
}
|
114
142
|
|
115
143
|
module Private
|
116
|
-
|
144
|
+
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
117
145
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
118
146
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
119
147
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
120
|
-
NAME_PATTERN =
|
148
|
+
NAME_PATTERN = /#{NAME}/um
|
121
149
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
122
150
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
123
151
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
152
|
+
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
153
|
+
CHARACTER_REFERENCES = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
154
|
+
DEFAULT_ENTITIES_PATTERNS = {}
|
155
|
+
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
156
|
+
default_entities.each do |term|
|
157
|
+
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
158
|
+
end
|
124
159
|
end
|
125
160
|
private_constant :Private
|
126
|
-
include Private
|
127
161
|
|
128
162
|
def initialize( source )
|
129
163
|
self.stream = source
|
130
164
|
@listeners = []
|
165
|
+
@prefixes = Set.new
|
166
|
+
@entity_expansion_count = 0
|
131
167
|
end
|
132
168
|
|
133
169
|
def add_listener( listener )
|
@@ -135,15 +171,18 @@ module REXML
|
|
135
171
|
end
|
136
172
|
|
137
173
|
attr_reader :source
|
174
|
+
attr_reader :entity_expansion_count
|
138
175
|
|
139
176
|
def stream=( source )
|
140
177
|
@source = SourceFactory.create_from( source )
|
141
178
|
@closed = nil
|
179
|
+
@have_root = false
|
142
180
|
@document_status = nil
|
143
181
|
@tags = []
|
144
182
|
@stack = []
|
145
183
|
@entities = []
|
146
|
-
@
|
184
|
+
@namespaces = {}
|
185
|
+
@namespaces_restore_stack = []
|
147
186
|
end
|
148
187
|
|
149
188
|
def position
|
@@ -193,6 +232,8 @@ module REXML
|
|
193
232
|
|
194
233
|
# Returns the next event. This is a +PullEvent+ object.
|
195
234
|
def pull
|
235
|
+
@source.drop_parsed_content
|
236
|
+
|
196
237
|
pull_event.tap do |event|
|
197
238
|
@listeners.each do |listener|
|
198
239
|
listener.receive event
|
@@ -205,7 +246,16 @@ module REXML
|
|
205
246
|
x, @closed = @closed, nil
|
206
247
|
return [ :end_element, x ]
|
207
248
|
end
|
208
|
-
|
249
|
+
if empty?
|
250
|
+
if @document_status == :in_doctype
|
251
|
+
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
252
|
+
end
|
253
|
+
unless @tags.empty?
|
254
|
+
path = "/" + @tags.join("/")
|
255
|
+
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
256
|
+
end
|
257
|
+
return [ :end_document ]
|
258
|
+
end
|
209
259
|
return @stack.shift if @stack.size > 0
|
210
260
|
#STDERR.puts @source.encoding
|
211
261
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
@@ -214,10 +264,17 @@ module REXML
|
|
214
264
|
if @document_status == nil
|
215
265
|
start_position = @source.position
|
216
266
|
if @source.match("<?", true)
|
217
|
-
return process_instruction
|
267
|
+
return process_instruction
|
218
268
|
elsif @source.match("<!", true)
|
219
269
|
if @source.match("--", true)
|
220
|
-
|
270
|
+
md = @source.match(/(.*?)-->/um, true)
|
271
|
+
if md.nil?
|
272
|
+
raise REXML::ParseException.new("Unclosed comment", @source)
|
273
|
+
end
|
274
|
+
if /--|-\z/.match?(md[1])
|
275
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
276
|
+
end
|
277
|
+
return [ :comment, md[1] ]
|
221
278
|
elsif @source.match("DOCTYPE", true)
|
222
279
|
base_error_message = "Malformed DOCTYPE"
|
223
280
|
unless @source.match(/\s+/um, true)
|
@@ -229,7 +286,6 @@ module REXML
|
|
229
286
|
@source.position = start_position
|
230
287
|
raise REXML::ParseException.new(message, @source)
|
231
288
|
end
|
232
|
-
@nsstack.unshift(curr_ns=Set.new)
|
233
289
|
name = parse_name(base_error_message)
|
234
290
|
if @source.match(/\s*\[/um, true)
|
235
291
|
id = [nil, nil, nil]
|
@@ -277,7 +333,11 @@ module REXML
|
|
277
333
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
278
334
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
279
335
|
elsif @source.match("ENTITY", true)
|
280
|
-
|
336
|
+
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
337
|
+
unless match_data
|
338
|
+
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
339
|
+
end
|
340
|
+
match = [:entitydecl, *match_data.captures.compact]
|
281
341
|
ref = false
|
282
342
|
if match[1] == '%'
|
283
343
|
ref = true
|
@@ -295,6 +355,8 @@ module REXML
|
|
295
355
|
match[4] = match[4][1..-2] # HREF
|
296
356
|
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
297
357
|
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
358
|
+
elsif Private::PEREFERENCE_PATTERN.match?(match[2])
|
359
|
+
raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
|
298
360
|
else
|
299
361
|
match[2] = match[2][1..-2]
|
300
362
|
match.pop if match.size == 4
|
@@ -303,13 +365,13 @@ module REXML
|
|
303
365
|
match << '%' if ref
|
304
366
|
return match
|
305
367
|
elsif @source.match("ATTLIST", true)
|
306
|
-
md = @source.match(ATTLISTDECL_END, true)
|
368
|
+
md = @source.match(Private::ATTLISTDECL_END, true)
|
307
369
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
308
370
|
element = md[1]
|
309
371
|
contents = md[0]
|
310
372
|
|
311
373
|
pairs = {}
|
312
|
-
values = md[0].scan( ATTDEF_RE )
|
374
|
+
values = md[0].strip.scan( ATTDEF_RE )
|
313
375
|
values.each do |attdef|
|
314
376
|
unless attdef[3] == "#IMPLIED"
|
315
377
|
attdef.compact!
|
@@ -317,7 +379,7 @@ module REXML
|
|
317
379
|
val = attdef[4] if val == "#FIXED "
|
318
380
|
pairs[attdef[0]] = val
|
319
381
|
if attdef[0] =~ /^xmlns:(.*)/
|
320
|
-
@
|
382
|
+
@namespaces[$1] = val
|
321
383
|
end
|
322
384
|
end
|
323
385
|
end
|
@@ -355,6 +417,9 @@ module REXML
|
|
355
417
|
@document_status = :after_doctype
|
356
418
|
return [ :end_doctype ]
|
357
419
|
end
|
420
|
+
if @document_status == :in_doctype
|
421
|
+
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
422
|
+
end
|
358
423
|
end
|
359
424
|
if @document_status == :after_doctype
|
360
425
|
@source.match(/\s*/um, true)
|
@@ -362,10 +427,14 @@ module REXML
|
|
362
427
|
begin
|
363
428
|
start_position = @source.position
|
364
429
|
if @source.match("<", true)
|
430
|
+
# :text's read_until may remain only "<" in buffer. In the
|
431
|
+
# case, buffer is empty here. So we need to fill buffer
|
432
|
+
# here explicitly.
|
433
|
+
@source.ensure_buffer
|
365
434
|
if @source.match("/", true)
|
366
|
-
@
|
435
|
+
@namespaces_restore_stack.pop
|
367
436
|
last_tag = @tags.pop
|
368
|
-
md = @source.match(CLOSE_PATTERN, true)
|
437
|
+
md = @source.match(Private::CLOSE_PATTERN, true)
|
369
438
|
if md and !last_tag
|
370
439
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
371
440
|
raise REXML::ParseException.new(message, @source)
|
@@ -384,12 +453,11 @@ module REXML
|
|
384
453
|
if md[0][0] == ?-
|
385
454
|
md = @source.match(/--(.*?)-->/um, true)
|
386
455
|
|
387
|
-
|
388
|
-
when /--/, /-\z/
|
456
|
+
if md.nil? || /--|-\z/.match?(md[1])
|
389
457
|
raise REXML::ParseException.new("Malformed comment", @source)
|
390
458
|
end
|
391
459
|
|
392
|
-
return [ :comment, md[1] ]
|
460
|
+
return [ :comment, md[1] ]
|
393
461
|
else
|
394
462
|
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
395
463
|
return [ :cdata, md[1] ] if md
|
@@ -397,38 +465,54 @@ module REXML
|
|
397
465
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
398
466
|
"in the doctype declaration.", @source)
|
399
467
|
elsif @source.match("?", true)
|
400
|
-
return process_instruction
|
468
|
+
return process_instruction
|
401
469
|
else
|
402
470
|
# Get the next tag
|
403
|
-
md = @source.match(TAG_PATTERN, true)
|
471
|
+
md = @source.match(Private::TAG_PATTERN, true)
|
404
472
|
unless md
|
405
473
|
@source.position = start_position
|
406
474
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
407
475
|
end
|
408
476
|
tag = md[1]
|
409
477
|
@document_status = :in_element
|
410
|
-
prefixes
|
411
|
-
prefixes << md[2] if md[2]
|
412
|
-
|
413
|
-
attributes, closed = parse_attributes(prefixes
|
478
|
+
@prefixes.clear
|
479
|
+
@prefixes << md[2] if md[2]
|
480
|
+
push_namespaces_restore
|
481
|
+
attributes, closed = parse_attributes(@prefixes)
|
414
482
|
# Verify that all of the prefixes have been defined
|
415
|
-
for prefix in prefixes
|
416
|
-
unless @
|
483
|
+
for prefix in @prefixes
|
484
|
+
unless @namespaces.key?(prefix)
|
417
485
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
418
486
|
end
|
419
487
|
end
|
420
488
|
|
421
489
|
if closed
|
422
490
|
@closed = tag
|
423
|
-
|
491
|
+
pop_namespaces_restore
|
424
492
|
else
|
493
|
+
if @tags.empty? and @have_root
|
494
|
+
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
495
|
+
end
|
425
496
|
@tags.push( tag )
|
426
497
|
end
|
498
|
+
@have_root = true
|
427
499
|
return [ :start_element, tag, attributes ]
|
428
500
|
end
|
429
501
|
else
|
430
|
-
|
431
|
-
text
|
502
|
+
text = @source.read_until("<")
|
503
|
+
if text.chomp!("<")
|
504
|
+
@source.position -= "<".bytesize
|
505
|
+
end
|
506
|
+
if @tags.empty?
|
507
|
+
unless /\A\s*\z/.match?(text)
|
508
|
+
if @have_root
|
509
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
510
|
+
else
|
511
|
+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
512
|
+
end
|
513
|
+
end
|
514
|
+
return pull_event if @have_root
|
515
|
+
end
|
432
516
|
return [ :text, text ]
|
433
517
|
end
|
434
518
|
rescue REXML::UndefinedNamespaceException
|
@@ -444,13 +528,13 @@ module REXML
|
|
444
528
|
private :pull_event
|
445
529
|
|
446
530
|
def entity( reference, entities )
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
unnormalize( value, entities )
|
531
|
+
return unless entities
|
532
|
+
|
533
|
+
value = entities[ reference ]
|
534
|
+
return if value.nil?
|
535
|
+
|
536
|
+
record_entity_expansion
|
537
|
+
unnormalize( value, entities )
|
454
538
|
end
|
455
539
|
|
456
540
|
# Escapes all possible entities
|
@@ -471,34 +555,83 @@ module REXML
|
|
471
555
|
|
472
556
|
# Unescapes all possible entities
|
473
557
|
def unnormalize( string, entities=nil, filter=nil )
|
474
|
-
|
558
|
+
if string.include?("\r")
|
559
|
+
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
560
|
+
else
|
561
|
+
rv = string.dup
|
562
|
+
end
|
475
563
|
matches = rv.scan( REFERENCE_RE )
|
476
564
|
return rv if matches.size == 0
|
477
|
-
rv.gsub!(
|
565
|
+
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
478
566
|
m=$1
|
479
567
|
m = "0#{m}" if m[0] == ?x
|
480
568
|
[Integer(m)].pack('U*')
|
481
569
|
}
|
482
570
|
matches.collect!{|x|x[0]}.compact!
|
571
|
+
if filter
|
572
|
+
matches.reject! do |entity_reference|
|
573
|
+
filter.include?(entity_reference)
|
574
|
+
end
|
575
|
+
end
|
483
576
|
if matches.size > 0
|
484
|
-
matches.each do |entity_reference|
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
577
|
+
matches.tally.each do |entity_reference, n|
|
578
|
+
entity_expansion_count_before = @entity_expansion_count
|
579
|
+
entity_value = entity( entity_reference, entities )
|
580
|
+
if entity_value
|
581
|
+
if n > 1
|
582
|
+
entity_expansion_count_delta =
|
583
|
+
@entity_expansion_count - entity_expansion_count_before
|
584
|
+
record_entity_expansion(entity_expansion_count_delta * (n - 1))
|
585
|
+
end
|
586
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
587
|
+
rv.gsub!( re, entity_value )
|
588
|
+
if rv.bytesize > Security.entity_expansion_text_limit
|
589
|
+
raise "entity expansion has grown too large"
|
493
590
|
end
|
591
|
+
else
|
592
|
+
er = DEFAULT_ENTITIES[entity_reference]
|
593
|
+
rv.gsub!( er[0], er[2] ) if er
|
494
594
|
end
|
495
595
|
end
|
496
|
-
rv.gsub!(
|
596
|
+
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
497
597
|
end
|
498
598
|
rv
|
499
599
|
end
|
500
600
|
|
501
601
|
private
|
602
|
+
def add_namespace(prefix, uri)
|
603
|
+
@namespaces_restore_stack.last[prefix] = @namespaces[prefix]
|
604
|
+
if uri.nil?
|
605
|
+
@namespaces.delete(prefix)
|
606
|
+
else
|
607
|
+
@namespaces[prefix] = uri
|
608
|
+
end
|
609
|
+
end
|
610
|
+
|
611
|
+
def push_namespaces_restore
|
612
|
+
namespaces_restore = {}
|
613
|
+
@namespaces_restore_stack.push(namespaces_restore)
|
614
|
+
namespaces_restore
|
615
|
+
end
|
616
|
+
|
617
|
+
def pop_namespaces_restore
|
618
|
+
namespaces_restore = @namespaces_restore_stack.pop
|
619
|
+
namespaces_restore.each do |prefix, uri|
|
620
|
+
if uri.nil?
|
621
|
+
@namespaces.delete(prefix)
|
622
|
+
else
|
623
|
+
@namespaces[prefix] = uri
|
624
|
+
end
|
625
|
+
end
|
626
|
+
end
|
627
|
+
|
628
|
+
def record_entity_expansion(delta=1)
|
629
|
+
@entity_expansion_count += delta
|
630
|
+
if @entity_expansion_count > Security.entity_expansion_limit
|
631
|
+
raise "number of entity expansions exceeded, processing aborted."
|
632
|
+
end
|
633
|
+
end
|
634
|
+
|
502
635
|
def need_source_encoding_update?(xml_declaration_encoding)
|
503
636
|
return false if xml_declaration_encoding.nil?
|
504
637
|
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
@@ -506,16 +639,16 @@ module REXML
|
|
506
639
|
end
|
507
640
|
|
508
641
|
def parse_name(base_error_message)
|
509
|
-
md = @source.match(NAME_PATTERN, true)
|
642
|
+
md = @source.match(Private::NAME_PATTERN, true)
|
510
643
|
unless md
|
511
|
-
if @source.match(/\
|
644
|
+
if @source.match(/\S/um)
|
512
645
|
message = "#{base_error_message}: invalid name"
|
513
646
|
else
|
514
647
|
message = "#{base_error_message}: name is missing"
|
515
648
|
end
|
516
649
|
raise REXML::ParseException.new(message, @source)
|
517
650
|
end
|
518
|
-
md[
|
651
|
+
md[0]
|
519
652
|
end
|
520
653
|
|
521
654
|
def parse_id(base_error_message,
|
@@ -584,15 +717,24 @@ module REXML
|
|
584
717
|
end
|
585
718
|
end
|
586
719
|
|
587
|
-
def process_instruction
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
720
|
+
def process_instruction
|
721
|
+
name = parse_name("Malformed XML: Invalid processing instruction node")
|
722
|
+
if @source.match(/\s+/um, true)
|
723
|
+
match_data = @source.match(/(.*?)\?>/um, true)
|
724
|
+
unless match_data
|
725
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
726
|
+
end
|
727
|
+
content = match_data[1]
|
728
|
+
else
|
729
|
+
content = nil
|
730
|
+
unless @source.match("?>", true)
|
731
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
732
|
+
end
|
593
733
|
end
|
594
|
-
if
|
595
|
-
|
734
|
+
if name == "xml"
|
735
|
+
if @document_status
|
736
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
737
|
+
end
|
596
738
|
version = VERSION.match(content)
|
597
739
|
version = version[1] unless version.nil?
|
598
740
|
encoding = ENCODING.match(content)
|
@@ -607,11 +749,12 @@ module REXML
|
|
607
749
|
standalone = standalone[1] unless standalone.nil?
|
608
750
|
return [ :xmldecl, version, encoding, standalone ]
|
609
751
|
end
|
610
|
-
[:processing_instruction,
|
752
|
+
[:processing_instruction, name, content]
|
611
753
|
end
|
612
754
|
|
613
|
-
def parse_attributes(prefixes
|
755
|
+
def parse_attributes(prefixes)
|
614
756
|
attributes = {}
|
757
|
+
expanded_names = {}
|
615
758
|
closed = false
|
616
759
|
while true
|
617
760
|
if @source.match(">", true)
|
@@ -633,8 +776,10 @@ module REXML
|
|
633
776
|
raise REXML::ParseException.new(message, @source)
|
634
777
|
end
|
635
778
|
quote = match[1]
|
779
|
+
start_position = @source.position
|
636
780
|
value = @source.read_until(quote)
|
637
781
|
unless value.chomp!(quote)
|
782
|
+
@source.position = start_position
|
638
783
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
639
784
|
raise REXML::ParseException.new(message, @source)
|
640
785
|
end
|
@@ -651,7 +796,7 @@ module REXML
|
|
651
796
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
652
797
|
raise REXML::ParseException.new( msg, @source, self)
|
653
798
|
end
|
654
|
-
|
799
|
+
add_namespace(local_part, value)
|
655
800
|
elsif prefix
|
656
801
|
prefixes << prefix unless prefix == "xml"
|
657
802
|
end
|
@@ -661,6 +806,20 @@ module REXML
|
|
661
806
|
raise REXML::ParseException.new(msg, @source, self)
|
662
807
|
end
|
663
808
|
|
809
|
+
unless prefix == "xmlns"
|
810
|
+
uri = @namespaces[prefix]
|
811
|
+
expanded_name = [uri, local_part]
|
812
|
+
existing_prefix = expanded_names[expanded_name]
|
813
|
+
if existing_prefix
|
814
|
+
message = "Namespace conflict in adding attribute " +
|
815
|
+
"\"#{local_part}\": " +
|
816
|
+
"Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
|
817
|
+
"prefix \"#{prefix}\" = \"#{uri}\""
|
818
|
+
raise REXML::ParseException.new(message, @source, self)
|
819
|
+
end
|
820
|
+
expanded_names[expanded_name] = prefix
|
821
|
+
end
|
822
|
+
|
664
823
|
attributes[name] = value
|
665
824
|
else
|
666
825
|
message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
@@ -22,6 +22,10 @@ module REXML
|
|
22
22
|
@parser.source
|
23
23
|
end
|
24
24
|
|
25
|
+
def entity_expansion_count
|
26
|
+
@parser.entity_expansion_count
|
27
|
+
end
|
28
|
+
|
25
29
|
def add_listener( listener )
|
26
30
|
@parser.add_listener( listener )
|
27
31
|
end
|
@@ -157,25 +161,8 @@ module REXML
|
|
157
161
|
end
|
158
162
|
end
|
159
163
|
when :text
|
160
|
-
|
161
|
-
|
162
|
-
copy = event[1].clone
|
163
|
-
|
164
|
-
esub = proc { |match|
|
165
|
-
if @entities.has_key?($1)
|
166
|
-
@entities[$1].gsub(Text::REFERENCE, &esub)
|
167
|
-
else
|
168
|
-
match
|
169
|
-
end
|
170
|
-
}
|
171
|
-
|
172
|
-
copy.gsub!( Text::REFERENCE, &esub )
|
173
|
-
copy.gsub!( Text::NUMERICENTITY ) {|m|
|
174
|
-
m=$1
|
175
|
-
m = "0#{m}" if m[0] == ?x
|
176
|
-
[Integer(m)].pack('U*')
|
177
|
-
}
|
178
|
-
handle( :characters, copy )
|
164
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
165
|
+
handle( :characters, unnormalized )
|
179
166
|
when :entitydecl
|
180
167
|
handle_entitydecl( event )
|
181
168
|
when :processing_instruction, :comment, :attlistdecl,
|
@@ -7,37 +7,34 @@ module REXML
|
|
7
7
|
def initialize source, listener
|
8
8
|
@listener = listener
|
9
9
|
@parser = BaseParser.new( source )
|
10
|
-
@
|
10
|
+
@entities = {}
|
11
11
|
end
|
12
12
|
|
13
13
|
def add_listener( listener )
|
14
14
|
@parser.add_listener( listener )
|
15
15
|
end
|
16
16
|
|
17
|
+
def entity_expansion_count
|
18
|
+
@parser.entity_expansion_count
|
19
|
+
end
|
20
|
+
|
17
21
|
def parse
|
18
22
|
# entity string
|
19
23
|
while true
|
20
24
|
event = @parser.pull
|
21
25
|
case event[0]
|
22
26
|
when :end_document
|
23
|
-
unless @tag_stack.empty?
|
24
|
-
tag_path = "/" + @tag_stack.join("/")
|
25
|
-
raise ParseException.new("Missing end tag for '#{tag_path}'",
|
26
|
-
@parser.source)
|
27
|
-
end
|
28
27
|
return
|
29
28
|
when :start_element
|
30
|
-
@tag_stack << event[1]
|
31
29
|
attrs = event[2].each do |n, v|
|
32
30
|
event[2][n] = @parser.unnormalize( v )
|
33
31
|
end
|
34
32
|
@listener.tag_start( event[1], attrs )
|
35
33
|
when :end_element
|
36
34
|
@listener.tag_end( event[1] )
|
37
|
-
@tag_stack.pop
|
38
35
|
when :text
|
39
|
-
|
40
|
-
@listener.text(
|
36
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
37
|
+
@listener.text( unnormalized )
|
41
38
|
when :processing_instruction
|
42
39
|
@listener.instruction( *event[1,2] )
|
43
40
|
when :start_doctype
|
@@ -48,6 +45,7 @@ module REXML
|
|
48
45
|
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
|
49
46
|
@listener.send( event[0].to_s, *event[1..-1] )
|
50
47
|
when :entitydecl, :notationdecl
|
48
|
+
@entities[ event[1] ] = event[2] if event.size == 3
|
51
49
|
@listener.send( event[0].to_s, event[1..-1] )
|
52
50
|
when :externalentity
|
53
51
|
entity_reference = event[1]
|