rexml 3.2.8 → 3.3.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +242 -2
- data/lib/rexml/element.rb +16 -31
- data/lib/rexml/entity.rb +5 -47
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parsers/baseparser.rb +220 -61
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/parsers/sax2parser.rb +6 -19
- data/lib/rexml/parsers/streamparser.rb +8 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +72 -15
- data/lib/rexml/text.rb +34 -14
- metadata +6 -5
@@ -1,12 +1,40 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require_relative '../parseexception'
|
3
3
|
require_relative '../undefinednamespaceexception'
|
4
|
+
require_relative '../security'
|
4
5
|
require_relative '../source'
|
5
6
|
require 'set'
|
6
7
|
require "strscan"
|
7
8
|
|
8
9
|
module REXML
|
9
10
|
module Parsers
|
11
|
+
unless [].respond_to?(:tally)
|
12
|
+
module EnumerableTally
|
13
|
+
refine Enumerable do
|
14
|
+
def tally
|
15
|
+
counts = {}
|
16
|
+
each do |item|
|
17
|
+
counts[item] ||= 0
|
18
|
+
counts[item] += 1
|
19
|
+
end
|
20
|
+
counts
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
using EnumerableTally
|
25
|
+
end
|
26
|
+
|
27
|
+
if StringScanner::Version < "3.0.8"
|
28
|
+
module StringScannerCaptures
|
29
|
+
refine StringScanner do
|
30
|
+
def captures
|
31
|
+
values_at(*(1...size))
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
using StringScannerCaptures
|
36
|
+
end
|
37
|
+
|
10
38
|
# = Using the Pull Parser
|
11
39
|
# <em>This API is experimental, and subject to change.</em>
|
12
40
|
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
@@ -113,21 +141,29 @@ module REXML
|
|
113
141
|
}
|
114
142
|
|
115
143
|
module Private
|
116
|
-
|
144
|
+
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
117
145
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
118
146
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
119
147
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
120
|
-
NAME_PATTERN =
|
148
|
+
NAME_PATTERN = /#{NAME}/um
|
121
149
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
122
150
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
123
151
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
152
|
+
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
153
|
+
CHARACTER_REFERENCES = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
154
|
+
DEFAULT_ENTITIES_PATTERNS = {}
|
155
|
+
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
156
|
+
default_entities.each do |term|
|
157
|
+
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
158
|
+
end
|
124
159
|
end
|
125
160
|
private_constant :Private
|
126
|
-
include Private
|
127
161
|
|
128
162
|
def initialize( source )
|
129
163
|
self.stream = source
|
130
164
|
@listeners = []
|
165
|
+
@prefixes = Set.new
|
166
|
+
@entity_expansion_count = 0
|
131
167
|
end
|
132
168
|
|
133
169
|
def add_listener( listener )
|
@@ -135,15 +171,18 @@ module REXML
|
|
135
171
|
end
|
136
172
|
|
137
173
|
attr_reader :source
|
174
|
+
attr_reader :entity_expansion_count
|
138
175
|
|
139
176
|
def stream=( source )
|
140
177
|
@source = SourceFactory.create_from( source )
|
141
178
|
@closed = nil
|
179
|
+
@have_root = false
|
142
180
|
@document_status = nil
|
143
181
|
@tags = []
|
144
182
|
@stack = []
|
145
183
|
@entities = []
|
146
|
-
@
|
184
|
+
@namespaces = {}
|
185
|
+
@namespaces_restore_stack = []
|
147
186
|
end
|
148
187
|
|
149
188
|
def position
|
@@ -193,6 +232,8 @@ module REXML
|
|
193
232
|
|
194
233
|
# Returns the next event. This is a +PullEvent+ object.
|
195
234
|
def pull
|
235
|
+
@source.drop_parsed_content
|
236
|
+
|
196
237
|
pull_event.tap do |event|
|
197
238
|
@listeners.each do |listener|
|
198
239
|
listener.receive event
|
@@ -205,7 +246,16 @@ module REXML
|
|
205
246
|
x, @closed = @closed, nil
|
206
247
|
return [ :end_element, x ]
|
207
248
|
end
|
208
|
-
|
249
|
+
if empty?
|
250
|
+
if @document_status == :in_doctype
|
251
|
+
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
252
|
+
end
|
253
|
+
unless @tags.empty?
|
254
|
+
path = "/" + @tags.join("/")
|
255
|
+
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
256
|
+
end
|
257
|
+
return [ :end_document ]
|
258
|
+
end
|
209
259
|
return @stack.shift if @stack.size > 0
|
210
260
|
#STDERR.puts @source.encoding
|
211
261
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
@@ -214,10 +264,17 @@ module REXML
|
|
214
264
|
if @document_status == nil
|
215
265
|
start_position = @source.position
|
216
266
|
if @source.match("<?", true)
|
217
|
-
return process_instruction
|
267
|
+
return process_instruction
|
218
268
|
elsif @source.match("<!", true)
|
219
269
|
if @source.match("--", true)
|
220
|
-
|
270
|
+
md = @source.match(/(.*?)-->/um, true)
|
271
|
+
if md.nil?
|
272
|
+
raise REXML::ParseException.new("Unclosed comment", @source)
|
273
|
+
end
|
274
|
+
if /--|-\z/.match?(md[1])
|
275
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
276
|
+
end
|
277
|
+
return [ :comment, md[1] ]
|
221
278
|
elsif @source.match("DOCTYPE", true)
|
222
279
|
base_error_message = "Malformed DOCTYPE"
|
223
280
|
unless @source.match(/\s+/um, true)
|
@@ -229,7 +286,6 @@ module REXML
|
|
229
286
|
@source.position = start_position
|
230
287
|
raise REXML::ParseException.new(message, @source)
|
231
288
|
end
|
232
|
-
@nsstack.unshift(curr_ns=Set.new)
|
233
289
|
name = parse_name(base_error_message)
|
234
290
|
if @source.match(/\s*\[/um, true)
|
235
291
|
id = [nil, nil, nil]
|
@@ -277,7 +333,11 @@ module REXML
|
|
277
333
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
278
334
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
279
335
|
elsif @source.match("ENTITY", true)
|
280
|
-
|
336
|
+
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
337
|
+
unless match_data
|
338
|
+
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
339
|
+
end
|
340
|
+
match = [:entitydecl, *match_data.captures.compact]
|
281
341
|
ref = false
|
282
342
|
if match[1] == '%'
|
283
343
|
ref = true
|
@@ -295,6 +355,8 @@ module REXML
|
|
295
355
|
match[4] = match[4][1..-2] # HREF
|
296
356
|
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
297
357
|
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
358
|
+
elsif Private::PEREFERENCE_PATTERN.match?(match[2])
|
359
|
+
raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
|
298
360
|
else
|
299
361
|
match[2] = match[2][1..-2]
|
300
362
|
match.pop if match.size == 4
|
@@ -303,13 +365,13 @@ module REXML
|
|
303
365
|
match << '%' if ref
|
304
366
|
return match
|
305
367
|
elsif @source.match("ATTLIST", true)
|
306
|
-
md = @source.match(ATTLISTDECL_END, true)
|
368
|
+
md = @source.match(Private::ATTLISTDECL_END, true)
|
307
369
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
308
370
|
element = md[1]
|
309
371
|
contents = md[0]
|
310
372
|
|
311
373
|
pairs = {}
|
312
|
-
values = md[0].scan( ATTDEF_RE )
|
374
|
+
values = md[0].strip.scan( ATTDEF_RE )
|
313
375
|
values.each do |attdef|
|
314
376
|
unless attdef[3] == "#IMPLIED"
|
315
377
|
attdef.compact!
|
@@ -317,7 +379,7 @@ module REXML
|
|
317
379
|
val = attdef[4] if val == "#FIXED "
|
318
380
|
pairs[attdef[0]] = val
|
319
381
|
if attdef[0] =~ /^xmlns:(.*)/
|
320
|
-
@
|
382
|
+
@namespaces[$1] = val
|
321
383
|
end
|
322
384
|
end
|
323
385
|
end
|
@@ -355,6 +417,9 @@ module REXML
|
|
355
417
|
@document_status = :after_doctype
|
356
418
|
return [ :end_doctype ]
|
357
419
|
end
|
420
|
+
if @document_status == :in_doctype
|
421
|
+
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
422
|
+
end
|
358
423
|
end
|
359
424
|
if @document_status == :after_doctype
|
360
425
|
@source.match(/\s*/um, true)
|
@@ -362,10 +427,14 @@ module REXML
|
|
362
427
|
begin
|
363
428
|
start_position = @source.position
|
364
429
|
if @source.match("<", true)
|
430
|
+
# :text's read_until may remain only "<" in buffer. In the
|
431
|
+
# case, buffer is empty here. So we need to fill buffer
|
432
|
+
# here explicitly.
|
433
|
+
@source.ensure_buffer
|
365
434
|
if @source.match("/", true)
|
366
|
-
@
|
435
|
+
@namespaces_restore_stack.pop
|
367
436
|
last_tag = @tags.pop
|
368
|
-
md = @source.match(CLOSE_PATTERN, true)
|
437
|
+
md = @source.match(Private::CLOSE_PATTERN, true)
|
369
438
|
if md and !last_tag
|
370
439
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
371
440
|
raise REXML::ParseException.new(message, @source)
|
@@ -384,12 +453,11 @@ module REXML
|
|
384
453
|
if md[0][0] == ?-
|
385
454
|
md = @source.match(/--(.*?)-->/um, true)
|
386
455
|
|
387
|
-
|
388
|
-
when /--/, /-\z/
|
456
|
+
if md.nil? || /--|-\z/.match?(md[1])
|
389
457
|
raise REXML::ParseException.new("Malformed comment", @source)
|
390
458
|
end
|
391
459
|
|
392
|
-
return [ :comment, md[1] ]
|
460
|
+
return [ :comment, md[1] ]
|
393
461
|
else
|
394
462
|
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
395
463
|
return [ :cdata, md[1] ] if md
|
@@ -397,38 +465,54 @@ module REXML
|
|
397
465
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
398
466
|
"in the doctype declaration.", @source)
|
399
467
|
elsif @source.match("?", true)
|
400
|
-
return process_instruction
|
468
|
+
return process_instruction
|
401
469
|
else
|
402
470
|
# Get the next tag
|
403
|
-
md = @source.match(TAG_PATTERN, true)
|
471
|
+
md = @source.match(Private::TAG_PATTERN, true)
|
404
472
|
unless md
|
405
473
|
@source.position = start_position
|
406
474
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
407
475
|
end
|
408
476
|
tag = md[1]
|
409
477
|
@document_status = :in_element
|
410
|
-
prefixes
|
411
|
-
prefixes << md[2] if md[2]
|
412
|
-
|
413
|
-
attributes, closed = parse_attributes(prefixes
|
478
|
+
@prefixes.clear
|
479
|
+
@prefixes << md[2] if md[2]
|
480
|
+
push_namespaces_restore
|
481
|
+
attributes, closed = parse_attributes(@prefixes)
|
414
482
|
# Verify that all of the prefixes have been defined
|
415
|
-
for prefix in prefixes
|
416
|
-
unless @
|
483
|
+
for prefix in @prefixes
|
484
|
+
unless @namespaces.key?(prefix)
|
417
485
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
418
486
|
end
|
419
487
|
end
|
420
488
|
|
421
489
|
if closed
|
422
490
|
@closed = tag
|
423
|
-
|
491
|
+
pop_namespaces_restore
|
424
492
|
else
|
493
|
+
if @tags.empty? and @have_root
|
494
|
+
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
495
|
+
end
|
425
496
|
@tags.push( tag )
|
426
497
|
end
|
498
|
+
@have_root = true
|
427
499
|
return [ :start_element, tag, attributes ]
|
428
500
|
end
|
429
501
|
else
|
430
|
-
|
431
|
-
text
|
502
|
+
text = @source.read_until("<")
|
503
|
+
if text.chomp!("<")
|
504
|
+
@source.position -= "<".bytesize
|
505
|
+
end
|
506
|
+
if @tags.empty?
|
507
|
+
unless /\A\s*\z/.match?(text)
|
508
|
+
if @have_root
|
509
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
510
|
+
else
|
511
|
+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
512
|
+
end
|
513
|
+
end
|
514
|
+
return pull_event if @have_root
|
515
|
+
end
|
432
516
|
return [ :text, text ]
|
433
517
|
end
|
434
518
|
rescue REXML::UndefinedNamespaceException
|
@@ -444,13 +528,13 @@ module REXML
|
|
444
528
|
private :pull_event
|
445
529
|
|
446
530
|
def entity( reference, entities )
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
unnormalize( value, entities )
|
531
|
+
return unless entities
|
532
|
+
|
533
|
+
value = entities[ reference ]
|
534
|
+
return if value.nil?
|
535
|
+
|
536
|
+
record_entity_expansion
|
537
|
+
unnormalize( value, entities )
|
454
538
|
end
|
455
539
|
|
456
540
|
# Escapes all possible entities
|
@@ -471,34 +555,83 @@ module REXML
|
|
471
555
|
|
472
556
|
# Unescapes all possible entities
|
473
557
|
def unnormalize( string, entities=nil, filter=nil )
|
474
|
-
|
558
|
+
if string.include?("\r")
|
559
|
+
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
560
|
+
else
|
561
|
+
rv = string.dup
|
562
|
+
end
|
475
563
|
matches = rv.scan( REFERENCE_RE )
|
476
564
|
return rv if matches.size == 0
|
477
|
-
rv.gsub!(
|
565
|
+
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
478
566
|
m=$1
|
479
567
|
m = "0#{m}" if m[0] == ?x
|
480
568
|
[Integer(m)].pack('U*')
|
481
569
|
}
|
482
570
|
matches.collect!{|x|x[0]}.compact!
|
571
|
+
if filter
|
572
|
+
matches.reject! do |entity_reference|
|
573
|
+
filter.include?(entity_reference)
|
574
|
+
end
|
575
|
+
end
|
483
576
|
if matches.size > 0
|
484
|
-
matches.each do |entity_reference|
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
577
|
+
matches.tally.each do |entity_reference, n|
|
578
|
+
entity_expansion_count_before = @entity_expansion_count
|
579
|
+
entity_value = entity( entity_reference, entities )
|
580
|
+
if entity_value
|
581
|
+
if n > 1
|
582
|
+
entity_expansion_count_delta =
|
583
|
+
@entity_expansion_count - entity_expansion_count_before
|
584
|
+
record_entity_expansion(entity_expansion_count_delta * (n - 1))
|
585
|
+
end
|
586
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
587
|
+
rv.gsub!( re, entity_value )
|
588
|
+
if rv.bytesize > Security.entity_expansion_text_limit
|
589
|
+
raise "entity expansion has grown too large"
|
493
590
|
end
|
591
|
+
else
|
592
|
+
er = DEFAULT_ENTITIES[entity_reference]
|
593
|
+
rv.gsub!( er[0], er[2] ) if er
|
494
594
|
end
|
495
595
|
end
|
496
|
-
rv.gsub!(
|
596
|
+
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
497
597
|
end
|
498
598
|
rv
|
499
599
|
end
|
500
600
|
|
501
601
|
private
|
602
|
+
def add_namespace(prefix, uri)
|
603
|
+
@namespaces_restore_stack.last[prefix] = @namespaces[prefix]
|
604
|
+
if uri.nil?
|
605
|
+
@namespaces.delete(prefix)
|
606
|
+
else
|
607
|
+
@namespaces[prefix] = uri
|
608
|
+
end
|
609
|
+
end
|
610
|
+
|
611
|
+
def push_namespaces_restore
|
612
|
+
namespaces_restore = {}
|
613
|
+
@namespaces_restore_stack.push(namespaces_restore)
|
614
|
+
namespaces_restore
|
615
|
+
end
|
616
|
+
|
617
|
+
def pop_namespaces_restore
|
618
|
+
namespaces_restore = @namespaces_restore_stack.pop
|
619
|
+
namespaces_restore.each do |prefix, uri|
|
620
|
+
if uri.nil?
|
621
|
+
@namespaces.delete(prefix)
|
622
|
+
else
|
623
|
+
@namespaces[prefix] = uri
|
624
|
+
end
|
625
|
+
end
|
626
|
+
end
|
627
|
+
|
628
|
+
def record_entity_expansion(delta=1)
|
629
|
+
@entity_expansion_count += delta
|
630
|
+
if @entity_expansion_count > Security.entity_expansion_limit
|
631
|
+
raise "number of entity expansions exceeded, processing aborted."
|
632
|
+
end
|
633
|
+
end
|
634
|
+
|
502
635
|
def need_source_encoding_update?(xml_declaration_encoding)
|
503
636
|
return false if xml_declaration_encoding.nil?
|
504
637
|
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
@@ -506,16 +639,16 @@ module REXML
|
|
506
639
|
end
|
507
640
|
|
508
641
|
def parse_name(base_error_message)
|
509
|
-
md = @source.match(NAME_PATTERN, true)
|
642
|
+
md = @source.match(Private::NAME_PATTERN, true)
|
510
643
|
unless md
|
511
|
-
if @source.match(/\
|
644
|
+
if @source.match(/\S/um)
|
512
645
|
message = "#{base_error_message}: invalid name"
|
513
646
|
else
|
514
647
|
message = "#{base_error_message}: name is missing"
|
515
648
|
end
|
516
649
|
raise REXML::ParseException.new(message, @source)
|
517
650
|
end
|
518
|
-
md[
|
651
|
+
md[0]
|
519
652
|
end
|
520
653
|
|
521
654
|
def parse_id(base_error_message,
|
@@ -584,15 +717,24 @@ module REXML
|
|
584
717
|
end
|
585
718
|
end
|
586
719
|
|
587
|
-
def process_instruction
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
720
|
+
def process_instruction
|
721
|
+
name = parse_name("Malformed XML: Invalid processing instruction node")
|
722
|
+
if @source.match(/\s+/um, true)
|
723
|
+
match_data = @source.match(/(.*?)\?>/um, true)
|
724
|
+
unless match_data
|
725
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
726
|
+
end
|
727
|
+
content = match_data[1]
|
728
|
+
else
|
729
|
+
content = nil
|
730
|
+
unless @source.match("?>", true)
|
731
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
732
|
+
end
|
593
733
|
end
|
594
|
-
if
|
595
|
-
|
734
|
+
if name == "xml"
|
735
|
+
if @document_status
|
736
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
737
|
+
end
|
596
738
|
version = VERSION.match(content)
|
597
739
|
version = version[1] unless version.nil?
|
598
740
|
encoding = ENCODING.match(content)
|
@@ -607,11 +749,12 @@ module REXML
|
|
607
749
|
standalone = standalone[1] unless standalone.nil?
|
608
750
|
return [ :xmldecl, version, encoding, standalone ]
|
609
751
|
end
|
610
|
-
[:processing_instruction,
|
752
|
+
[:processing_instruction, name, content]
|
611
753
|
end
|
612
754
|
|
613
|
-
def parse_attributes(prefixes
|
755
|
+
def parse_attributes(prefixes)
|
614
756
|
attributes = {}
|
757
|
+
expanded_names = {}
|
615
758
|
closed = false
|
616
759
|
while true
|
617
760
|
if @source.match(">", true)
|
@@ -633,8 +776,10 @@ module REXML
|
|
633
776
|
raise REXML::ParseException.new(message, @source)
|
634
777
|
end
|
635
778
|
quote = match[1]
|
779
|
+
start_position = @source.position
|
636
780
|
value = @source.read_until(quote)
|
637
781
|
unless value.chomp!(quote)
|
782
|
+
@source.position = start_position
|
638
783
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
639
784
|
raise REXML::ParseException.new(message, @source)
|
640
785
|
end
|
@@ -651,7 +796,7 @@ module REXML
|
|
651
796
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
652
797
|
raise REXML::ParseException.new( msg, @source, self)
|
653
798
|
end
|
654
|
-
|
799
|
+
add_namespace(local_part, value)
|
655
800
|
elsif prefix
|
656
801
|
prefixes << prefix unless prefix == "xml"
|
657
802
|
end
|
@@ -661,6 +806,20 @@ module REXML
|
|
661
806
|
raise REXML::ParseException.new(msg, @source, self)
|
662
807
|
end
|
663
808
|
|
809
|
+
unless prefix == "xmlns"
|
810
|
+
uri = @namespaces[prefix]
|
811
|
+
expanded_name = [uri, local_part]
|
812
|
+
existing_prefix = expanded_names[expanded_name]
|
813
|
+
if existing_prefix
|
814
|
+
message = "Namespace conflict in adding attribute " +
|
815
|
+
"\"#{local_part}\": " +
|
816
|
+
"Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
|
817
|
+
"prefix \"#{prefix}\" = \"#{uri}\""
|
818
|
+
raise REXML::ParseException.new(message, @source, self)
|
819
|
+
end
|
820
|
+
expanded_names[expanded_name] = prefix
|
821
|
+
end
|
822
|
+
|
664
823
|
attributes[name] = value
|
665
824
|
else
|
666
825
|
message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
@@ -22,6 +22,10 @@ module REXML
|
|
22
22
|
@parser.source
|
23
23
|
end
|
24
24
|
|
25
|
+
def entity_expansion_count
|
26
|
+
@parser.entity_expansion_count
|
27
|
+
end
|
28
|
+
|
25
29
|
def add_listener( listener )
|
26
30
|
@parser.add_listener( listener )
|
27
31
|
end
|
@@ -157,25 +161,8 @@ module REXML
|
|
157
161
|
end
|
158
162
|
end
|
159
163
|
when :text
|
160
|
-
|
161
|
-
|
162
|
-
copy = event[1].clone
|
163
|
-
|
164
|
-
esub = proc { |match|
|
165
|
-
if @entities.has_key?($1)
|
166
|
-
@entities[$1].gsub(Text::REFERENCE, &esub)
|
167
|
-
else
|
168
|
-
match
|
169
|
-
end
|
170
|
-
}
|
171
|
-
|
172
|
-
copy.gsub!( Text::REFERENCE, &esub )
|
173
|
-
copy.gsub!( Text::NUMERICENTITY ) {|m|
|
174
|
-
m=$1
|
175
|
-
m = "0#{m}" if m[0] == ?x
|
176
|
-
[Integer(m)].pack('U*')
|
177
|
-
}
|
178
|
-
handle( :characters, copy )
|
164
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
165
|
+
handle( :characters, unnormalized )
|
179
166
|
when :entitydecl
|
180
167
|
handle_entitydecl( event )
|
181
168
|
when :processing_instruction, :comment, :attlistdecl,
|
@@ -7,37 +7,34 @@ module REXML
|
|
7
7
|
def initialize source, listener
|
8
8
|
@listener = listener
|
9
9
|
@parser = BaseParser.new( source )
|
10
|
-
@
|
10
|
+
@entities = {}
|
11
11
|
end
|
12
12
|
|
13
13
|
def add_listener( listener )
|
14
14
|
@parser.add_listener( listener )
|
15
15
|
end
|
16
16
|
|
17
|
+
def entity_expansion_count
|
18
|
+
@parser.entity_expansion_count
|
19
|
+
end
|
20
|
+
|
17
21
|
def parse
|
18
22
|
# entity string
|
19
23
|
while true
|
20
24
|
event = @parser.pull
|
21
25
|
case event[0]
|
22
26
|
when :end_document
|
23
|
-
unless @tag_stack.empty?
|
24
|
-
tag_path = "/" + @tag_stack.join("/")
|
25
|
-
raise ParseException.new("Missing end tag for '#{tag_path}'",
|
26
|
-
@parser.source)
|
27
|
-
end
|
28
27
|
return
|
29
28
|
when :start_element
|
30
|
-
@tag_stack << event[1]
|
31
29
|
attrs = event[2].each do |n, v|
|
32
30
|
event[2][n] = @parser.unnormalize( v )
|
33
31
|
end
|
34
32
|
@listener.tag_start( event[1], attrs )
|
35
33
|
when :end_element
|
36
34
|
@listener.tag_end( event[1] )
|
37
|
-
@tag_stack.pop
|
38
35
|
when :text
|
39
|
-
|
40
|
-
@listener.text(
|
36
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
37
|
+
@listener.text( unnormalized )
|
41
38
|
when :processing_instruction
|
42
39
|
@listener.instruction( *event[1,2] )
|
43
40
|
when :start_doctype
|
@@ -48,6 +45,7 @@ module REXML
|
|
48
45
|
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
|
49
46
|
@listener.send( event[0].to_s, *event[1..-1] )
|
50
47
|
when :entitydecl, :notationdecl
|
48
|
+
@entities[ event[1] ] = event[2] if event.size == 3
|
51
49
|
@listener.send( event[0].to_s, event[1..-1] )
|
52
50
|
when :externalentity
|
53
51
|
entity_reference = event[1]
|