rexml 3.2.7 → 3.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +278 -2
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/element.rb +16 -31
- data/lib/rexml/entity.rb +9 -48
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parsers/baseparser.rb +224 -61
- data/lib/rexml/parsers/pullparser.rb +12 -0
- data/lib/rexml/parsers/sax2parser.rb +14 -19
- data/lib/rexml/parsers/streamparser.rb +16 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +72 -16
- data/lib/rexml/text.rb +39 -17
- metadata +5 -18
@@ -1,12 +1,40 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require_relative '../parseexception'
|
3
3
|
require_relative '../undefinednamespaceexception'
|
4
|
+
require_relative '../security'
|
4
5
|
require_relative '../source'
|
5
6
|
require 'set'
|
6
7
|
require "strscan"
|
7
8
|
|
8
9
|
module REXML
|
9
10
|
module Parsers
|
11
|
+
unless [].respond_to?(:tally)
|
12
|
+
module EnumerableTally
|
13
|
+
refine Enumerable do
|
14
|
+
def tally
|
15
|
+
counts = {}
|
16
|
+
each do |item|
|
17
|
+
counts[item] ||= 0
|
18
|
+
counts[item] += 1
|
19
|
+
end
|
20
|
+
counts
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
using EnumerableTally
|
25
|
+
end
|
26
|
+
|
27
|
+
if StringScanner::Version < "3.0.8"
|
28
|
+
module StringScannerCaptures
|
29
|
+
refine StringScanner do
|
30
|
+
def captures
|
31
|
+
values_at(*(1...size))
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
using StringScannerCaptures
|
36
|
+
end
|
37
|
+
|
10
38
|
# = Using the Pull Parser
|
11
39
|
# <em>This API is experimental, and subject to change.</em>
|
12
40
|
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
@@ -113,21 +141,31 @@ module REXML
|
|
113
141
|
}
|
114
142
|
|
115
143
|
module Private
|
116
|
-
|
144
|
+
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
117
145
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
118
146
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
119
147
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
120
|
-
NAME_PATTERN =
|
148
|
+
NAME_PATTERN = /#{NAME}/um
|
121
149
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
122
150
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
123
151
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
152
|
+
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
153
|
+
CHARACTER_REFERENCES = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
154
|
+
DEFAULT_ENTITIES_PATTERNS = {}
|
155
|
+
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
156
|
+
default_entities.each do |term|
|
157
|
+
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
158
|
+
end
|
124
159
|
end
|
125
160
|
private_constant :Private
|
126
|
-
include Private
|
127
161
|
|
128
162
|
def initialize( source )
|
129
163
|
self.stream = source
|
130
164
|
@listeners = []
|
165
|
+
@prefixes = Set.new
|
166
|
+
@entity_expansion_count = 0
|
167
|
+
@entity_expansion_limit = Security.entity_expansion_limit
|
168
|
+
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
131
169
|
end
|
132
170
|
|
133
171
|
def add_listener( listener )
|
@@ -135,15 +173,20 @@ module REXML
|
|
135
173
|
end
|
136
174
|
|
137
175
|
attr_reader :source
|
176
|
+
attr_reader :entity_expansion_count
|
177
|
+
attr_writer :entity_expansion_limit
|
178
|
+
attr_writer :entity_expansion_text_limit
|
138
179
|
|
139
180
|
def stream=( source )
|
140
181
|
@source = SourceFactory.create_from( source )
|
141
182
|
@closed = nil
|
183
|
+
@have_root = false
|
142
184
|
@document_status = nil
|
143
185
|
@tags = []
|
144
186
|
@stack = []
|
145
187
|
@entities = []
|
146
|
-
@
|
188
|
+
@namespaces = {}
|
189
|
+
@namespaces_restore_stack = []
|
147
190
|
end
|
148
191
|
|
149
192
|
def position
|
@@ -193,6 +236,8 @@ module REXML
|
|
193
236
|
|
194
237
|
# Returns the next event. This is a +PullEvent+ object.
|
195
238
|
def pull
|
239
|
+
@source.drop_parsed_content
|
240
|
+
|
196
241
|
pull_event.tap do |event|
|
197
242
|
@listeners.each do |listener|
|
198
243
|
listener.receive event
|
@@ -205,7 +250,16 @@ module REXML
|
|
205
250
|
x, @closed = @closed, nil
|
206
251
|
return [ :end_element, x ]
|
207
252
|
end
|
208
|
-
|
253
|
+
if empty?
|
254
|
+
if @document_status == :in_doctype
|
255
|
+
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
256
|
+
end
|
257
|
+
unless @tags.empty?
|
258
|
+
path = "/" + @tags.join("/")
|
259
|
+
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
260
|
+
end
|
261
|
+
return [ :end_document ]
|
262
|
+
end
|
209
263
|
return @stack.shift if @stack.size > 0
|
210
264
|
#STDERR.puts @source.encoding
|
211
265
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
@@ -214,10 +268,17 @@ module REXML
|
|
214
268
|
if @document_status == nil
|
215
269
|
start_position = @source.position
|
216
270
|
if @source.match("<?", true)
|
217
|
-
return process_instruction
|
271
|
+
return process_instruction
|
218
272
|
elsif @source.match("<!", true)
|
219
273
|
if @source.match("--", true)
|
220
|
-
|
274
|
+
md = @source.match(/(.*?)-->/um, true)
|
275
|
+
if md.nil?
|
276
|
+
raise REXML::ParseException.new("Unclosed comment", @source)
|
277
|
+
end
|
278
|
+
if /--|-\z/.match?(md[1])
|
279
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
280
|
+
end
|
281
|
+
return [ :comment, md[1] ]
|
221
282
|
elsif @source.match("DOCTYPE", true)
|
222
283
|
base_error_message = "Malformed DOCTYPE"
|
223
284
|
unless @source.match(/\s+/um, true)
|
@@ -229,7 +290,6 @@ module REXML
|
|
229
290
|
@source.position = start_position
|
230
291
|
raise REXML::ParseException.new(message, @source)
|
231
292
|
end
|
232
|
-
@nsstack.unshift(curr_ns=Set.new)
|
233
293
|
name = parse_name(base_error_message)
|
234
294
|
if @source.match(/\s*\[/um, true)
|
235
295
|
id = [nil, nil, nil]
|
@@ -277,7 +337,11 @@ module REXML
|
|
277
337
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
278
338
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
279
339
|
elsif @source.match("ENTITY", true)
|
280
|
-
|
340
|
+
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
341
|
+
unless match_data
|
342
|
+
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
343
|
+
end
|
344
|
+
match = [:entitydecl, *match_data.captures.compact]
|
281
345
|
ref = false
|
282
346
|
if match[1] == '%'
|
283
347
|
ref = true
|
@@ -295,6 +359,8 @@ module REXML
|
|
295
359
|
match[4] = match[4][1..-2] # HREF
|
296
360
|
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
297
361
|
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
362
|
+
elsif Private::PEREFERENCE_PATTERN.match?(match[2])
|
363
|
+
raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
|
298
364
|
else
|
299
365
|
match[2] = match[2][1..-2]
|
300
366
|
match.pop if match.size == 4
|
@@ -303,13 +369,13 @@ module REXML
|
|
303
369
|
match << '%' if ref
|
304
370
|
return match
|
305
371
|
elsif @source.match("ATTLIST", true)
|
306
|
-
md = @source.match(ATTLISTDECL_END, true)
|
372
|
+
md = @source.match(Private::ATTLISTDECL_END, true)
|
307
373
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
308
374
|
element = md[1]
|
309
375
|
contents = md[0]
|
310
376
|
|
311
377
|
pairs = {}
|
312
|
-
values = md[0].scan( ATTDEF_RE )
|
378
|
+
values = md[0].strip.scan( ATTDEF_RE )
|
313
379
|
values.each do |attdef|
|
314
380
|
unless attdef[3] == "#IMPLIED"
|
315
381
|
attdef.compact!
|
@@ -317,7 +383,7 @@ module REXML
|
|
317
383
|
val = attdef[4] if val == "#FIXED "
|
318
384
|
pairs[attdef[0]] = val
|
319
385
|
if attdef[0] =~ /^xmlns:(.*)/
|
320
|
-
@
|
386
|
+
@namespaces[$1] = val
|
321
387
|
end
|
322
388
|
end
|
323
389
|
end
|
@@ -355,6 +421,9 @@ module REXML
|
|
355
421
|
@document_status = :after_doctype
|
356
422
|
return [ :end_doctype ]
|
357
423
|
end
|
424
|
+
if @document_status == :in_doctype
|
425
|
+
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
426
|
+
end
|
358
427
|
end
|
359
428
|
if @document_status == :after_doctype
|
360
429
|
@source.match(/\s*/um, true)
|
@@ -362,10 +431,14 @@ module REXML
|
|
362
431
|
begin
|
363
432
|
start_position = @source.position
|
364
433
|
if @source.match("<", true)
|
434
|
+
# :text's read_until may remain only "<" in buffer. In the
|
435
|
+
# case, buffer is empty here. So we need to fill buffer
|
436
|
+
# here explicitly.
|
437
|
+
@source.ensure_buffer
|
365
438
|
if @source.match("/", true)
|
366
|
-
@
|
439
|
+
@namespaces_restore_stack.pop
|
367
440
|
last_tag = @tags.pop
|
368
|
-
md = @source.match(CLOSE_PATTERN, true)
|
441
|
+
md = @source.match(Private::CLOSE_PATTERN, true)
|
369
442
|
if md and !last_tag
|
370
443
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
371
444
|
raise REXML::ParseException.new(message, @source)
|
@@ -384,12 +457,11 @@ module REXML
|
|
384
457
|
if md[0][0] == ?-
|
385
458
|
md = @source.match(/--(.*?)-->/um, true)
|
386
459
|
|
387
|
-
|
388
|
-
when /--/, /-\z/
|
460
|
+
if md.nil? || /--|-\z/.match?(md[1])
|
389
461
|
raise REXML::ParseException.new("Malformed comment", @source)
|
390
462
|
end
|
391
463
|
|
392
|
-
return [ :comment, md[1] ]
|
464
|
+
return [ :comment, md[1] ]
|
393
465
|
else
|
394
466
|
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
395
467
|
return [ :cdata, md[1] ] if md
|
@@ -397,38 +469,54 @@ module REXML
|
|
397
469
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
398
470
|
"in the doctype declaration.", @source)
|
399
471
|
elsif @source.match("?", true)
|
400
|
-
return process_instruction
|
472
|
+
return process_instruction
|
401
473
|
else
|
402
474
|
# Get the next tag
|
403
|
-
md = @source.match(TAG_PATTERN, true)
|
475
|
+
md = @source.match(Private::TAG_PATTERN, true)
|
404
476
|
unless md
|
405
477
|
@source.position = start_position
|
406
478
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
407
479
|
end
|
408
480
|
tag = md[1]
|
409
481
|
@document_status = :in_element
|
410
|
-
prefixes
|
411
|
-
prefixes << md[2] if md[2]
|
412
|
-
|
413
|
-
attributes, closed = parse_attributes(prefixes
|
482
|
+
@prefixes.clear
|
483
|
+
@prefixes << md[2] if md[2]
|
484
|
+
push_namespaces_restore
|
485
|
+
attributes, closed = parse_attributes(@prefixes)
|
414
486
|
# Verify that all of the prefixes have been defined
|
415
|
-
for prefix in prefixes
|
416
|
-
unless @
|
487
|
+
for prefix in @prefixes
|
488
|
+
unless @namespaces.key?(prefix)
|
417
489
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
418
490
|
end
|
419
491
|
end
|
420
492
|
|
421
493
|
if closed
|
422
494
|
@closed = tag
|
423
|
-
|
495
|
+
pop_namespaces_restore
|
424
496
|
else
|
497
|
+
if @tags.empty? and @have_root
|
498
|
+
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
499
|
+
end
|
425
500
|
@tags.push( tag )
|
426
501
|
end
|
502
|
+
@have_root = true
|
427
503
|
return [ :start_element, tag, attributes ]
|
428
504
|
end
|
429
505
|
else
|
430
|
-
|
431
|
-
text
|
506
|
+
text = @source.read_until("<")
|
507
|
+
if text.chomp!("<")
|
508
|
+
@source.position -= "<".bytesize
|
509
|
+
end
|
510
|
+
if @tags.empty?
|
511
|
+
unless /\A\s*\z/.match?(text)
|
512
|
+
if @have_root
|
513
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
514
|
+
else
|
515
|
+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
516
|
+
end
|
517
|
+
end
|
518
|
+
return pull_event if @have_root
|
519
|
+
end
|
432
520
|
return [ :text, text ]
|
433
521
|
end
|
434
522
|
rescue REXML::UndefinedNamespaceException
|
@@ -444,13 +532,13 @@ module REXML
|
|
444
532
|
private :pull_event
|
445
533
|
|
446
534
|
def entity( reference, entities )
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
unnormalize( value, entities )
|
535
|
+
return unless entities
|
536
|
+
|
537
|
+
value = entities[ reference ]
|
538
|
+
return if value.nil?
|
539
|
+
|
540
|
+
record_entity_expansion
|
541
|
+
unnormalize( value, entities )
|
454
542
|
end
|
455
543
|
|
456
544
|
# Escapes all possible entities
|
@@ -471,34 +559,83 @@ module REXML
|
|
471
559
|
|
472
560
|
# Unescapes all possible entities
|
473
561
|
def unnormalize( string, entities=nil, filter=nil )
|
474
|
-
|
562
|
+
if string.include?("\r")
|
563
|
+
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
564
|
+
else
|
565
|
+
rv = string.dup
|
566
|
+
end
|
475
567
|
matches = rv.scan( REFERENCE_RE )
|
476
568
|
return rv if matches.size == 0
|
477
|
-
rv.gsub!(
|
569
|
+
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
478
570
|
m=$1
|
479
571
|
m = "0#{m}" if m[0] == ?x
|
480
572
|
[Integer(m)].pack('U*')
|
481
573
|
}
|
482
574
|
matches.collect!{|x|x[0]}.compact!
|
575
|
+
if filter
|
576
|
+
matches.reject! do |entity_reference|
|
577
|
+
filter.include?(entity_reference)
|
578
|
+
end
|
579
|
+
end
|
483
580
|
if matches.size > 0
|
484
|
-
matches.each do |entity_reference|
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
581
|
+
matches.tally.each do |entity_reference, n|
|
582
|
+
entity_expansion_count_before = @entity_expansion_count
|
583
|
+
entity_value = entity( entity_reference, entities )
|
584
|
+
if entity_value
|
585
|
+
if n > 1
|
586
|
+
entity_expansion_count_delta =
|
587
|
+
@entity_expansion_count - entity_expansion_count_before
|
588
|
+
record_entity_expansion(entity_expansion_count_delta * (n - 1))
|
589
|
+
end
|
590
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
591
|
+
rv.gsub!( re, entity_value )
|
592
|
+
if rv.bytesize > @entity_expansion_text_limit
|
593
|
+
raise "entity expansion has grown too large"
|
493
594
|
end
|
595
|
+
else
|
596
|
+
er = DEFAULT_ENTITIES[entity_reference]
|
597
|
+
rv.gsub!( er[0], er[2] ) if er
|
494
598
|
end
|
495
599
|
end
|
496
|
-
rv.gsub!(
|
600
|
+
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
497
601
|
end
|
498
602
|
rv
|
499
603
|
end
|
500
604
|
|
501
605
|
private
|
606
|
+
def add_namespace(prefix, uri)
|
607
|
+
@namespaces_restore_stack.last[prefix] = @namespaces[prefix]
|
608
|
+
if uri.nil?
|
609
|
+
@namespaces.delete(prefix)
|
610
|
+
else
|
611
|
+
@namespaces[prefix] = uri
|
612
|
+
end
|
613
|
+
end
|
614
|
+
|
615
|
+
def push_namespaces_restore
|
616
|
+
namespaces_restore = {}
|
617
|
+
@namespaces_restore_stack.push(namespaces_restore)
|
618
|
+
namespaces_restore
|
619
|
+
end
|
620
|
+
|
621
|
+
def pop_namespaces_restore
|
622
|
+
namespaces_restore = @namespaces_restore_stack.pop
|
623
|
+
namespaces_restore.each do |prefix, uri|
|
624
|
+
if uri.nil?
|
625
|
+
@namespaces.delete(prefix)
|
626
|
+
else
|
627
|
+
@namespaces[prefix] = uri
|
628
|
+
end
|
629
|
+
end
|
630
|
+
end
|
631
|
+
|
632
|
+
def record_entity_expansion(delta=1)
|
633
|
+
@entity_expansion_count += delta
|
634
|
+
if @entity_expansion_count > @entity_expansion_limit
|
635
|
+
raise "number of entity expansions exceeded, processing aborted."
|
636
|
+
end
|
637
|
+
end
|
638
|
+
|
502
639
|
def need_source_encoding_update?(xml_declaration_encoding)
|
503
640
|
return false if xml_declaration_encoding.nil?
|
504
641
|
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
@@ -506,16 +643,16 @@ module REXML
|
|
506
643
|
end
|
507
644
|
|
508
645
|
def parse_name(base_error_message)
|
509
|
-
md = @source.match(NAME_PATTERN, true)
|
646
|
+
md = @source.match(Private::NAME_PATTERN, true)
|
510
647
|
unless md
|
511
|
-
if @source.match(/\
|
648
|
+
if @source.match(/\S/um)
|
512
649
|
message = "#{base_error_message}: invalid name"
|
513
650
|
else
|
514
651
|
message = "#{base_error_message}: name is missing"
|
515
652
|
end
|
516
653
|
raise REXML::ParseException.new(message, @source)
|
517
654
|
end
|
518
|
-
md[
|
655
|
+
md[0]
|
519
656
|
end
|
520
657
|
|
521
658
|
def parse_id(base_error_message,
|
@@ -584,15 +721,24 @@ module REXML
|
|
584
721
|
end
|
585
722
|
end
|
586
723
|
|
587
|
-
def process_instruction
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
724
|
+
def process_instruction
|
725
|
+
name = parse_name("Malformed XML: Invalid processing instruction node")
|
726
|
+
if @source.match(/\s+/um, true)
|
727
|
+
match_data = @source.match(/(.*?)\?>/um, true)
|
728
|
+
unless match_data
|
729
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
730
|
+
end
|
731
|
+
content = match_data[1]
|
732
|
+
else
|
733
|
+
content = nil
|
734
|
+
unless @source.match("?>", true)
|
735
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
736
|
+
end
|
593
737
|
end
|
594
|
-
if
|
595
|
-
|
738
|
+
if name == "xml"
|
739
|
+
if @document_status
|
740
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
741
|
+
end
|
596
742
|
version = VERSION.match(content)
|
597
743
|
version = version[1] unless version.nil?
|
598
744
|
encoding = ENCODING.match(content)
|
@@ -607,11 +753,12 @@ module REXML
|
|
607
753
|
standalone = standalone[1] unless standalone.nil?
|
608
754
|
return [ :xmldecl, version, encoding, standalone ]
|
609
755
|
end
|
610
|
-
[:processing_instruction,
|
756
|
+
[:processing_instruction, name, content]
|
611
757
|
end
|
612
758
|
|
613
|
-
def parse_attributes(prefixes
|
759
|
+
def parse_attributes(prefixes)
|
614
760
|
attributes = {}
|
761
|
+
expanded_names = {}
|
615
762
|
closed = false
|
616
763
|
while true
|
617
764
|
if @source.match(">", true)
|
@@ -633,8 +780,10 @@ module REXML
|
|
633
780
|
raise REXML::ParseException.new(message, @source)
|
634
781
|
end
|
635
782
|
quote = match[1]
|
783
|
+
start_position = @source.position
|
636
784
|
value = @source.read_until(quote)
|
637
785
|
unless value.chomp!(quote)
|
786
|
+
@source.position = start_position
|
638
787
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
639
788
|
raise REXML::ParseException.new(message, @source)
|
640
789
|
end
|
@@ -651,7 +800,7 @@ module REXML
|
|
651
800
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
652
801
|
raise REXML::ParseException.new( msg, @source, self)
|
653
802
|
end
|
654
|
-
|
803
|
+
add_namespace(local_part, value)
|
655
804
|
elsif prefix
|
656
805
|
prefixes << prefix unless prefix == "xml"
|
657
806
|
end
|
@@ -661,6 +810,20 @@ module REXML
|
|
661
810
|
raise REXML::ParseException.new(msg, @source, self)
|
662
811
|
end
|
663
812
|
|
813
|
+
unless prefix == "xmlns"
|
814
|
+
uri = @namespaces[prefix]
|
815
|
+
expanded_name = [uri, local_part]
|
816
|
+
existing_prefix = expanded_names[expanded_name]
|
817
|
+
if existing_prefix
|
818
|
+
message = "Namespace conflict in adding attribute " +
|
819
|
+
"\"#{local_part}\": " +
|
820
|
+
"Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
|
821
|
+
"prefix \"#{prefix}\" = \"#{uri}\""
|
822
|
+
raise REXML::ParseException.new(message, @source, self)
|
823
|
+
end
|
824
|
+
expanded_names[expanded_name] = prefix
|
825
|
+
end
|
826
|
+
|
664
827
|
attributes[name] = value
|
665
828
|
else
|
666
829
|
message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
@@ -47,6 +47,18 @@ module REXML
|
|
47
47
|
@listeners << listener
|
48
48
|
end
|
49
49
|
|
50
|
+
def entity_expansion_count
|
51
|
+
@parser.entity_expansion_count
|
52
|
+
end
|
53
|
+
|
54
|
+
def entity_expansion_limit=( limit )
|
55
|
+
@parser.entity_expansion_limit = limit
|
56
|
+
end
|
57
|
+
|
58
|
+
def entity_expansion_text_limit=( limit )
|
59
|
+
@parser.entity_expansion_text_limit = limit
|
60
|
+
end
|
61
|
+
|
50
62
|
def each
|
51
63
|
while has_next?
|
52
64
|
yield self.pull
|
@@ -22,6 +22,18 @@ module REXML
|
|
22
22
|
@parser.source
|
23
23
|
end
|
24
24
|
|
25
|
+
def entity_expansion_count
|
26
|
+
@parser.entity_expansion_count
|
27
|
+
end
|
28
|
+
|
29
|
+
def entity_expansion_limit=( limit )
|
30
|
+
@parser.entity_expansion_limit = limit
|
31
|
+
end
|
32
|
+
|
33
|
+
def entity_expansion_text_limit=( limit )
|
34
|
+
@parser.entity_expansion_text_limit = limit
|
35
|
+
end
|
36
|
+
|
25
37
|
def add_listener( listener )
|
26
38
|
@parser.add_listener( listener )
|
27
39
|
end
|
@@ -157,25 +169,8 @@ module REXML
|
|
157
169
|
end
|
158
170
|
end
|
159
171
|
when :text
|
160
|
-
|
161
|
-
|
162
|
-
copy = event[1].clone
|
163
|
-
|
164
|
-
esub = proc { |match|
|
165
|
-
if @entities.has_key?($1)
|
166
|
-
@entities[$1].gsub(Text::REFERENCE, &esub)
|
167
|
-
else
|
168
|
-
match
|
169
|
-
end
|
170
|
-
}
|
171
|
-
|
172
|
-
copy.gsub!( Text::REFERENCE, &esub )
|
173
|
-
copy.gsub!( Text::NUMERICENTITY ) {|m|
|
174
|
-
m=$1
|
175
|
-
m = "0#{m}" if m[0] == ?x
|
176
|
-
[Integer(m)].pack('U*')
|
177
|
-
}
|
178
|
-
handle( :characters, copy )
|
172
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
173
|
+
handle( :characters, unnormalized )
|
179
174
|
when :entitydecl
|
180
175
|
handle_entitydecl( event )
|
181
176
|
when :processing_instruction, :comment, :attlistdecl,
|
@@ -7,37 +7,42 @@ module REXML
|
|
7
7
|
def initialize source, listener
|
8
8
|
@listener = listener
|
9
9
|
@parser = BaseParser.new( source )
|
10
|
-
@
|
10
|
+
@entities = {}
|
11
11
|
end
|
12
12
|
|
13
13
|
def add_listener( listener )
|
14
14
|
@parser.add_listener( listener )
|
15
15
|
end
|
16
16
|
|
17
|
+
def entity_expansion_count
|
18
|
+
@parser.entity_expansion_count
|
19
|
+
end
|
20
|
+
|
21
|
+
def entity_expansion_limit=( limit )
|
22
|
+
@parser.entity_expansion_limit = limit
|
23
|
+
end
|
24
|
+
|
25
|
+
def entity_expansion_text_limit=( limit )
|
26
|
+
@parser.entity_expansion_text_limit = limit
|
27
|
+
end
|
28
|
+
|
17
29
|
def parse
|
18
30
|
# entity string
|
19
31
|
while true
|
20
32
|
event = @parser.pull
|
21
33
|
case event[0]
|
22
34
|
when :end_document
|
23
|
-
unless @tag_stack.empty?
|
24
|
-
tag_path = "/" + @tag_stack.join("/")
|
25
|
-
raise ParseException.new("Missing end tag for '#{tag_path}'",
|
26
|
-
@parser.source)
|
27
|
-
end
|
28
35
|
return
|
29
36
|
when :start_element
|
30
|
-
@tag_stack << event[1]
|
31
37
|
attrs = event[2].each do |n, v|
|
32
38
|
event[2][n] = @parser.unnormalize( v )
|
33
39
|
end
|
34
40
|
@listener.tag_start( event[1], attrs )
|
35
41
|
when :end_element
|
36
42
|
@listener.tag_end( event[1] )
|
37
|
-
@tag_stack.pop
|
38
43
|
when :text
|
39
|
-
|
40
|
-
@listener.text(
|
44
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
45
|
+
@listener.text( unnormalized )
|
41
46
|
when :processing_instruction
|
42
47
|
@listener.instruction( *event[1,2] )
|
43
48
|
when :start_doctype
|
@@ -48,6 +53,7 @@ module REXML
|
|
48
53
|
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
|
49
54
|
@listener.send( event[0].to_s, *event[1..-1] )
|
50
55
|
when :entitydecl, :notationdecl
|
56
|
+
@entities[ event[1] ] = event[2] if event.size == 3
|
51
57
|
@listener.send( event[0].to_s, event[1..-1] )
|
52
58
|
when :externalentity
|
53
59
|
entity_reference = event[1]
|