rexml 3.2.8 → 3.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +306 -2
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/element.rb +16 -31
- data/lib/rexml/entity.rb +9 -48
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parsers/baseparser.rb +233 -64
- data/lib/rexml/parsers/pullparser.rb +12 -0
- data/lib/rexml/parsers/sax2parser.rb +16 -19
- data/lib/rexml/parsers/streamparser.rb +16 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +79 -16
- data/lib/rexml/text.rb +39 -17
- metadata +5 -18
@@ -1,12 +1,40 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require_relative '../parseexception'
|
3
3
|
require_relative '../undefinednamespaceexception'
|
4
|
+
require_relative '../security'
|
4
5
|
require_relative '../source'
|
5
6
|
require 'set'
|
6
7
|
require "strscan"
|
7
8
|
|
8
9
|
module REXML
|
9
10
|
module Parsers
|
11
|
+
unless [].respond_to?(:tally)
|
12
|
+
module EnumerableTally
|
13
|
+
refine Enumerable do
|
14
|
+
def tally
|
15
|
+
counts = {}
|
16
|
+
each do |item|
|
17
|
+
counts[item] ||= 0
|
18
|
+
counts[item] += 1
|
19
|
+
end
|
20
|
+
counts
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
using EnumerableTally
|
25
|
+
end
|
26
|
+
|
27
|
+
if StringScanner::Version < "3.0.8"
|
28
|
+
module StringScannerCaptures
|
29
|
+
refine StringScanner do
|
30
|
+
def captures
|
31
|
+
values_at(*(1...size))
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
using StringScannerCaptures
|
36
|
+
end
|
37
|
+
|
10
38
|
# = Using the Pull Parser
|
11
39
|
# <em>This API is experimental, and subject to change.</em>
|
12
40
|
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
@@ -113,21 +141,33 @@ module REXML
|
|
113
141
|
}
|
114
142
|
|
115
143
|
module Private
|
116
|
-
|
144
|
+
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
117
145
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
118
146
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
119
147
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
120
|
-
NAME_PATTERN =
|
148
|
+
NAME_PATTERN = /#{NAME}/um
|
121
149
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
122
150
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
123
151
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
152
|
+
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
153
|
+
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
|
154
|
+
DEFAULT_ENTITIES_PATTERNS = {}
|
155
|
+
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
156
|
+
default_entities.each do |term|
|
157
|
+
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
158
|
+
end
|
159
|
+
XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
124
160
|
end
|
125
161
|
private_constant :Private
|
126
|
-
include Private
|
127
162
|
|
128
163
|
def initialize( source )
|
129
164
|
self.stream = source
|
130
165
|
@listeners = []
|
166
|
+
@prefixes = Set.new
|
167
|
+
@entity_expansion_count = 0
|
168
|
+
@entity_expansion_limit = Security.entity_expansion_limit
|
169
|
+
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
170
|
+
@source.ensure_buffer
|
131
171
|
end
|
132
172
|
|
133
173
|
def add_listener( listener )
|
@@ -135,15 +175,20 @@ module REXML
|
|
135
175
|
end
|
136
176
|
|
137
177
|
attr_reader :source
|
178
|
+
attr_reader :entity_expansion_count
|
179
|
+
attr_writer :entity_expansion_limit
|
180
|
+
attr_writer :entity_expansion_text_limit
|
138
181
|
|
139
182
|
def stream=( source )
|
140
183
|
@source = SourceFactory.create_from( source )
|
141
184
|
@closed = nil
|
185
|
+
@have_root = false
|
142
186
|
@document_status = nil
|
143
187
|
@tags = []
|
144
188
|
@stack = []
|
145
189
|
@entities = []
|
146
|
-
@
|
190
|
+
@namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
|
191
|
+
@namespaces_restore_stack = []
|
147
192
|
end
|
148
193
|
|
149
194
|
def position
|
@@ -193,6 +238,8 @@ module REXML
|
|
193
238
|
|
194
239
|
# Returns the next event. This is a +PullEvent+ object.
|
195
240
|
def pull
|
241
|
+
@source.drop_parsed_content
|
242
|
+
|
196
243
|
pull_event.tap do |event|
|
197
244
|
@listeners.each do |listener|
|
198
245
|
listener.receive event
|
@@ -205,7 +252,16 @@ module REXML
|
|
205
252
|
x, @closed = @closed, nil
|
206
253
|
return [ :end_element, x ]
|
207
254
|
end
|
208
|
-
|
255
|
+
if empty?
|
256
|
+
if @document_status == :in_doctype
|
257
|
+
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
258
|
+
end
|
259
|
+
unless @tags.empty?
|
260
|
+
path = "/" + @tags.join("/")
|
261
|
+
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
262
|
+
end
|
263
|
+
return [ :end_document ]
|
264
|
+
end
|
209
265
|
return @stack.shift if @stack.size > 0
|
210
266
|
#STDERR.puts @source.encoding
|
211
267
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
@@ -214,10 +270,17 @@ module REXML
|
|
214
270
|
if @document_status == nil
|
215
271
|
start_position = @source.position
|
216
272
|
if @source.match("<?", true)
|
217
|
-
return process_instruction
|
273
|
+
return process_instruction
|
218
274
|
elsif @source.match("<!", true)
|
219
275
|
if @source.match("--", true)
|
220
|
-
|
276
|
+
md = @source.match(/(.*?)-->/um, true)
|
277
|
+
if md.nil?
|
278
|
+
raise REXML::ParseException.new("Unclosed comment", @source)
|
279
|
+
end
|
280
|
+
if /--|-\z/.match?(md[1])
|
281
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
282
|
+
end
|
283
|
+
return [ :comment, md[1] ]
|
221
284
|
elsif @source.match("DOCTYPE", true)
|
222
285
|
base_error_message = "Malformed DOCTYPE"
|
223
286
|
unless @source.match(/\s+/um, true)
|
@@ -229,7 +292,6 @@ module REXML
|
|
229
292
|
@source.position = start_position
|
230
293
|
raise REXML::ParseException.new(message, @source)
|
231
294
|
end
|
232
|
-
@nsstack.unshift(curr_ns=Set.new)
|
233
295
|
name = parse_name(base_error_message)
|
234
296
|
if @source.match(/\s*\[/um, true)
|
235
297
|
id = [nil, nil, nil]
|
@@ -277,7 +339,11 @@ module REXML
|
|
277
339
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
278
340
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
279
341
|
elsif @source.match("ENTITY", true)
|
280
|
-
|
342
|
+
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
343
|
+
unless match_data
|
344
|
+
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
345
|
+
end
|
346
|
+
match = [:entitydecl, *match_data.captures.compact]
|
281
347
|
ref = false
|
282
348
|
if match[1] == '%'
|
283
349
|
ref = true
|
@@ -295,6 +361,8 @@ module REXML
|
|
295
361
|
match[4] = match[4][1..-2] # HREF
|
296
362
|
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
297
363
|
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
364
|
+
elsif Private::PEREFERENCE_PATTERN.match?(match[2])
|
365
|
+
raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
|
298
366
|
else
|
299
367
|
match[2] = match[2][1..-2]
|
300
368
|
match.pop if match.size == 4
|
@@ -303,13 +371,13 @@ module REXML
|
|
303
371
|
match << '%' if ref
|
304
372
|
return match
|
305
373
|
elsif @source.match("ATTLIST", true)
|
306
|
-
md = @source.match(ATTLISTDECL_END, true)
|
374
|
+
md = @source.match(Private::ATTLISTDECL_END, true)
|
307
375
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
308
376
|
element = md[1]
|
309
377
|
contents = md[0]
|
310
378
|
|
311
379
|
pairs = {}
|
312
|
-
values = md[0].scan( ATTDEF_RE )
|
380
|
+
values = md[0].strip.scan( ATTDEF_RE )
|
313
381
|
values.each do |attdef|
|
314
382
|
unless attdef[3] == "#IMPLIED"
|
315
383
|
attdef.compact!
|
@@ -317,7 +385,7 @@ module REXML
|
|
317
385
|
val = attdef[4] if val == "#FIXED "
|
318
386
|
pairs[attdef[0]] = val
|
319
387
|
if attdef[0] =~ /^xmlns:(.*)/
|
320
|
-
@
|
388
|
+
@namespaces[$1] = val
|
321
389
|
end
|
322
390
|
end
|
323
391
|
end
|
@@ -355,6 +423,9 @@ module REXML
|
|
355
423
|
@document_status = :after_doctype
|
356
424
|
return [ :end_doctype ]
|
357
425
|
end
|
426
|
+
if @document_status == :in_doctype
|
427
|
+
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
428
|
+
end
|
358
429
|
end
|
359
430
|
if @document_status == :after_doctype
|
360
431
|
@source.match(/\s*/um, true)
|
@@ -362,10 +433,14 @@ module REXML
|
|
362
433
|
begin
|
363
434
|
start_position = @source.position
|
364
435
|
if @source.match("<", true)
|
436
|
+
# :text's read_until may remain only "<" in buffer. In the
|
437
|
+
# case, buffer is empty here. So we need to fill buffer
|
438
|
+
# here explicitly.
|
439
|
+
@source.ensure_buffer
|
365
440
|
if @source.match("/", true)
|
366
|
-
@
|
441
|
+
@namespaces_restore_stack.pop
|
367
442
|
last_tag = @tags.pop
|
368
|
-
md = @source.match(CLOSE_PATTERN, true)
|
443
|
+
md = @source.match(Private::CLOSE_PATTERN, true)
|
369
444
|
if md and !last_tag
|
370
445
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
371
446
|
raise REXML::ParseException.new(message, @source)
|
@@ -384,12 +459,11 @@ module REXML
|
|
384
459
|
if md[0][0] == ?-
|
385
460
|
md = @source.match(/--(.*?)-->/um, true)
|
386
461
|
|
387
|
-
|
388
|
-
when /--/, /-\z/
|
462
|
+
if md.nil? || /--|-\z/.match?(md[1])
|
389
463
|
raise REXML::ParseException.new("Malformed comment", @source)
|
390
464
|
end
|
391
465
|
|
392
|
-
return [ :comment, md[1] ]
|
466
|
+
return [ :comment, md[1] ]
|
393
467
|
else
|
394
468
|
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
395
469
|
return [ :cdata, md[1] ] if md
|
@@ -397,38 +471,54 @@ module REXML
|
|
397
471
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
398
472
|
"in the doctype declaration.", @source)
|
399
473
|
elsif @source.match("?", true)
|
400
|
-
return process_instruction
|
474
|
+
return process_instruction
|
401
475
|
else
|
402
476
|
# Get the next tag
|
403
|
-
md = @source.match(TAG_PATTERN, true)
|
477
|
+
md = @source.match(Private::TAG_PATTERN, true)
|
404
478
|
unless md
|
405
479
|
@source.position = start_position
|
406
480
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
407
481
|
end
|
408
482
|
tag = md[1]
|
409
483
|
@document_status = :in_element
|
410
|
-
prefixes
|
411
|
-
prefixes << md[2] if md[2]
|
412
|
-
|
413
|
-
attributes, closed = parse_attributes(prefixes
|
484
|
+
@prefixes.clear
|
485
|
+
@prefixes << md[2] if md[2]
|
486
|
+
push_namespaces_restore
|
487
|
+
attributes, closed = parse_attributes(@prefixes)
|
414
488
|
# Verify that all of the prefixes have been defined
|
415
|
-
for prefix in prefixes
|
416
|
-
unless @
|
489
|
+
for prefix in @prefixes
|
490
|
+
unless @namespaces.key?(prefix)
|
417
491
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
418
492
|
end
|
419
493
|
end
|
420
494
|
|
421
495
|
if closed
|
422
496
|
@closed = tag
|
423
|
-
|
497
|
+
pop_namespaces_restore
|
424
498
|
else
|
499
|
+
if @tags.empty? and @have_root
|
500
|
+
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
501
|
+
end
|
425
502
|
@tags.push( tag )
|
426
503
|
end
|
504
|
+
@have_root = true
|
427
505
|
return [ :start_element, tag, attributes ]
|
428
506
|
end
|
429
507
|
else
|
430
|
-
|
431
|
-
text
|
508
|
+
text = @source.read_until("<")
|
509
|
+
if text.chomp!("<")
|
510
|
+
@source.position -= "<".bytesize
|
511
|
+
end
|
512
|
+
if @tags.empty?
|
513
|
+
unless /\A\s*\z/.match?(text)
|
514
|
+
if @have_root
|
515
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
516
|
+
else
|
517
|
+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
518
|
+
end
|
519
|
+
end
|
520
|
+
return pull_event if @have_root
|
521
|
+
end
|
432
522
|
return [ :text, text ]
|
433
523
|
end
|
434
524
|
rescue REXML::UndefinedNamespaceException
|
@@ -444,13 +534,13 @@ module REXML
|
|
444
534
|
private :pull_event
|
445
535
|
|
446
536
|
def entity( reference, entities )
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
unnormalize( value, entities )
|
537
|
+
return unless entities
|
538
|
+
|
539
|
+
value = entities[ reference ]
|
540
|
+
return if value.nil?
|
541
|
+
|
542
|
+
record_entity_expansion
|
543
|
+
unnormalize( value, entities )
|
454
544
|
end
|
455
545
|
|
456
546
|
# Escapes all possible entities
|
@@ -471,34 +561,87 @@ module REXML
|
|
471
561
|
|
472
562
|
# Unescapes all possible entities
|
473
563
|
def unnormalize( string, entities=nil, filter=nil )
|
474
|
-
|
564
|
+
if string.include?("\r")
|
565
|
+
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
566
|
+
else
|
567
|
+
rv = string.dup
|
568
|
+
end
|
475
569
|
matches = rv.scan( REFERENCE_RE )
|
476
570
|
return rv if matches.size == 0
|
477
|
-
rv.gsub!(
|
571
|
+
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
478
572
|
m=$1
|
479
|
-
|
480
|
-
|
573
|
+
if m.start_with?("x")
|
574
|
+
code_point = Integer(m[1..-1], 16)
|
575
|
+
else
|
576
|
+
code_point = Integer(m, 10)
|
577
|
+
end
|
578
|
+
[code_point].pack('U*')
|
481
579
|
}
|
482
580
|
matches.collect!{|x|x[0]}.compact!
|
581
|
+
if filter
|
582
|
+
matches.reject! do |entity_reference|
|
583
|
+
filter.include?(entity_reference)
|
584
|
+
end
|
585
|
+
end
|
483
586
|
if matches.size > 0
|
484
|
-
matches.each do |entity_reference|
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
587
|
+
matches.tally.each do |entity_reference, n|
|
588
|
+
entity_expansion_count_before = @entity_expansion_count
|
589
|
+
entity_value = entity( entity_reference, entities )
|
590
|
+
if entity_value
|
591
|
+
if n > 1
|
592
|
+
entity_expansion_count_delta =
|
593
|
+
@entity_expansion_count - entity_expansion_count_before
|
594
|
+
record_entity_expansion(entity_expansion_count_delta * (n - 1))
|
595
|
+
end
|
596
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
597
|
+
rv.gsub!( re, entity_value )
|
598
|
+
if rv.bytesize > @entity_expansion_text_limit
|
599
|
+
raise "entity expansion has grown too large"
|
493
600
|
end
|
601
|
+
else
|
602
|
+
er = DEFAULT_ENTITIES[entity_reference]
|
603
|
+
rv.gsub!( er[0], er[2] ) if er
|
494
604
|
end
|
495
605
|
end
|
496
|
-
rv.gsub!(
|
606
|
+
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
497
607
|
end
|
498
608
|
rv
|
499
609
|
end
|
500
610
|
|
501
611
|
private
|
612
|
+
def add_namespace(prefix, uri)
|
613
|
+
@namespaces_restore_stack.last[prefix] = @namespaces[prefix]
|
614
|
+
if uri.nil?
|
615
|
+
@namespaces.delete(prefix)
|
616
|
+
else
|
617
|
+
@namespaces[prefix] = uri
|
618
|
+
end
|
619
|
+
end
|
620
|
+
|
621
|
+
def push_namespaces_restore
|
622
|
+
namespaces_restore = {}
|
623
|
+
@namespaces_restore_stack.push(namespaces_restore)
|
624
|
+
namespaces_restore
|
625
|
+
end
|
626
|
+
|
627
|
+
def pop_namespaces_restore
|
628
|
+
namespaces_restore = @namespaces_restore_stack.pop
|
629
|
+
namespaces_restore.each do |prefix, uri|
|
630
|
+
if uri.nil?
|
631
|
+
@namespaces.delete(prefix)
|
632
|
+
else
|
633
|
+
@namespaces[prefix] = uri
|
634
|
+
end
|
635
|
+
end
|
636
|
+
end
|
637
|
+
|
638
|
+
def record_entity_expansion(delta=1)
|
639
|
+
@entity_expansion_count += delta
|
640
|
+
if @entity_expansion_count > @entity_expansion_limit
|
641
|
+
raise "number of entity expansions exceeded, processing aborted."
|
642
|
+
end
|
643
|
+
end
|
644
|
+
|
502
645
|
def need_source_encoding_update?(xml_declaration_encoding)
|
503
646
|
return false if xml_declaration_encoding.nil?
|
504
647
|
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
@@ -506,16 +649,16 @@ module REXML
|
|
506
649
|
end
|
507
650
|
|
508
651
|
def parse_name(base_error_message)
|
509
|
-
md = @source.match(NAME_PATTERN, true)
|
652
|
+
md = @source.match(Private::NAME_PATTERN, true)
|
510
653
|
unless md
|
511
|
-
if @source.match(/\
|
654
|
+
if @source.match(/\S/um)
|
512
655
|
message = "#{base_error_message}: invalid name"
|
513
656
|
else
|
514
657
|
message = "#{base_error_message}: name is missing"
|
515
658
|
end
|
516
659
|
raise REXML::ParseException.new(message, @source)
|
517
660
|
end
|
518
|
-
md[
|
661
|
+
md[0]
|
519
662
|
end
|
520
663
|
|
521
664
|
def parse_id(base_error_message,
|
@@ -584,15 +727,24 @@ module REXML
|
|
584
727
|
end
|
585
728
|
end
|
586
729
|
|
587
|
-
def process_instruction
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
730
|
+
def process_instruction
|
731
|
+
name = parse_name("Malformed XML: Invalid processing instruction node")
|
732
|
+
if @source.match(/\s+/um, true)
|
733
|
+
match_data = @source.match(/(.*?)\?>/um, true)
|
734
|
+
unless match_data
|
735
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
736
|
+
end
|
737
|
+
content = match_data[1]
|
738
|
+
else
|
739
|
+
content = nil
|
740
|
+
unless @source.match("?>", true)
|
741
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
742
|
+
end
|
593
743
|
end
|
594
|
-
if
|
595
|
-
|
744
|
+
if name == "xml"
|
745
|
+
if @document_status
|
746
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
747
|
+
end
|
596
748
|
version = VERSION.match(content)
|
597
749
|
version = version[1] unless version.nil?
|
598
750
|
encoding = ENCODING.match(content)
|
@@ -607,11 +759,12 @@ module REXML
|
|
607
759
|
standalone = standalone[1] unless standalone.nil?
|
608
760
|
return [ :xmldecl, version, encoding, standalone ]
|
609
761
|
end
|
610
|
-
[:processing_instruction,
|
762
|
+
[:processing_instruction, name, content]
|
611
763
|
end
|
612
764
|
|
613
|
-
def parse_attributes(prefixes
|
765
|
+
def parse_attributes(prefixes)
|
614
766
|
attributes = {}
|
767
|
+
expanded_names = {}
|
615
768
|
closed = false
|
616
769
|
while true
|
617
770
|
if @source.match(">", true)
|
@@ -633,15 +786,17 @@ module REXML
|
|
633
786
|
raise REXML::ParseException.new(message, @source)
|
634
787
|
end
|
635
788
|
quote = match[1]
|
789
|
+
start_position = @source.position
|
636
790
|
value = @source.read_until(quote)
|
637
791
|
unless value.chomp!(quote)
|
792
|
+
@source.position = start_position
|
638
793
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
639
794
|
raise REXML::ParseException.new(message, @source)
|
640
795
|
end
|
641
796
|
@source.match(/\s*/um, true)
|
642
797
|
if prefix == "xmlns"
|
643
798
|
if local_part == "xml"
|
644
|
-
if value !=
|
799
|
+
if value != Private::XML_PREFIXED_NAMESPACE
|
645
800
|
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
646
801
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
647
802
|
raise REXML::ParseException.new( msg, @source, self )
|
@@ -651,7 +806,7 @@ module REXML
|
|
651
806
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
652
807
|
raise REXML::ParseException.new( msg, @source, self)
|
653
808
|
end
|
654
|
-
|
809
|
+
add_namespace(local_part, value)
|
655
810
|
elsif prefix
|
656
811
|
prefixes << prefix unless prefix == "xml"
|
657
812
|
end
|
@@ -661,6 +816,20 @@ module REXML
|
|
661
816
|
raise REXML::ParseException.new(msg, @source, self)
|
662
817
|
end
|
663
818
|
|
819
|
+
unless prefix == "xmlns"
|
820
|
+
uri = @namespaces[prefix]
|
821
|
+
expanded_name = [uri, local_part]
|
822
|
+
existing_prefix = expanded_names[expanded_name]
|
823
|
+
if existing_prefix
|
824
|
+
message = "Namespace conflict in adding attribute " +
|
825
|
+
"\"#{local_part}\": " +
|
826
|
+
"Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
|
827
|
+
"prefix \"#{prefix}\" = \"#{uri}\""
|
828
|
+
raise REXML::ParseException.new(message, @source, self)
|
829
|
+
end
|
830
|
+
expanded_names[expanded_name] = prefix
|
831
|
+
end
|
832
|
+
|
664
833
|
attributes[name] = value
|
665
834
|
else
|
666
835
|
message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
@@ -47,6 +47,18 @@ module REXML
|
|
47
47
|
@listeners << listener
|
48
48
|
end
|
49
49
|
|
50
|
+
def entity_expansion_count
|
51
|
+
@parser.entity_expansion_count
|
52
|
+
end
|
53
|
+
|
54
|
+
def entity_expansion_limit=( limit )
|
55
|
+
@parser.entity_expansion_limit = limit
|
56
|
+
end
|
57
|
+
|
58
|
+
def entity_expansion_text_limit=( limit )
|
59
|
+
@parser.entity_expansion_text_limit = limit
|
60
|
+
end
|
61
|
+
|
50
62
|
def each
|
51
63
|
while has_next?
|
52
64
|
yield self.pull
|
@@ -22,6 +22,18 @@ module REXML
|
|
22
22
|
@parser.source
|
23
23
|
end
|
24
24
|
|
25
|
+
def entity_expansion_count
|
26
|
+
@parser.entity_expansion_count
|
27
|
+
end
|
28
|
+
|
29
|
+
def entity_expansion_limit=( limit )
|
30
|
+
@parser.entity_expansion_limit = limit
|
31
|
+
end
|
32
|
+
|
33
|
+
def entity_expansion_text_limit=( limit )
|
34
|
+
@parser.entity_expansion_text_limit = limit
|
35
|
+
end
|
36
|
+
|
25
37
|
def add_listener( listener )
|
26
38
|
@parser.add_listener( listener )
|
27
39
|
end
|
@@ -157,25 +169,8 @@ module REXML
|
|
157
169
|
end
|
158
170
|
end
|
159
171
|
when :text
|
160
|
-
|
161
|
-
|
162
|
-
copy = event[1].clone
|
163
|
-
|
164
|
-
esub = proc { |match|
|
165
|
-
if @entities.has_key?($1)
|
166
|
-
@entities[$1].gsub(Text::REFERENCE, &esub)
|
167
|
-
else
|
168
|
-
match
|
169
|
-
end
|
170
|
-
}
|
171
|
-
|
172
|
-
copy.gsub!( Text::REFERENCE, &esub )
|
173
|
-
copy.gsub!( Text::NUMERICENTITY ) {|m|
|
174
|
-
m=$1
|
175
|
-
m = "0#{m}" if m[0] == ?x
|
176
|
-
[Integer(m)].pack('U*')
|
177
|
-
}
|
178
|
-
handle( :characters, copy )
|
172
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
173
|
+
handle( :characters, unnormalized )
|
179
174
|
when :entitydecl
|
180
175
|
handle_entitydecl( event )
|
181
176
|
when :processing_instruction, :comment, :attlistdecl,
|
@@ -264,6 +259,8 @@ module REXML
|
|
264
259
|
end
|
265
260
|
|
266
261
|
def get_namespace( prefix )
|
262
|
+
return nil if @namespace_stack.empty?
|
263
|
+
|
267
264
|
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
|
268
265
|
(@namespace_stack.find { |ns| not ns[nil].nil? })
|
269
266
|
uris[-1][prefix] unless uris.nil? or 0 == uris.size
|
@@ -7,37 +7,42 @@ module REXML
|
|
7
7
|
def initialize source, listener
|
8
8
|
@listener = listener
|
9
9
|
@parser = BaseParser.new( source )
|
10
|
-
@
|
10
|
+
@entities = {}
|
11
11
|
end
|
12
12
|
|
13
13
|
def add_listener( listener )
|
14
14
|
@parser.add_listener( listener )
|
15
15
|
end
|
16
16
|
|
17
|
+
def entity_expansion_count
|
18
|
+
@parser.entity_expansion_count
|
19
|
+
end
|
20
|
+
|
21
|
+
def entity_expansion_limit=( limit )
|
22
|
+
@parser.entity_expansion_limit = limit
|
23
|
+
end
|
24
|
+
|
25
|
+
def entity_expansion_text_limit=( limit )
|
26
|
+
@parser.entity_expansion_text_limit = limit
|
27
|
+
end
|
28
|
+
|
17
29
|
def parse
|
18
30
|
# entity string
|
19
31
|
while true
|
20
32
|
event = @parser.pull
|
21
33
|
case event[0]
|
22
34
|
when :end_document
|
23
|
-
unless @tag_stack.empty?
|
24
|
-
tag_path = "/" + @tag_stack.join("/")
|
25
|
-
raise ParseException.new("Missing end tag for '#{tag_path}'",
|
26
|
-
@parser.source)
|
27
|
-
end
|
28
35
|
return
|
29
36
|
when :start_element
|
30
|
-
@tag_stack << event[1]
|
31
37
|
attrs = event[2].each do |n, v|
|
32
38
|
event[2][n] = @parser.unnormalize( v )
|
33
39
|
end
|
34
40
|
@listener.tag_start( event[1], attrs )
|
35
41
|
when :end_element
|
36
42
|
@listener.tag_end( event[1] )
|
37
|
-
@tag_stack.pop
|
38
43
|
when :text
|
39
|
-
|
40
|
-
@listener.text(
|
44
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
45
|
+
@listener.text( unnormalized )
|
41
46
|
when :processing_instruction
|
42
47
|
@listener.instruction( *event[1,2] )
|
43
48
|
when :start_doctype
|
@@ -48,6 +53,7 @@ module REXML
|
|
48
53
|
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
|
49
54
|
@listener.send( event[0].to_s, *event[1..-1] )
|
50
55
|
when :entitydecl, :notationdecl
|
56
|
+
@entities[ event[1] ] = event[2] if event.size == 3
|
51
57
|
@listener.send( event[0].to_s, event[1..-1] )
|
52
58
|
when :externalentity
|
53
59
|
entity_reference = event[1]
|