rexml 3.2.4 → 3.2.6

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

data/lib/rexml/entity.rb CHANGED
@@ -132,24 +132,34 @@ module REXML
132
132
  # then:
133
133
  # doctype.entity('yada').value #-> "nanoo bar nanoo"
134
134
  def value
135
- if @value
136
- matches = @value.scan(PEREFERENCE_RE)
137
- rv = @value.clone
138
- if @parent
139
- sum = 0
140
- matches.each do |entity_reference|
141
- entity_value = @parent.entity( entity_reference[0] )
142
- if sum + entity_value.bytesize > Security.entity_expansion_text_limit
143
- raise "entity expansion has grown too large"
144
- else
145
- sum += entity_value.bytesize
146
- end
147
- rv.gsub!( /%#{entity_reference.join};/um, entity_value )
135
+ @resolved_value ||= resolve_value
136
+ end
137
+
138
+ def parent=(other)
139
+ @resolved_value = nil
140
+ super
141
+ end
142
+
143
+ private
144
+ def resolve_value
145
+ return nil if @value.nil?
146
+ return @value unless @value.match?(PEREFERENCE_RE)
147
+
148
+ matches = @value.scan(PEREFERENCE_RE)
149
+ rv = @value.clone
150
+ if @parent
151
+ sum = 0
152
+ matches.each do |entity_reference|
153
+ entity_value = @parent.entity( entity_reference[0] )
154
+ if sum + entity_value.bytesize > Security.entity_expansion_text_limit
155
+ raise "entity expansion has grown too large"
156
+ else
157
+ sum += entity_value.bytesize
148
158
  end
159
+ rv.gsub!( /%#{entity_reference.join};/um, entity_value )
149
160
  end
150
- return rv
151
161
  end
152
- nil
162
+ rv
153
163
  end
154
164
  end
155
165
 
@@ -1,4 +1,4 @@
1
- # frozen_string_literal: false
1
+ # frozen_string_literal: true
2
2
  require_relative 'default'
3
3
 
4
4
  module REXML
@@ -58,7 +58,7 @@ module REXML
58
58
  skip = false
59
59
  if compact
60
60
  if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
61
- string = ""
61
+ string = +""
62
62
  old_level = @level
63
63
  @level = 0
64
64
  node.children.each { |child| write( child, string ) }
@@ -1,14 +1,6 @@
1
1
  # frozen_string_literal: false
2
2
  require_relative '../xmltokens'
3
3
 
4
- # [ :element, parent, name, attributes, children* ]
5
- # a = Node.new
6
- # a << "B" # => <a>B</a>
7
- # a.b # => <a>B<b/></a>
8
- # a.b[1] # => <a>B<b/><b/><a>
9
- # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
10
- # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
11
- # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
12
4
  module REXML
13
5
  module Light
14
6
  # Represents a tagged XML element. Elements are characterized by
@@ -1,4 +1,4 @@
1
- # frozen_string_literal: false
1
+ # frozen_string_literal: true
2
2
 
3
3
  require_relative 'xmltokens'
4
4
 
@@ -10,13 +10,17 @@ module REXML
10
10
  # The expanded name of the object, valid if name is set
11
11
  attr_accessor :prefix
12
12
  include XMLTokens
13
+ NAME_WITHOUT_NAMESPACE = /\A#{NCNAME_STR}\z/
13
14
  NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
14
15
 
15
16
  # Sets the name and the expanded name
16
17
  def name=( name )
17
18
  @expanded_name = name
18
- case name
19
- when NAMESPLIT
19
+ if name.match?(NAME_WITHOUT_NAMESPACE)
20
+ @prefix = ""
21
+ @namespace = ""
22
+ @name = name
23
+ elsif name =~ NAMESPLIT
20
24
  if $1
21
25
  @prefix = $1
22
26
  else
@@ -24,7 +28,7 @@ module REXML
24
28
  @namespace = ""
25
29
  end
26
30
  @name = $2
27
- when ""
31
+ elsif name == ""
28
32
  @prefix = nil
29
33
  @namespace = nil
30
34
  @name = nil
@@ -50,7 +50,6 @@ module REXML
50
50
 
51
51
  DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
52
52
  DOCTYPE_END = /\A\s*\]\s*>/um
53
- DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
54
53
  ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
55
54
  COMMENT_START = /\A<!--/u
56
55
  COMMENT_PATTERN = /<!--(.*?)-->/um
@@ -61,15 +60,14 @@ module REXML
61
60
  XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
62
61
  INSTRUCTION_START = /\A<\?/u
63
62
  INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
64
- TAG_MATCH = /^<((?>#{QNAME_STR}))/um
65
- CLOSE_MATCH = /^\s*<\/(#{QNAME_STR})\s*>/um
63
+ TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
64
+ CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
66
65
 
67
66
  VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
68
67
  ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
69
68
  STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
70
69
 
71
70
  ENTITY_START = /\A\s*<!ENTITY/
72
- IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
73
71
  ELEMENTDECL_START = /\A\s*<!ELEMENT/um
74
72
  ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
75
73
  SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
@@ -83,9 +81,6 @@ module REXML
83
81
  ATTDEF_RE = /#{ATTDEF}/
84
82
  ATTLISTDECL_START = /\A\s*<!ATTLIST/um
85
83
  ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
86
- NOTATIONDECL_START = /\A\s*<!NOTATION/um
87
- PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
88
- SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
89
84
 
90
85
  TEXT_PATTERN = /\A([^<]*)/um
91
86
 
@@ -103,6 +98,11 @@ module REXML
103
98
  GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
104
99
  ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
105
100
 
101
+ NOTATIONDECL_START = /\A\s*<!NOTATION/um
102
+ EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
103
+ EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
104
+ PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
105
+
106
106
  EREFERENCE = /&(?!#{NAME};)/
107
107
 
108
108
  DEFAULT_ENTITIES = {
@@ -195,11 +195,9 @@ module REXML
195
195
  return [ :end_document ] if empty?
196
196
  return @stack.shift if @stack.size > 0
197
197
  #STDERR.puts @source.encoding
198
- @source.read if @source.buffer.size<2
199
198
  #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
200
199
  if @document_status == nil
201
- #@source.consume( /^\s*/um )
202
- word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
200
+ word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
203
201
  word = word[1] unless word.nil?
204
202
  #STDERR.puts "WORD = #{word.inspect}"
205
203
  case word
@@ -224,38 +222,49 @@ module REXML
224
222
  when INSTRUCTION_START
225
223
  return process_instruction
226
224
  when DOCTYPE_START
227
- md = @source.match( DOCTYPE_PATTERN, true )
225
+ base_error_message = "Malformed DOCTYPE"
226
+ @source.match(DOCTYPE_START, true)
228
227
  @nsstack.unshift(curr_ns=Set.new)
229
- identity = md[1]
230
- close = md[2]
231
- identity =~ IDENTITY
232
- name = $1
233
- raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
234
- pub_sys = $2.nil? ? nil : $2.strip
235
- long_name = $4.nil? ? nil : $4.strip
236
- uri = $6.nil? ? nil : $6.strip
237
- args = [ :start_doctype, name, pub_sys, long_name, uri ]
238
- if close == ">"
228
+ name = parse_name(base_error_message)
229
+ if @source.match(/\A\s*\[/um, true)
230
+ id = [nil, nil, nil]
231
+ @document_status = :in_doctype
232
+ elsif @source.match(/\A\s*>/um, true)
233
+ id = [nil, nil, nil]
239
234
  @document_status = :after_doctype
240
- @source.read if @source.buffer.size<2
241
- md = @source.match(/^\s*/um, true)
242
- @stack << [ :end_doctype ]
243
235
  else
244
- @document_status = :in_doctype
236
+ id = parse_id(base_error_message,
237
+ accept_external_id: true,
238
+ accept_public_id: false)
239
+ if id[0] == "SYSTEM"
240
+ # For backward compatibility
241
+ id[1], id[2] = id[2], nil
242
+ end
243
+ if @source.match(/\A\s*\[/um, true)
244
+ @document_status = :in_doctype
245
+ elsif @source.match(/\A\s*>/um, true)
246
+ @document_status = :after_doctype
247
+ else
248
+ message = "#{base_error_message}: garbage after external ID"
249
+ raise REXML::ParseException.new(message, @source)
250
+ end
251
+ end
252
+ args = [:start_doctype, name, *id]
253
+ if @document_status == :after_doctype
254
+ @source.match(/\A\s*/um, true)
255
+ @stack << [ :end_doctype ]
245
256
  end
246
257
  return args
247
- when /^\s+/
258
+ when /\A\s+/
248
259
  else
249
260
  @document_status = :after_doctype
250
- @source.read if @source.buffer.size<2
251
- md = @source.match(/\s*/um, true)
252
261
  if @source.encoding == "UTF-8"
253
262
  @source.buffer.force_encoding(::Encoding::UTF_8)
254
263
  end
255
264
  end
256
265
  end
257
266
  if @document_status == :in_doctype
258
- md = @source.match(/\s*(.*?>)/um)
267
+ md = @source.match(/\A\s*(.*?>)/um)
259
268
  case md[1]
260
269
  when SYSTEMENTITY
261
270
  match = @source.match( SYSTEMENTITY, true )[1]
@@ -312,24 +321,35 @@ module REXML
312
321
  end
313
322
  return [ :attlistdecl, element, pairs, contents ]
314
323
  when NOTATIONDECL_START
315
- md = nil
316
- if @source.match( PUBLIC )
317
- md = @source.match( PUBLIC, true )
318
- vals = [md[1],md[2],md[4],md[6]]
319
- elsif @source.match( SYSTEM )
320
- md = @source.match( SYSTEM, true )
321
- vals = [md[1],md[2],nil,md[4]]
322
- else
323
- raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
324
+ base_error_message = "Malformed notation declaration"
325
+ unless @source.match(/\A\s*<!NOTATION\s+/um, true)
326
+ if @source.match(/\A\s*<!NOTATION\s*>/um)
327
+ message = "#{base_error_message}: name is missing"
328
+ else
329
+ message = "#{base_error_message}: invalid declaration name"
330
+ end
331
+ raise REXML::ParseException.new(message, @source)
324
332
  end
325
- return [ :notationdecl, *vals ]
333
+ name = parse_name(base_error_message)
334
+ id = parse_id(base_error_message,
335
+ accept_external_id: true,
336
+ accept_public_id: true)
337
+ unless @source.match(/\A\s*>/um, true)
338
+ message = "#{base_error_message}: garbage before end >"
339
+ raise REXML::ParseException.new(message, @source)
340
+ end
341
+ return [:notationdecl, name, *id]
326
342
  when DOCTYPE_END
327
343
  @document_status = :after_doctype
328
344
  @source.match( DOCTYPE_END, true )
329
345
  return [ :end_doctype ]
330
346
  end
331
347
  end
348
+ if @document_status == :after_doctype
349
+ @source.match(/\A\s*/um, true)
350
+ end
332
351
  begin
352
+ @source.read if @source.buffer.size<2
333
353
  if @source.buffer[0] == ?<
334
354
  if @source.buffer[1] == ?/
335
355
  @nsstack.shift
@@ -372,6 +392,7 @@ module REXML
372
392
  unless md
373
393
  raise REXML::ParseException.new("malformed XML: missing tag start", @source)
374
394
  end
395
+ @document_status = :in_element
375
396
  prefixes = Set.new
376
397
  prefixes << md[2] if md[2]
377
398
  @nsstack.unshift(curr_ns=Set.new)
@@ -477,6 +498,85 @@ module REXML
477
498
  true
478
499
  end
479
500
 
501
+ def parse_name(base_error_message)
502
+ md = @source.match(/\A\s*#{NAME}/um, true)
503
+ unless md
504
+ if @source.match(/\A\s*\S/um)
505
+ message = "#{base_error_message}: invalid name"
506
+ else
507
+ message = "#{base_error_message}: name is missing"
508
+ end
509
+ raise REXML::ParseException.new(message, @source)
510
+ end
511
+ md[1]
512
+ end
513
+
514
+ def parse_id(base_error_message,
515
+ accept_external_id:,
516
+ accept_public_id:)
517
+ if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
518
+ pubid = system = nil
519
+ pubid_literal = md[1]
520
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
521
+ system_literal = md[2]
522
+ system = system_literal[1..-2] if system_literal # Remove quote
523
+ ["PUBLIC", pubid, system]
524
+ elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
525
+ pubid = system = nil
526
+ pubid_literal = md[1]
527
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
528
+ ["PUBLIC", pubid, nil]
529
+ elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
530
+ system = nil
531
+ system_literal = md[1]
532
+ system = system_literal[1..-2] if system_literal # Remove quote
533
+ ["SYSTEM", nil, system]
534
+ else
535
+ details = parse_id_invalid_details(accept_external_id: accept_external_id,
536
+ accept_public_id: accept_public_id)
537
+ message = "#{base_error_message}: #{details}"
538
+ raise REXML::ParseException.new(message, @source)
539
+ end
540
+ end
541
+
542
+ def parse_id_invalid_details(accept_external_id:,
543
+ accept_public_id:)
544
+ public = /\A\s*PUBLIC/um
545
+ system = /\A\s*SYSTEM/um
546
+ if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
547
+ if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
548
+ return "public ID literal is missing"
549
+ end
550
+ unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
551
+ return "invalid public ID literal"
552
+ end
553
+ if accept_public_id
554
+ if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
555
+ return "system ID literal is missing"
556
+ end
557
+ unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
558
+ return "invalid system literal"
559
+ end
560
+ "garbage after system literal"
561
+ else
562
+ "garbage after public ID literal"
563
+ end
564
+ elsif accept_external_id and @source.match(/#{system}/um)
565
+ if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
566
+ return "system literal is missing"
567
+ end
568
+ unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
569
+ return "invalid system literal"
570
+ end
571
+ "garbage after system literal"
572
+ else
573
+ unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
574
+ return "invalid ID type"
575
+ end
576
+ "ID type is missing"
577
+ end
578
+ end
579
+
480
580
  def process_instruction
481
581
  match_data = @source.match(INSTRUCTION_PATTERN, true)
482
582
  unless match_data