rexml 3.2.0 → 3.2.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

data/lib/rexml/entity.rb CHANGED
@@ -90,7 +90,7 @@ module REXML
90
90
  # object itself is valid.)
91
91
  #
92
92
  # out::
93
- # An object implementing <TT>&lt;&lt;<TT> to which the entity will be
93
+ # An object implementing <TT>&lt;&lt;</TT> to which the entity will be
94
94
  # output
95
95
  # indent::
96
96
  # *DEPRECATED* and ignored
@@ -66,11 +66,11 @@ module REXML
66
66
  def Functions::id( object )
67
67
  end
68
68
 
69
- # UNTESTED
70
- def Functions::local_name( node_set=nil )
71
- get_namespace( node_set ) do |node|
69
+ def Functions::local_name(node_set=nil)
70
+ get_namespace(node_set) do |node|
72
71
  return node.local_name
73
72
  end
73
+ ""
74
74
  end
75
75
 
76
76
  def Functions::namespace_uri( node_set=nil )
@@ -135,8 +135,7 @@ module REXML
135
135
  #
136
136
  # An object of a type other than the four basic types is converted to a
137
137
  # string in a way that is dependent on that type.
138
- def Functions::string( object=nil )
139
- object = @@context[:node] if object.nil?
138
+ def Functions::string( object=@@context[:node] )
140
139
  if object.respond_to?(:node_type)
141
140
  case object.node_type
142
141
  when :attribute
@@ -165,8 +164,6 @@ module REXML
165
164
  object.to_s
166
165
  end
167
166
  end
168
- when nil
169
- ""
170
167
  else
171
168
  object.to_s
172
169
  end
@@ -318,18 +315,23 @@ module REXML
318
315
  end
319
316
  end
320
317
 
321
- # UNTESTED
322
- def Functions::boolean( object=nil )
323
- if object.kind_of? String
324
- if object =~ /\d+/u
325
- return object.to_f != 0
326
- else
327
- return object.size > 0
328
- end
329
- elsif object.kind_of? Array
330
- object = object.find{|x| x and true}
318
+ def Functions::boolean(object=@@context[:node])
319
+ case object
320
+ when true, false
321
+ object
322
+ when Float
323
+ return false if object.zero?
324
+ return false if object.nan?
325
+ true
326
+ when Numeric
327
+ not object.zero?
328
+ when String
329
+ not object.empty?
330
+ when Array
331
+ not object.empty?
332
+ else
333
+ object ? true : false
331
334
  end
332
- return object ? true : false
333
335
  end
334
336
 
335
337
  # UNTESTED
@@ -383,25 +385,23 @@ module REXML
383
385
  #
384
386
  # an object of a type other than the four basic types is converted to a
385
387
  # number in a way that is dependent on that type
386
- def Functions::number( object=nil )
387
- object = @@context[:node] unless object
388
+ def Functions::number(object=@@context[:node])
388
389
  case object
389
390
  when true
390
391
  Float(1)
391
392
  when false
392
393
  Float(0)
393
394
  when Array
394
- number(string( object ))
395
+ number(string(object))
395
396
  when Numeric
396
397
  object.to_f
397
398
  else
398
- str = string( object )
399
- # If XPath ever gets scientific notation...
400
- #if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/
401
- if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/
402
- str.to_f
399
+ str = string(object)
400
+ case str.strip
401
+ when /\A\s*(-?(?:\d+(?:\.\d*)?|\.\d+))\s*\z/
402
+ $1.to_f
403
403
  else
404
- (0.0 / 0.0)
404
+ Float::NAN
405
405
  end
406
406
  end
407
407
  end
@@ -1,14 +1,6 @@
1
1
  # frozen_string_literal: false
2
2
  require_relative '../xmltokens'
3
3
 
4
- # [ :element, parent, name, attributes, children* ]
5
- # a = Node.new
6
- # a << "B" # => <a>B</a>
7
- # a.b # => <a>B<b/></a>
8
- # a.b[1] # => <a>B<b/><b/><a>
9
- # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
10
- # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
11
- # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
12
4
  module REXML
13
5
  module Light
14
6
  # Represents a tagged XML element. Elements are characterized by
@@ -50,7 +50,6 @@ module REXML
50
50
 
51
51
  DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
52
52
  DOCTYPE_END = /\A\s*\]\s*>/um
53
- DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
54
53
  ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
55
54
  COMMENT_START = /\A<!--/u
56
55
  COMMENT_PATTERN = /<!--(.*?)-->/um
@@ -61,15 +60,14 @@ module REXML
61
60
  XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
62
61
  INSTRUCTION_START = /\A<\?/u
63
62
  INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
64
- TAG_MATCH = /^<((?>#{QNAME_STR}))/um
65
- CLOSE_MATCH = /^\s*<\/(#{QNAME_STR})\s*>/um
63
+ TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
64
+ CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
66
65
 
67
66
  VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
68
67
  ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
69
68
  STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
70
69
 
71
70
  ENTITY_START = /\A\s*<!ENTITY/
72
- IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
73
71
  ELEMENTDECL_START = /\A\s*<!ELEMENT/um
74
72
  ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
75
73
  SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
@@ -83,9 +81,6 @@ module REXML
83
81
  ATTDEF_RE = /#{ATTDEF}/
84
82
  ATTLISTDECL_START = /\A\s*<!ATTLIST/um
85
83
  ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
86
- NOTATIONDECL_START = /\A\s*<!NOTATION/um
87
- PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
88
- SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
89
84
 
90
85
  TEXT_PATTERN = /\A([^<]*)/um
91
86
 
@@ -103,6 +98,11 @@ module REXML
103
98
  GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
104
99
  ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
105
100
 
101
+ NOTATIONDECL_START = /\A\s*<!NOTATION/um
102
+ EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
103
+ EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
104
+ PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
105
+
106
106
  EREFERENCE = /&(?!#{NAME};)/
107
107
 
108
108
  DEFAULT_ENTITIES = {
@@ -195,11 +195,9 @@ module REXML
195
195
  return [ :end_document ] if empty?
196
196
  return @stack.shift if @stack.size > 0
197
197
  #STDERR.puts @source.encoding
198
- @source.read if @source.buffer.size<2
199
198
  #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
200
199
  if @document_status == nil
201
- #@source.consume( /^\s*/um )
202
- word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
200
+ word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
203
201
  word = word[1] unless word.nil?
204
202
  #STDERR.puts "WORD = #{word.inspect}"
205
203
  case word
@@ -224,38 +222,49 @@ module REXML
224
222
  when INSTRUCTION_START
225
223
  return process_instruction
226
224
  when DOCTYPE_START
227
- md = @source.match( DOCTYPE_PATTERN, true )
225
+ base_error_message = "Malformed DOCTYPE"
226
+ @source.match(DOCTYPE_START, true)
228
227
  @nsstack.unshift(curr_ns=Set.new)
229
- identity = md[1]
230
- close = md[2]
231
- identity =~ IDENTITY
232
- name = $1
233
- raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
234
- pub_sys = $2.nil? ? nil : $2.strip
235
- long_name = $4.nil? ? nil : $4.strip
236
- uri = $6.nil? ? nil : $6.strip
237
- args = [ :start_doctype, name, pub_sys, long_name, uri ]
238
- if close == ">"
228
+ name = parse_name(base_error_message)
229
+ if @source.match(/\A\s*\[/um, true)
230
+ id = [nil, nil, nil]
231
+ @document_status = :in_doctype
232
+ elsif @source.match(/\A\s*>/um, true)
233
+ id = [nil, nil, nil]
239
234
  @document_status = :after_doctype
240
- @source.read if @source.buffer.size<2
241
- md = @source.match(/^\s*/um, true)
242
- @stack << [ :end_doctype ]
243
235
  else
244
- @document_status = :in_doctype
236
+ id = parse_id(base_error_message,
237
+ accept_external_id: true,
238
+ accept_public_id: false)
239
+ if id[0] == "SYSTEM"
240
+ # For backward compatibility
241
+ id[1], id[2] = id[2], nil
242
+ end
243
+ if @source.match(/\A\s*\[/um, true)
244
+ @document_status = :in_doctype
245
+ elsif @source.match(/\A\s*>/um, true)
246
+ @document_status = :after_doctype
247
+ else
248
+ message = "#{base_error_message}: garbage after external ID"
249
+ raise REXML::ParseException.new(message, @source)
250
+ end
251
+ end
252
+ args = [:start_doctype, name, *id]
253
+ if @document_status == :after_doctype
254
+ @source.match(/\A\s*/um, true)
255
+ @stack << [ :end_doctype ]
245
256
  end
246
257
  return args
247
- when /^\s+/
258
+ when /\A\s+/
248
259
  else
249
260
  @document_status = :after_doctype
250
- @source.read if @source.buffer.size<2
251
- md = @source.match(/\s*/um, true)
252
261
  if @source.encoding == "UTF-8"
253
262
  @source.buffer.force_encoding(::Encoding::UTF_8)
254
263
  end
255
264
  end
256
265
  end
257
266
  if @document_status == :in_doctype
258
- md = @source.match(/\s*(.*?>)/um)
267
+ md = @source.match(/\A\s*(.*?>)/um)
259
268
  case md[1]
260
269
  when SYSTEMENTITY
261
270
  match = @source.match( SYSTEMENTITY, true )[1]
@@ -312,29 +321,44 @@ module REXML
312
321
  end
313
322
  return [ :attlistdecl, element, pairs, contents ]
314
323
  when NOTATIONDECL_START
315
- md = nil
316
- if @source.match( PUBLIC )
317
- md = @source.match( PUBLIC, true )
318
- vals = [md[1],md[2],md[4],md[6]]
319
- elsif @source.match( SYSTEM )
320
- md = @source.match( SYSTEM, true )
321
- vals = [md[1],md[2],nil,md[4]]
322
- else
323
- raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
324
+ base_error_message = "Malformed notation declaration"
325
+ unless @source.match(/\A\s*<!NOTATION\s+/um, true)
326
+ if @source.match(/\A\s*<!NOTATION\s*>/um)
327
+ message = "#{base_error_message}: name is missing"
328
+ else
329
+ message = "#{base_error_message}: invalid declaration name"
330
+ end
331
+ raise REXML::ParseException.new(message, @source)
332
+ end
333
+ name = parse_name(base_error_message)
334
+ id = parse_id(base_error_message,
335
+ accept_external_id: true,
336
+ accept_public_id: true)
337
+ unless @source.match(/\A\s*>/um, true)
338
+ message = "#{base_error_message}: garbage before end >"
339
+ raise REXML::ParseException.new(message, @source)
324
340
  end
325
- return [ :notationdecl, *vals ]
341
+ return [:notationdecl, name, *id]
326
342
  when DOCTYPE_END
327
343
  @document_status = :after_doctype
328
344
  @source.match( DOCTYPE_END, true )
329
345
  return [ :end_doctype ]
330
346
  end
331
347
  end
348
+ if @document_status == :after_doctype
349
+ @source.match(/\A\s*/um, true)
350
+ end
332
351
  begin
352
+ @source.read if @source.buffer.size<2
333
353
  if @source.buffer[0] == ?<
334
354
  if @source.buffer[1] == ?/
335
355
  @nsstack.shift
336
356
  last_tag = @tags.pop
337
357
  md = @source.match( CLOSE_MATCH, true )
358
+ if md and !last_tag
359
+ message = "Unexpected top-level end tag (got '#{md[1]}')"
360
+ raise REXML::ParseException.new(message, @source)
361
+ end
338
362
  if md.nil? or last_tag != md[1]
339
363
  message = "Missing end tag for '#{last_tag}'"
340
364
  message << " (got '#{md[1]}')" if md
@@ -368,6 +392,7 @@ module REXML
368
392
  unless md
369
393
  raise REXML::ParseException.new("malformed XML: missing tag start", @source)
370
394
  end
395
+ @document_status = :in_element
371
396
  prefixes = Set.new
372
397
  prefixes << md[2] if md[2]
373
398
  @nsstack.unshift(curr_ns=Set.new)
@@ -473,6 +498,85 @@ module REXML
473
498
  true
474
499
  end
475
500
 
501
+ def parse_name(base_error_message)
502
+ md = @source.match(/\A\s*#{NAME}/um, true)
503
+ unless md
504
+ if @source.match(/\A\s*\S/um)
505
+ message = "#{base_error_message}: invalid name"
506
+ else
507
+ message = "#{base_error_message}: name is missing"
508
+ end
509
+ raise REXML::ParseException.new(message, @source)
510
+ end
511
+ md[1]
512
+ end
513
+
514
+ def parse_id(base_error_message,
515
+ accept_external_id:,
516
+ accept_public_id:)
517
+ if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
518
+ pubid = system = nil
519
+ pubid_literal = md[1]
520
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
521
+ system_literal = md[2]
522
+ system = system_literal[1..-2] if system_literal # Remove quote
523
+ ["PUBLIC", pubid, system]
524
+ elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
525
+ pubid = system = nil
526
+ pubid_literal = md[1]
527
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
528
+ ["PUBLIC", pubid, nil]
529
+ elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
530
+ system = nil
531
+ system_literal = md[1]
532
+ system = system_literal[1..-2] if system_literal # Remove quote
533
+ ["SYSTEM", nil, system]
534
+ else
535
+ details = parse_id_invalid_details(accept_external_id: accept_external_id,
536
+ accept_public_id: accept_public_id)
537
+ message = "#{base_error_message}: #{details}"
538
+ raise REXML::ParseException.new(message, @source)
539
+ end
540
+ end
541
+
542
+ def parse_id_invalid_details(accept_external_id:,
543
+ accept_public_id:)
544
+ public = /\A\s*PUBLIC/um
545
+ system = /\A\s*SYSTEM/um
546
+ if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
547
+ if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
548
+ return "public ID literal is missing"
549
+ end
550
+ unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
551
+ return "invalid public ID literal"
552
+ end
553
+ if accept_public_id
554
+ if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
555
+ return "system ID literal is missing"
556
+ end
557
+ unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
558
+ return "invalid system literal"
559
+ end
560
+ "garbage after system literal"
561
+ else
562
+ "garbage after public ID literal"
563
+ end
564
+ elsif accept_external_id and @source.match(/#{system}/um)
565
+ if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
566
+ return "system literal is missing"
567
+ end
568
+ unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
569
+ return "invalid system literal"
570
+ end
571
+ "garbage after system literal"
572
+ else
573
+ unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
574
+ return "invalid ID type"
575
+ end
576
+ "ID type is missing"
577
+ end
578
+ end
579
+
476
580
  def process_instruction
477
581
  match_data = @source.match(INSTRUCTION_PATTERN, true)
478
582
  unless match_data
@@ -22,7 +22,13 @@ module REXML
22
22
  path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
23
23
  path.gsub!( /\s+([\]\)])/, '\1')
24
24
  parsed = []
25
- OrExpr(path, parsed)
25
+ rest = OrExpr(path, parsed)
26
+ if rest
27
+ unless rest.strip.empty?
28
+ raise ParseException.new("Garbage component exists at the end: " +
29
+ "<#{rest}>: <#{path}>")
30
+ end
31
+ end
26
32
  parsed
27
33
  end
28
34
 
@@ -229,24 +235,28 @@ module REXML
229
235
  path = path[1..-1]
230
236
  end
231
237
  else
238
+ path_before_axis_specifier = path
239
+ parsed_not_abberviated = []
232
240
  if path[0] == ?@
233
- parsed << :attribute
241
+ parsed_not_abberviated << :attribute
234
242
  path = path[1..-1]
235
243
  # Goto Nodetest
236
244
  elsif path =~ AXIS
237
- parsed << $1.tr('-','_').intern
245
+ parsed_not_abberviated << $1.tr('-','_').intern
238
246
  path = $'
239
247
  # Goto Nodetest
240
248
  else
241
- parsed << :child
249
+ parsed_not_abberviated << :child
242
250
  end
243
251
 
244
- n = []
245
- path = NodeTest( path, n)
246
-
247
- path = Predicate( path, n )
252
+ path_before_node_test = path
253
+ path = NodeTest(path, parsed_not_abberviated)
254
+ if path == path_before_node_test
255
+ return path_before_axis_specifier
256
+ end
257
+ path = Predicate(path, parsed_not_abberviated)
248
258
 
249
- parsed.concat(n)
259
+ parsed.concat(parsed_not_abberviated)
250
260
  end
251
261
 
252
262
  original_path = path
@@ -301,7 +311,9 @@ module REXML
301
311
  when PI
302
312
  path = $'
303
313
  literal = nil
304
- if path !~ /^\s*\)/
314
+ if path =~ /^\s*\)/
315
+ path = $'
316
+ else
305
317
  path =~ LITERAL
306
318
  literal = $1
307
319
  path = $'
@@ -545,7 +557,9 @@ module REXML
545
557
  #| PrimaryExpr
546
558
  def FilterExpr path, parsed
547
559
  n = []
548
- path = PrimaryExpr( path, n )
560
+ path_before_primary_expr = path
561
+ path = PrimaryExpr(path, n)
562
+ return path_before_primary_expr if path == path_before_primary_expr
549
563
  path = Predicate(path, n)
550
564
  parsed.concat(n)
551
565
  path