rexml 3.2.4 → 3.2.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

@@ -1,14 +1,6 @@
1
1
  # frozen_string_literal: false
2
2
  require_relative '../xmltokens'
3
3
 
4
- # [ :element, parent, name, attributes, children* ]
5
- # a = Node.new
6
- # a << "B" # => <a>B</a>
7
- # a.b # => <a>B<b/></a>
8
- # a.b[1] # => <a>B<b/><b/><a>
9
- # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
10
- # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
11
- # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
12
4
  module REXML
13
5
  module Light
14
6
  # Represents a tagged XML element. Elements are characterized by
@@ -50,7 +50,6 @@ module REXML
50
50
 
51
51
  DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
52
52
  DOCTYPE_END = /\A\s*\]\s*>/um
53
- DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
54
53
  ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
55
54
  COMMENT_START = /\A<!--/u
56
55
  COMMENT_PATTERN = /<!--(.*?)-->/um
@@ -61,15 +60,14 @@ module REXML
61
60
  XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
62
61
  INSTRUCTION_START = /\A<\?/u
63
62
  INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
64
- TAG_MATCH = /^<((?>#{QNAME_STR}))/um
65
- CLOSE_MATCH = /^\s*<\/(#{QNAME_STR})\s*>/um
63
+ TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
64
+ CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
66
65
 
67
66
  VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
68
67
  ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
69
68
  STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
70
69
 
71
70
  ENTITY_START = /\A\s*<!ENTITY/
72
- IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
73
71
  ELEMENTDECL_START = /\A\s*<!ELEMENT/um
74
72
  ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
75
73
  SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
@@ -83,9 +81,6 @@ module REXML
83
81
  ATTDEF_RE = /#{ATTDEF}/
84
82
  ATTLISTDECL_START = /\A\s*<!ATTLIST/um
85
83
  ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
86
- NOTATIONDECL_START = /\A\s*<!NOTATION/um
87
- PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
88
- SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
89
84
 
90
85
  TEXT_PATTERN = /\A([^<]*)/um
91
86
 
@@ -103,6 +98,11 @@ module REXML
103
98
  GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
104
99
  ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
105
100
 
101
+ NOTATIONDECL_START = /\A\s*<!NOTATION/um
102
+ EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
103
+ EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
104
+ PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
105
+
106
106
  EREFERENCE = /&(?!#{NAME};)/
107
107
 
108
108
  DEFAULT_ENTITIES = {
@@ -195,11 +195,9 @@ module REXML
195
195
  return [ :end_document ] if empty?
196
196
  return @stack.shift if @stack.size > 0
197
197
  #STDERR.puts @source.encoding
198
- @source.read if @source.buffer.size<2
199
198
  #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
200
199
  if @document_status == nil
201
- #@source.consume( /^\s*/um )
202
- word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
200
+ word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
203
201
  word = word[1] unless word.nil?
204
202
  #STDERR.puts "WORD = #{word.inspect}"
205
203
  case word
@@ -224,38 +222,49 @@ module REXML
224
222
  when INSTRUCTION_START
225
223
  return process_instruction
226
224
  when DOCTYPE_START
227
- md = @source.match( DOCTYPE_PATTERN, true )
225
+ base_error_message = "Malformed DOCTYPE"
226
+ @source.match(DOCTYPE_START, true)
228
227
  @nsstack.unshift(curr_ns=Set.new)
229
- identity = md[1]
230
- close = md[2]
231
- identity =~ IDENTITY
232
- name = $1
233
- raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
234
- pub_sys = $2.nil? ? nil : $2.strip
235
- long_name = $4.nil? ? nil : $4.strip
236
- uri = $6.nil? ? nil : $6.strip
237
- args = [ :start_doctype, name, pub_sys, long_name, uri ]
238
- if close == ">"
228
+ name = parse_name(base_error_message)
229
+ if @source.match(/\A\s*\[/um, true)
230
+ id = [nil, nil, nil]
231
+ @document_status = :in_doctype
232
+ elsif @source.match(/\A\s*>/um, true)
233
+ id = [nil, nil, nil]
239
234
  @document_status = :after_doctype
240
- @source.read if @source.buffer.size<2
241
- md = @source.match(/^\s*/um, true)
242
- @stack << [ :end_doctype ]
243
235
  else
244
- @document_status = :in_doctype
236
+ id = parse_id(base_error_message,
237
+ accept_external_id: true,
238
+ accept_public_id: false)
239
+ if id[0] == "SYSTEM"
240
+ # For backward compatibility
241
+ id[1], id[2] = id[2], nil
242
+ end
243
+ if @source.match(/\A\s*\[/um, true)
244
+ @document_status = :in_doctype
245
+ elsif @source.match(/\A\s*>/um, true)
246
+ @document_status = :after_doctype
247
+ else
248
+ message = "#{base_error_message}: garbage after external ID"
249
+ raise REXML::ParseException.new(message, @source)
250
+ end
251
+ end
252
+ args = [:start_doctype, name, *id]
253
+ if @document_status == :after_doctype
254
+ @source.match(/\A\s*/um, true)
255
+ @stack << [ :end_doctype ]
245
256
  end
246
257
  return args
247
- when /^\s+/
258
+ when /\A\s+/
248
259
  else
249
260
  @document_status = :after_doctype
250
- @source.read if @source.buffer.size<2
251
- md = @source.match(/\s*/um, true)
252
261
  if @source.encoding == "UTF-8"
253
262
  @source.buffer.force_encoding(::Encoding::UTF_8)
254
263
  end
255
264
  end
256
265
  end
257
266
  if @document_status == :in_doctype
258
- md = @source.match(/\s*(.*?>)/um)
267
+ md = @source.match(/\A\s*(.*?>)/um)
259
268
  case md[1]
260
269
  when SYSTEMENTITY
261
270
  match = @source.match( SYSTEMENTITY, true )[1]
@@ -312,24 +321,35 @@ module REXML
312
321
  end
313
322
  return [ :attlistdecl, element, pairs, contents ]
314
323
  when NOTATIONDECL_START
315
- md = nil
316
- if @source.match( PUBLIC )
317
- md = @source.match( PUBLIC, true )
318
- vals = [md[1],md[2],md[4],md[6]]
319
- elsif @source.match( SYSTEM )
320
- md = @source.match( SYSTEM, true )
321
- vals = [md[1],md[2],nil,md[4]]
322
- else
323
- raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
324
+ base_error_message = "Malformed notation declaration"
325
+ unless @source.match(/\A\s*<!NOTATION\s+/um, true)
326
+ if @source.match(/\A\s*<!NOTATION\s*>/um)
327
+ message = "#{base_error_message}: name is missing"
328
+ else
329
+ message = "#{base_error_message}: invalid declaration name"
330
+ end
331
+ raise REXML::ParseException.new(message, @source)
324
332
  end
325
- return [ :notationdecl, *vals ]
333
+ name = parse_name(base_error_message)
334
+ id = parse_id(base_error_message,
335
+ accept_external_id: true,
336
+ accept_public_id: true)
337
+ unless @source.match(/\A\s*>/um, true)
338
+ message = "#{base_error_message}: garbage before end >"
339
+ raise REXML::ParseException.new(message, @source)
340
+ end
341
+ return [:notationdecl, name, *id]
326
342
  when DOCTYPE_END
327
343
  @document_status = :after_doctype
328
344
  @source.match( DOCTYPE_END, true )
329
345
  return [ :end_doctype ]
330
346
  end
331
347
  end
348
+ if @document_status == :after_doctype
349
+ @source.match(/\A\s*/um, true)
350
+ end
332
351
  begin
352
+ @source.read if @source.buffer.size<2
333
353
  if @source.buffer[0] == ?<
334
354
  if @source.buffer[1] == ?/
335
355
  @nsstack.shift
@@ -372,6 +392,7 @@ module REXML
372
392
  unless md
373
393
  raise REXML::ParseException.new("malformed XML: missing tag start", @source)
374
394
  end
395
+ @document_status = :in_element
375
396
  prefixes = Set.new
376
397
  prefixes << md[2] if md[2]
377
398
  @nsstack.unshift(curr_ns=Set.new)
@@ -477,6 +498,85 @@ module REXML
477
498
  true
478
499
  end
479
500
 
501
+ def parse_name(base_error_message)
502
+ md = @source.match(/\A\s*#{NAME}/um, true)
503
+ unless md
504
+ if @source.match(/\A\s*\S/um)
505
+ message = "#{base_error_message}: invalid name"
506
+ else
507
+ message = "#{base_error_message}: name is missing"
508
+ end
509
+ raise REXML::ParseException.new(message, @source)
510
+ end
511
+ md[1]
512
+ end
513
+
514
+ def parse_id(base_error_message,
515
+ accept_external_id:,
516
+ accept_public_id:)
517
+ if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
518
+ pubid = system = nil
519
+ pubid_literal = md[1]
520
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
521
+ system_literal = md[2]
522
+ system = system_literal[1..-2] if system_literal # Remove quote
523
+ ["PUBLIC", pubid, system]
524
+ elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
525
+ pubid = system = nil
526
+ pubid_literal = md[1]
527
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
528
+ ["PUBLIC", pubid, nil]
529
+ elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
530
+ system = nil
531
+ system_literal = md[1]
532
+ system = system_literal[1..-2] if system_literal # Remove quote
533
+ ["SYSTEM", nil, system]
534
+ else
535
+ details = parse_id_invalid_details(accept_external_id: accept_external_id,
536
+ accept_public_id: accept_public_id)
537
+ message = "#{base_error_message}: #{details}"
538
+ raise REXML::ParseException.new(message, @source)
539
+ end
540
+ end
541
+
542
+ def parse_id_invalid_details(accept_external_id:,
543
+ accept_public_id:)
544
+ public = /\A\s*PUBLIC/um
545
+ system = /\A\s*SYSTEM/um
546
+ if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
547
+ if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
548
+ return "public ID literal is missing"
549
+ end
550
+ unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
551
+ return "invalid public ID literal"
552
+ end
553
+ if accept_public_id
554
+ if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
555
+ return "system ID literal is missing"
556
+ end
557
+ unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
558
+ return "invalid system literal"
559
+ end
560
+ "garbage after system literal"
561
+ else
562
+ "garbage after public ID literal"
563
+ end
564
+ elsif accept_external_id and @source.match(/#{system}/um)
565
+ if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
566
+ return "system literal is missing"
567
+ end
568
+ unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
569
+ return "invalid system literal"
570
+ end
571
+ "garbage after system literal"
572
+ else
573
+ unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
574
+ return "invalid ID type"
575
+ end
576
+ "ID type is missing"
577
+ end
578
+ end
579
+
480
580
  def process_instruction
481
581
  match_data = @source.match(INSTRUCTION_PATTERN, true)
482
582
  unless match_data
@@ -22,7 +22,13 @@ module REXML
22
22
  path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
23
23
  path.gsub!( /\s+([\]\)])/, '\1')
24
24
  parsed = []
25
- OrExpr(path, parsed)
25
+ rest = OrExpr(path, parsed)
26
+ if rest
27
+ unless rest.strip.empty?
28
+ raise ParseException.new("Garbage component exists at the end: " +
29
+ "<#{rest}>: <#{path}>")
30
+ end
31
+ end
26
32
  parsed
27
33
  end
28
34
 
@@ -229,24 +235,28 @@ module REXML
229
235
  path = path[1..-1]
230
236
  end
231
237
  else
238
+ path_before_axis_specifier = path
239
+ parsed_not_abberviated = []
232
240
  if path[0] == ?@
233
- parsed << :attribute
241
+ parsed_not_abberviated << :attribute
234
242
  path = path[1..-1]
235
243
  # Goto Nodetest
236
244
  elsif path =~ AXIS
237
- parsed << $1.tr('-','_').intern
245
+ parsed_not_abberviated << $1.tr('-','_').intern
238
246
  path = $'
239
247
  # Goto Nodetest
240
248
  else
241
- parsed << :child
249
+ parsed_not_abberviated << :child
242
250
  end
243
251
 
244
- n = []
245
- path = NodeTest( path, n)
246
-
247
- path = Predicate( path, n )
252
+ path_before_node_test = path
253
+ path = NodeTest(path, parsed_not_abberviated)
254
+ if path == path_before_node_test
255
+ return path_before_axis_specifier
256
+ end
257
+ path = Predicate(path, parsed_not_abberviated)
248
258
 
249
- parsed.concat(n)
259
+ parsed.concat(parsed_not_abberviated)
250
260
  end
251
261
 
252
262
  original_path = path
@@ -301,7 +311,9 @@ module REXML
301
311
  when PI
302
312
  path = $'
303
313
  literal = nil
304
- if path !~ /^\s*\)/
314
+ if path =~ /^\s*\)/
315
+ path = $'
316
+ else
305
317
  path =~ LITERAL
306
318
  literal = $1
307
319
  path = $'
@@ -545,7 +557,9 @@ module REXML
545
557
  #| PrimaryExpr
546
558
  def FilterExpr path, parsed
547
559
  n = []
548
- path = PrimaryExpr( path, n )
560
+ path_before_primary_expr = path
561
+ path = PrimaryExpr(path, n)
562
+ return path_before_primary_expr if path == path_before_primary_expr
549
563
  path = Predicate(path, n)
550
564
  parsed.concat(n)
551
565
  path
data/lib/rexml/rexml.rb CHANGED
@@ -1,30 +1,35 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # frozen_string_literal: false
3
- # REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
4
- #
5
- # REXML is a _pure_ Ruby, XML 1.0 conforming,
6
- # non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
7
- # toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
8
- # tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
9
- # and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
10
- # includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
11
- # Ruby 1.8, REXML is included in the standard Ruby distribution.
12
- #
13
- # Main page:: http://www.germane-software.com/software/rexml
14
- # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
15
- # Date:: 2008/019
16
- # Version:: 3.1.7.3
17
- #
18
- # This API documentation can be downloaded from the REXML home page, or can
19
- # be accessed online[http://www.germane-software.com/software/rexml_doc]
20
- #
21
- # A tutorial is available in the REXML distribution in docs/tutorial.html,
22
- # or can be accessed
23
- # online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
3
+ #
4
+ # \Module \REXML provides classes and methods for parsing,
5
+ # editing, and generating XML.
6
+ #
7
+ # == Implementation
8
+ #
9
+ # \REXML:
10
+ # - Is pure Ruby.
11
+ # - Provides tree, stream, SAX2, pull, and lightweight APIs.
12
+ # - Conforms to {XML version 1.0}[https://www.w3.org/TR/REC-xml/].
13
+ # - Fully implements {XPath version 1.0}[http://www.w3c.org/tr/xpath].
14
+ # - Is {non-validating}[https://www.w3.org/TR/xml/].
15
+ # - Passes 100% of the non-validating {Oasis tests}[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml].
16
+ #
17
+ # == In a Hurry?
18
+ #
19
+ # If you're somewhat familiar with XML
20
+ # and have a particular task in mind,
21
+ # you may want to see {the tasks pages}[doc/rexml/tasks/tocs/master_toc_rdoc.html].
22
+ #
23
+ # == API
24
+ #
25
+ # Among the most important classes for using \REXML are:
26
+ # - REXML::Document.
27
+ # - REXML::Element.
28
+ #
24
29
  module REXML
25
30
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
26
31
  DATE = "2008/019"
27
- VERSION = "3.2.4"
32
+ VERSION = "3.2.5"
28
33
  REVISION = ""
29
34
 
30
35
  Copyright = COPYRIGHT
@@ -7,39 +7,45 @@ require_relative 'xmltokens'
7
7
  require_relative 'attribute'
8
8
  require_relative 'parsers/xpathparser'
9
9
 
10
- class Object
11
- # provides a unified +clone+ operation, for REXML::XPathParser
12
- # to use across multiple Object types
13
- def dclone
14
- clone
15
- end
16
- end
17
- class Symbol
18
- # provides a unified +clone+ operation, for REXML::XPathParser
19
- # to use across multiple Object types
20
- def dclone ; self ; end
21
- end
22
- class Integer
23
- # provides a unified +clone+ operation, for REXML::XPathParser
24
- # to use across multiple Object types
25
- def dclone ; self ; end
26
- end
27
- class Float
28
- # provides a unified +clone+ operation, for REXML::XPathParser
29
- # to use across multiple Object types
30
- def dclone ; self ; end
31
- end
32
- class Array
33
- # provides a unified +clone+ operation, for REXML::XPathParser
34
- # to use across multiple Object+ types
35
- def dclone
36
- klone = self.clone
37
- klone.clear
38
- self.each{|v| klone << v.dclone}
39
- klone
10
+ module REXML
11
+ module DClonable
12
+ refine Object do
13
+ # provides a unified +clone+ operation, for REXML::XPathParser
14
+ # to use across multiple Object types
15
+ def dclone
16
+ clone
17
+ end
18
+ end
19
+ refine Symbol do
20
+ # provides a unified +clone+ operation, for REXML::XPathParser
21
+ # to use across multiple Object types
22
+ def dclone ; self ; end
23
+ end
24
+ refine Integer do
25
+ # provides a unified +clone+ operation, for REXML::XPathParser
26
+ # to use across multiple Object types
27
+ def dclone ; self ; end
28
+ end
29
+ refine Float do
30
+ # provides a unified +clone+ operation, for REXML::XPathParser
31
+ # to use across multiple Object types
32
+ def dclone ; self ; end
33
+ end
34
+ refine Array do
35
+ # provides a unified +clone+ operation, for REXML::XPathParser
36
+ # to use across multiple Object+ types
37
+ def dclone
38
+ klone = self.clone
39
+ klone.clear
40
+ self.each{|v| klone << v.dclone}
41
+ klone
42
+ end
43
+ end
40
44
  end
41
45
  end
42
46
 
47
+ using REXML::DClonable
48
+
43
49
  module REXML
44
50
  # You don't want to use this class. Really. Use XPath, which is a wrapper
45
51
  # for this class. Believe me. You don't want to poke around in here.