rexml 3.2.4 → 3.2.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,14 +1,6 @@
1
1
  # frozen_string_literal: false
2
2
  require_relative '../xmltokens'
3
3
 
4
- # [ :element, parent, name, attributes, children* ]
5
- # a = Node.new
6
- # a << "B" # => <a>B</a>
7
- # a.b # => <a>B<b/></a>
8
- # a.b[1] # => <a>B<b/><b/><a>
9
- # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
10
- # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
11
- # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
12
4
  module REXML
13
5
  module Light
14
6
  # Represents a tagged XML element. Elements are characterized by
@@ -50,7 +50,6 @@ module REXML
50
50
 
51
51
  DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
52
52
  DOCTYPE_END = /\A\s*\]\s*>/um
53
- DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
54
53
  ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
55
54
  COMMENT_START = /\A<!--/u
56
55
  COMMENT_PATTERN = /<!--(.*?)-->/um
@@ -61,15 +60,14 @@ module REXML
61
60
  XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
62
61
  INSTRUCTION_START = /\A<\?/u
63
62
  INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
64
- TAG_MATCH = /^<((?>#{QNAME_STR}))/um
65
- CLOSE_MATCH = /^\s*<\/(#{QNAME_STR})\s*>/um
63
+ TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
64
+ CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
66
65
 
67
66
  VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
68
67
  ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
69
68
  STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
70
69
 
71
70
  ENTITY_START = /\A\s*<!ENTITY/
72
- IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
73
71
  ELEMENTDECL_START = /\A\s*<!ELEMENT/um
74
72
  ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
75
73
  SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
@@ -83,9 +81,6 @@ module REXML
83
81
  ATTDEF_RE = /#{ATTDEF}/
84
82
  ATTLISTDECL_START = /\A\s*<!ATTLIST/um
85
83
  ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
86
- NOTATIONDECL_START = /\A\s*<!NOTATION/um
87
- PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
88
- SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
89
84
 
90
85
  TEXT_PATTERN = /\A([^<]*)/um
91
86
 
@@ -103,6 +98,11 @@ module REXML
103
98
  GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
104
99
  ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
105
100
 
101
+ NOTATIONDECL_START = /\A\s*<!NOTATION/um
102
+ EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
103
+ EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
104
+ PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
105
+
106
106
  EREFERENCE = /&(?!#{NAME};)/
107
107
 
108
108
  DEFAULT_ENTITIES = {
@@ -195,11 +195,9 @@ module REXML
195
195
  return [ :end_document ] if empty?
196
196
  return @stack.shift if @stack.size > 0
197
197
  #STDERR.puts @source.encoding
198
- @source.read if @source.buffer.size<2
199
198
  #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
200
199
  if @document_status == nil
201
- #@source.consume( /^\s*/um )
202
- word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
200
+ word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
203
201
  word = word[1] unless word.nil?
204
202
  #STDERR.puts "WORD = #{word.inspect}"
205
203
  case word
@@ -224,38 +222,49 @@ module REXML
224
222
  when INSTRUCTION_START
225
223
  return process_instruction
226
224
  when DOCTYPE_START
227
- md = @source.match( DOCTYPE_PATTERN, true )
225
+ base_error_message = "Malformed DOCTYPE"
226
+ @source.match(DOCTYPE_START, true)
228
227
  @nsstack.unshift(curr_ns=Set.new)
229
- identity = md[1]
230
- close = md[2]
231
- identity =~ IDENTITY
232
- name = $1
233
- raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
234
- pub_sys = $2.nil? ? nil : $2.strip
235
- long_name = $4.nil? ? nil : $4.strip
236
- uri = $6.nil? ? nil : $6.strip
237
- args = [ :start_doctype, name, pub_sys, long_name, uri ]
238
- if close == ">"
228
+ name = parse_name(base_error_message)
229
+ if @source.match(/\A\s*\[/um, true)
230
+ id = [nil, nil, nil]
231
+ @document_status = :in_doctype
232
+ elsif @source.match(/\A\s*>/um, true)
233
+ id = [nil, nil, nil]
239
234
  @document_status = :after_doctype
240
- @source.read if @source.buffer.size<2
241
- md = @source.match(/^\s*/um, true)
242
- @stack << [ :end_doctype ]
243
235
  else
244
- @document_status = :in_doctype
236
+ id = parse_id(base_error_message,
237
+ accept_external_id: true,
238
+ accept_public_id: false)
239
+ if id[0] == "SYSTEM"
240
+ # For backward compatibility
241
+ id[1], id[2] = id[2], nil
242
+ end
243
+ if @source.match(/\A\s*\[/um, true)
244
+ @document_status = :in_doctype
245
+ elsif @source.match(/\A\s*>/um, true)
246
+ @document_status = :after_doctype
247
+ else
248
+ message = "#{base_error_message}: garbage after external ID"
249
+ raise REXML::ParseException.new(message, @source)
250
+ end
251
+ end
252
+ args = [:start_doctype, name, *id]
253
+ if @document_status == :after_doctype
254
+ @source.match(/\A\s*/um, true)
255
+ @stack << [ :end_doctype ]
245
256
  end
246
257
  return args
247
- when /^\s+/
258
+ when /\A\s+/
248
259
  else
249
260
  @document_status = :after_doctype
250
- @source.read if @source.buffer.size<2
251
- md = @source.match(/\s*/um, true)
252
261
  if @source.encoding == "UTF-8"
253
262
  @source.buffer.force_encoding(::Encoding::UTF_8)
254
263
  end
255
264
  end
256
265
  end
257
266
  if @document_status == :in_doctype
258
- md = @source.match(/\s*(.*?>)/um)
267
+ md = @source.match(/\A\s*(.*?>)/um)
259
268
  case md[1]
260
269
  when SYSTEMENTITY
261
270
  match = @source.match( SYSTEMENTITY, true )[1]
@@ -312,24 +321,35 @@ module REXML
312
321
  end
313
322
  return [ :attlistdecl, element, pairs, contents ]
314
323
  when NOTATIONDECL_START
315
- md = nil
316
- if @source.match( PUBLIC )
317
- md = @source.match( PUBLIC, true )
318
- vals = [md[1],md[2],md[4],md[6]]
319
- elsif @source.match( SYSTEM )
320
- md = @source.match( SYSTEM, true )
321
- vals = [md[1],md[2],nil,md[4]]
322
- else
323
- raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
324
+ base_error_message = "Malformed notation declaration"
325
+ unless @source.match(/\A\s*<!NOTATION\s+/um, true)
326
+ if @source.match(/\A\s*<!NOTATION\s*>/um)
327
+ message = "#{base_error_message}: name is missing"
328
+ else
329
+ message = "#{base_error_message}: invalid declaration name"
330
+ end
331
+ raise REXML::ParseException.new(message, @source)
324
332
  end
325
- return [ :notationdecl, *vals ]
333
+ name = parse_name(base_error_message)
334
+ id = parse_id(base_error_message,
335
+ accept_external_id: true,
336
+ accept_public_id: true)
337
+ unless @source.match(/\A\s*>/um, true)
338
+ message = "#{base_error_message}: garbage before end >"
339
+ raise REXML::ParseException.new(message, @source)
340
+ end
341
+ return [:notationdecl, name, *id]
326
342
  when DOCTYPE_END
327
343
  @document_status = :after_doctype
328
344
  @source.match( DOCTYPE_END, true )
329
345
  return [ :end_doctype ]
330
346
  end
331
347
  end
348
+ if @document_status == :after_doctype
349
+ @source.match(/\A\s*/um, true)
350
+ end
332
351
  begin
352
+ @source.read if @source.buffer.size<2
333
353
  if @source.buffer[0] == ?<
334
354
  if @source.buffer[1] == ?/
335
355
  @nsstack.shift
@@ -372,6 +392,7 @@ module REXML
372
392
  unless md
373
393
  raise REXML::ParseException.new("malformed XML: missing tag start", @source)
374
394
  end
395
+ @document_status = :in_element
375
396
  prefixes = Set.new
376
397
  prefixes << md[2] if md[2]
377
398
  @nsstack.unshift(curr_ns=Set.new)
@@ -477,6 +498,85 @@ module REXML
477
498
  true
478
499
  end
479
500
 
501
+ def parse_name(base_error_message)
502
+ md = @source.match(/\A\s*#{NAME}/um, true)
503
+ unless md
504
+ if @source.match(/\A\s*\S/um)
505
+ message = "#{base_error_message}: invalid name"
506
+ else
507
+ message = "#{base_error_message}: name is missing"
508
+ end
509
+ raise REXML::ParseException.new(message, @source)
510
+ end
511
+ md[1]
512
+ end
513
+
514
+ def parse_id(base_error_message,
515
+ accept_external_id:,
516
+ accept_public_id:)
517
+ if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
518
+ pubid = system = nil
519
+ pubid_literal = md[1]
520
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
521
+ system_literal = md[2]
522
+ system = system_literal[1..-2] if system_literal # Remove quote
523
+ ["PUBLIC", pubid, system]
524
+ elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
525
+ pubid = system = nil
526
+ pubid_literal = md[1]
527
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
528
+ ["PUBLIC", pubid, nil]
529
+ elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
530
+ system = nil
531
+ system_literal = md[1]
532
+ system = system_literal[1..-2] if system_literal # Remove quote
533
+ ["SYSTEM", nil, system]
534
+ else
535
+ details = parse_id_invalid_details(accept_external_id: accept_external_id,
536
+ accept_public_id: accept_public_id)
537
+ message = "#{base_error_message}: #{details}"
538
+ raise REXML::ParseException.new(message, @source)
539
+ end
540
+ end
541
+
542
+ def parse_id_invalid_details(accept_external_id:,
543
+ accept_public_id:)
544
+ public = /\A\s*PUBLIC/um
545
+ system = /\A\s*SYSTEM/um
546
+ if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
547
+ if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
548
+ return "public ID literal is missing"
549
+ end
550
+ unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
551
+ return "invalid public ID literal"
552
+ end
553
+ if accept_public_id
554
+ if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
555
+ return "system ID literal is missing"
556
+ end
557
+ unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
558
+ return "invalid system literal"
559
+ end
560
+ "garbage after system literal"
561
+ else
562
+ "garbage after public ID literal"
563
+ end
564
+ elsif accept_external_id and @source.match(/#{system}/um)
565
+ if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
566
+ return "system literal is missing"
567
+ end
568
+ unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
569
+ return "invalid system literal"
570
+ end
571
+ "garbage after system literal"
572
+ else
573
+ unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
574
+ return "invalid ID type"
575
+ end
576
+ "ID type is missing"
577
+ end
578
+ end
579
+
480
580
  def process_instruction
481
581
  match_data = @source.match(INSTRUCTION_PATTERN, true)
482
582
  unless match_data
@@ -22,7 +22,13 @@ module REXML
22
22
  path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
23
23
  path.gsub!( /\s+([\]\)])/, '\1')
24
24
  parsed = []
25
- OrExpr(path, parsed)
25
+ rest = OrExpr(path, parsed)
26
+ if rest
27
+ unless rest.strip.empty?
28
+ raise ParseException.new("Garbage component exists at the end: " +
29
+ "<#{rest}>: <#{path}>")
30
+ end
31
+ end
26
32
  parsed
27
33
  end
28
34
 
@@ -229,24 +235,28 @@ module REXML
229
235
  path = path[1..-1]
230
236
  end
231
237
  else
238
+ path_before_axis_specifier = path
239
+ parsed_not_abberviated = []
232
240
  if path[0] == ?@
233
- parsed << :attribute
241
+ parsed_not_abberviated << :attribute
234
242
  path = path[1..-1]
235
243
  # Goto Nodetest
236
244
  elsif path =~ AXIS
237
- parsed << $1.tr('-','_').intern
245
+ parsed_not_abberviated << $1.tr('-','_').intern
238
246
  path = $'
239
247
  # Goto Nodetest
240
248
  else
241
- parsed << :child
249
+ parsed_not_abberviated << :child
242
250
  end
243
251
 
244
- n = []
245
- path = NodeTest( path, n)
246
-
247
- path = Predicate( path, n )
252
+ path_before_node_test = path
253
+ path = NodeTest(path, parsed_not_abberviated)
254
+ if path == path_before_node_test
255
+ return path_before_axis_specifier
256
+ end
257
+ path = Predicate(path, parsed_not_abberviated)
248
258
 
249
- parsed.concat(n)
259
+ parsed.concat(parsed_not_abberviated)
250
260
  end
251
261
 
252
262
  original_path = path
@@ -301,7 +311,9 @@ module REXML
301
311
  when PI
302
312
  path = $'
303
313
  literal = nil
304
- if path !~ /^\s*\)/
314
+ if path =~ /^\s*\)/
315
+ path = $'
316
+ else
305
317
  path =~ LITERAL
306
318
  literal = $1
307
319
  path = $'
@@ -545,7 +557,9 @@ module REXML
545
557
  #| PrimaryExpr
546
558
  def FilterExpr path, parsed
547
559
  n = []
548
- path = PrimaryExpr( path, n )
560
+ path_before_primary_expr = path
561
+ path = PrimaryExpr(path, n)
562
+ return path_before_primary_expr if path == path_before_primary_expr
549
563
  path = Predicate(path, n)
550
564
  parsed.concat(n)
551
565
  path
data/lib/rexml/rexml.rb CHANGED
@@ -1,30 +1,35 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # frozen_string_literal: false
3
- # REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
4
- #
5
- # REXML is a _pure_ Ruby, XML 1.0 conforming,
6
- # non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
7
- # toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
8
- # tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
9
- # and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
10
- # includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
11
- # Ruby 1.8, REXML is included in the standard Ruby distribution.
12
- #
13
- # Main page:: http://www.germane-software.com/software/rexml
14
- # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
15
- # Date:: 2008/019
16
- # Version:: 3.1.7.3
17
- #
18
- # This API documentation can be downloaded from the REXML home page, or can
19
- # be accessed online[http://www.germane-software.com/software/rexml_doc]
20
- #
21
- # A tutorial is available in the REXML distribution in docs/tutorial.html,
22
- # or can be accessed
23
- # online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
3
+ #
4
+ # \Module \REXML provides classes and methods for parsing,
5
+ # editing, and generating XML.
6
+ #
7
+ # == Implementation
8
+ #
9
+ # \REXML:
10
+ # - Is pure Ruby.
11
+ # - Provides tree, stream, SAX2, pull, and lightweight APIs.
12
+ # - Conforms to {XML version 1.0}[https://www.w3.org/TR/REC-xml/].
13
+ # - Fully implements {XPath version 1.0}[http://www.w3c.org/tr/xpath].
14
+ # - Is {non-validating}[https://www.w3.org/TR/xml/].
15
+ # - Passes 100% of the non-validating {Oasis tests}[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml].
16
+ #
17
+ # == In a Hurry?
18
+ #
19
+ # If you're somewhat familiar with XML
20
+ # and have a particular task in mind,
21
+ # you may want to see {the tasks pages}[doc/rexml/tasks/tocs/master_toc_rdoc.html].
22
+ #
23
+ # == API
24
+ #
25
+ # Among the most important classes for using \REXML are:
26
+ # - REXML::Document.
27
+ # - REXML::Element.
28
+ #
24
29
  module REXML
25
30
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
26
31
  DATE = "2008/019"
27
- VERSION = "3.2.4"
32
+ VERSION = "3.2.5"
28
33
  REVISION = ""
29
34
 
30
35
  Copyright = COPYRIGHT
@@ -7,39 +7,45 @@ require_relative 'xmltokens'
7
7
  require_relative 'attribute'
8
8
  require_relative 'parsers/xpathparser'
9
9
 
10
- class Object
11
- # provides a unified +clone+ operation, for REXML::XPathParser
12
- # to use across multiple Object types
13
- def dclone
14
- clone
15
- end
16
- end
17
- class Symbol
18
- # provides a unified +clone+ operation, for REXML::XPathParser
19
- # to use across multiple Object types
20
- def dclone ; self ; end
21
- end
22
- class Integer
23
- # provides a unified +clone+ operation, for REXML::XPathParser
24
- # to use across multiple Object types
25
- def dclone ; self ; end
26
- end
27
- class Float
28
- # provides a unified +clone+ operation, for REXML::XPathParser
29
- # to use across multiple Object types
30
- def dclone ; self ; end
31
- end
32
- class Array
33
- # provides a unified +clone+ operation, for REXML::XPathParser
34
- # to use across multiple Object+ types
35
- def dclone
36
- klone = self.clone
37
- klone.clear
38
- self.each{|v| klone << v.dclone}
39
- klone
10
+ module REXML
11
+ module DClonable
12
+ refine Object do
13
+ # provides a unified +clone+ operation, for REXML::XPathParser
14
+ # to use across multiple Object types
15
+ def dclone
16
+ clone
17
+ end
18
+ end
19
+ refine Symbol do
20
+ # provides a unified +clone+ operation, for REXML::XPathParser
21
+ # to use across multiple Object types
22
+ def dclone ; self ; end
23
+ end
24
+ refine Integer do
25
+ # provides a unified +clone+ operation, for REXML::XPathParser
26
+ # to use across multiple Object types
27
+ def dclone ; self ; end
28
+ end
29
+ refine Float do
30
+ # provides a unified +clone+ operation, for REXML::XPathParser
31
+ # to use across multiple Object types
32
+ def dclone ; self ; end
33
+ end
34
+ refine Array do
35
+ # provides a unified +clone+ operation, for REXML::XPathParser
36
+ # to use across multiple Object+ types
37
+ def dclone
38
+ klone = self.clone
39
+ klone.clear
40
+ self.each{|v| klone << v.dclone}
41
+ klone
42
+ end
43
+ end
40
44
  end
41
45
  end
42
46
 
47
+ using REXML::DClonable
48
+
43
49
  module REXML
44
50
  # You don't want to use this class. Really. Use XPath, which is a wrapper
45
51
  # for this class. Believe me. You don't want to poke around in here.