rexml 3.2.3 → 3.3.8

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/NEWS.md +502 -0
  3. data/README.md +11 -14
  4. data/doc/rexml/context.rdoc +143 -0
  5. data/doc/rexml/tasks/rdoc/child.rdoc +87 -0
  6. data/doc/rexml/tasks/rdoc/document.rdoc +276 -0
  7. data/doc/rexml/tasks/rdoc/element.rdoc +602 -0
  8. data/doc/rexml/tasks/rdoc/node.rdoc +97 -0
  9. data/doc/rexml/tasks/rdoc/parent.rdoc +267 -0
  10. data/doc/rexml/tasks/tocs/child_toc.rdoc +12 -0
  11. data/doc/rexml/tasks/tocs/document_toc.rdoc +30 -0
  12. data/doc/rexml/tasks/tocs/element_toc.rdoc +55 -0
  13. data/doc/rexml/tasks/tocs/master_toc.rdoc +135 -0
  14. data/doc/rexml/tasks/tocs/node_toc.rdoc +16 -0
  15. data/doc/rexml/tasks/tocs/parent_toc.rdoc +25 -0
  16. data/doc/rexml/tutorial.rdoc +1358 -0
  17. data/lib/rexml/attribute.rb +17 -11
  18. data/lib/rexml/doctype.rb +55 -31
  19. data/lib/rexml/document.rb +199 -35
  20. data/lib/rexml/element.rb +1802 -487
  21. data/lib/rexml/entity.rb +10 -39
  22. data/lib/rexml/formatters/pretty.rb +3 -3
  23. data/lib/rexml/functions.rb +1 -2
  24. data/lib/rexml/light/node.rb +0 -8
  25. data/lib/rexml/namespace.rb +8 -4
  26. data/lib/rexml/node.rb +8 -4
  27. data/lib/rexml/parseexception.rb +1 -0
  28. data/lib/rexml/parsers/baseparser.rb +513 -250
  29. data/lib/rexml/parsers/pullparser.rb +12 -0
  30. data/lib/rexml/parsers/sax2parser.rb +16 -19
  31. data/lib/rexml/parsers/streamparser.rb +16 -10
  32. data/lib/rexml/parsers/treeparser.rb +9 -21
  33. data/lib/rexml/parsers/xpathparser.rb +161 -97
  34. data/lib/rexml/rexml.rb +29 -22
  35. data/lib/rexml/source.rb +128 -98
  36. data/lib/rexml/text.rb +46 -22
  37. data/lib/rexml/xpath_parser.rb +43 -33
  38. data/lib/rexml.rb +3 -0
  39. metadata +42 -46
  40. data/.gitignore +0 -9
  41. data/.travis.yml +0 -24
  42. data/Gemfile +0 -6
  43. data/Rakefile +0 -8
  44. data/rexml.gemspec +0 -84
@@ -1,4 +1,4 @@
1
- # frozen_string_literal: false
1
+ # frozen_string_literal: true
2
2
  require_relative "namespace"
3
3
  require_relative 'text'
4
4
 
@@ -13,9 +13,6 @@ module REXML
13
13
 
14
14
  # The element to which this attribute belongs
15
15
  attr_reader :element
16
- # The normalized value of this attribute. That is, the attribute with
17
- # entities intact.
18
- attr_writer :normalized
19
16
  PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
20
17
 
21
18
  NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
@@ -122,10 +119,13 @@ module REXML
122
119
  # b = Attribute.new( "ns:x", "y" )
123
120
  # b.to_string # -> "ns:x='y'"
124
121
  def to_string
122
+ value = to_s
125
123
  if @element and @element.context and @element.context[:attribute_quote] == :quote
126
- %Q^#@expanded_name="#{to_s().gsub(/"/, '&quot;')}"^
124
+ value = value.gsub('"', '&quot;') if value.include?('"')
125
+ %Q^#@expanded_name="#{value}"^
127
126
  else
128
- "#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
127
+ value = value.gsub("'", '&apos;') if value.include?("'")
128
+ "#@expanded_name='#{value}'"
129
129
  end
130
130
  end
131
131
 
@@ -141,7 +141,6 @@ module REXML
141
141
  return @normalized if @normalized
142
142
 
143
143
  @normalized = Text::normalize( @unnormalized, doctype )
144
- @unnormalized = nil
145
144
  @normalized
146
145
  end
147
146
 
@@ -149,9 +148,16 @@ module REXML
149
148
  # have been expanded to their values
150
149
  def value
151
150
  return @unnormalized if @unnormalized
152
- @unnormalized = Text::unnormalize( @normalized, doctype )
153
- @normalized = nil
154
- @unnormalized
151
+
152
+ @unnormalized = Text::unnormalize(@normalized, doctype,
153
+ entity_expansion_text_limit: @element&.document&.entity_expansion_text_limit)
154
+ end
155
+
156
+ # The normalized value of this attribute. That is, the attribute with
157
+ # entities intact.
158
+ def normalized=(new_normalized)
159
+ @normalized = new_normalized
160
+ @unnormalized = nil
155
161
  end
156
162
 
157
163
  # Returns a copy of this attribute
@@ -190,7 +196,7 @@ module REXML
190
196
  end
191
197
 
192
198
  def inspect
193
- rv = ""
199
+ rv = +""
194
200
  write( rv )
195
201
  rv
196
202
  end
data/lib/rexml/doctype.rb CHANGED
@@ -7,6 +7,44 @@ require_relative 'attlistdecl'
7
7
  require_relative 'xmltokens'
8
8
 
9
9
  module REXML
10
+ class ReferenceWriter
11
+ def initialize(id_type,
12
+ public_id_literal,
13
+ system_literal,
14
+ context=nil)
15
+ @id_type = id_type
16
+ @public_id_literal = public_id_literal
17
+ @system_literal = system_literal
18
+ if context and context[:prologue_quote] == :apostrophe
19
+ @default_quote = "'"
20
+ else
21
+ @default_quote = "\""
22
+ end
23
+ end
24
+
25
+ def write(output)
26
+ output << " #{@id_type}"
27
+ if @public_id_literal
28
+ if @public_id_literal.include?("'")
29
+ quote = "\""
30
+ else
31
+ quote = @default_quote
32
+ end
33
+ output << " #{quote}#{@public_id_literal}#{quote}"
34
+ end
35
+ if @system_literal
36
+ if @system_literal.include?("'")
37
+ quote = "\""
38
+ elsif @system_literal.include?("\"")
39
+ quote = "'"
40
+ else
41
+ quote = @default_quote
42
+ end
43
+ output << " #{quote}#{@system_literal}#{quote}"
44
+ end
45
+ end
46
+ end
47
+
10
48
  # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
11
49
  # ... >. DOCTYPES can be used to declare the DTD of a document, as well as
12
50
  # being used to declare entities used in the document.
@@ -50,6 +88,8 @@ module REXML
50
88
  super( parent )
51
89
  @name = first.name
52
90
  @external_id = first.external_id
91
+ @long_name = first.instance_variable_get(:@long_name)
92
+ @uri = first.instance_variable_get(:@uri)
53
93
  elsif first.kind_of? Array
54
94
  super( parent )
55
95
  @name = first[0]
@@ -108,19 +148,17 @@ module REXML
108
148
  # Ignored
109
149
  def write( output, indent=0, transitive=false, ie_hack=false )
110
150
  f = REXML::Formatters::Default.new
111
- c = context
112
- if c and c[:prologue_quote] == :apostrophe
113
- quote = "'"
114
- else
115
- quote = "\""
116
- end
117
151
  indent( output, indent )
118
152
  output << START
119
153
  output << ' '
120
154
  output << @name
121
- output << " #{@external_id}" if @external_id
122
- output << " #{quote}#{@long_name}#{quote}" if @long_name
123
- output << " #{quote}#{@uri}#{quote}" if @uri
155
+ if @external_id
156
+ reference_writer = ReferenceWriter.new(@external_id,
157
+ @long_name,
158
+ @uri,
159
+ context)
160
+ reference_writer.write(output)
161
+ end
124
162
  unless @children.empty?
125
163
  output << ' ['
126
164
  @children.each { |child|
@@ -159,7 +197,7 @@ module REXML
159
197
  when "SYSTEM"
160
198
  nil
161
199
  when "PUBLIC"
162
- strip_quotes(@long_name)
200
+ @long_name
163
201
  end
164
202
  end
165
203
 
@@ -169,9 +207,9 @@ module REXML
169
207
  def system
170
208
  case @external_id
171
209
  when "SYSTEM"
172
- strip_quotes(@long_name)
210
+ @long_name
173
211
  when "PUBLIC"
174
- @uri.kind_of?(String) ? strip_quotes(@uri) : nil
212
+ @uri.kind_of?(String) ? @uri : nil
175
213
  end
176
214
  end
177
215
 
@@ -193,15 +231,6 @@ module REXML
193
231
  notation_decl.name == name
194
232
  }
195
233
  end
196
-
197
- private
198
-
199
- # Method contributed by Henrik Martensson
200
- def strip_quotes(quoted_string)
201
- quoted_string =~ /^[\'\"].*[\'\"]$/ ?
202
- quoted_string[1, quoted_string.length-2] :
203
- quoted_string
204
- end
205
234
  end
206
235
 
207
236
  # We don't really handle any of these since we're not a validating
@@ -259,16 +288,11 @@ module REXML
259
288
  end
260
289
 
261
290
  def to_s
262
- c = nil
263
- c = parent.context if parent
264
- if c and c[:prologue_quote] == :apostrophe
265
- quote = "'"
266
- else
267
- quote = "\""
268
- end
269
- notation = "<!NOTATION #{@name} #{@middle}"
270
- notation << " #{quote}#{@public}#{quote}" if @public
271
- notation << " #{quote}#{@system}#{quote}" if @system
291
+ context = nil
292
+ context = parent.context if parent
293
+ notation = "<!NOTATION #{@name}"
294
+ reference_writer = ReferenceWriter.new(@middle, @public, @system, context)
295
+ reference_writer.write(notation)
272
296
  notation << ">"
273
297
  notation
274
298
  end
@@ -14,27 +14,85 @@ require_relative "parsers/streamparser"
14
14
  require_relative "parsers/treeparser"
15
15
 
16
16
  module REXML
17
- # Represents a full XML document, including PIs, a doctype, etc. A
18
- # Document has a single child that can be accessed by root().
19
- # Note that if you want to have an XML declaration written for a document
20
- # you create, you must add one; REXML documents do not write a default
21
- # declaration for you. See |DECLARATION| and |write|.
17
+ # Represents an XML document.
18
+ #
19
+ # A document may have:
20
+ #
21
+ # - A single child that may be accessed via method #root.
22
+ # - An XML declaration.
23
+ # - A document type.
24
+ # - Processing instructions.
25
+ #
26
+ # == In a Hurry?
27
+ #
28
+ # If you're somewhat familiar with XML
29
+ # and have a particular task in mind,
30
+ # you may want to see the
31
+ # {tasks pages}[../doc/rexml/tasks/tocs/master_toc_rdoc.html],
32
+ # and in particular, the
33
+ # {tasks page for documents}[../doc/rexml/tasks/tocs/document_toc_rdoc.html].
34
+ #
22
35
  class Document < Element
23
- # A convenient default XML declaration. If you want an XML declaration,
24
- # the easiest way to add one is mydoc << Document::DECLARATION
25
- # +DEPRECATED+
26
- # Use: mydoc << XMLDecl.default
36
+ # A convenient default XML declaration. Use:
37
+ #
38
+ # mydoc << XMLDecl.default
39
+ #
27
40
  DECLARATION = XMLDecl.default
28
41
 
29
- # Constructor
30
- # @param source if supplied, must be a Document, String, or IO.
31
- # Documents have their context and Element attributes cloned.
32
- # Strings are expected to be valid XML documents. IOs are expected
33
- # to be sources of valid XML documents.
34
- # @param context if supplied, contains the context of the document;
35
- # this should be a Hash.
42
+ # :call-seq:
43
+ # new(string = nil, context = {}) -> new_document
44
+ # new(io_stream = nil, context = {}) -> new_document
45
+ # new(document = nil, context = {}) -> new_document
46
+ #
47
+ # Returns a new \REXML::Document object.
48
+ #
49
+ # When no arguments are given,
50
+ # returns an empty document:
51
+ #
52
+ # d = REXML::Document.new
53
+ # d.to_s # => ""
54
+ #
55
+ # When argument +string+ is given, it must be a string
56
+ # containing a valid XML document:
57
+ #
58
+ # xml_string = '<root><foo>Foo</foo><bar>Bar</bar></root>'
59
+ # d = REXML::Document.new(xml_string)
60
+ # d.to_s # => "<root><foo>Foo</foo><bar>Bar</bar></root>"
61
+ #
62
+ # When argument +io_stream+ is given, it must be an \IO object
63
+ # that is opened for reading, and when read must return a valid XML document:
64
+ #
65
+ # File.write('t.xml', xml_string)
66
+ # d = File.open('t.xml', 'r') do |io|
67
+ # REXML::Document.new(io)
68
+ # end
69
+ # d.to_s # => "<root><foo>Foo</foo><bar>Bar</bar></root>"
70
+ #
71
+ # When argument +document+ is given, it must be an existing
72
+ # document object, whose context and attributes (but not children)
73
+ # are cloned into the new document:
74
+ #
75
+ # d = REXML::Document.new(xml_string)
76
+ # d.children # => [<root> ... </>]
77
+ # d.context = {raw: :all, compress_whitespace: :all}
78
+ # d.add_attributes({'bar' => 0, 'baz' => 1})
79
+ # d1 = REXML::Document.new(d)
80
+ # d1.children # => []
81
+ # d1.context # => {:raw=>:all, :compress_whitespace=>:all}
82
+ # d1.attributes # => {"bar"=>bar='0', "baz"=>baz='1'}
83
+ #
84
+ # When argument +context+ is given, it must be a hash
85
+ # containing context entries for the document;
86
+ # see {Element Context}[../doc/rexml/context_rdoc.html]:
87
+ #
88
+ # context = {raw: :all, compress_whitespace: :all}
89
+ # d = REXML::Document.new(xml_string, context)
90
+ # d.context # => {:raw=>:all, :compress_whitespace=>:all}
91
+ #
36
92
  def initialize( source = nil, context = {} )
37
93
  @entity_expansion_count = 0
94
+ @entity_expansion_limit = Security.entity_expansion_limit
95
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
38
96
  super()
39
97
  @context = context
40
98
  return if source.nil?
@@ -46,26 +104,71 @@ module REXML
46
104
  end
47
105
  end
48
106
 
107
+ # :call-seq:
108
+ # node_type -> :document
109
+ #
110
+ # Returns the symbol +:document+.
111
+ #
49
112
  def node_type
50
113
  :document
51
114
  end
52
115
 
53
- # Should be obvious
116
+ # :call-seq:
117
+ # clone -> new_document
118
+ #
119
+ # Returns the new document resulting from executing
120
+ # <tt>Document.new(self)</tt>. See Document.new.
121
+ #
54
122
  def clone
55
123
  Document.new self
56
124
  end
57
125
 
58
- # According to the XML spec, a root node has no expanded name
126
+ # :call-seq:
127
+ # expanded_name -> empty_string
128
+ #
129
+ # Returns an empty string.
130
+ #
59
131
  def expanded_name
60
132
  ''
61
133
  #d = doc_type
62
134
  #d ? d.name : "UNDEFINED"
63
135
  end
64
-
65
136
  alias :name :expanded_name
66
137
 
67
- # We override this, because XMLDecls and DocTypes must go at the start
68
- # of the document
138
+ # :call-seq:
139
+ # add(xml_decl) -> self
140
+ # add(doc_type) -> self
141
+ # add(object) -> self
142
+ #
143
+ # Adds an object to the document; returns +self+.
144
+ #
145
+ # When argument +xml_decl+ is given,
146
+ # it must be an REXML::XMLDecl object,
147
+ # which becomes the XML declaration for the document,
148
+ # replacing the previous XML declaration if any:
149
+ #
150
+ # d = REXML::Document.new
151
+ # d.xml_decl.to_s # => ""
152
+ # d.add(REXML::XMLDecl.new('2.0'))
153
+ # d.xml_decl.to_s # => "<?xml version='2.0'?>"
154
+ #
155
+ # When argument +doc_type+ is given,
156
+ # it must be an REXML::DocType object,
157
+ # which becomes the document type for the document,
158
+ # replacing the previous document type, if any:
159
+ #
160
+ # d = REXML::Document.new
161
+ # d.doctype.to_s # => ""
162
+ # d.add(REXML::DocType.new('foo'))
163
+ # d.doctype.to_s # => "<!DOCTYPE foo>"
164
+ #
165
+ # When argument +object+ (not an REXML::XMLDecl or REXML::DocType object)
166
+ # is given it is added as the last child:
167
+ #
168
+ # d = REXML::Document.new
169
+ # d.add(REXML::Element.new('foo'))
170
+ # d.to_s # => "<foo/>"
171
+ #
69
172
  def add( child )
70
173
  if child.kind_of? XMLDecl
71
174
  if @children[0].kind_of? XMLDecl
@@ -99,49 +202,108 @@ module REXML
99
202
  end
100
203
  alias :<< :add
101
204
 
205
+ # :call-seq:
206
+ # add_element(name_or_element = nil, attributes = nil) -> new_element
207
+ #
208
+ # Adds an element to the document by calling REXML::Element.add_element:
209
+ #
210
+ # REXML::Element.add_element(name_or_element, attributes)
102
211
  def add_element(arg=nil, arg2=nil)
103
212
  rv = super
104
213
  raise "attempted adding second root element to document" if @elements.size > 1
105
214
  rv
106
215
  end
107
216
 
108
- # @return the root Element of the document, or nil if this document
109
- # has no children.
217
+ # :call-seq:
218
+ # root -> root_element or nil
219
+ #
220
+ # Returns the root element of the document, if it exists, otherwise +nil+:
221
+ #
222
+ # d = REXML::Document.new('<root></root>')
223
+ # d.root # => <root/>
224
+ # d = REXML::Document.new('')
225
+ # d.root # => nil
226
+ #
110
227
  def root
111
228
  elements[1]
112
229
  #self
113
230
  #@children.find { |item| item.kind_of? Element }
114
231
  end
115
232
 
116
- # @return the DocType child of the document, if one exists,
117
- # and nil otherwise.
233
+ # :call-seq:
234
+ # doctype -> doc_type or nil
235
+ #
236
+ # Returns the DocType object for the document, if it exists, otherwise +nil+:
237
+ #
238
+ # d = REXML::Document.new('<!DOCTYPE document SYSTEM "subjects.dtd">')
239
+ # d.doctype.class # => REXML::DocType
240
+ # d = REXML::Document.new('')
241
+ # d.doctype.class # => nil
242
+ #
118
243
  def doctype
119
244
  @children.find { |item| item.kind_of? DocType }
120
245
  end
121
246
 
122
- # @return the XMLDecl of this document; if no XMLDecl has been
123
- # set, the default declaration is returned.
247
+ # :call-seq:
248
+ # xml_decl -> xml_decl
249
+ #
250
+ # Returns the XMLDecl object for the document, if it exists,
251
+ # otherwise the default XMLDecl object:
252
+ #
253
+ # d = REXML::Document.new('<?xml version="1.0" encoding="UTF-8"?>')
254
+ # d.xml_decl.class # => REXML::XMLDecl
255
+ # d.xml_decl.to_s # => "<?xml version='1.0' encoding='UTF-8'?>"
256
+ # d = REXML::Document.new('')
257
+ # d.xml_decl.class # => REXML::XMLDecl
258
+ # d.xml_decl.to_s # => ""
259
+ #
124
260
  def xml_decl
125
261
  rv = @children[0]
126
262
  return rv if rv.kind_of? XMLDecl
127
263
  @children.unshift(XMLDecl.default)[0]
128
264
  end
129
265
 
130
- # @return the XMLDecl version of this document as a String.
131
- # If no XMLDecl has been set, returns the default version.
266
+ # :call-seq:
267
+ # version -> version_string
268
+ #
269
+ # Returns the XMLDecl version of this document as a string,
270
+ # if it has been set, otherwise the default version:
271
+ #
272
+ # d = REXML::Document.new('<?xml version="2.0" encoding="UTF-8"?>')
273
+ # d.version # => "2.0"
274
+ # d = REXML::Document.new('')
275
+ # d.version # => "1.0"
276
+ #
132
277
  def version
133
278
  xml_decl().version
134
279
  end
135
280
 
136
- # @return the XMLDecl encoding of this document as an
137
- # Encoding object.
138
- # If no XMLDecl has been set, returns the default encoding.
281
+ # :call-seq:
282
+ # encoding -> encoding_string
283
+ #
284
+ # Returns the XMLDecl encoding of the document,
285
+ # if it has been set, otherwise the default encoding:
286
+ #
287
+ # d = REXML::Document.new('<?xml version="1.0" encoding="UTF-16"?>')
288
+ # d.encoding # => "UTF-16"
289
+ # d = REXML::Document.new('')
290
+ # d.encoding # => "UTF-8"
291
+ #
139
292
  def encoding
140
293
  xml_decl().encoding
141
294
  end
142
295
 
143
- # @return the XMLDecl standalone value of this document as a String.
144
- # If no XMLDecl has been set, returns the default setting.
296
+ # :call-seq:
297
+ # stand_alone?
298
+ #
299
+ # Returns the XMLDecl standalone value of the document as a string,
300
+ # if it has been set, otherwise the default standalone value:
301
+ #
302
+ # d = REXML::Document.new('<?xml standalone="yes"?>')
303
+ # d.stand_alone? # => "yes"
304
+ # d = REXML::Document.new('')
305
+ # d.stand_alone? # => nil
306
+ #
145
307
  def stand_alone?
146
308
  xml_decl().stand_alone?
147
309
  end
@@ -271,10 +433,12 @@ module REXML
271
433
  end
272
434
 
273
435
  attr_reader :entity_expansion_count
436
+ attr_writer :entity_expansion_limit
437
+ attr_accessor :entity_expansion_text_limit
274
438
 
275
439
  def record_entity_expansion
276
440
  @entity_expansion_count += 1
277
- if @entity_expansion_count > Security.entity_expansion_limit
441
+ if @entity_expansion_count > @entity_expansion_limit
278
442
  raise "number of entity expansions exceeded, processing aborted."
279
443
  end
280
444
  end