rexml 3.2.3 → 3.3.8

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/NEWS.md +502 -0
  3. data/README.md +11 -14
  4. data/doc/rexml/context.rdoc +143 -0
  5. data/doc/rexml/tasks/rdoc/child.rdoc +87 -0
  6. data/doc/rexml/tasks/rdoc/document.rdoc +276 -0
  7. data/doc/rexml/tasks/rdoc/element.rdoc +602 -0
  8. data/doc/rexml/tasks/rdoc/node.rdoc +97 -0
  9. data/doc/rexml/tasks/rdoc/parent.rdoc +267 -0
  10. data/doc/rexml/tasks/tocs/child_toc.rdoc +12 -0
  11. data/doc/rexml/tasks/tocs/document_toc.rdoc +30 -0
  12. data/doc/rexml/tasks/tocs/element_toc.rdoc +55 -0
  13. data/doc/rexml/tasks/tocs/master_toc.rdoc +135 -0
  14. data/doc/rexml/tasks/tocs/node_toc.rdoc +16 -0
  15. data/doc/rexml/tasks/tocs/parent_toc.rdoc +25 -0
  16. data/doc/rexml/tutorial.rdoc +1358 -0
  17. data/lib/rexml/attribute.rb +17 -11
  18. data/lib/rexml/doctype.rb +55 -31
  19. data/lib/rexml/document.rb +199 -35
  20. data/lib/rexml/element.rb +1802 -487
  21. data/lib/rexml/entity.rb +10 -39
  22. data/lib/rexml/formatters/pretty.rb +3 -3
  23. data/lib/rexml/functions.rb +1 -2
  24. data/lib/rexml/light/node.rb +0 -8
  25. data/lib/rexml/namespace.rb +8 -4
  26. data/lib/rexml/node.rb +8 -4
  27. data/lib/rexml/parseexception.rb +1 -0
  28. data/lib/rexml/parsers/baseparser.rb +513 -250
  29. data/lib/rexml/parsers/pullparser.rb +12 -0
  30. data/lib/rexml/parsers/sax2parser.rb +16 -19
  31. data/lib/rexml/parsers/streamparser.rb +16 -10
  32. data/lib/rexml/parsers/treeparser.rb +9 -21
  33. data/lib/rexml/parsers/xpathparser.rb +161 -97
  34. data/lib/rexml/rexml.rb +29 -22
  35. data/lib/rexml/source.rb +128 -98
  36. data/lib/rexml/text.rb +46 -22
  37. data/lib/rexml/xpath_parser.rb +43 -33
  38. data/lib/rexml.rb +3 -0
  39. metadata +42 -46
  40. data/.gitignore +0 -9
  41. data/.travis.yml +0 -24
  42. data/Gemfile +0 -6
  43. data/Rakefile +0 -8
  44. data/rexml.gemspec +0 -84
@@ -47,6 +47,18 @@ module REXML
47
47
  @listeners << listener
48
48
  end
49
49
 
50
+ def entity_expansion_count
51
+ @parser.entity_expansion_count
52
+ end
53
+
54
+ def entity_expansion_limit=( limit )
55
+ @parser.entity_expansion_limit = limit
56
+ end
57
+
58
+ def entity_expansion_text_limit=( limit )
59
+ @parser.entity_expansion_text_limit = limit
60
+ end
61
+
50
62
  def each
51
63
  while has_next?
52
64
  yield self.pull
@@ -22,6 +22,18 @@ module REXML
22
22
  @parser.source
23
23
  end
24
24
 
25
+ def entity_expansion_count
26
+ @parser.entity_expansion_count
27
+ end
28
+
29
+ def entity_expansion_limit=( limit )
30
+ @parser.entity_expansion_limit = limit
31
+ end
32
+
33
+ def entity_expansion_text_limit=( limit )
34
+ @parser.entity_expansion_text_limit = limit
35
+ end
36
+
25
37
  def add_listener( listener )
26
38
  @parser.add_listener( listener )
27
39
  end
@@ -157,25 +169,8 @@ module REXML
157
169
  end
158
170
  end
159
171
  when :text
160
- #normalized = @parser.normalize( event[1] )
161
- #handle( :characters, normalized )
162
- copy = event[1].clone
163
-
164
- esub = proc { |match|
165
- if @entities.has_key?($1)
166
- @entities[$1].gsub(Text::REFERENCE, &esub)
167
- else
168
- match
169
- end
170
- }
171
-
172
- copy.gsub!( Text::REFERENCE, &esub )
173
- copy.gsub!( Text::NUMERICENTITY ) {|m|
174
- m=$1
175
- m = "0#{m}" if m[0] == ?x
176
- [Integer(m)].pack('U*')
177
- }
178
- handle( :characters, copy )
172
+ unnormalized = @parser.unnormalize( event[1], @entities )
173
+ handle( :characters, unnormalized )
179
174
  when :entitydecl
180
175
  handle_entitydecl( event )
181
176
  when :processing_instruction, :comment, :attlistdecl,
@@ -264,6 +259,8 @@ module REXML
264
259
  end
265
260
 
266
261
  def get_namespace( prefix )
262
+ return nil if @namespace_stack.empty?
263
+
267
264
  uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
268
265
  (@namespace_stack.find { |ns| not ns[nil].nil? })
269
266
  uris[-1][prefix] unless uris.nil? or 0 == uris.size
@@ -7,37 +7,42 @@ module REXML
7
7
  def initialize source, listener
8
8
  @listener = listener
9
9
  @parser = BaseParser.new( source )
10
- @tag_stack = []
10
+ @entities = {}
11
11
  end
12
12
 
13
13
  def add_listener( listener )
14
14
  @parser.add_listener( listener )
15
15
  end
16
16
 
17
+ def entity_expansion_count
18
+ @parser.entity_expansion_count
19
+ end
20
+
21
+ def entity_expansion_limit=( limit )
22
+ @parser.entity_expansion_limit = limit
23
+ end
24
+
25
+ def entity_expansion_text_limit=( limit )
26
+ @parser.entity_expansion_text_limit = limit
27
+ end
28
+
17
29
  def parse
18
30
  # entity string
19
31
  while true
20
32
  event = @parser.pull
21
33
  case event[0]
22
34
  when :end_document
23
- unless @tag_stack.empty?
24
- tag_path = "/" + @tag_stack.join("/")
25
- raise ParseException.new("Missing end tag for '#{tag_path}'",
26
- @parser.source)
27
- end
28
35
  return
29
36
  when :start_element
30
- @tag_stack << event[1]
31
37
  attrs = event[2].each do |n, v|
32
38
  event[2][n] = @parser.unnormalize( v )
33
39
  end
34
40
  @listener.tag_start( event[1], attrs )
35
41
  when :end_element
36
42
  @listener.tag_end( event[1] )
37
- @tag_stack.pop
38
43
  when :text
39
- normalized = @parser.unnormalize( event[1] )
40
- @listener.text( normalized )
44
+ unnormalized = @parser.unnormalize( event[1], @entities )
45
+ @listener.text( unnormalized )
41
46
  when :processing_instruction
42
47
  @listener.instruction( *event[1,2] )
43
48
  when :start_doctype
@@ -48,6 +53,7 @@ module REXML
48
53
  when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
49
54
  @listener.send( event[0].to_s, *event[1..-1] )
50
55
  when :entitydecl, :notationdecl
56
+ @entities[ event[1] ] = event[2] if event.size == 3
51
57
  @listener.send( event[0].to_s, event[1..-1] )
52
58
  when :externalentity
53
59
  entity_reference = event[1]
@@ -15,8 +15,6 @@ module REXML
15
15
  end
16
16
 
17
17
  def parse
18
- tag_stack = []
19
- in_doctype = false
20
18
  entities = nil
21
19
  begin
22
20
  while true
@@ -24,32 +22,24 @@ module REXML
24
22
  #STDERR.puts "TREEPARSER GOT #{event.inspect}"
25
23
  case event[0]
26
24
  when :end_document
27
- unless tag_stack.empty?
28
- raise ParseException.new("No close tag for #{@build_context.xpath}",
29
- @parser.source, @parser)
30
- end
31
25
  return
32
26
  when :start_element
33
- tag_stack.push(event[1])
34
27
  el = @build_context = @build_context.add_element( event[1] )
35
28
  event[2].each do |key, value|
36
29
  el.attributes[key]=Attribute.new(key,value,self)
37
30
  end
38
31
  when :end_element
39
- tag_stack.pop
40
32
  @build_context = @build_context.parent
41
33
  when :text
42
- if not in_doctype
43
- if @build_context[-1].instance_of? Text
44
- @build_context[-1] << event[1]
45
- else
46
- @build_context.add(
47
- Text.new(event[1], @build_context.whitespace, nil, true)
48
- ) unless (
49
- @build_context.ignore_whitespace_nodes and
50
- event[1].strip.size==0
51
- )
52
- end
34
+ if @build_context[-1].instance_of? Text
35
+ @build_context[-1] << event[1]
36
+ else
37
+ @build_context.add(
38
+ Text.new(event[1], @build_context.whitespace, nil, true)
39
+ ) unless (
40
+ @build_context.ignore_whitespace_nodes and
41
+ event[1].strip.size==0
42
+ )
53
43
  end
54
44
  when :comment
55
45
  c = Comment.new( event[1] )
@@ -60,14 +50,12 @@ module REXML
60
50
  when :processing_instruction
61
51
  @build_context.add( Instruction.new( event[1], event[2] ) )
62
52
  when :end_doctype
63
- in_doctype = false
64
53
  entities.each { |k,v| entities[k] = @build_context.entities[k].value }
65
54
  @build_context = @build_context.parent
66
55
  when :start_doctype
67
56
  doctype = DocType.new( event[1..-1], @build_context )
68
57
  @build_context = doctype
69
58
  entities = {}
70
- in_doctype = true
71
59
  when :attlistdecl
72
60
  n = AttlistDecl.new( event[1..-1] )
73
61
  @build_context.add( n )
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: false
2
+
2
3
  require_relative '../namespace'
3
4
  require_relative '../xmltokens'
4
5
 
@@ -22,7 +23,13 @@ module REXML
22
23
  path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
23
24
  path.gsub!( /\s+([\]\)])/, '\1')
24
25
  parsed = []
25
- OrExpr(path, parsed)
26
+ rest = OrExpr(path, parsed)
27
+ if rest
28
+ unless rest.strip.empty?
29
+ raise ParseException.new("Garbage component exists at the end: " +
30
+ "<#{rest}>: <#{path}>")
31
+ end
32
+ end
26
33
  parsed
27
34
  end
28
35
 
@@ -32,108 +39,143 @@ module REXML
32
39
  parsed
33
40
  end
34
41
 
35
- def abbreviate( path )
36
- path = path.kind_of?(String) ? parse( path ) : path
37
- string = ""
38
- document = false
39
- while path.size > 0
40
- op = path.shift
42
+ def abbreviate(path_or_parsed)
43
+ if path_or_parsed.kind_of?(String)
44
+ parsed = parse(path_or_parsed)
45
+ else
46
+ parsed = path_or_parsed
47
+ end
48
+ components = []
49
+ component = nil
50
+ while parsed.size > 0
51
+ op = parsed.shift
41
52
  case op
42
53
  when :node
54
+ component << "node()"
43
55
  when :attribute
44
- string << "/" if string.size > 0
45
- string << "@"
56
+ component = "@"
57
+ components << component
46
58
  when :child
47
- string << "/" if string.size > 0
59
+ component = ""
60
+ components << component
48
61
  when :descendant_or_self
49
- string << "/"
62
+ next_op = parsed[0]
63
+ if next_op == :node
64
+ parsed.shift
65
+ component = ""
66
+ components << component
67
+ else
68
+ component = "descendant-or-self::"
69
+ components << component
70
+ end
50
71
  when :self
51
- string << "."
72
+ next_op = parsed[0]
73
+ if next_op == :node
74
+ parsed.shift
75
+ components << "."
76
+ else
77
+ component = "self::"
78
+ components << component
79
+ end
52
80
  when :parent
53
- string << ".."
81
+ next_op = parsed[0]
82
+ if next_op == :node
83
+ parsed.shift
84
+ components << ".."
85
+ else
86
+ component = "parent::"
87
+ components << component
88
+ end
54
89
  when :any
55
- string << "*"
90
+ component << "*"
56
91
  when :text
57
- string << "text()"
92
+ component << "text()"
58
93
  when :following, :following_sibling,
59
94
  :ancestor, :ancestor_or_self, :descendant,
60
95
  :namespace, :preceding, :preceding_sibling
61
- string << "/" unless string.size == 0
62
- string << op.to_s.tr("_", "-")
63
- string << "::"
96
+ component = op.to_s.tr("_", "-") << "::"
97
+ components << component
64
98
  when :qname
65
- prefix = path.shift
66
- name = path.shift
67
- string << prefix+":" if prefix.size > 0
68
- string << name
99
+ prefix = parsed.shift
100
+ name = parsed.shift
101
+ component << prefix+":" if prefix.size > 0
102
+ component << name
69
103
  when :predicate
70
- string << '['
71
- string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
72
- string << ']'
104
+ component << '['
105
+ component << predicate_to_path(parsed.shift) {|x| abbreviate(x)}
106
+ component << ']'
73
107
  when :document
74
- document = true
108
+ components << ""
75
109
  when :function
76
- string << path.shift
77
- string << "( "
78
- string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
79
- string << " )"
110
+ component << parsed.shift
111
+ component << "( "
112
+ component << predicate_to_path(parsed.shift[0]) {|x| abbreviate(x)}
113
+ component << " )"
80
114
  when :literal
81
- string << %Q{ "#{path.shift}" }
115
+ component << quote_literal(parsed.shift)
82
116
  else
83
- string << "/" unless string.size == 0
84
- string << "UNKNOWN("
85
- string << op.inspect
86
- string << ")"
117
+ component << "UNKNOWN("
118
+ component << op.inspect
119
+ component << ")"
87
120
  end
88
121
  end
89
- string = "/"+string if document
90
- return string
122
+ case components
123
+ when [""]
124
+ "/"
125
+ when ["", ""]
126
+ "//"
127
+ else
128
+ components.join("/")
129
+ end
91
130
  end
92
131
 
93
- def expand( path )
94
- path = path.kind_of?(String) ? parse( path ) : path
95
- string = ""
132
+ def expand(path_or_parsed)
133
+ if path_or_parsed.kind_of?(String)
134
+ parsed = parse(path_or_parsed)
135
+ else
136
+ parsed = path_or_parsed
137
+ end
138
+ path = ""
96
139
  document = false
97
- while path.size > 0
98
- op = path.shift
140
+ while parsed.size > 0
141
+ op = parsed.shift
99
142
  case op
100
143
  when :node
101
- string << "node()"
144
+ path << "node()"
102
145
  when :attribute, :child, :following, :following_sibling,
103
146
  :ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
104
147
  :namespace, :preceding, :preceding_sibling, :self, :parent
105
- string << "/" unless string.size == 0
106
- string << op.to_s.tr("_", "-")
107
- string << "::"
148
+ path << "/" unless path.size == 0
149
+ path << op.to_s.tr("_", "-")
150
+ path << "::"
108
151
  when :any
109
- string << "*"
152
+ path << "*"
110
153
  when :qname
111
- prefix = path.shift
112
- name = path.shift
113
- string << prefix+":" if prefix.size > 0
114
- string << name
154
+ prefix = parsed.shift
155
+ name = parsed.shift
156
+ path << prefix+":" if prefix.size > 0
157
+ path << name
115
158
  when :predicate
116
- string << '['
117
- string << predicate_to_string( path.shift ) { |x| expand(x) }
118
- string << ']'
159
+ path << '['
160
+ path << predicate_to_path( parsed.shift ) { |x| expand(x) }
161
+ path << ']'
119
162
  when :document
120
163
  document = true
121
164
  else
122
- string << "/" unless string.size == 0
123
- string << "UNKNOWN("
124
- string << op.inspect
125
- string << ")"
165
+ path << "UNKNOWN("
166
+ path << op.inspect
167
+ path << ")"
126
168
  end
127
169
  end
128
- string = "/"+string if document
129
- return string
170
+ path = "/"+path if document
171
+ path
130
172
  end
131
173
 
132
- def predicate_to_string( path, &block )
133
- string = ""
134
- case path[0]
174
+ def predicate_to_path(parsed, &block)
175
+ path = ""
176
+ case parsed[0]
135
177
  when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
136
- op = path.shift
178
+ op = parsed.shift
137
179
  case op
138
180
  when :eq
139
181
  op = "="
@@ -150,36 +192,50 @@ module REXML
150
192
  when :union
151
193
  op = "|"
152
194
  end
153
- left = predicate_to_string( path.shift, &block )
154
- right = predicate_to_string( path.shift, &block )
155
- string << " "
156
- string << left
157
- string << " "
158
- string << op.to_s
159
- string << " "
160
- string << right
161
- string << " "
195
+ left = predicate_to_path( parsed.shift, &block )
196
+ right = predicate_to_path( parsed.shift, &block )
197
+ path << left
198
+ path << " "
199
+ path << op.to_s
200
+ path << " "
201
+ path << right
162
202
  when :function
163
- path.shift
164
- name = path.shift
165
- string << name
166
- string << "( "
167
- string << predicate_to_string( path.shift, &block )
168
- string << " )"
203
+ parsed.shift
204
+ name = parsed.shift
205
+ path << name
206
+ path << "("
207
+ parsed.shift.each_with_index do |argument, i|
208
+ path << ", " if i > 0
209
+ path << predicate_to_path(argument, &block)
210
+ end
211
+ path << ")"
169
212
  when :literal
170
- path.shift
171
- string << " "
172
- string << path.shift.inspect
173
- string << " "
213
+ parsed.shift
214
+ path << quote_literal(parsed.shift)
174
215
  else
175
- string << " "
176
- string << yield( path )
177
- string << " "
216
+ path << yield( parsed )
178
217
  end
179
- return string.squeeze(" ")
218
+ return path.squeeze(" ")
180
219
  end
220
+ # For backward compatibility
221
+ alias_method :preciate_to_string, :predicate_to_path
181
222
 
182
223
  private
224
+ def quote_literal( literal )
225
+ case literal
226
+ when String
227
+ # XPath 1.0 does not support escape characters.
228
+ # Assumes literal does not contain both single and double quotes.
229
+ if literal.include?("'")
230
+ "\"#{literal}\""
231
+ else
232
+ "'#{literal}'"
233
+ end
234
+ else
235
+ literal.inspect
236
+ end
237
+ end
238
+
183
239
  #LocationPath
184
240
  # | RelativeLocationPath
185
241
  # | '/' RelativeLocationPath?
@@ -229,24 +285,28 @@ module REXML
229
285
  path = path[1..-1]
230
286
  end
231
287
  else
288
+ path_before_axis_specifier = path
289
+ parsed_not_abberviated = []
232
290
  if path[0] == ?@
233
- parsed << :attribute
291
+ parsed_not_abberviated << :attribute
234
292
  path = path[1..-1]
235
293
  # Goto Nodetest
236
294
  elsif path =~ AXIS
237
- parsed << $1.tr('-','_').intern
295
+ parsed_not_abberviated << $1.tr('-','_').intern
238
296
  path = $'
239
297
  # Goto Nodetest
240
298
  else
241
- parsed << :child
299
+ parsed_not_abberviated << :child
242
300
  end
243
301
 
244
- n = []
245
- path = NodeTest( path, n)
246
-
247
- path = Predicate( path, n )
302
+ path_before_node_test = path
303
+ path = NodeTest(path, parsed_not_abberviated)
304
+ if path == path_before_node_test
305
+ return path_before_axis_specifier
306
+ end
307
+ path = Predicate(path, parsed_not_abberviated)
248
308
 
249
- parsed.concat(n)
309
+ parsed.concat(parsed_not_abberviated)
250
310
  end
251
311
 
252
312
  original_path = path
@@ -301,7 +361,9 @@ module REXML
301
361
  when PI
302
362
  path = $'
303
363
  literal = nil
304
- if path !~ /^\s*\)/
364
+ if path =~ /^\s*\)/
365
+ path = $'
366
+ else
305
367
  path =~ LITERAL
306
368
  literal = $1
307
369
  path = $'
@@ -545,7 +607,9 @@ module REXML
545
607
  #| PrimaryExpr
546
608
  def FilterExpr path, parsed
547
609
  n = []
548
- path = PrimaryExpr( path, n )
610
+ path_before_primary_expr = path
611
+ path = PrimaryExpr(path, n)
612
+ return path_before_primary_expr if path == path_before_primary_expr
549
613
  path = Predicate(path, n)
550
614
  parsed.concat(n)
551
615
  path
data/lib/rexml/rexml.rb CHANGED
@@ -1,30 +1,37 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # frozen_string_literal: false
3
- # REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
4
- #
5
- # REXML is a _pure_ Ruby, XML 1.0 conforming,
6
- # non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
7
- # toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
8
- # tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
9
- # and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
10
- # includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
11
- # Ruby 1.8, REXML is included in the standard Ruby distribution.
12
- #
13
- # Main page:: http://www.germane-software.com/software/rexml
14
- # Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
15
- # Date:: 2008/019
16
- # Version:: 3.1.7.3
17
- #
18
- # This API documentation can be downloaded from the REXML home page, or can
19
- # be accessed online[http://www.germane-software.com/software/rexml_doc]
20
- #
21
- # A tutorial is available in the REXML distribution in docs/tutorial.html,
22
- # or can be accessed
23
- # online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
3
+ #
4
+ # \Module \REXML provides classes and methods for parsing,
5
+ # editing, and generating XML.
6
+ #
7
+ # == Implementation
8
+ #
9
+ # \REXML:
10
+ # - Is pure Ruby.
11
+ # - Provides tree, stream, SAX2, pull, and lightweight APIs.
12
+ # - Conforms to {XML version 1.0}[https://www.w3.org/TR/REC-xml/].
13
+ # - Fully implements {XPath version 1.0}[http://www.w3c.org/tr/xpath].
14
+ # - Is {non-validating}[https://www.w3.org/TR/xml/].
15
+ # - Passes 100% of the non-validating {Oasis tests}[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml].
16
+ #
17
+ # == In a Hurry?
18
+ #
19
+ # If you're somewhat familiar with XML
20
+ # and have a particular task in mind,
21
+ # you may want to see {the tasks pages}[doc/rexml/tasks/tocs/master_toc_rdoc.html].
22
+ #
23
+ # == API
24
+ #
25
+ # Among the most important classes for using \REXML are:
26
+ # - REXML::Document.
27
+ # - REXML::Element.
28
+ #
29
+ # There's also an {REXML tutorial}[doc/rexml/tutorial_rdoc.html].
30
+ #
24
31
  module REXML
25
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
26
33
  DATE = "2008/019"
27
- VERSION = "3.2.3"
34
+ VERSION = "3.3.8"
28
35
  REVISION = ""
29
36
 
30
37
  Copyright = COPYRIGHT